diff --git a/.gitignore b/.gitignore index 551b1b5361ce2..f483bc772a874 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ *.suo *.vcxproj.user *.patch +*.diff .idea .svn .classpath @@ -50,16 +51,12 @@ patchprocess/ .history/ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/package-lock.json hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/yarn-error.log - -# Ignore files generated by HDDS acceptance tests. -hadoop-ozone/acceptance-test/docker-compose.log -hadoop-ozone/acceptance-test/junit-results.xml +phantomjsdriver.log #robotframework outputs log.html output.xml report.html -hadoop-hdds/docs/public .mvn diff --git a/BUILDING.txt b/BUILDING.txt index d54ce83183846..42ec263095b99 100644 --- a/BUILDING.txt +++ b/BUILDING.txt @@ -57,25 +57,22 @@ Installing required packages for clean install of Ubuntu 14.04 LTS Desktop: $ sudo apt-get install software-properties-common $ sudo add-apt-repository ppa:webupd8team/java $ sudo apt-get update - $ sudo apt-get install oracle-java8-installer + $ sudo apt-get -y install openjdk-8-jdk * Maven $ sudo apt-get -y install maven * Native libraries $ sudo apt-get -y install build-essential autoconf automake libtool cmake zlib1g-dev pkg-config libssl-dev libsasl2-dev * Protocol Buffers 3.7.1 (required to build native code) - $ mkdir -p /opt/protobuf-3.7-src \ - && curl -L -s -S \ - https://github.com/protocolbuffers/protobuf/releases/download/v3.7.1/protobuf-java-3.7.1.tar.gz \ - -o /opt/protobuf-3.7.1.tar.gz \ - && tar xzf /opt/protobuf-3.7.1.tar.gz --strip-components 1 -C /opt/protobuf-3.7-src \ - && cd /opt/protobuf-3.7-src \ - && ./configure\ - && make install \ - && rm -rf /opt/protobuf-3.7-src + $ curl -L -s -S https://github.com/protocolbuffers/protobuf/releases/download/v3.7.1/protobuf-java-3.7.1.tar.gz -o protobuf-3.7.1.tar.gz + $ mkdir protobuf-3.7-src + $ tar xzf protobuf-3.7.1.tar.gz --strip-components 1 -C protobuf-3.7-src && cd protobuf-3.7-src + $ ./configure + $ make -j$(nproc) + $ sudo make install Optional packages: -* Snappy compression +* Snappy compression (only used for hadoop-mapreduce-client-nativetask) $ sudo apt-get install snappy libsnappy-dev * Intel ISA-L library for erasure coding Please refer to https://01.org/intel%C2%AE-storage-acceleration-library-open-source-version @@ -96,7 +93,7 @@ Maven main modules: - hadoop-project (Parent POM for all Hadoop Maven modules. ) (All plugins & dependencies versions are defined here.) - hadoop-project-dist (Parent POM for modules that generate distributions.) - - hadoop-annotations (Generates the Hadoop doclet used to generated the Javadocs) + - hadoop-annotations (Generates the Hadoop doclet used to generate the Javadocs) - hadoop-assemblies (Maven assemblies used by the different modules) - hadoop-maven-plugins (Maven plugins used in project) - hadoop-build-tools (Build tools like checkstyle, etc.) @@ -113,7 +110,7 @@ Maven main modules: ---------------------------------------------------------------------------------- Where to run Maven from? - It can be run from any module. The only catch is that if not run from utrunk + It can be run from any module. The only catch is that if not run from trunk all modules that are not part of the build run must be installed in the local Maven cache or available in a Maven repository. @@ -124,11 +121,11 @@ Maven build goals: * Compile : mvn compile [-Pnative] * Run tests : mvn test [-Pnative] [-Pshelltest] * Create JAR : mvn package - * Run findbugs : mvn compile findbugs:findbugs + * Run spotbugs : mvn compile spotbugs:spotbugs * Run checkstyle : mvn compile checkstyle:checkstyle * Install JAR in M2 cache : mvn install * Deploy JAR to Maven repo : mvn deploy - * Run clover : mvn test -Pclover [-DcloverLicenseLocation=${user.name}/.clover.license] + * Run clover : mvn test -Pclover * Run Rat : mvn apache-rat:check * Build javadocs : mvn javadoc:javadoc * Build distribution : mvn package [-Pdist][-Pdocs][-Psrc][-Pnative][-Dtar][-Preleasedocs][-Pyarn-ui] @@ -161,14 +158,14 @@ Maven build goals: Snappy is a compression library that can be utilized by the native code. It is currently an optional component, meaning that Hadoop can be built with - or without this dependency. + or without this dependency. Snappy library as optional dependency is only + used for hadoop-mapreduce-client-nativetask. * Use -Drequire.snappy to fail the build if libsnappy.so is not found. If this option is not specified and the snappy library is missing, we silently build a version of libhadoop.so that cannot make use of snappy. This option is recommended if you plan on making use of snappy and want to get more repeatable builds. - * Use -Dsnappy.prefix to specify a nonstandard location for the libsnappy header files and library files. You do not need this option if you have installed snappy using a package manager. @@ -311,40 +308,35 @@ to update SNAPSHOTs from external repos. ---------------------------------------------------------------------------------- Importing projects to eclipse -When you import the project to eclipse, install hadoop-maven-plugins at first. - - $ cd hadoop-maven-plugins - $ mvn install - -Then, generate eclipse project files. +At first, install artifacts including hadoop-maven-plugins at the top of the source tree. - $ mvn eclipse:eclipse -DskipTests + $ mvn clean install -DskipTests -DskipShade -At last, import to eclipse by specifying the root directory of the project via -[File] > [Import] > [Existing Projects into Workspace]. +Then, import to eclipse by specifying the root directory of the project via +[File] > [Import] > [Maven] > [Existing Maven Projects]. ---------------------------------------------------------------------------------- Building distributions: -Create binary distribution without native code and without documentation: +Create binary distribution without native code and without Javadocs: $ mvn package -Pdist -DskipTests -Dtar -Dmaven.javadoc.skip=true -Create binary distribution with native code and with documentation: +Create binary distribution with native code: - $ mvn package -Pdist,native,docs -DskipTests -Dtar + $ mvn package -Pdist,native -DskipTests -Dtar Create source distribution: $ mvn package -Psrc -DskipTests -Create source and binary distributions with native code and documentation: +Create source and binary distributions with native code: - $ mvn package -Pdist,native,docs,src -DskipTests -Dtar + $ mvn package -Pdist,native,src -DskipTests -Dtar Create a local staging version of the website (in /tmp/hadoop-site) - $ mvn clean site -Preleasedocs; mvn site:stage -DstagingDirectory=/tmp/hadoop-site + $ mvn site site:stage -Preleasedocs,docs -DstagingDirectory=/tmp/hadoop-site Note that the site needs to be built in a second pass after other artifacts. diff --git a/LICENSE-binary b/LICENSE-binary index 921d486b23be2..834eeb2625b51 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -205,55 +205,58 @@ This project bundles some components that are also licensed under the Apache License Version 2.0: - -hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/nvd3-1.8.5.* (css and js files) hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/AbstractFuture.java hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/TimeoutFuture.java -com.aliyun:aliyun-java-sdk-core:3.4.0 -com.aliyun:aliyun-java-sdk-ecs:4.2.0 -com.aliyun:aliyun-java-sdk-ram:3.0.0 +ch.qos.reload4j:reload4j:1.2.22 +com.aliyun:aliyun-java-sdk-core:4.5.10 +com.aliyun:aliyun-java-sdk-kms:2.11.0 +com.aliyun:aliyun-java-sdk-ram:3.1.0 com.aliyun:aliyun-java-sdk-sts:3.0.0 -com.aliyun.oss:aliyun-sdk-oss:3.4.1 -com.amazonaws:aws-java-sdk-bundle:1.11.563 +com.aliyun.oss:aliyun-sdk-oss:3.13.0 +com.amazonaws:aws-java-sdk-bundle:1.12.367 com.cedarsoftware:java-util:1.9.0 com.cedarsoftware:json-io:2.5.1 -com.fasterxml.jackson.core:jackson-annotations:2.9.9 -com.fasterxml.jackson.core:jackson-core:2.9.9 -com.fasterxml.jackson.core:jackson-databind:2.9.9.2 -com.fasterxml.jackson.jaxrs:jackson-jaxrs-base:2.9.9 -com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider:2.9.9 -com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.9.9 +com.fasterxml.jackson.core:jackson-annotations:2.12.7 +com.fasterxml.jackson.core:jackson-core:2.12.7 +com.fasterxml.jackson.core:jackson-databind:2.12.7.1 +com.fasterxml.jackson.jaxrs:jackson-jaxrs-base:2.12.7 +com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider:2.12.7 +com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.12.7 com.fasterxml.uuid:java-uuid-generator:3.1.4 -com.fasterxml.woodstox:woodstox-core:5.0.3 +com.fasterxml.woodstox:woodstox-core:5.4.0 com.github.davidmoten:rxjava-extras:0.8.0.17 com.github.stephenc.jcip:jcip-annotations:1.0-1 com.google:guice:4.0 com.google:guice-servlet:4.0 -com.google.api.grpc:proto-google-common-protos:1.0.0 -com.google.code.gson:2.2.4 -com.google.errorprone:error_prone_annotations:2.2.0 +com.google.android:annotations:jar:4.1.1.4 +com.google.api.grpc:proto-google-common-protos:1.12.0 +com.google.code.findbugs:jsr305:3.0.2 +com.google.code.gson:gson:2.9.0 +com.google.errorprone:error_prone_annotations:2.3.3 com.google.j2objc:j2objc-annotations:1.1 com.google.json-simple:json-simple:1.1.1 com.google.guava:failureaccess:1.0 -com.google.guava:guava:20.0 com.google.guava:guava:27.0-jre +com.google.guava:guava:jar:30.1.1-jre com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava -com.microsoft.azure:azure-storage:7.0.0 -com.nimbusds:nimbus-jose-jwt:4.41.1 -com.squareup.okhttp:okhttp:2.7.5 -com.squareup.okio:okio:1.6.0 +com.google.j2objc:j2objc-annotations:1.3 +com.microsoft.azure:azure-storage:7.0.1 +com.nimbusds:nimbus-jose-jwt:9.8.1 +com.squareup.okhttp3:okhttp:4.9.3 +com.squareup.okio:okio:2.8.0 +com.yammer.metrics:metrics-core:2.2.0 com.zaxxer:HikariCP-java7:2.4.12 -commons-beanutils:commons-beanutils:1.9.3 +commons-beanutils:commons-beanutils:1.9.4 commons-cli:commons-cli:1.2 commons-codec:commons-codec:1.11 commons-collections:commons-collections:3.2.2 commons-daemon:commons-daemon:1.0.13 -commons-io:commons-io:2.5 -commons-lang:commons-lang:2.6 +commons-io:commons-io:2.8.0 commons-logging:commons-logging:1.1.3 -commons-net:commons-net:3.6 +commons-net:commons-net:3.9.0 de.ruedigermoeller:fst:2.50 +io.dropwizard.metrics:metrics-core:3.2.4 io.grpc:grpc-api:1.26.0 io.grpc:grpc-context:1.26.0 io.grpc:grpc-core:1.26.0 @@ -262,17 +265,36 @@ io.grpc:grpc-protobuf:1.26.0 io.grpc:grpc-protobuf-lite:1.26.0 io.grpc:grpc-stub:1.26.0 io.netty:netty:3.10.6.Final -io.netty:netty-all:4.1.42.Final -io.netty:netty-buffer:4.1.27.Final -io.netty:netty-codec:4.1.27.Final -io.netty:netty-codec-http:4.1.27.Final -io.netty:netty-codec-http2:4.1.27.Final -io.netty:netty-codec-socks:4.1.27.Final -io.netty:netty-common:4.1.27.Final -io.netty:netty-handler:4.1.27.Final -io.netty:netty-handler-proxy:4.1.27.Final -io.netty:netty-resolver:4.1.27.Final -io.netty:netty-transport:4.1.27.Final +io.netty:netty-all:4.1.89.Final +io.netty:netty-buffer:4.1.89.Final +io.netty:netty-codec:4.1.89.Final +io.netty:netty-codec-dns:4.1.89.Final +io.netty:netty-codec-haproxy:4.1.89.Final +io.netty:netty-codec-http:4.1.89.Final +io.netty:netty-codec-http2:4.1.89.Final +io.netty:netty-codec-memcache:4.1.89.Final +io.netty:netty-codec-mqtt:4.1.89.Final +io.netty:netty-codec-redis:4.1.89.Final +io.netty:netty-codec-smtp:4.1.89.Final +io.netty:netty-codec-socks:4.1.89.Final +io.netty:netty-codec-stomp:4.1.89.Final +io.netty:netty-codec-xml:4.1.89.Final +io.netty:netty-common:4.1.89.Final +io.netty:netty-handler:4.1.89.Final +io.netty:netty-handler-proxy:4.1.89.Final +io.netty:netty-resolver:4.1.89.Final +io.netty:netty-resolver-dns:4.1.89.Final +io.netty:netty-transport:4.1.89.Final +io.netty:netty-transport-rxtx:4.1.89.Final +io.netty:netty-transport-sctp:4.1.89.Final +io.netty:netty-transport-udt:4.1.89.Final +io.netty:netty-transport-classes-epoll:4.1.89.Final +io.netty:netty-transport-native-unix-common:4.1.89.Final +io.netty:netty-transport-classes-kqueue:4.1.89.Final +io.netty:netty-resolver-dns-classes-macos:4.1.89.Final +io.netty:netty-transport-native-epoll:4.1.89.Final +io.netty:netty-transport-native-kqueue:4.1.89.Final +io.netty:netty-resolver-dns-native-macos:4.1.89.Final io.opencensus:opencensus-api:0.12.3 io.opencensus:opencensus-contrib-grpc-metrics:0.12.3 io.reactivex:rxjava:1.3.8 @@ -280,23 +302,21 @@ io.reactivex:rxjava-string:1.1.1 io.reactivex:rxnetty:0.4.20 io.swagger:swagger-annotations:1.5.4 javax.inject:javax.inject:1 -log4j:log4j:1.2.17 net.java.dev.jna:jna:5.2.0 -net.minidev:accessors-smart:1.2 -net.minidev:json-smart:2.3 +net.minidev:accessors-smart:2.4.7 org.apache.avro:avro:1.7.7 org.apache.commons:commons-collections4:4.2 -org.apache.commons:commons-compress:1.19 -org.apache.commons:commons-configuration2:2.1.1 -org.apache.commons:commons-csv:1.0 +org.apache.commons:commons-compress:1.21 +org.apache.commons:commons-configuration2:2.8.0 +org.apache.commons:commons-csv:1.9.0 org.apache.commons:commons-digester:1.8.1 -org.apache.commons:commons-lang3:3.7 +org.apache.commons:commons-lang3:3.12.0 org.apache.commons:commons-math3:3.1.1 -org.apache.commons:commons-text:1.4 +org.apache.commons:commons-text:1.10.0 org.apache.commons:commons-validator:1.6 -org.apache.curator:curator-client:2.13.0 -org.apache.curator:curator-framework:2.13.0 -org.apache.curator:curator-recipes:2.13.0 +org.apache.curator:curator-client:5.2.0 +org.apache.curator:curator-framework:5.2.0 +org.apache.curator:curator-recipes:5.2.0 org.apache.geronimo.specs:geronimo-jcache_1.0_spec:1.0-alpha-1 org.apache.hbase:hbase-annotations:1.4.8 org.apache.hbase:hbase-client:1.4.8 @@ -304,9 +324,9 @@ org.apache.hbase:hbase-common:1.4.8 org.apache.hbase:hbase-protocol:1.4.8 org.apache.htrace:htrace-core:3.1.0-incubating org.apache.htrace:htrace-core4:4.1.0-incubating -org.apache.httpcomponents:httpclient:4.5.6 -org.apache.httpcomponents:httpcore:4.4.10 -org.apache.kafka:kafka-clients:2.4.0 +org.apache.httpcomponents:httpclient:4.5.13 +org.apache.httpcomponents:httpcore:4.4.13 +org.apache.kafka:kafka-clients:2.8.2 org.apache.kerby:kerb-admin:1.0.1 org.apache.kerby:kerb-client:1.0.1 org.apache.kerby:kerb-common:1.0.1 @@ -323,32 +343,31 @@ org.apache.kerby:kerby-util:1.0.1 org.apache.kerby:kerby-xdr:1.0.1 org.apache.kerby:token-provider:1.0.1 org.apache.yetus:audience-annotations:0.5.0 -org.apache.zookeeper:zookeeper:3.4.13 -org.codehaus.jackson:jackson-core-asl:1.9.13 -org.codehaus.jackson:jackson-jaxrs:1.9.13 -org.codehaus.jackson:jackson-mapper-asl:1.9.13 -org.codehaus.jackson:jackson-xc:1.9.13 -org.codehaus.jettison:jettison:1.1 -org.eclipse.jetty:jetty-annotations:9.3.27.v20190418 -org.eclipse.jetty:jetty-http:9.3.27.v20190418 -org.eclipse.jetty:jetty-io:9.3.27.v20190418 -org.eclipse.jetty:jetty-jndi:9.3.27.v20190418 -org.eclipse.jetty:jetty-plus:9.3.27.v20190418 -org.eclipse.jetty:jetty-security:9.3.27.v20190418 -org.eclipse.jetty:jetty-server:9.3.27.v20190418 -org.eclipse.jetty:jetty-servlet:9.3.27.v20190418 -org.eclipse.jetty:jetty-util:9.3.27.v20190418 -org.eclipse.jetty:jetty-util-ajax:9.3.27.v20190418 -org.eclipse.jetty:jetty-webapp:9.3.27.v20190418 -org.eclipse.jetty:jetty-xml:9.3.27.v20190418 -org.eclipse.jetty.websocket:javax-websocket-client-impl:9.3.27.v20190418 -org.eclipse.jetty.websocket:javax-websocket-server-impl:9.3.27.v20190418 +org.codehaus.jettison:jettison:1.5.4 +org.eclipse.jetty:jetty-annotations:9.4.51.v20230217 +org.eclipse.jetty:jetty-http:9.4.51.v20230217 +org.eclipse.jetty:jetty-io:9.4.51.v20230217 +org.eclipse.jetty:jetty-jndi:9.4.51.v20230217 +org.eclipse.jetty:jetty-plus:9.4.51.v20230217 +org.eclipse.jetty:jetty-security:9.4.51.v20230217 +org.eclipse.jetty:jetty-server:9.4.51.v20230217 +org.eclipse.jetty:jetty-servlet:9.4.51.v20230217 +org.eclipse.jetty:jetty-util:9.4.51.v20230217 +org.eclipse.jetty:jetty-util-ajax:9.4.51.v20230217 +org.eclipse.jetty:jetty-webapp:9.4.51.v20230217 +org.eclipse.jetty:jetty-xml:9.4.51.v20230217 +org.eclipse.jetty.websocket:javax-websocket-client-impl:9.4.51.v20230217 +org.eclipse.jetty.websocket:javax-websocket-server-impl:9.4.51.v20230217 +org.apache.zookeeper:zookeeper:3.6.3 org.ehcache:ehcache:3.3.1 -org.lz4:lz4-java:1.6.0 +org.ini4j:ini4j:0.5.4 +org.jetbrains.kotlin:kotlin-stdlib:1.4.10 +org.jetbrains.kotlin:kotlin-stdlib-common:1.4.10 +org.lz4:lz4-java:1.7.1 org.objenesis:objenesis:2.6 -org.xerial.snappy:snappy-java:1.0.5 -org.yaml:snakeyaml:1.16: -org.wildfly.openssl:wildfly-openssl:1.0.7.Final +org.xerial.snappy:snappy-java:1.1.8.2 +org.yaml:snakeyaml:2.0 +org.wildfly.openssl:wildfly-openssl:1.1.3.Final -------------------------------------------------------------------------------- @@ -360,13 +379,13 @@ See licenses-binary/ for text of these licenses. BSD 2-Clause ------------ -hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/{lz4.h,lz4.c,lz4hc.h,lz4hc.c} +hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/lz4/lz4.{c|h} hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/util/tree.h hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/{fstatat|openat|unlinkat}.h -com.github.luben:zstd-jni:1.4.3-1 +com.github.luben:zstd-jni:1.4.9-1 dnsjava:dnsjava:2.1.7 -org.codehaus.woodstox:stax2-api:3.1.4 +org.codehaus.woodstox:stax2-api:4.2.1 BSD 3-Clause @@ -376,125 +395,87 @@ hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/bloom/* hadoop-common-project/hadoop-common/src/main/native/gtest/gtest-all.cc hadoop-common-project/hadoop-common/src/main/native/gtest/include/gtest/gtest.h hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_x86.c +hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/protobuf/protobuf/cpp_helpers.h +hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/gmock-1.7.0/*/*.{cc|h} hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/d3.v3.js -hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/d3-3.5.17.min.js +hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/d3-v4.1.1.min.js leveldb v1.13 com.google.protobuf:protobuf-java:2.5.0 com.google.protobuf:protobuf-java:3.6.1 +com.google.protobuf:protobuf-java:3.7.1 com.google.re2j:re2j:1.1 -com.jcraft:jsch:0.1.54 +com.jcraft:jsch:0.1.55 com.thoughtworks.paranamer:paranamer:2.3 -javax.activation:javax.activation-api:1.2.0 +jakarta.activation:jakarta.activation-api:1.2.1 org.fusesource.leveldbjni:leveldbjni-all:1.8 org.jline:jline:3.9.0 org.hamcrest:hamcrest-core:1.3 org.ow2.asm:asm:5.0.4 -org.ow2.asm:asm-commons:6.0 -org.ow2.asm:asm-tree:6.0 +org.ow2.asm:asm-analysis:9.0 +org.ow2.asm:asm-commons:9.0 +org.ow2.asm:asm-tree:9.0 MIT License ----------- -hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/angular-1.6.4.min.js -hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/angular-nvd3-1.0.9.min.js -hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/angular-route-1.6.4.min.js hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/bootstrap-3.4.1 hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.css hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.js hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dust-full-2.0.0.min.js hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dust-helpers-1.1.1.min.js -hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery-3.4.1.min.js +hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery-3.6.0.min.js hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery.dataTables.min.js hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/moment.min.js hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/bootstrap.min.js hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/jquery.js hadoop-tools/hadoop-sls/src/main/html/css/bootstrap.min.css hadoop-tools/hadoop-sls/src/main/html/css/bootstrap-responsive.min.css +hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/node_modules/.bin/r.js hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/dt-1.10.18/* hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/jquery hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/jt/jquery.jstree.js hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/TERMINAL -bootstrap v3.3.6 -broccoli-asset-rev v2.4.2 -broccoli-funnel v1.0.1 -datatables v1.10.8 -em-helpers v0.5.13 -em-table v0.1.6 -ember v2.2.0 -ember-array-contains-helper v1.0.2 -ember-bootstrap v0.5.1 -ember-cli v1.13.13 -ember-cli-app-version v1.0.0 -ember-cli-babel v5.1.6 -ember-cli-content-security-policy v0.4.0 -ember-cli-dependency-checker v1.2.0 -ember-cli-htmlbars v1.0.2 -ember-cli-htmlbars-inline-precompile v0.3.1 -ember-cli-ic-ajax v0.2.1 -ember-cli-inject-live-reload v1.4.0 -ember-cli-jquery-ui v0.0.20 -ember-cli-qunit v1.2.1 -ember-cli-release v0.2.8 -ember-cli-shims v0.0.6 -ember-cli-sri v1.2.1 -ember-cli-test-loader v0.2.1 -ember-cli-uglify v1.2.0 -ember-d3 v0.1.0 -ember-data v2.1.0 -ember-disable-proxy-controllers v1.0.1 -ember-export-application-global v1.0.5 -ember-load-initializers v0.1.7 -ember-qunit v0.4.16 -ember-qunit-notifications v0.1.0 -ember-resolver v2.0.3 -ember-spin-spinner v0.2.3 -ember-truth-helpers v1.2.0 -jquery v2.1.4 -jquery-ui v1.11.4 -loader.js v3.3.0 -momentjs v2.10.6 -qunit v1.19.0 -select2 v4.0.0 -snippet-ss v1.11.0 -spin.js v2.3.2 +uriparser2 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/uriparser2) com.microsoft.azure:azure-cosmosdb:2.4.5 com.microsoft.azure:azure-cosmosdb-commons:2.4.5 com.microsoft.azure:azure-cosmosdb-direct:2.4.5 com.microsoft.azure:azure-cosmosdb-gateway:2.4.5 -com.microsoft.azure:azure-data-lake-store-sdk:2.3.3 +com.microsoft.azure:azure-data-lake-store-sdk:2.3.9 com.microsoft.azure:azure-keyvault-core:1.0.0 com.microsoft.sqlserver:mssql-jdbc:6.2.1.jre7 -org.bouncycastle:bcpkix-jdk15on:1.60 -org.bouncycastle:bcprov-jdk15on:1.60 +org.bouncycastle:bcpkix-jdk15on:1.68 +org.bouncycastle:bcprov-jdk15on:1.68 org.checkerframework:checker-qual:2.5.2 +org.checkerframework:checker-qual:3.8.0 org.codehaus.mojo:animal-sniffer-annotations:1.17 org.jruby.jcodings:jcodings:1.0.13 org.jruby.joni:joni:2.1.2 -org.slf4j:jul-to-slf4j:jar:1.7.25 -org.ojalgo:ojalgo:43.0:compile -org.slf4j:jul-to-slf4j:1.7.25 -org.slf4j:slf4j-api:1.7.25 -org.slf4j:slf4j-log4j12:1.7.25 +org.ojalgo:ojalgo:43.0 +org.slf4j:jcl-over-slf4j:1.7.35 +org.slf4j:jul-to-slf4j:1.7.35 +org.slf4j:slf4j-api:1.7.35 +org.slf4j:slf4j-reload4j:1.7.35 CDDL 1.1 + GPLv2 with classpath exception ----------------------------------------- -com.sun.jersey:jersey-client:1.19 -com.sun.jersey:jersey-core:1.19 -com.sun.jersey:jersey-guice:1.19 -com.sun.jersey:jersey-json:1.19 -com.sun.jersey:jersey-server:1.19 -com.sun.jersey:jersey-servlet:1.19 +com.github.pjfanning:jersey-json:1.20 +com.sun.jersey:jersey-client:1.19.4 +com.sun.jersey:jersey-core:1.19.4 +com.sun.jersey:jersey-guice:1.19.4 +com.sun.jersey:jersey-server:1.19.4 +com.sun.jersey:jersey-servlet:1.19.4 com.sun.xml.bind:jaxb-impl:2.2.3-1 javax.annotation:javax.annotation-api:1.3.2 javax.servlet:javax.servlet-api:3.1.0 javax.servlet.jsp:jsp-api:2.1 javax.websocket:javax.websocket-api:1.0 +javax.websocket:javax.websocket-client-api:1.0 javax.ws.rs:jsr311-api:1.1.1 javax.xml.bind:jaxb-api:2.2.11 @@ -502,22 +483,38 @@ javax.xml.bind:jaxb-api:2.2.11 Eclipse Public License 1.0 -------------------------- -junit:junit:4.12 +junit:junit:4.13.2 +org.jacoco:org.jacoco.agent:0.8.5 + +Eclipse Distribution License 1.0 +-------------------------- +jakarta.xml.bind:jakarta.xml.bind-api:2.3.2 HSQL License ------------ -org.hsqldb:hsqldb:2.3.4 +org.hsqldb:hsqldb:2.7.1 JDOM License ------------ org.jdom:jdom:1.1 +org.jdom:jdom2:2.0.6.jar +Boost Software License, Version 1.0 +------------- +asio-1.10.2 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/asio-1.10.2) +rapidxml-1.13 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/rapidxml-1.13) +tr2 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/tr2) Public Domain ------------- +hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/json-bignum.js aopalliance:aopalliance:1.0 + +See LICENSE-binary-yarn-ui for yarn-managed packages at hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp +See LICENSE-binary-hadoop-yarn-applications-catalog-webapp for yarn-managed packages at hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/yarn.lock +To generate the report, execute 'yarn licenses list' at these two directories. diff --git a/LICENSE-binary-yarn-applications-catalog-webapp b/LICENSE-binary-yarn-applications-catalog-webapp new file mode 100644 index 0000000000000..6abe54cf27d75 --- /dev/null +++ b/LICENSE-binary-yarn-applications-catalog-webapp @@ -0,0 +1,279 @@ +yarn licenses v1.22.5 +├─ Apache-2.0 +│ └─ roboto-fontface@0.10.0 +│ ├─ URL: https://github.com/choffmeister/roboto-fontface-bower.git +│ ├─ VendorName: Christian Hoffmeister +│ └─ VendorUrl: https://github.com/choffmeister/roboto-fontface-bower +├─ BSD-2-Clause +│ └─ entities@1.1.2 +│ ├─ URL: git://github.com/fb55/entities.git +│ └─ VendorName: Felix Boehm +├─ BSD-3-Clause +│ ├─ shelljs@0.2.6 +│ │ ├─ URL: git://github.com/arturadib/shelljs.git +│ │ ├─ VendorName: Artur Adib +│ │ └─ VendorUrl: http://github.com/arturadib/shelljs +│ └─ sprintf-js@1.0.3 +│ ├─ URL: https://github.com/alexei/sprintf.js.git +│ ├─ VendorName: Alexandru Marasteanu +│ └─ VendorUrl: http://alexei.ro/ +├─ ISC +│ ├─ fs.realpath@1.0.0 +│ │ ├─ URL: git+https://github.com/isaacs/fs.realpath.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ glob@7.1.6 +│ │ ├─ URL: git://github.com/isaacs/node-glob.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ graceful-fs@4.2.4 +│ │ └─ URL: https://github.com/isaacs/node-graceful-fs +│ ├─ inflight@1.0.6 +│ │ ├─ URL: https://github.com/npm/inflight.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: https://github.com/isaacs/inflight +│ ├─ inherits@2.0.4 +│ │ └─ URL: git://github.com/isaacs/inherits +│ ├─ minimatch@3.0.4 +│ │ ├─ URL: git://github.com/isaacs/minimatch.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me +│ ├─ once@1.4.0 +│ │ ├─ URL: git://github.com/isaacs/once +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ semver@5.3.0 +│ │ └─ URL: https://github.com/npm/node-semver +│ └─ wrappy@1.0.2 +│ ├─ URL: https://github.com/npm/wrappy +│ ├─ VendorName: Isaac Z. Schlueter +│ └─ VendorUrl: https://github.com/npm/wrappy +└─ MIT + ├─ angular-loader@1.6.10 + │ ├─ URL: https://github.com/angular/angular.js.git + │ ├─ VendorName: Angular Core Team + │ └─ VendorUrl: http://angularjs.org/ + ├─ angular-mocks@1.6.10 + │ ├─ URL: https://github.com/angular/angular.js.git + │ ├─ VendorName: Angular Core Team + │ └─ VendorUrl: http://angularjs.org/ + ├─ angular-route@1.6.10 + │ ├─ URL: https://github.com/angular/angular.js.git + │ ├─ VendorName: Angular Core Team + │ └─ VendorUrl: http://angularjs.org/ + ├─ angular@1.6.10 + │ ├─ URL: https://github.com/angular/angular.js.git + │ ├─ VendorName: Angular Core Team + │ └─ VendorUrl: http://angularjs.org/ + ├─ apidoc-core@0.8.3 + │ ├─ URL: https://github.com/apidoc/apidoc-core.git + │ ├─ VendorName: Peter Rottmann + │ └─ VendorUrl: https://github.com/apidoc/apidoc-core + ├─ apidoc@0.17.7 + │ ├─ URL: https://github.com/apidoc/apidoc.git + │ ├─ VendorName: Peter Rottmann + │ └─ VendorUrl: http://apidocjs.com/ + ├─ argparse@1.0.10 + │ └─ URL: https://github.com/nodeca/argparse.git + ├─ async@2.6.3 + │ ├─ URL: https://github.com/caolan/async.git + │ ├─ VendorName: Caolan McMahon + │ └─ VendorUrl: https://caolan.github.io/async/ + ├─ balanced-match@1.0.0 + │ ├─ URL: git://github.com/juliangruber/balanced-match.git + │ ├─ VendorName: Julian Gruber + │ └─ VendorUrl: https://github.com/juliangruber/balanced-match + ├─ bootstrap@3.3.7 + │ ├─ URL: https://github.com/twbs/bootstrap.git + │ ├─ VendorName: Twitter, Inc. + │ └─ VendorUrl: http://getbootstrap.com/ + ├─ brace-expansion@1.1.11 + │ ├─ URL: git://github.com/juliangruber/brace-expansion.git + │ ├─ VendorName: Julian Gruber + │ └─ VendorUrl: https://github.com/juliangruber/brace-expansion + ├─ color-convert@1.9.3 + │ ├─ URL: https://github.com/Qix-/color-convert.git + │ └─ VendorName: Heather Arthur + ├─ color-name@1.1.3 + │ ├─ URL: git@github.com:dfcreative/color-name.git + │ ├─ VendorName: DY + │ └─ VendorUrl: https://github.com/dfcreative/color-name + ├─ color-name@1.1.4 + │ ├─ URL: git@github.com:colorjs/color-name.git + │ ├─ VendorName: DY + │ └─ VendorUrl: https://github.com/colorjs/color-name + ├─ color-string@1.5.3 + │ ├─ URL: https://github.com/Qix-/color-string.git + │ └─ VendorName: Heather Arthur + ├─ color@3.0.0 + │ └─ URL: https://github.com/Qix-/color.git + ├─ colornames@1.1.1 + │ ├─ URL: git://github.com/timoxley/colornames.git + │ ├─ VendorName: Tim Oxley + │ └─ VendorUrl: https://github.com/timoxley/colornames#readme + ├─ colors@1.4.0 + │ ├─ URL: http://github.com/Marak/colors.js.git + │ ├─ VendorName: Marak Squires + │ └─ VendorUrl: https://github.com/Marak/colors.js + ├─ colorspace@1.1.2 + │ ├─ URL: https://github.com/3rd-Eden/colorspace + │ ├─ VendorName: Arnout Kazemier + │ └─ VendorUrl: https://github.com/3rd-Eden/colorspace + ├─ commander@2.20.3 + │ ├─ URL: https://github.com/tj/commander.js.git + │ └─ VendorName: TJ Holowaychuk + ├─ concat-map@0.0.1 + │ ├─ URL: git://github.com/substack/node-concat-map.git + │ ├─ VendorName: James Halliday + │ └─ VendorUrl: http://substack.net + ├─ core-util-is@1.0.2 + │ ├─ URL: git://github.com/isaacs/core-util-is + │ ├─ VendorName: Isaac Z. Schlueter + │ └─ VendorUrl: http://blog.izs.me/ + ├─ diagnostics@1.1.1 + │ ├─ URL: git://github.com/bigpipe/diagnostics.git + │ ├─ VendorName: Arnout Kazemier + │ └─ VendorUrl: https://github.com/bigpipe/diagnostics + ├─ enabled@1.0.2 + │ ├─ URL: git://github.com/bigpipe/enabled.git + │ └─ VendorName: Arnout Kazemier + ├─ env-variable@0.0.6 + │ ├─ URL: https://github.com/3rd-Eden/env-variable + │ ├─ VendorName: Arnout Kazemier + │ └─ VendorUrl: https://github.com/3rd-Eden/env-variable + ├─ fast-safe-stringify@2.0.7 + │ ├─ URL: git+https://github.com/davidmarkclements/fast-safe-stringify.git + │ ├─ VendorName: David Mark Clements + │ └─ VendorUrl: https://github.com/davidmarkclements/fast-safe-stringify#readme + ├─ fecha@2.3.3 + │ ├─ URL: https://taylorhakes@github.com/taylorhakes/fecha.git + │ ├─ VendorName: Taylor Hakes + │ └─ VendorUrl: https://github.com/taylorhakes/fecha + ├─ fs-extra@3.0.1 + │ ├─ URL: https://github.com/jprichardson/node-fs-extra + │ ├─ VendorName: JP Richardson + │ └─ VendorUrl: https://github.com/jprichardson/node-fs-extra + ├─ fs-extra@7.0.1 + │ ├─ URL: https://github.com/jprichardson/node-fs-extra + │ ├─ VendorName: JP Richardson + │ └─ VendorUrl: https://github.com/jprichardson/node-fs-extra + ├─ iconv-lite@0.4.24 + │ ├─ URL: git://github.com/ashtuchkin/iconv-lite.git + │ ├─ VendorName: Alexander Shtuchkin + │ └─ VendorUrl: https://github.com/ashtuchkin/iconv-lite + ├─ is-arrayish@0.3.2 + │ ├─ URL: https://github.com/qix-/node-is-arrayish.git + │ ├─ VendorName: Qix + │ └─ VendorUrl: http://github.com/qix- + ├─ is-stream@1.1.0 + │ ├─ URL: https://github.com/sindresorhus/is-stream.git + │ ├─ VendorName: Sindre Sorhus + │ └─ VendorUrl: sindresorhus.com + ├─ isarray@1.0.0 + │ ├─ URL: git://github.com/juliangruber/isarray.git + │ ├─ VendorName: Julian Gruber + │ └─ VendorUrl: https://github.com/juliangruber/isarray + ├─ jquery@3.3.1 + │ ├─ URL: https://github.com/jquery/jquery.git + │ ├─ VendorName: JS Foundation and other contributors + │ └─ VendorUrl: https://jquery.com/ + ├─ jsonfile@3.0.1 + │ ├─ URL: git@github.com:jprichardson/node-jsonfile.git + │ └─ VendorName: JP Richardson + ├─ jsonfile@4.0.0 + │ ├─ URL: git@github.com:jprichardson/node-jsonfile.git + │ └─ VendorName: JP Richardson + ├─ klaw-sync@2.1.0 + │ ├─ URL: git+https://github.com/manidlou/node-klaw-sync.git + │ ├─ VendorName: Mani Maghsoudlou + │ └─ VendorUrl: https://github.com/manidlou/node-klaw-sync#readme + ├─ kuler@1.0.1 + │ ├─ URL: https://github.com/3rd-Eden/kuler + │ ├─ VendorName: Arnout Kazemier + │ └─ VendorUrl: https://github.com/3rd-Eden/kuler + ├─ linkify-it@2.2.0 + │ └─ URL: https://github.com/markdown-it/linkify-it.git + ├─ lodash@4.17.15 + │ ├─ URL: https://github.com/lodash/lodash.git + │ ├─ VendorName: John-David Dalton + │ └─ VendorUrl: https://lodash.com/ + ├─ logform@2.1.2 + │ ├─ URL: git+https://github.com/winstonjs/logform.git + │ ├─ VendorName: Charlie Robbins + │ └─ VendorUrl: https://github.com/winstonjs/logform#readme + ├─ markdown-it@8.4.2 + │ └─ URL: https://github.com/markdown-it/markdown-it.git + ├─ mdurl@1.0.1 + │ └─ URL: https://github.com/markdown-it/mdurl.git + ├─ ms@2.1.2 + │ └─ URL: https://github.com/zeit/ms.git + ├─ one-time@0.0.4 + │ ├─ URL: https://github.com/unshiftio/one-time + │ └─ VendorName: Arnout Kazemier + ├─ path-is-absolute@1.0.1 + │ ├─ URL: https://github.com/sindresorhus/path-is-absolute.git + │ ├─ VendorName: Sindre Sorhus + │ └─ VendorUrl: sindresorhus.com + ├─ process-nextick-args@2.0.1 + │ ├─ URL: https://github.com/calvinmetcalf/process-nextick-args.git + │ └─ VendorUrl: https://github.com/calvinmetcalf/process-nextick-args + ├─ readable-stream@2.3.7 + │ └─ URL: git://github.com/nodejs/readable-stream + ├─ readable-stream@3.6.0 + │ └─ URL: git://github.com/nodejs/readable-stream + ├─ requirejs@2.3.6 + │ ├─ URL: https://github.com/jrburke/r.js.git + │ ├─ VendorName: James Burke + │ └─ VendorUrl: http://github.com/jrburke/r.js + ├─ safe-buffer@5.1.2 + │ ├─ URL: git://github.com/feross/safe-buffer.git + │ ├─ VendorName: Feross Aboukhadijeh + │ └─ VendorUrl: https://github.com/feross/safe-buffer + ├─ safe-buffer@5.2.0 + │ ├─ URL: git://github.com/feross/safe-buffer.git + │ ├─ VendorName: Feross Aboukhadijeh + │ └─ VendorUrl: https://github.com/feross/safe-buffer + ├─ safer-buffer@2.1.2 + │ ├─ URL: git+https://github.com/ChALkeR/safer-buffer.git + │ ├─ VendorName: Nikita Skovoroda + │ └─ VendorUrl: https://github.com/ChALkeR + ├─ simple-swizzle@0.2.2 + │ ├─ URL: https://github.com/qix-/node-simple-swizzle.git + │ ├─ VendorName: Qix + │ └─ VendorUrl: http://github.com/qix- + ├─ stack-trace@0.0.10 + │ ├─ URL: git://github.com/felixge/node-stack-trace.git + │ ├─ VendorName: Felix Geisendörfer + │ └─ VendorUrl: https://github.com/felixge/node-stack-trace + ├─ string_decoder@1.1.1 + │ ├─ URL: git://github.com/nodejs/string_decoder.git + │ └─ VendorUrl: https://github.com/nodejs/string_decoder + ├─ string_decoder@1.3.0 + │ ├─ URL: git://github.com/nodejs/string_decoder.git + │ └─ VendorUrl: https://github.com/nodejs/string_decoder + ├─ text-hex@1.0.0 + │ ├─ URL: https://github.com/3rd-Eden/text-hex + │ ├─ VendorName: Arnout Kazemier + │ └─ VendorUrl: https://github.com/3rd-Eden/text-hex + ├─ triple-beam@1.3.0 + │ ├─ URL: git+https://github.com/winstonjs/triple-beam.git + │ ├─ VendorName: Charlie Robbins + │ └─ VendorUrl: https://github.com/winstonjs/triple-beam#readme + ├─ uc.micro@1.0.6 + │ └─ URL: https://github.com/markdown-it/uc.micro.git + ├─ universalify@0.1.2 + │ ├─ URL: git+https://github.com/RyanZim/universalify.git + │ ├─ VendorName: Ryan Zimmerman + │ └─ VendorUrl: https://github.com/RyanZim/universalify#readme + ├─ util-deprecate@1.0.2 + │ ├─ URL: git://github.com/TooTallNate/util-deprecate.git + │ ├─ VendorName: Nathan Rajlich + │ └─ VendorUrl: https://github.com/TooTallNate/util-deprecate + ├─ winston-transport@4.3.0 + │ ├─ URL: git@github.com:winstonjs/winston-transport.git + │ ├─ VendorName: Charlie Robbins + │ └─ VendorUrl: https://github.com/winstonjs/winston-transport#readme + └─ winston@3.2.1 + ├─ URL: https://github.com/winstonjs/winston.git + └─ VendorName: Charlie Robbins +Done in 1.94s. diff --git a/LICENSE-binary-yarn-ui b/LICENSE-binary-yarn-ui new file mode 100644 index 0000000000000..ecc3dbe5af9a4 --- /dev/null +++ b/LICENSE-binary-yarn-ui @@ -0,0 +1,3339 @@ +yarn licenses v1.22.5 +├─ (BSD-2-Clause OR MIT) +│ └─ sha@2.0.1 +│ └─ URL: https://github.com/ForbesLindesay/sha.git +├─ (LGPL-2.0 or MIT) +│ └─ xmldom@0.1.31 +│ ├─ URL: git://github.com/xmldom/xmldom.git +│ ├─ VendorName: jindw +│ └─ VendorUrl: https://github.com/xmldom/xmldom +├─ (MIT AND JSON) +│ └─ jshint@2.10.3 +│ ├─ URL: https://github.com/jshint/jshint.git +│ ├─ VendorName: Anton Kovalyov +│ └─ VendorUrl: http://jshint.com/ +├─ (MIT OR Apache-2.0) +│ └─ ember-test-helpers@0.5.34 +│ └─ URL: https://github.com/switchfly/ember-test-helpers.git +├─ (WTFPL OR MIT) +│ ├─ opener@1.4.3 +│ │ ├─ URL: https://github.com/domenic/opener.git +│ │ ├─ VendorName: Domenic Denicola +│ │ └─ VendorUrl: https://domenic.me/ +│ └─ path-is-inside@1.0.2 +│ ├─ URL: https://github.com/domenic/path-is-inside.git +│ ├─ VendorName: Domenic Denicola +│ └─ VendorUrl: https://domenic.me +├─ Apache-2.0 +│ ├─ aws-sign2@0.6.0 +│ │ ├─ URL: https://github.com/mikeal/aws-sign +│ │ ├─ VendorName: Mikeal Rogers +│ │ └─ VendorUrl: http://www.futurealoof.com +│ ├─ aws-sign2@0.7.0 +│ │ ├─ URL: https://github.com/mikeal/aws-sign +│ │ ├─ VendorName: Mikeal Rogers +│ │ └─ VendorUrl: http://www.futurealoof.com +│ ├─ bser@2.1.1 +│ │ ├─ URL: https://github.com/facebook/watchman +│ │ ├─ VendorName: Wez Furlong +│ │ └─ VendorUrl: https://facebook.github.io/watchman/docs/bser.html +│ ├─ caseless@0.11.0 +│ │ ├─ URL: https://github.com/mikeal/caseless +│ │ └─ VendorName: Mikeal Rogers +│ ├─ caseless@0.12.0 +│ │ ├─ URL: https://github.com/mikeal/caseless +│ │ └─ VendorName: Mikeal Rogers +│ ├─ fb-watchman@2.0.1 +│ │ ├─ URL: git@github.com:facebook/watchman.git +│ │ ├─ VendorName: Wez Furlong +│ │ └─ VendorUrl: https://facebook.github.io/watchman/ +│ ├─ forever-agent@0.6.1 +│ │ ├─ URL: https://github.com/mikeal/forever-agent +│ │ ├─ VendorName: Mikeal Rogers +│ │ └─ VendorUrl: http://www.futurealoof.com +│ ├─ kew@0.7.0 +│ │ ├─ URL: https://github.com/Medium/kew.git +│ │ └─ VendorUrl: https://github.com/Medium/kew +│ ├─ less@2.7.3 +│ │ ├─ URL: https://github.com/less/less.js.git +│ │ ├─ VendorName: Alexis Sellier +│ │ └─ VendorUrl: http://lesscss.org/ +│ ├─ oauth-sign@0.8.2 +│ │ ├─ URL: https://github.com/mikeal/oauth-sign +│ │ ├─ VendorName: Mikeal Rogers +│ │ └─ VendorUrl: http://www.futurealoof.com +│ ├─ oauth-sign@0.9.0 +│ │ ├─ URL: https://github.com/mikeal/oauth-sign +│ │ ├─ VendorName: Mikeal Rogers +│ │ └─ VendorUrl: http://www.futurealoof.com +│ ├─ phantomjs-prebuilt@2.1.13 +│ │ ├─ URL: git://github.com/Medium/phantomjs.git +│ │ ├─ VendorName: Dan Pupius +│ │ └─ VendorUrl: https://github.com/Medium/phantomjs +│ ├─ request@2.65.0 +│ │ ├─ URL: https://github.com/request/request.git +│ │ └─ VendorName: Mikeal Rogers +│ ├─ request@2.74.0 +│ │ ├─ URL: https://github.com/request/request.git +│ │ └─ VendorName: Mikeal Rogers +│ ├─ request@2.81.0 +│ │ ├─ URL: https://github.com/request/request.git +│ │ └─ VendorName: Mikeal Rogers +│ ├─ request@2.88.0 +│ │ ├─ URL: https://github.com/request/request.git +│ │ └─ VendorName: Mikeal Rogers +│ ├─ spdx-correct@3.1.0 +│ │ ├─ URL: https://github.com/jslicense/spdx-correct.js.git +│ │ ├─ VendorName: Kyle E. Mitchell +│ │ └─ VendorUrl: https://kemitchell.com +│ ├─ spdx@0.4.3 +│ │ ├─ URL: https://github.com/kemitchell/spdx.js.git +│ │ ├─ VendorName: Kyle E. Mitchell +│ │ └─ VendorUrl: https://github.com/kemitchell/spdx.js +│ ├─ true-case-path@1.0.3 +│ │ ├─ URL: git+https://github.com/barsh/true-case-path.git +│ │ ├─ VendorName: barsh +│ │ └─ VendorUrl: https://github.com/barsh/true-case-path#readme +│ ├─ tunnel-agent@0.4.3 +│ │ ├─ URL: https://github.com/mikeal/tunnel-agent +│ │ ├─ VendorName: Mikeal Rogers +│ │ └─ VendorUrl: http://www.futurealoof.com +│ ├─ tunnel-agent@0.6.0 +│ │ ├─ URL: https://github.com/mikeal/tunnel-agent +│ │ ├─ VendorName: Mikeal Rogers +│ │ └─ VendorUrl: http://www.futurealoof.com +│ ├─ validate-npm-package-license@3.0.4 +│ │ ├─ URL: https://github.com/kemitchell/validate-npm-package-license.js.git +│ │ ├─ VendorName: Kyle E. Mitchell +│ │ └─ VendorUrl: https://kemitchell.com +│ ├─ walker@1.0.7 +│ │ ├─ URL: https://github.com/daaku/nodejs-walker +│ │ ├─ VendorName: Naitik Shah +│ │ └─ VendorUrl: https://github.com/daaku/nodejs-walker +│ ├─ websocket-driver@0.7.3 +│ │ ├─ URL: git://github.com/faye/websocket-driver-node.git +│ │ ├─ VendorName: James Coglan +│ │ └─ VendorUrl: https://github.com/faye/websocket-driver-node +│ └─ workerpool@2.3.3 +│ ├─ URL: git://github.com/josdejong/workerpool.git +│ ├─ VendorName: Jos de Jong +│ └─ VendorUrl: https://github.com/josdejong/workerpool +├─ Apache* +│ └─ watch@0.10.0 +│ ├─ URL: git://github.com/mikeal/watch.git +│ ├─ VendorName: Mikeal Rogers +│ └─ VendorUrl: https://github.com/mikeal/watch +├─ Artistic-2.0 +│ └─ npm@2.14.10 +│ ├─ URL: https://github.com/npm/npm +│ ├─ VendorName: Isaac Z. Schlueter +│ └─ VendorUrl: https://docs.npmjs.com/ +├─ BSD +│ ├─ graceful-fs@2.0.3 +│ │ ├─ URL: git://github.com/isaacs/node-graceful-fs.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me +│ ├─ mute-stream@0.0.4 +│ │ ├─ URL: git://github.com/isaacs/mute-stream +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ osenv@0.0.3 +│ │ ├─ URL: git://github.com/isaacs/osenv +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ └─ tar@1.0.3 +│ ├─ URL: git://github.com/isaacs/node-tar.git +│ ├─ VendorName: Isaac Z. Schlueter +│ └─ VendorUrl: http://blog.izs.me/ +├─ BSD-2-Clause +│ ├─ cmd-shim@2.0.2 +│ │ └─ URL: https://github.com/ForbesLindesay/cmd-shim.git +│ ├─ configstore@1.2.1 +│ │ ├─ URL: https://github.com/yeoman/configstore.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ domelementtype@1.3.1 +│ │ ├─ URL: git://github.com/fb55/domelementtype.git +│ │ └─ VendorName: Felix Boehm +│ ├─ domelementtype@2.0.1 +│ │ ├─ URL: git://github.com/fb55/domelementtype.git +│ │ └─ VendorName: Felix Boehm +│ ├─ domhandler@2.3.0 +│ │ ├─ URL: git://github.com/fb55/DomHandler.git +│ │ └─ VendorName: Felix Boehm +│ ├─ domutils@1.5.1 +│ │ ├─ URL: git://github.com/FB55/domutils.git +│ │ └─ VendorName: Felix Boehm +│ ├─ entities@1.0.0 +│ │ ├─ URL: git://github.com/fb55/node-entities.git +│ │ └─ VendorName: Felix Boehm +│ ├─ entities@1.1.2 +│ │ ├─ URL: git://github.com/fb55/entities.git +│ │ └─ VendorName: Felix Boehm +│ ├─ entities@2.0.0 +│ │ ├─ URL: git://github.com/fb55/entities.git +│ │ └─ VendorName: Felix Boehm +│ ├─ esprima@2.7.3 +│ │ ├─ URL: https://github.com/jquery/esprima.git +│ │ ├─ VendorName: Ariya Hidayat +│ │ └─ VendorUrl: http://esprima.org/ +│ ├─ esprima@3.1.3 +│ │ ├─ URL: https://github.com/jquery/esprima.git +│ │ ├─ VendorName: Ariya Hidayat +│ │ └─ VendorUrl: http://esprima.org/ +│ ├─ esprima@4.0.1 +│ │ ├─ URL: https://github.com/jquery/esprima.git +│ │ ├─ VendorName: Ariya Hidayat +│ │ └─ VendorUrl: http://esprima.org/ +│ ├─ esutils@2.0.3 +│ │ ├─ URL: http://github.com/estools/esutils.git +│ │ └─ VendorUrl: https://github.com/estools/esutils +│ ├─ extract-zip@1.5.0 +│ │ ├─ URL: git+ssh://git@github.com/maxogden/extract-zip.git +│ │ ├─ VendorName: max ogden +│ │ └─ VendorUrl: https://github.com/maxogden/extract-zip +│ ├─ github-url-from-username-repo@1.0.2 +│ │ ├─ URL: git@github.com:robertkowalski/github-url-from-username-repo.git +│ │ └─ VendorName: Robert Kowalski +│ ├─ normalize-package-data@2.3.8 +│ │ ├─ URL: git://github.com/npm/normalize-package-data.git +│ │ └─ VendorName: Meryn Stol +│ ├─ normalize-package-data@2.5.0 +│ │ ├─ URL: git://github.com/npm/normalize-package-data.git +│ │ └─ VendorName: Meryn Stol +│ ├─ npm-install-checks@1.0.7 +│ │ ├─ URL: git://github.com/npm/npm-install-checks.git +│ │ ├─ VendorName: Robert Kowalski +│ │ └─ VendorUrl: https://github.com/npm/npm-install-checks +│ ├─ npm-user-validate@0.1.5 +│ │ ├─ URL: git://github.com/npm/npm-user-validate.git +│ │ └─ VendorName: Robert Kowalski +│ ├─ regenerator@0.8.40 +│ │ ├─ URL: git://github.com/facebook/regenerator.git +│ │ ├─ VendorName: Ben Newman +│ │ └─ VendorUrl: http://github.com/facebook/regenerator +│ ├─ regjsparser@0.1.5 +│ │ ├─ URL: git@github.com:jviereck/regjsparser.git +│ │ ├─ VendorName: 'Julian Viereck' +│ │ └─ VendorUrl: https://github.com/jviereck/regjsparser +│ ├─ uglify-js@2.8.29 +│ │ ├─ URL: https://github.com/mishoo/UglifyJS2.git +│ │ ├─ VendorName: Mihai Bazon +│ │ └─ VendorUrl: http://lisperator.net/uglifyjs +│ └─ uri-js@4.2.2 +│ ├─ URL: http://github.com/garycourt/uri-js +│ ├─ VendorName: Gary Court +│ └─ VendorUrl: https://github.com/garycourt/uri-js +├─ BSD-3-Clause +│ ├─ bcrypt-pbkdf@1.0.2 +│ │ └─ URL: git://github.com/joyent/node-bcrypt-pbkdf.git +│ ├─ boom@2.10.1 +│ │ └─ URL: git://github.com/hapijs/boom +│ ├─ cpr@0.4.2 +│ │ ├─ URL: http://github.com/davglass/cpr.git +│ │ └─ VendorName: Dav Glass +│ ├─ cryptiles@2.0.5 +│ │ └─ URL: git://github.com/hapijs/cryptiles +│ ├─ hawk@3.1.3 +│ │ ├─ URL: git://github.com/hueniverse/hawk +│ │ ├─ VendorName: Eran Hammer +│ │ └─ VendorUrl: http://hueniverse.com +│ ├─ hoek@2.16.3 +│ │ └─ URL: git://github.com/hapijs/hoek +│ ├─ js-base64@2.5.1 +│ │ ├─ URL: git://github.com/dankogai/js-base64.git +│ │ └─ VendorName: Dan Kogai +│ ├─ makeerror@1.0.11 +│ │ ├─ URL: https://github.com/daaku/nodejs-makeerror +│ │ └─ VendorName: Naitik Shah +│ ├─ printf@0.2.5 +│ │ ├─ URL: https://github.com/wdavidw/node-printf.git +│ │ ├─ VendorName: David Worms +│ │ └─ VendorUrl: http://www.adaltas.com/projects/node-printf +│ ├─ qs@5.1.0 +│ │ ├─ URL: https://github.com/hapijs/qs.git +│ │ └─ VendorUrl: https://github.com/hapijs/qs +│ ├─ qs@5.2.0 +│ │ ├─ URL: https://github.com/hapijs/qs.git +│ │ └─ VendorUrl: https://github.com/hapijs/qs +│ ├─ qs@5.2.1 +│ │ ├─ URL: https://github.com/hapijs/qs.git +│ │ └─ VendorUrl: https://github.com/hapijs/qs +│ ├─ qs@6.2.3 +│ │ ├─ URL: https://github.com/ljharb/qs.git +│ │ └─ VendorUrl: https://github.com/ljharb/qs +│ ├─ qs@6.4.0 +│ │ ├─ URL: https://github.com/ljharb/qs.git +│ │ └─ VendorUrl: https://github.com/ljharb/qs +│ ├─ qs@6.5.2 +│ │ ├─ URL: https://github.com/ljharb/qs.git +│ │ └─ VendorUrl: https://github.com/ljharb/qs +│ ├─ qs@6.7.0 +│ │ ├─ URL: https://github.com/ljharb/qs.git +│ │ └─ VendorUrl: https://github.com/ljharb/qs +│ ├─ shelljs@0.3.0 +│ │ ├─ URL: git://github.com/arturadib/shelljs.git +│ │ ├─ VendorName: Artur Adib +│ │ └─ VendorUrl: http://github.com/arturadib/shelljs +│ ├─ source-map@0.1.32 +│ │ ├─ URL: http://github.com/mozilla/source-map.git +│ │ ├─ VendorName: Nick Fitzgerald +│ │ └─ VendorUrl: https://github.com/mozilla/source-map +│ ├─ source-map@0.1.43 +│ │ ├─ URL: http://github.com/mozilla/source-map.git +│ │ ├─ VendorName: Nick Fitzgerald +│ │ └─ VendorUrl: https://github.com/mozilla/source-map +│ ├─ source-map@0.4.4 +│ │ ├─ URL: http://github.com/mozilla/source-map.git +│ │ ├─ VendorName: Nick Fitzgerald +│ │ └─ VendorUrl: https://github.com/mozilla/source-map +│ ├─ source-map@0.5.7 +│ │ ├─ URL: http://github.com/mozilla/source-map.git +│ │ ├─ VendorName: Nick Fitzgerald +│ │ └─ VendorUrl: https://github.com/mozilla/source-map +│ ├─ sprintf-js@1.0.3 +│ │ ├─ URL: https://github.com/alexei/sprintf.js.git +│ │ ├─ VendorName: Alexandru Marasteanu +│ │ └─ VendorUrl: http://alexei.ro/ +│ ├─ sprintf-js@1.1.2 +│ │ ├─ URL: https://github.com/alexei/sprintf.js.git +│ │ └─ VendorName: Alexandru Mărășteanu +│ ├─ tmpl@1.0.4 +│ │ ├─ URL: https://github.com/daaku/nodejs-tmpl +│ │ ├─ VendorName: Naitik Shah +│ │ └─ VendorUrl: https://github.com/nshah/nodejs-tmpl +│ ├─ tough-cookie@2.2.2 +│ │ ├─ URL: git://github.com/SalesforceEng/tough-cookie.git +│ │ ├─ VendorName: Jeremy Stashewsky +│ │ └─ VendorUrl: https://github.com/SalesforceEng/tough-cookie +│ ├─ tough-cookie@2.3.4 +│ │ ├─ URL: git://github.com/salesforce/tough-cookie.git +│ │ ├─ VendorName: Jeremy Stashewsky +│ │ └─ VendorUrl: https://github.com/salesforce/tough-cookie +│ └─ tough-cookie@2.4.3 +│ ├─ URL: git://github.com/salesforce/tough-cookie.git +│ ├─ VendorName: Jeremy Stashewsky +│ └─ VendorUrl: https://github.com/salesforce/tough-cookie +├─ BSD-3-Clause OR MIT +│ └─ amdefine@1.0.1 +│ ├─ URL: https://github.com/jrburke/amdefine.git +│ ├─ VendorName: James Burke +│ └─ VendorUrl: http://github.com/jrburke/amdefine +├─ BSD* +│ ├─ diff@1.4.0 +│ │ └─ URL: git://github.com/kpdecker/jsdiff.git +│ ├─ esprima-fb@12001.1.0-dev-harmony-fb +│ │ ├─ URL: http://github.com/facebook/esprima.git +│ │ ├─ VendorName: Ariya Hidayat +│ │ └─ VendorUrl: https://github.com/facebook/esprima/tree/fb-harmony +│ ├─ esprima-fb@15001.1001.0-dev-harmony-fb +│ │ ├─ URL: http://github.com/facebook/esprima.git +│ │ ├─ VendorName: Ariya Hidayat +│ │ └─ VendorUrl: https://github.com/facebook/esprima/tree/fb-harmony +│ └─ json-schema@0.2.3 +│ ├─ URL: http://github.com/kriszyp/json-schema +│ └─ VendorName: Kris Zyp +├─ CC-BY-3.0 +│ └─ spdx-exceptions@2.2.0 +│ ├─ URL: https://github.com/kemitchell/spdx-exceptions.json.git +│ └─ VendorName: The Linux Foundation +├─ CC0-1.0 +│ └─ spdx-license-ids@3.0.5 +│ ├─ URL: https://github.com/shinnn/spdx-license-ids.git +│ ├─ VendorName: Shinnosuke Watanabe +│ └─ VendorUrl: https://github.com/shinnn +├─ ISC +│ ├─ abbrev@1.0.9 +│ │ ├─ URL: http://github.com/isaacs/abbrev-js +│ │ └─ VendorName: Isaac Z. Schlueter +│ ├─ abbrev@1.1.1 +│ │ ├─ URL: http://github.com/isaacs/abbrev-js +│ │ └─ VendorName: Isaac Z. Schlueter +│ ├─ anymatch@1.3.2 +│ │ ├─ URL: https://github.com/es128/anymatch +│ │ ├─ VendorName: Elan Shanker +│ │ └─ VendorUrl: https://github.com/es128/anymatch +│ ├─ aproba@1.2.0 +│ │ ├─ URL: https://github.com/iarna/aproba +│ │ ├─ VendorName: Rebecca Turner +│ │ └─ VendorUrl: https://github.com/iarna/aproba +│ ├─ are-we-there-yet@1.0.6 +│ │ ├─ URL: https://github.com/iarna/are-we-there-yet.git +│ │ ├─ VendorName: Rebecca Turner +│ │ └─ VendorUrl: https://github.com/iarna/are-we-there-yet +│ ├─ are-we-there-yet@1.1.5 +│ │ ├─ URL: https://github.com/iarna/are-we-there-yet.git +│ │ ├─ VendorName: Rebecca Turner +│ │ └─ VendorUrl: https://github.com/iarna/are-we-there-yet +│ ├─ async-some@1.0.2 +│ │ ├─ URL: https://github.com/othiym23/async-some.git +│ │ ├─ VendorName: Forrest L Norvell +│ │ └─ VendorUrl: https://github.com/othiym23/async-some +│ ├─ block-stream@0.0.8 +│ │ ├─ URL: git://github.com/isaacs/block-stream.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ block-stream@0.0.9 +│ │ ├─ URL: git://github.com/isaacs/block-stream.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ broccoli-viz@2.0.1 +│ │ ├─ URL: https://github.com/stefanpenner/broccoli-viz.git +│ │ ├─ VendorName: Stefan Penner +│ │ └─ VendorUrl: https://github.com/stefanpenner/broccoli-viz +│ ├─ char-spinner@1.0.1 +│ │ ├─ URL: git://github.com/isaacs/char-spinner +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: https://github.com/isaacs/char-spinner +│ ├─ chmodr@1.0.2 +│ │ ├─ URL: git://github.com/isaacs/chmodr.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ chownr@1.0.1 +│ │ ├─ URL: git://github.com/isaacs/chownr.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ chownr@1.1.3 +│ │ ├─ URL: git://github.com/isaacs/chownr.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ clean-base-url@1.0.0 +│ │ ├─ URL: git+ssh://git@github.com/stefanpenner/clean-base-url.git +│ │ ├─ VendorName: Stefan Penner +│ │ └─ VendorUrl: https://github.com/stefanpenner/clean-base-url#readme +│ ├─ cliui@2.1.0 +│ │ ├─ URL: http://github.com/bcoe/cliui.git +│ │ └─ VendorName: Ben Coe +│ ├─ cliui@3.2.0 +│ │ ├─ URL: http://github.com/yargs/cliui.git +│ │ └─ VendorName: Ben Coe +│ ├─ console-control-strings@1.1.0 +│ │ ├─ URL: https://github.com/iarna/console-control-strings +│ │ ├─ VendorName: Rebecca Turner +│ │ └─ VendorUrl: http://re-becca.org/ +│ ├─ d@1.0.1 +│ │ ├─ URL: git://github.com/medikoo/d.git +│ │ ├─ VendorName: Mariusz Nowak +│ │ └─ VendorUrl: http://www.medikoo.com/ +│ ├─ dezalgo@1.0.3 +│ │ ├─ URL: https://github.com/npm/dezalgo +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: https://github.com/npm/dezalgo +│ ├─ ember-cli-get-dependency-depth@1.0.0 +│ │ ├─ URL: git+https://github.com/ember-cli/get-dependency-depth.git +│ │ ├─ VendorName: ember-cli Constributors +│ │ └─ VendorUrl: https://github.com/ember-cli/get-dependency-depth#readme +│ ├─ ember-cli-is-package-missing@1.0.0 +│ │ ├─ URL: git+https://github.com/ember-cli/ember-cli.git +│ │ ├─ VendorName: ember-cli contributors +│ │ └─ VendorUrl: https://github.com/ember-cli/ember-cli#readme +│ ├─ ember-cli-normalize-entity-name@1.0.0 +│ │ ├─ URL: git+https://github.com/ember-cli/ember-cli.git +│ │ ├─ VendorName: ember-cli contributors +│ │ └─ VendorUrl: https://github.com/ember-cli/ember-cli#readme +│ ├─ ember-cli-path-utils@1.0.0 +│ │ ├─ URL: git+https://github.com/ember-cli/ember-cli-path-utils.git +│ │ ├─ VendorName: ember-cli contributors +│ │ └─ VendorUrl: https://github.com/ember-cli/ember-cli-path-utils#readme +│ ├─ ember-cli-preprocess-registry@1.1.0 +│ │ ├─ URL: git+https://github.com/ember-cli/ember-cli-preprocessor-registry.git +│ │ ├─ VendorName: ember-cli Contributors +│ │ └─ VendorUrl: https://github.com/ember-cli/ember-cli-preprocessor-registry#readme +│ ├─ ember-cli-string-utils@1.1.0 +│ │ ├─ URL: git+https://github.com/ember-cli/ember-cli-string-utils.git +│ │ ├─ VendorName: ember-cli contributors +│ │ └─ VendorUrl: https://github.com/ember-cli/ember-cli-string-utils#readme +│ ├─ ember-cli-test-info@1.0.0 +│ │ ├─ URL: git+https://github.com/ember-cli/ember-cli-test-info.git +│ │ ├─ VendorName: ember-cli contributors +│ │ └─ VendorUrl: https://github.com/ember-cli/ember-cli-test-info#readme +│ ├─ ensure-posix-path@1.1.1 +│ │ ├─ URL: git+https://github.com/stefanpenner/ensure-posix-path.git +│ │ ├─ VendorName: Stefan Penner +│ │ └─ VendorUrl: https://github.com/stefanpenner/ensure-posix-path#readme +│ ├─ es5-ext@0.10.53 +│ │ ├─ URL: https://github.com/medikoo/es5-ext.git +│ │ ├─ VendorName: Mariusz Nowak +│ │ └─ VendorUrl: http://www.medikoo.com/ +│ ├─ es6-symbol@3.1.3 +│ │ ├─ URL: git://github.com/medikoo/es6-symbol.git +│ │ ├─ VendorName: Mariusz Nowak +│ │ └─ VendorUrl: http://www.medikoo.com/ +│ ├─ events-to-array@1.1.2 +│ │ ├─ URL: https://github.com/isaacs/events-to-array +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: https://github.com/isaacs/events-to-array +│ ├─ exists-sync@0.0.3 +│ │ ├─ URL: git+https://github.com/ember-cli/exists-sync.git +│ │ ├─ VendorName: Jake Bixby +│ │ └─ VendorUrl: https://github.com/ember-cli/exists-sync#readme +│ ├─ exists-sync@0.0.4 +│ │ ├─ URL: git+https://github.com/ember-cli/exists-sync.git +│ │ ├─ VendorName: Jake Bixby +│ │ └─ VendorUrl: https://github.com/ember-cli/exists-sync#readme +│ ├─ ext@1.4.0 +│ │ ├─ URL: https://github.com/medikoo/es5-ext/tree/ext +│ │ ├─ VendorName: Mariusz Nowak +│ │ └─ VendorUrl: http://www.medikoo.com/ +│ ├─ fast-ordered-set@1.0.3 +│ ├─ fs-monitor-stack@1.1.1 +│ │ ├─ URL: git+https://github.com/stefanpenner/fs-monitor-stack.git +│ │ ├─ VendorName: Stefan Penner +│ │ └─ VendorUrl: https://github.com/stefanpenner/fs-monitor-stack#readme +│ ├─ fs-vacuum@1.2.10 +│ │ ├─ URL: https://github.com/npm/fs-vacuum.git +│ │ ├─ VendorName: Forrest L Norvell +│ │ └─ VendorUrl: https://github.com/npm/fs-vacuum +│ ├─ fs-write-stream-atomic@1.0.10 +│ │ ├─ URL: https://github.com/npm/fs-write-stream-atomic +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: https://github.com/npm/fs-write-stream-atomic +│ ├─ fs.realpath@1.0.0 +│ │ ├─ URL: git+https://github.com/isaacs/fs.realpath.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ fstream-ignore@1.0.5 +│ │ ├─ URL: git://github.com/isaacs/fstream-ignore.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ fstream-npm@1.0.7 +│ │ ├─ URL: https://github.com/npm/fstream-npm.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ fstream@1.0.12 +│ │ ├─ URL: https://github.com/npm/fstream.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ gauge@1.2.7 +│ │ ├─ URL: https://github.com/iarna/gauge +│ │ ├─ VendorName: Rebecca Turner +│ │ └─ VendorUrl: https://github.com/iarna/gauge +│ ├─ gauge@2.7.4 +│ │ ├─ URL: https://github.com/iarna/gauge +│ │ ├─ VendorName: Rebecca Turner +│ │ └─ VendorUrl: https://github.com/iarna/gauge +│ ├─ get-caller-file@1.0.3 +│ │ ├─ URL: git+https://github.com/stefanpenner/get-caller-file.git +│ │ ├─ VendorName: Stefan Penner +│ │ └─ VendorUrl: https://github.com/stefanpenner/get-caller-file#readme +│ ├─ glob-parent@2.0.0 +│ │ ├─ URL: https://github.com/es128/glob-parent +│ │ ├─ VendorName: Elan Shanker +│ │ └─ VendorUrl: https://github.com/es128/glob-parent +│ ├─ glob@4.3.5 +│ │ ├─ URL: git://github.com/isaacs/node-glob.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ glob@4.5.3 +│ │ ├─ URL: git://github.com/isaacs/node-glob.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ glob@5.0.13 +│ │ ├─ URL: git://github.com/isaacs/node-glob.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ glob@5.0.15 +│ │ ├─ URL: git://github.com/isaacs/node-glob.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ glob@6.0.4 +│ │ ├─ URL: git://github.com/isaacs/node-glob.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ glob@7.1.6 +│ │ ├─ URL: git://github.com/isaacs/node-glob.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ graceful-fs@3.0.12 +│ │ ├─ URL: git://github.com/isaacs/node-graceful-fs.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me +│ ├─ graceful-fs@4.1.15 +│ │ └─ URL: https://github.com/isaacs/node-graceful-fs +│ ├─ graceful-fs@4.2.3 +│ │ └─ URL: https://github.com/isaacs/node-graceful-fs +│ ├─ har-schema@1.0.5 +│ │ ├─ URL: https://github.com/ahmadnassri/har-schema.git +│ │ ├─ VendorName: Ahmad Nassri +│ │ └─ VendorUrl: https://github.com/ahmadnassri/har-schema +│ ├─ har-schema@2.0.0 +│ │ ├─ URL: https://github.com/ahmadnassri/har-schema.git +│ │ ├─ VendorName: Ahmad Nassri +│ │ └─ VendorUrl: https://github.com/ahmadnassri/har-schema +│ ├─ har-validator@2.0.6 +│ │ ├─ URL: https://github.com/ahmadnassri/har-validator.git +│ │ ├─ VendorName: Ahmad Nassri +│ │ └─ VendorUrl: https://github.com/ahmadnassri/har-validator +│ ├─ har-validator@4.2.1 +│ │ ├─ URL: https://github.com/ahmadnassri/har-validator.git +│ │ ├─ VendorName: Ahmad Nassri +│ │ └─ VendorUrl: https://github.com/ahmadnassri/har-validator +│ ├─ has-binary-data@0.1.3 +│ │ └─ VendorName: Kevin Roark +│ ├─ has-unicode@2.0.1 +│ │ ├─ URL: https://github.com/iarna/has-unicode +│ │ ├─ VendorName: Rebecca Turner +│ │ └─ VendorUrl: https://github.com/iarna/has-unicode +│ ├─ hash-for-dep@1.5.1 +│ │ ├─ URL: git+https://github.com/stefanpenner/hash-for-dep.git +│ │ ├─ VendorName: Stefan Penner +│ │ └─ VendorUrl: https://github.com/stefanpenner/hash-for-dep#readme +│ ├─ hosted-git-info@2.1.5 +│ │ ├─ URL: git+https://github.com/npm/hosted-git-info.git +│ │ ├─ VendorName: Rebecca Turner +│ │ └─ VendorUrl: https://github.com/npm/hosted-git-info +│ ├─ hosted-git-info@2.8.5 +│ │ ├─ URL: git+https://github.com/npm/hosted-git-info.git +│ │ ├─ VendorName: Rebecca Turner +│ │ └─ VendorUrl: https://github.com/npm/hosted-git-info +│ ├─ in-publish@2.0.0 +│ │ ├─ URL: https://github.com/iarna/in-publish +│ │ ├─ VendorName: Rebecca Turner +│ │ └─ VendorUrl: https://github.com/iarna/in-publish +│ ├─ inflight@1.0.6 +│ │ ├─ URL: https://github.com/npm/inflight.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: https://github.com/isaacs/inflight +│ ├─ inherits@2.0.3 +│ │ └─ URL: git://github.com/isaacs/inherits +│ ├─ inherits@2.0.4 +│ │ └─ URL: git://github.com/isaacs/inherits +│ ├─ ini@1.3.5 +│ │ ├─ URL: git://github.com/isaacs/ini.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ init-package-json@1.9.6 +│ │ ├─ URL: https://github.com/npm/init-package-json.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ isexe@2.0.0 +│ │ ├─ URL: git+https://github.com/isaacs/isexe.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: https://github.com/isaacs/isexe#readme +│ ├─ json-stringify-safe@5.0.1 +│ │ ├─ URL: git://github.com/isaacs/json-stringify-safe +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: https://github.com/isaacs/json-stringify-safe +│ ├─ lockfile@1.0.4 +│ │ ├─ URL: https://github.com/npm/lockfile.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ lru-cache@2.7.3 +│ │ ├─ URL: git://github.com/isaacs/node-lru-cache.git +│ │ └─ VendorName: Isaac Z. Schlueter +│ ├─ lru-cache@4.1.5 +│ │ ├─ URL: git://github.com/isaacs/node-lru-cache.git +│ │ └─ VendorName: Isaac Z. Schlueter +│ ├─ matcher-collection@1.1.2 +│ │ ├─ URL: git+https://github.com/stefanpenner/matcher-collection.git +│ │ ├─ VendorName: Stefan Penner +│ │ └─ VendorUrl: https://github.com/stefanpenner/matcher-collection#readme +│ ├─ minimatch@2.0.10 +│ │ ├─ URL: git://github.com/isaacs/minimatch.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me +│ ├─ minimatch@3.0.4 +│ │ ├─ URL: git://github.com/isaacs/minimatch.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me +│ ├─ mute-stream@0.0.8 +│ │ ├─ URL: git://github.com/isaacs/mute-stream +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ natives@1.1.6 +│ │ ├─ URL: git+https://github.com/addaleax/natives.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: https://github.com/addaleax/natives#readme +│ ├─ node-modules-path@1.0.2 +│ │ ├─ URL: git+https://github.com/ember-cli/node-modules-path.git +│ │ ├─ VendorName: Stefan Penner +│ │ └─ VendorUrl: https://github.com/ember-cli/node-modules-path#readme +│ ├─ nopt@3.0.6 +│ │ ├─ URL: https://github.com/npm/nopt.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ normalize-git-url@3.0.2 +│ │ ├─ URL: https://github.com/npm/normalize-git-url.git +│ │ ├─ VendorName: Forrest L Norvell +│ │ └─ VendorUrl: https://github.com/npm/normalize-git-url +│ ├─ npm-cache-filename@1.0.2 +│ │ ├─ URL: git://github.com/npm/npm-cache-filename +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: https://github.com/npm/npm-cache-filename +│ ├─ npm-normalize-package-bin@1.0.1 +│ │ ├─ URL: git+https://github.com/npm/npm-normalize-package-bin +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: https://izs.me +│ ├─ npm-package-arg@4.0.2 +│ │ ├─ URL: https://github.com/npm/npm-package-arg +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: https://github.com/npm/npm-package-arg +│ ├─ npm-package-arg@4.2.1 +│ │ ├─ URL: https://github.com/npm/npm-package-arg +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: https://github.com/npm/npm-package-arg +│ ├─ npm-package-arg@5.1.2 +│ │ ├─ URL: https://github.com/npm/npm-package-arg +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: https://github.com/npm/npm-package-arg +│ ├─ npm-registry-client@7.0.9 +│ │ ├─ URL: https://github.com/npm/npm-registry-client.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ npmlog@1.2.1 +│ │ ├─ URL: git://github.com/isaacs/npmlog.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ npmlog@2.0.4 +│ │ ├─ URL: https://github.com/npm/npmlog.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ npmlog@4.1.2 +│ │ ├─ URL: https://github.com/npm/npmlog.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ once@1.3.3 +│ │ ├─ URL: git://github.com/isaacs/once +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ once@1.4.0 +│ │ ├─ URL: git://github.com/isaacs/once +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ osenv@0.1.5 +│ │ ├─ URL: https://github.com/npm/osenv +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ path-posix@1.0.0 +│ │ ├─ URL: git@github.com:jden/node-path-posix.git +│ │ └─ VendorName: jden +│ ├─ pleasant-progress@1.1.0 +│ │ ├─ URL: https://github.com/stefanpenner/pleasant-progress.git +│ │ ├─ VendorName: Stefan Penner +│ │ └─ VendorUrl: https://github.com/stefanpenner/pleasant-progress +│ ├─ process-relative-require@1.0.0 +│ │ ├─ URL: git+https://github.com/ember-cli/process-relative-require.git +│ │ ├─ VendorName: Stefan Penner +│ │ └─ VendorUrl: https://github.com/ember-cli/process-relative-require#readme +│ ├─ promzard@0.3.0 +│ │ ├─ URL: git://github.com/isaacs/promzard +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ proto-list@1.2.4 +│ │ ├─ URL: https://github.com/isaacs/proto-list +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ pseudomap@1.0.2 +│ │ ├─ URL: git+https://github.com/isaacs/pseudomap.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: https://github.com/isaacs/pseudomap#readme +│ ├─ read-installed@4.0.3 +│ │ ├─ URL: git://github.com/isaacs/read-installed +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ read-package-json@2.0.13 +│ │ ├─ URL: https://github.com/npm/read-package-json.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ read-package-json@2.1.1 +│ │ ├─ URL: https://github.com/npm/read-package-json.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ read@1.0.7 +│ │ ├─ URL: git://github.com/isaacs/read.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ readdir-scoped-modules@1.1.0 +│ │ ├─ URL: https://github.com/npm/readdir-scoped-modules +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: https://github.com/npm/readdir-scoped-modules +│ ├─ realize-package-specifier@3.0.3 +│ │ ├─ URL: https://github.com/npm/realize-package-specifier.git +│ │ ├─ VendorName: Rebecca Turner +│ │ └─ VendorUrl: https://github.com/npm/realize-package-specifier +│ ├─ remove-trailing-separator@1.1.0 +│ │ ├─ URL: git+https://github.com/darsain/remove-trailing-separator.git +│ │ ├─ VendorName: darsain +│ │ └─ VendorUrl: https://github.com/darsain/remove-trailing-separator#readme +│ ├─ require-main-filename@1.0.1 +│ │ ├─ URL: git+ssh://git@github.com/yargs/require-main-filename.git +│ │ ├─ VendorName: Ben Coe +│ │ └─ VendorUrl: https://github.com/yargs/require-main-filename#readme +│ ├─ rimraf@2.4.5 +│ │ ├─ URL: git://github.com/isaacs/rimraf.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ rimraf@2.7.1 +│ │ ├─ URL: git://github.com/isaacs/rimraf.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ semver@4.3.6 +│ │ └─ URL: git://github.com/npm/node-semver.git +│ ├─ semver@5.0.3 +│ │ └─ URL: https://github.com/npm/node-semver +│ ├─ semver@5.3.0 +│ │ └─ URL: https://github.com/npm/node-semver +│ ├─ semver@5.7.1 +│ │ └─ URL: https://github.com/npm/node-semver +│ ├─ set-blocking@2.0.0 +│ │ ├─ URL: git+https://github.com/yargs/set-blocking.git +│ │ ├─ VendorName: Ben Coe +│ │ └─ VendorUrl: https://github.com/yargs/set-blocking#readme +│ ├─ setprototypeof@1.1.1 +│ │ ├─ URL: https://github.com/wesleytodd/setprototypeof.git +│ │ ├─ VendorName: Wes Todd +│ │ └─ VendorUrl: https://github.com/wesleytodd/setprototypeof +│ ├─ sigmund@1.0.1 +│ │ ├─ URL: git://github.com/isaacs/sigmund +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ signal-exit@3.0.2 +│ │ ├─ URL: https://github.com/tapjs/signal-exit.git +│ │ ├─ VendorName: Ben Coe +│ │ └─ VendorUrl: https://github.com/tapjs/signal-exit +│ ├─ silent-error@1.1.1 +│ │ ├─ URL: git+https://github.com/stefanpenner/silent-error.git +│ │ ├─ VendorName: Stefan Penner +│ │ └─ VendorUrl: https://github.com/stefanpenner/silent-error#readme +│ ├─ slide@1.1.6 +│ │ ├─ URL: git://github.com/isaacs/slide-flow-control.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ tar@2.2.2 +│ │ ├─ URL: git://github.com/isaacs/node-tar.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ timers-ext@0.1.7 +│ │ ├─ URL: git://github.com/medikoo/timers-ext.git +│ │ ├─ VendorName: Mariusz Nowak +│ │ └─ VendorUrl: http://www.medikoo.com/ +│ ├─ tree-sync@1.4.0 +│ │ ├─ URL: https://github.com/stefanpenner/tree-sync/ +│ │ └─ VendorName: Stefan Penner +│ ├─ type@1.2.0 +│ │ ├─ URL: https://github.com/medikoo/type.git +│ │ ├─ VendorName: Mariusz Nowak +│ │ └─ VendorUrl: https://www.medikoo.com/ +│ ├─ type@2.0.0 +│ │ ├─ URL: https://github.com/medikoo/type.git +│ │ ├─ VendorName: Mariusz Nowak +│ │ └─ VendorUrl: https://www.medikoo.com/ +│ ├─ uid-number@0.0.6 +│ │ ├─ URL: git://github.com/isaacs/uid-number.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ validate-npm-package-name@2.2.2 +│ │ ├─ URL: https://github.com/npm/validate-npm-package-name +│ │ ├─ VendorName: zeke +│ │ └─ VendorUrl: https://github.com/npm/validate-npm-package-name +│ ├─ validate-npm-package-name@3.0.0 +│ │ ├─ URL: https://github.com/npm/validate-npm-package-name +│ │ ├─ VendorName: zeke +│ │ └─ VendorUrl: https://github.com/npm/validate-npm-package-name +│ ├─ which-module@1.0.0 +│ │ ├─ URL: git+https://github.com/nexdrew/which-module.git +│ │ ├─ VendorName: nexdrew +│ │ └─ VendorUrl: https://github.com/nexdrew/which-module#readme +│ ├─ which@1.2.14 +│ │ ├─ URL: git://github.com/isaacs/node-which.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me +│ ├─ which@1.3.1 +│ │ ├─ URL: git://github.com/isaacs/node-which.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me +│ ├─ wide-align@1.1.3 +│ │ ├─ URL: https://github.com/iarna/wide-align +│ │ ├─ VendorName: Rebecca Turner +│ │ └─ VendorUrl: http://re-becca.org/ +│ ├─ wrappy@1.0.2 +│ │ ├─ URL: https://github.com/npm/wrappy +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: https://github.com/npm/wrappy +│ ├─ write-file-atomic@1.1.4 +│ │ ├─ URL: git@github.com:iarna/write-file-atomic.git +│ │ ├─ VendorName: Rebecca Turner +│ │ └─ VendorUrl: https://github.com/iarna/write-file-atomic +│ ├─ write-file-atomic@1.3.4 +│ │ ├─ URL: git@github.com:iarna/write-file-atomic.git +│ │ ├─ VendorName: Rebecca Turner +│ │ └─ VendorUrl: https://github.com/iarna/write-file-atomic +│ ├─ y18n@3.2.2 +│ │ ├─ URL: git@github.com:yargs/y18n.git +│ │ ├─ VendorName: Ben Coe +│ │ └─ VendorUrl: https://github.com/yargs/y18n +│ ├─ yallist@2.1.2 +│ │ ├─ URL: git+https://github.com/isaacs/yallist.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ └─ yargs-parser@5.0.0 +│ ├─ URL: git@github.com:yargs/yargs-parser.git +│ └─ VendorName: Ben Coe +├─ MIT +│ ├─ @sailshq/lodash@3.10.4 +│ │ ├─ URL: git://github.com/treelinehq/lodash.git +│ │ └─ VendorName: Mike McNeil +│ ├─ accepts@1.3.7 +│ │ └─ URL: https://github.com/jshttp/accepts.git +│ ├─ acorn@5.7.3 +│ │ ├─ URL: https://github.com/acornjs/acorn.git +│ │ └─ VendorUrl: https://github.com/acornjs/acorn +│ ├─ ajv@4.11.8 +│ │ ├─ URL: https://github.com/epoberezkin/ajv.git +│ │ ├─ VendorName: Evgeny Poberezkin +│ │ └─ VendorUrl: https://github.com/epoberezkin/ajv +│ ├─ ajv@6.10.2 +│ │ ├─ URL: https://github.com/epoberezkin/ajv.git +│ │ ├─ VendorName: Evgeny Poberezkin +│ │ └─ VendorUrl: https://github.com/epoberezkin/ajv +│ ├─ align-text@0.1.4 +│ │ ├─ URL: git://github.com/jonschlinkert/align-text.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/align-text +│ ├─ alter@0.2.0 +│ │ ├─ URL: https://github.com/olov/alter.git +│ │ └─ VendorName: Olov Lassus +│ ├─ amd-name-resolver@0.0.2 +│ │ └─ VendorName: Ember CLI contributors +│ ├─ ansi-regex@0.2.1 +│ │ ├─ URL: https://github.com/sindresorhus/ansi-regex.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ ansi-regex@1.1.1 +│ │ ├─ URL: https://github.com/sindresorhus/ansi-regex.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ ansi-regex@2.1.1 +│ │ ├─ URL: https://github.com/chalk/ansi-regex.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ ansi-regex@3.0.0 +│ │ ├─ URL: https://github.com/chalk/ansi-regex.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ ansi-styles@1.0.0 +│ │ ├─ URL: git://github.com/sindresorhus/ansi-styles.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: https://github.com/sindresorhus/ansi-styles +│ ├─ ansi-styles@1.1.0 +│ │ ├─ URL: https://github.com/sindresorhus/ansi-styles.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ ansi-styles@2.2.1 +│ │ ├─ URL: https://github.com/chalk/ansi-styles.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ ansi-styles@3.2.1 +│ │ ├─ URL: https://github.com/chalk/ansi-styles.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ ansi@0.3.1 +│ │ ├─ URL: git://github.com/TooTallNate/ansi.js.git +│ │ ├─ VendorName: Nathan Rajlich +│ │ └─ VendorUrl: http://tootallnate.net +│ ├─ ansicolors@0.2.1 +│ │ ├─ URL: git://github.com/thlorenz/ansicolors.git +│ │ ├─ VendorName: Thorsten Lorenz +│ │ └─ VendorUrl: thlorenz.com +│ ├─ ansicolors@0.3.2 +│ │ ├─ URL: git://github.com/thlorenz/ansicolors.git +│ │ ├─ VendorName: Thorsten Lorenz +│ │ └─ VendorUrl: thlorenz.com +│ ├─ ansistyles@0.1.3 +│ │ ├─ URL: git://github.com/thlorenz/ansistyles.git +│ │ ├─ VendorName: Thorsten Lorenz +│ │ └─ VendorUrl: thlorenz.com +│ ├─ archy@1.0.0 +│ │ ├─ URL: http://github.com/substack/node-archy.git +│ │ ├─ VendorName: James Halliday +│ │ └─ VendorUrl: http://substack.net +│ ├─ argparse@1.0.10 +│ │ └─ URL: https://github.com/nodeca/argparse.git +│ ├─ arr-diff@2.0.0 +│ │ ├─ URL: https://github.com/jonschlinkert/arr-diff.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/arr-diff +│ ├─ arr-flatten@1.1.0 +│ │ ├─ URL: https://github.com/jonschlinkert/arr-flatten.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/arr-flatten +│ ├─ array-equal@1.0.0 +│ │ ├─ URL: https://github.com/component/array-equal.git +│ │ ├─ VendorName: Jonathan Ong +│ │ └─ VendorUrl: http://jongleberry.com +│ ├─ array-find-index@1.0.2 +│ │ ├─ URL: https://github.com/sindresorhus/array-find-index.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ array-flatten@1.1.1 +│ │ ├─ URL: git://github.com/blakeembrey/array-flatten.git +│ │ ├─ VendorName: Blake Embrey +│ │ └─ VendorUrl: https://github.com/blakeembrey/array-flatten +│ ├─ array-index@1.0.0 +│ │ ├─ URL: git://github.com/TooTallNate/array-index.git +│ │ ├─ VendorName: Nathan Rajlich +│ │ └─ VendorUrl: http://tootallnate.net +│ ├─ array-unique@0.2.1 +│ │ ├─ URL: git://github.com/jonschlinkert/array-unique.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/array-unique +│ ├─ arraybuffer.slice@0.0.6 +│ │ ├─ URL: git@github.com:rase-/arraybuffer.slice.git +│ │ └─ VendorUrl: https://github.com/rase-/arraybuffer.slice +│ ├─ asap@2.0.6 +│ │ └─ URL: https://github.com/kriskowal/asap.git +│ ├─ asn1@0.1.11 +│ │ ├─ URL: git://github.com/mcavage/node-asn1.git +│ │ └─ VendorName: Mark Cavage +│ ├─ asn1@0.2.4 +│ │ ├─ URL: git://github.com/joyent/node-asn1.git +│ │ ├─ VendorName: Joyent +│ │ └─ VendorUrl: joyent.com +│ ├─ assert-plus@0.2.0 +│ │ ├─ URL: https://github.com/mcavage/node-assert-plus.git +│ │ └─ VendorName: Mark Cavage +│ ├─ assert-plus@1.0.0 +│ │ ├─ URL: https://github.com/mcavage/node-assert-plus.git +│ │ └─ VendorName: Mark Cavage +│ ├─ ast-traverse@0.1.1 +│ │ ├─ URL: https://github.com/olov/ast-traverse.git +│ │ └─ VendorName: Olov Lassus +│ ├─ ast-types@0.8.12 +│ │ ├─ URL: git://github.com/benjamn/ast-types.git +│ │ ├─ VendorName: Ben Newman +│ │ └─ VendorUrl: http://github.com/benjamn/ast-types +│ ├─ ast-types@0.8.15 +│ │ ├─ URL: git://github.com/benjamn/ast-types.git +│ │ ├─ VendorName: Ben Newman +│ │ └─ VendorUrl: http://github.com/benjamn/ast-types +│ ├─ ast-types@0.9.6 +│ │ ├─ URL: git://github.com/benjamn/ast-types.git +│ │ ├─ VendorName: Ben Newman +│ │ └─ VendorUrl: http://github.com/benjamn/ast-types +│ ├─ async-disk-cache@1.3.5 +│ │ ├─ URL: https://github.com/stefanpenner/async-disk-cache.git +│ │ └─ VendorName: Stefan Penner +│ ├─ async-promise-queue@1.0.5 +│ │ ├─ URL: https://github.com/stefanpenner/async-promise-queue +│ │ └─ VendorName: Stefan Penner +│ ├─ async@0.2.10 +│ │ ├─ URL: https://github.com/caolan/async.git +│ │ └─ VendorName: Caolan McMahon +│ ├─ async@0.8.0 +│ │ ├─ URL: https://github.com/caolan/async.git +│ │ └─ VendorName: Caolan McMahon +│ ├─ async@0.9.0 +│ │ ├─ URL: https://github.com/caolan/async.git +│ │ └─ VendorName: Caolan McMahon +│ ├─ async@1.5.2 +│ │ ├─ URL: https://github.com/caolan/async.git +│ │ └─ VendorName: Caolan McMahon +│ ├─ async@2.6.3 +│ │ ├─ URL: https://github.com/caolan/async.git +│ │ ├─ VendorName: Caolan McMahon +│ │ └─ VendorUrl: https://caolan.github.io/async/ +│ ├─ asynckit@0.4.0 +│ │ ├─ URL: git+https://github.com/alexindigo/asynckit.git +│ │ ├─ VendorName: Alex Indigo +│ │ └─ VendorUrl: https://github.com/alexindigo/asynckit#readme +│ ├─ aws4@1.9.0 +│ │ ├─ URL: https://github.com/mhart/aws4.git +│ │ ├─ VendorName: Michael Hart +│ │ └─ VendorUrl: http://github.com/mhart +│ ├─ babel-core@5.8.38 +│ │ ├─ URL: https://github.com/babel/babel.git +│ │ ├─ VendorName: Sebastian McKenzie +│ │ └─ VendorUrl: https://babeljs.io/ +│ ├─ babel-plugin-constant-folding@1.0.1 +│ │ └─ URL: https://github.com/babel-plugins/babel-plugin-constant-folding.git +│ ├─ babel-plugin-dead-code-elimination@1.0.2 +│ │ └─ URL: https://github.com/babel-plugins/babel-plugin-dead-code-elimination.git +│ ├─ babel-plugin-eval@1.0.1 +│ │ └─ URL: https://github.com/babel-plugins/babel-plugin-eval.git +│ ├─ babel-plugin-htmlbars-inline-precompile@0.0.5 +│ │ ├─ URL: https://github.com/pangratz/babel-plugin-htmlbars-inline-precompile +│ │ └─ VendorName: Clemens Müller +│ ├─ babel-plugin-inline-environment-variables@1.0.1 +│ │ └─ URL: https://github.com/babel-plugins/babel-plugin-inline-environment-variables.git +│ ├─ babel-plugin-jscript@1.0.4 +│ │ └─ URL: https://github.com/babel-plugins/babel-plugin-jscript.git +│ ├─ babel-plugin-member-expression-literals@1.0.1 +│ │ └─ URL: https://github.com/babel-plugins/babel-plugin-member-expression-literals.git +│ ├─ babel-plugin-property-literals@1.0.1 +│ │ └─ URL: https://github.com/babel-plugins/babel-plugin-property-literals.git +│ ├─ babel-plugin-proto-to-assign@1.0.4 +│ │ └─ URL: https://github.com/babel-plugins/babel-plugin-proto-to-assign.git +│ ├─ babel-plugin-react-constant-elements@1.0.3 +│ │ └─ URL: https://github.com/babel-plugins/babel-plugin-react-constant-elements.git +│ ├─ babel-plugin-react-display-name@1.0.3 +│ │ └─ URL: https://github.com/babel-plugins/babel-plugin-react-display-name.git +│ ├─ babel-plugin-remove-console@1.0.1 +│ │ └─ URL: https://github.com/babel-plugins/babel-plugin-remove-console.git +│ ├─ babel-plugin-remove-debugger@1.0.1 +│ │ └─ URL: https://github.com/babel-plugins/babel-plugin-remove-debugger.git +│ ├─ babel-plugin-runtime@1.0.7 +│ │ └─ URL: https://github.com/babel-plugins/babel-plugin-runtime.git +│ ├─ babel-plugin-undeclared-variables-check@1.0.2 +│ │ └─ URL: https://github.com/babel-plugins/babel-plugin-undeclared-variables-check.git +│ ├─ babel-plugin-undefined-to-void@1.1.6 +│ │ └─ URL: https://github.com/babel-plugins/babel-plugin-undefined-to-void.git +│ ├─ babylon@5.8.38 +│ │ ├─ URL: https://github.com/babel/babel.git +│ │ ├─ VendorName: Sebastian McKenzie +│ │ └─ VendorUrl: https://babeljs.io/ +│ ├─ backbone@1.4.0 +│ │ ├─ URL: https://github.com/jashkenas/backbone.git +│ │ └─ VendorName: Jeremy Ashkenas +│ ├─ backo2@1.0.2 +│ │ └─ URL: https://github.com/mokesmokes/backo.git +│ ├─ balanced-match@1.0.0 +│ │ ├─ URL: git://github.com/juliangruber/balanced-match.git +│ │ ├─ VendorName: Julian Gruber +│ │ └─ VendorUrl: https://github.com/juliangruber/balanced-match +│ ├─ base64-arraybuffer@0.1.2 +│ │ ├─ URL: https://github.com/niklasvh/base64-arraybuffer +│ │ ├─ VendorName: Niklas von Hertzen +│ │ └─ VendorUrl: https://github.com/niklasvh/base64-arraybuffer +│ ├─ basic-auth@2.0.1 +│ │ └─ URL: https://github.com/jshttp/basic-auth.git +│ ├─ benchmark@1.0.0 +│ │ ├─ URL: https://github.com/bestiejs/benchmark.js.git +│ │ ├─ VendorName: Mathias Bynens +│ │ └─ VendorUrl: http://benchmarkjs.com/ +│ ├─ better-assert@1.0.2 +│ │ ├─ URL: https://github.com/visionmedia/better-assert.git +│ │ └─ VendorName: TJ Holowaychuk +│ ├─ binaryextensions@2.2.0 +│ │ ├─ URL: https://github.com/bevry/binaryextensions.git +│ │ ├─ VendorName: 2013+ Bevry Pty Ltd +│ │ └─ VendorUrl: https://github.com/bevry/binaryextensions +│ ├─ bl@1.0.3 +│ │ ├─ URL: https://github.com/rvagg/bl.git +│ │ └─ VendorUrl: https://github.com/rvagg/bl +│ ├─ bl@1.1.2 +│ │ ├─ URL: https://github.com/rvagg/bl.git +│ │ └─ VendorUrl: https://github.com/rvagg/bl +│ ├─ blank-object@1.0.2 +│ │ ├─ URL: git+https://github.com/stefanpenner/blank-object.git +│ │ ├─ VendorName: Stefan Penner +│ │ └─ VendorUrl: https://github.com/stefanpenner/blank-object#readme +│ ├─ bluebird@2.11.0 +│ │ ├─ URL: git://github.com/petkaantonov/bluebird.git +│ │ ├─ VendorName: Petka Antonov +│ │ └─ VendorUrl: https://github.com/petkaantonov/bluebird +│ ├─ body-parser@1.14.2 +│ │ └─ URL: https://github.com/expressjs/body-parser.git +│ ├─ body-parser@1.19.0 +│ │ └─ URL: https://github.com/expressjs/body-parser.git +│ ├─ bower-config@0.6.1 +│ │ ├─ URL: git://github.com/bower/config.git +│ │ ├─ VendorName: Twitter +│ │ └─ VendorUrl: http://bower.io/ +│ ├─ bower-endpoint-parser@0.2.2 +│ │ ├─ URL: git://github.com/bower/endpoint-parser.git +│ │ └─ VendorName: Twitter +│ ├─ bower-shrinkwrap-resolver-ext@0.1.0 +│ │ └─ URL: https://github.com/sreenaths/bower-shrinkwrap-resolver-ext +│ ├─ bower@1.8.8 +│ │ ├─ URL: https://github.com/bower/bower.git +│ │ ├─ VendorName: Twitter +│ │ └─ VendorUrl: http://bower.io/ +│ ├─ brace-expansion@1.1.11 +│ │ ├─ URL: git://github.com/juliangruber/brace-expansion.git +│ │ ├─ VendorName: Julian Gruber +│ │ └─ VendorUrl: https://github.com/juliangruber/brace-expansion +│ ├─ braces@1.8.5 +│ │ ├─ URL: https://github.com/jonschlinkert/braces.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/braces +│ ├─ breakable@1.0.0 +│ │ ├─ URL: https://github.com/olov/breakable.git +│ │ └─ VendorName: Olov Lassus +│ ├─ broccoli-asset-rev@2.4.2 +│ │ ├─ URL: git://github.com/rickharrison/broccoli-asset-rev +│ │ ├─ VendorName: Rick Harrison +│ │ └─ VendorUrl: https://github.com/rickharrison/broccoli-asset-rev +│ ├─ broccoli-asset-rewrite@1.1.0 +│ │ ├─ URL: git://github.com/rickharrison/broccoli-asset-rewrite +│ │ ├─ VendorName: Rick Harrison +│ │ └─ VendorUrl: https://github.com/rickharrison/broccoli-asset-rewrite +│ ├─ broccoli-babel-transpiler@5.7.4 +│ │ ├─ URL: https://github.com/babel/broccoli-babel-transpiler.git +│ │ ├─ VendorName: nightire +│ │ └─ VendorUrl: https://github.com/babel/broccoli-babel-transpiler +│ ├─ broccoli-caching-writer@2.3.1 +│ │ ├─ URL: https://github.com/ember-cli/broccoli-caching-writer.git +│ │ └─ VendorName: Robert Jackson +│ ├─ broccoli-caching-writer@3.0.3 +│ │ ├─ URL: https://github.com/ember-cli/broccoli-caching-writer.git +│ │ └─ VendorName: Robert Jackson +│ ├─ broccoli-clean-css@0.2.0 +│ │ ├─ URL: https://github.com/shinnn/broccoli-clean-css.git +│ │ ├─ VendorName: Shinnosuke Watanabe +│ │ └─ VendorUrl: https://github.com/shinnn/broccoli-clean-css +│ ├─ broccoli-config-loader@1.0.1 +│ │ ├─ URL: git+https://github.com/ember-cli/broccoli-config-loader.git +│ │ ├─ VendorName: yaymukund +│ │ └─ VendorUrl: https://github.com/ember-cli/broccoli-config-loader#readme +│ ├─ broccoli-config-replace@1.1.2 +│ │ ├─ URL: https://github.com/ember-cli/broccoli-config-replace +│ │ ├─ VendorName: yaymukund +│ │ └─ VendorUrl: https://github.com/ember-cli/broccoli-config-replace#readme +│ ├─ broccoli-debug@0.6.5 +│ │ ├─ URL: https://github.com/broccolijs/broccoli-debug.git +│ │ ├─ VendorName: Robert Jackson +│ │ └─ VendorUrl: http: +│ ├─ broccoli-filter@0.1.14 +│ │ ├─ URL: https://github.com/broccolijs/broccoli-filter +│ │ └─ VendorName: Jo Liss +│ ├─ broccoli-filter@1.3.0 +│ │ └─ URL: https://github.com/broccolijs/broccoli-filter +│ ├─ broccoli-funnel@1.0.1 +│ │ ├─ URL: https://github.com/broccolijs/broccoli-funnel.git +│ │ └─ VendorName: Robert Jackson +│ ├─ broccoli-funnel@1.2.0 +│ │ ├─ URL: https://github.com/broccolijs/broccoli-funnel.git +│ │ └─ VendorName: Robert Jackson +│ ├─ broccoli-funnel@2.0.2 +│ │ ├─ URL: https://github.com/broccolijs/broccoli-funnel.git +│ │ └─ VendorName: Robert Jackson +│ ├─ broccoli-jshint@1.2.0 +│ │ ├─ URL: https://github.com/rwjblue/broccoli-jshint.git +│ │ └─ VendorName: Robert Jackson +│ ├─ broccoli-kitchen-sink-helpers@0.2.9 +│ │ ├─ URL: https://github.com/broccolijs/broccoli-kitchen-sink-helpers +│ │ └─ VendorName: Jo Liss +│ ├─ broccoli-kitchen-sink-helpers@0.3.1 +│ │ ├─ URL: https://github.com/broccolijs/broccoli-kitchen-sink-helpers +│ │ └─ VendorName: Jo Liss +│ ├─ broccoli-less-single@0.6.4 +│ │ ├─ URL: https://github.com/gabrielgrant/broccoli-less-single +│ │ ├─ VendorName: Gabriel Grant +│ │ └─ VendorUrl: https://github.com/gabrielgrant/broccoli-less-single +│ ├─ broccoli-merge-trees@1.1.1 +│ │ ├─ URL: https://github.com/broccolijs/broccoli-merge-trees +│ │ └─ VendorName: Jo Liss +│ ├─ broccoli-merge-trees@1.2.4 +│ │ ├─ URL: https://github.com/broccolijs/broccoli-merge-trees +│ │ └─ VendorName: Jo Liss +│ ├─ broccoli-merge-trees@2.0.1 +│ │ ├─ URL: https://github.com/broccolijs/broccoli-merge-trees +│ │ └─ VendorName: Jo Liss +│ ├─ broccoli-persistent-filter@1.4.6 +│ │ ├─ URL: git+https://github.com/stefanpenner/broccoli-persistent-filter.git +│ │ ├─ VendorName: Stefan Penner +│ │ └─ VendorUrl: https://github.com/stefanpenner/broccoli-persistent-filter#readme +│ ├─ broccoli-plugin@1.1.0 +│ │ ├─ URL: https://github.com/broccolijs/broccoli-plugin +│ │ └─ VendorName: Jo Liss +│ ├─ broccoli-plugin@1.3.1 +│ │ ├─ URL: https://github.com/broccolijs/broccoli-plugin +│ │ └─ VendorName: Jo Liss +│ ├─ broccoli-sane-watcher@1.1.5 +│ │ ├─ URL: https://github.com/broccolijs/broccoli-sane-watcher.git +│ │ ├─ VendorName: Kris Selden +│ │ └─ VendorUrl: https://github.com/broccolijs/broccoli-sane-watcher +│ ├─ broccoli-sass-source-maps@2.2.0 +│ │ ├─ URL: https://github.com/aexmachina/broccoli-sass-source-maps +│ │ └─ VendorName: Jo Liss +│ ├─ broccoli-slow-trees@1.1.0 +│ │ ├─ URL: https://github.com/rwjblue/broccoli-slow-trees.git +│ │ ├─ VendorName: Robert Jackson +│ │ └─ VendorUrl: https://github.com/rwjblue/broccoli-slow-trees +│ ├─ broccoli-source@1.1.0 +│ │ ├─ URL: https://github.com/broccolijs/broccoli-source +│ │ └─ VendorName: Jo Liss +│ ├─ broccoli-sourcemap-concat@1.1.6 +│ │ ├─ URL: https://github.com/ef4/broccoli-sourcemap-concat +│ │ └─ VendorName: Edward Faulkner +│ ├─ broccoli-sourcemap-concat@2.0.2 +│ │ ├─ URL: https://github.com/ef4/broccoli-sourcemap-concat +│ │ └─ VendorName: Edward Faulkner +│ ├─ broccoli-sri-hash@1.2.2 +│ │ ├─ URL: https://github.com/jonathanKingston/broccoli-sri-hash.git +│ │ └─ VendorName: Jonathan Kingston +│ ├─ broccoli-stew@1.6.0 +│ │ ├─ URL: https://github.com/stefanpenner/broccoli-stew.git +│ │ ├─ VendorName: Stefan Penner & Robert Jackson +│ │ └─ VendorUrl: https://github.com/stefanpenner/broccoli-stew +│ ├─ broccoli-uglify-sourcemap@1.5.2 +│ │ ├─ URL: git+https://github.com/ember-cli/broccoli-uglify-sourcemap.git +│ │ ├─ VendorName: Edward Faulkner +│ │ └─ VendorUrl: https://github.com/ember-cli/broccoli-uglify-sourcemap#readme +│ ├─ broccoli-unwatched-tree@0.1.3 +│ │ ├─ URL: https://github.com/rjackson/broccoli-unwatched-tree.git +│ │ └─ VendorName: Robert Jackson +│ ├─ broccoli-writer@0.1.1 +│ │ ├─ URL: https://github.com/joliss/broccoli-writer +│ │ └─ VendorName: Jo Liss +│ ├─ broccoli@0.16.8 +│ │ ├─ URL: https://github.com/broccolijs/broccoli +│ │ ├─ VendorName: Jo Liss +│ │ └─ VendorUrl: https://github.com/broccolijs/broccoli +│ ├─ buffer-from@1.1.1 +│ │ └─ URL: https://github.com/LinusU/buffer-from.git +│ ├─ builtin-modules@1.1.1 +│ │ ├─ URL: https://github.com/sindresorhus/builtin-modules.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ builtins@0.0.7 +│ │ └─ URL: https://github.com/juliangruber/builtins.git +│ ├─ builtins@1.0.3 +│ │ └─ URL: https://github.com/juliangruber/builtins.git +│ ├─ bytes@2.2.0 +│ │ ├─ URL: https://github.com/visionmedia/bytes.js.git +│ │ ├─ VendorName: TJ Holowaychuk +│ │ └─ VendorUrl: http://tjholowaychuk.com +│ ├─ bytes@2.4.0 +│ │ ├─ URL: https://github.com/visionmedia/bytes.js.git +│ │ ├─ VendorName: TJ Holowaychuk +│ │ └─ VendorUrl: http://tjholowaychuk.com +│ ├─ bytes@3.0.0 +│ │ ├─ URL: https://github.com/visionmedia/bytes.js.git +│ │ ├─ VendorName: TJ Holowaychuk +│ │ └─ VendorUrl: http://tjholowaychuk.com +│ ├─ bytes@3.1.0 +│ │ ├─ URL: https://github.com/visionmedia/bytes.js.git +│ │ ├─ VendorName: TJ Holowaychuk +│ │ └─ VendorUrl: http://tjholowaychuk.com +│ ├─ camelcase-keys@2.1.0 +│ │ ├─ URL: https://github.com/sindresorhus/camelcase-keys.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ camelcase@1.2.1 +│ │ ├─ URL: https://github.com/sindresorhus/camelcase.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ camelcase@2.1.1 +│ │ ├─ URL: https://github.com/sindresorhus/camelcase.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ camelcase@3.0.0 +│ │ ├─ URL: https://github.com/sindresorhus/camelcase.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ can-symlink@1.0.0 +│ │ ├─ URL: https://github.com/raytiley/can-symlink +│ │ └─ VendorName: raytiley +│ ├─ cardinal@0.5.0 +│ │ ├─ URL: git://github.com/thlorenz/cardinal.git +│ │ ├─ VendorName: Thorsten Lorenz +│ │ └─ VendorUrl: thlorenz.com +│ ├─ center-align@0.1.3 +│ │ ├─ URL: https://github.com/jonschlinkert/center-align.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/center-align +│ ├─ chalk@0.4.0 +│ │ ├─ URL: https://github.com/sindresorhus/chalk.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ chalk@0.5.1 +│ │ └─ URL: https://github.com/sindresorhus/chalk.git +│ ├─ chalk@1.1.0 +│ │ └─ URL: https://github.com/chalk/chalk.git +│ ├─ chalk@1.1.3 +│ │ └─ URL: https://github.com/chalk/chalk.git +│ ├─ chalk@2.4.2 +│ │ └─ URL: https://github.com/chalk/chalk.git +│ ├─ charm@1.0.2 +│ │ ├─ URL: http://github.com/substack/node-charm.git +│ │ ├─ VendorName: James Halliday +│ │ └─ VendorUrl: http://substack.net +│ ├─ clean-css@2.2.23 +│ │ ├─ URL: https://github.com/GoalSmashers/clean-css.git +│ │ ├─ VendorName: Jakub Pawlowicz +│ │ └─ VendorUrl: https://github.com/GoalSmashers/clean-css +│ ├─ cli-color@0.3.3 +│ │ ├─ URL: git://github.com/medikoo/cli-color.git +│ │ ├─ VendorName: Mariusz Nowak +│ │ └─ VendorUrl: http://www.medikoo.com/ +│ ├─ cli-table@0.3.1 +│ │ ├─ URL: https://github.com/Automattic/cli-table.git +│ │ └─ VendorName: Guillermo Rauch +│ ├─ cli@1.0.1 +│ │ ├─ URL: http://github.com/node-js-libs/cli.git +│ │ ├─ VendorName: Chris O'Hara +│ │ └─ VendorUrl: http://github.com/node-js-libs/cli +│ ├─ clone@0.2.0 +│ │ ├─ URL: git://github.com/pvorb/node-clone.git +│ │ ├─ VendorName: Paul Vorbach +│ │ └─ VendorUrl: http://paul.vorba.ch/ +│ ├─ clone@1.0.4 +│ │ ├─ URL: git://github.com/pvorb/node-clone.git +│ │ ├─ VendorName: Paul Vorbach +│ │ └─ VendorUrl: http://paul.vorba.ch/ +│ ├─ clone@2.1.2 +│ │ ├─ URL: git://github.com/pvorb/node-clone.git +│ │ ├─ VendorName: Paul Vorbach +│ │ └─ VendorUrl: http://paul.vorba.ch/ +│ ├─ co@4.6.0 +│ │ └─ URL: https://github.com/tj/co.git +│ ├─ code-point-at@1.1.0 +│ │ ├─ URL: https://github.com/sindresorhus/code-point-at.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ color-convert@1.9.3 +│ │ ├─ URL: https://github.com/Qix-/color-convert.git +│ │ └─ VendorName: Heather Arthur +│ ├─ color-name@1.1.3 +│ │ ├─ URL: git@github.com:dfcreative/color-name.git +│ │ ├─ VendorName: DY +│ │ └─ VendorUrl: https://github.com/dfcreative/color-name +│ ├─ colors@1.0.3 +│ │ ├─ URL: http://github.com/Marak/colors.js.git +│ │ ├─ VendorName: Marak Squires +│ │ └─ VendorUrl: https://github.com/Marak/colors.js +│ ├─ columnify@1.5.4 +│ │ ├─ URL: git://github.com/timoxley/columnify.git +│ │ ├─ VendorName: Tim Oxley +│ │ └─ VendorUrl: https://github.com/timoxley/columnify +│ ├─ combined-stream@1.0.8 +│ │ ├─ URL: git://github.com/felixge/node-combined-stream.git +│ │ ├─ VendorName: Felix Geisendörfer +│ │ └─ VendorUrl: https://github.com/felixge/node-combined-stream +│ ├─ commander@2.1.0 +│ │ ├─ URL: https://github.com/visionmedia/commander.js.git +│ │ └─ VendorName: TJ Holowaychuk +│ ├─ commander@2.2.0 +│ │ ├─ URL: https://github.com/visionmedia/commander.js.git +│ │ └─ VendorName: TJ Holowaychuk +│ ├─ commander@2.20.3 +│ │ ├─ URL: https://github.com/tj/commander.js.git +│ │ └─ VendorName: TJ Holowaychuk +│ ├─ commoner@0.10.8 +│ │ ├─ URL: git://github.com/benjamn/commoner.git +│ │ ├─ VendorName: Ben Newman +│ │ └─ VendorUrl: http://github.com/benjamn/commoner +│ ├─ compressible@2.0.17 +│ │ └─ URL: https://github.com/jshttp/compressible.git +│ ├─ compression@1.7.4 +│ │ └─ URL: https://github.com/expressjs/compression.git +│ ├─ concat-map@0.0.1 +│ │ ├─ URL: git://github.com/substack/node-concat-map.git +│ │ ├─ VendorName: James Halliday +│ │ └─ VendorUrl: http://substack.net +│ ├─ concat-stream@1.5.0 +│ │ ├─ URL: http://github.com/maxogden/concat-stream.git +│ │ └─ VendorName: Max Ogden +│ ├─ concat-stream@1.6.2 +│ │ ├─ URL: http://github.com/maxogden/concat-stream.git +│ │ └─ VendorName: Max Ogden +│ ├─ config-chain@1.1.12 +│ │ ├─ URL: https://github.com/dominictarr/config-chain.git +│ │ ├─ VendorName: Dominic Tarr +│ │ └─ VendorUrl: http://github.com/dominictarr/config-chain +│ ├─ connect@3.7.0 +│ │ ├─ URL: https://github.com/senchalabs/connect.git +│ │ ├─ VendorName: TJ Holowaychuk +│ │ └─ VendorUrl: http://tjholowaychuk.com +│ ├─ console-browserify@1.1.0 +│ │ ├─ URL: git://github.com/Raynos/console-browserify.git +│ │ ├─ VendorName: Raynos +│ │ └─ VendorUrl: https://github.com/Raynos/console-browserify +│ ├─ consolidate@0.13.1 +│ │ ├─ URL: https://github.com/visionmedia/consolidate.js.git +│ │ └─ VendorName: TJ Holowaychuk +│ ├─ content-disposition@0.5.3 +│ │ ├─ URL: https://github.com/jshttp/content-disposition.git +│ │ └─ VendorName: Douglas Christopher Wilson +│ ├─ content-type@1.0.4 +│ │ ├─ URL: https://github.com/jshttp/content-type.git +│ │ └─ VendorName: Douglas Christopher Wilson +│ ├─ convert-source-map@1.7.0 +│ │ ├─ URL: git://github.com/thlorenz/convert-source-map.git +│ │ ├─ VendorName: Thorsten Lorenz +│ │ └─ VendorUrl: https://github.com/thlorenz/convert-source-map +│ ├─ cookie-signature@1.0.6 +│ │ ├─ URL: https://github.com/visionmedia/node-cookie-signature.git +│ │ └─ VendorName: TJ Holowaychuk +│ ├─ cookie@0.4.0 +│ │ ├─ URL: https://github.com/jshttp/cookie.git +│ │ └─ VendorName: Roman Shtylman +│ ├─ copy-dereference@1.0.0 +│ │ ├─ URL: https://github.com/broccolijs/node-copy-dereference +│ │ └─ VendorName: Jo Liss +│ ├─ core-js@1.2.7 +│ │ └─ URL: https://github.com/zloirock/core-js.git +│ ├─ core-object@0.0.2 +│ │ ├─ URL: https://github.com/stefanpenner/core_object.git +│ │ ├─ VendorName: Ember core team and ember cli contributors +│ │ └─ VendorUrl: https://github.com/stefanpenner/core_object +│ ├─ core-util-is@1.0.2 +│ │ ├─ URL: git://github.com/isaacs/core-util-is +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ cross-spawn-async@2.2.5 +│ │ ├─ URL: git://github.com/IndigoUnited/node-cross-spawn-async.git +│ │ ├─ VendorName: IndigoUnited +│ │ └─ VendorUrl: http://indigounited.com +│ ├─ cross-spawn@3.0.1 +│ │ ├─ URL: git://github.com/IndigoUnited/node-cross-spawn.git +│ │ ├─ VendorName: IndigoUnited +│ │ └─ VendorUrl: http://indigounited.com +│ ├─ ctype@0.5.3 +│ │ ├─ URL: https://github.com/rmustacc/node-ctype.git +│ │ ├─ VendorName: Robert Mustacchi +│ │ └─ VendorUrl: https://github.com/rmustacc/node-ctype +│ ├─ currently-unhandled@0.4.1 +│ │ ├─ URL: https://github.com/jamestalmage/currently-unhandled.git +│ │ ├─ VendorName: James Talmage +│ │ └─ VendorUrl: github.com/jamestalmage +│ ├─ d@0.1.1 +│ │ ├─ URL: git://github.com/medikoo/d.git +│ │ ├─ VendorName: Mariusz Nowak +│ │ └─ VendorUrl: http://www.medikoo.com/ +│ ├─ dashdash@1.14.1 +│ │ ├─ URL: git://github.com/trentm/node-dashdash.git +│ │ ├─ VendorName: Trent Mick +│ │ └─ VendorUrl: http://trentm.com +│ ├─ date-now@0.1.4 +│ │ ├─ URL: git://github.com/Colingo/date-now.git +│ │ ├─ VendorName: Raynos +│ │ └─ VendorUrl: https://github.com/Colingo/date-now +│ ├─ debug@0.7.4 +│ │ ├─ URL: git://github.com/visionmedia/debug.git +│ │ └─ VendorName: TJ Holowaychuk +│ ├─ debug@1.0.2 +│ │ ├─ URL: git://github.com/visionmedia/debug.git +│ │ └─ VendorName: TJ Holowaychuk +│ ├─ debug@1.0.3 +│ │ ├─ URL: git://github.com/visionmedia/debug.git +│ │ └─ VendorName: TJ Holowaychuk +│ ├─ debug@1.0.4 +│ │ ├─ URL: git://github.com/visionmedia/debug.git +│ │ └─ VendorName: TJ Holowaychuk +│ ├─ debug@2.1.0 +│ │ ├─ URL: git://github.com/visionmedia/debug.git +│ │ └─ VendorName: TJ Holowaychuk +│ ├─ debug@2.2.0 +│ │ ├─ URL: git://github.com/visionmedia/debug.git +│ │ └─ VendorName: TJ Holowaychuk +│ ├─ debug@2.6.9 +│ │ ├─ URL: git://github.com/visionmedia/debug.git +│ │ └─ VendorName: TJ Holowaychuk +│ ├─ debug@3.2.6 +│ │ ├─ URL: git://github.com/visionmedia/debug.git +│ │ └─ VendorName: TJ Holowaychuk +│ ├─ debuglog@1.0.1 +│ │ ├─ URL: https://github.com/sam-github/node-debuglog.git +│ │ └─ VendorName: Sam Roberts +│ ├─ decamelize@1.2.0 +│ │ ├─ URL: https://github.com/sindresorhus/decamelize.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ defaults@1.0.3 +│ │ ├─ URL: git://github.com/tmpvar/defaults.git +│ │ └─ VendorName: Elijah Insua +│ ├─ defined@1.0.0 +│ │ ├─ URL: git://github.com/substack/defined.git +│ │ ├─ VendorName: James Halliday +│ │ └─ VendorUrl: https://github.com/substack/defined +│ ├─ defs@1.1.1 +│ │ ├─ URL: https://github.com/olov/defs.git +│ │ └─ VendorName: Olov Lassus +│ ├─ delayed-stream@1.0.0 +│ │ ├─ URL: git://github.com/felixge/node-delayed-stream.git +│ │ ├─ VendorName: Felix Geisendörfer +│ │ └─ VendorUrl: https://github.com/felixge/node-delayed-stream +│ ├─ delegates@1.0.0 +│ │ └─ URL: https://github.com/visionmedia/node-delegates.git +│ ├─ depd@1.1.2 +│ │ ├─ URL: https://github.com/dougwilson/nodejs-depd.git +│ │ └─ VendorName: Douglas Christopher Wilson +│ ├─ destroy@1.0.4 +│ │ ├─ URL: https://github.com/stream-utils/destroy.git +│ │ ├─ VendorName: Jonathan Ong +│ │ └─ VendorUrl: http://jongleberry.com +│ ├─ detect-indent@3.0.1 +│ │ ├─ URL: https://github.com/sindresorhus/detect-indent.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ detective@4.7.1 +│ │ ├─ URL: git://github.com/browserify/detective.git +│ │ ├─ VendorName: James Halliday +│ │ └─ VendorUrl: http://substack.net +│ ├─ did_it_work@0.0.6 +│ │ └─ VendorName: Toby Ho +│ ├─ dom-serializer@0.2.2 +│ │ ├─ URL: git://github.com/cheeriojs/dom-renderer.git +│ │ └─ VendorName: Felix Boehm +│ ├─ ecc-jsbn@0.1.2 +│ │ ├─ URL: https://github.com/quartzjer/ecc-jsbn.git +│ │ ├─ VendorName: Jeremie Miller +│ │ └─ VendorUrl: https://github.com/quartzjer/ecc-jsbn +│ ├─ editions@1.3.4 +│ │ ├─ URL: https://github.com/bevry/editions.git +│ │ ├─ VendorName: 2016+ Bevry Pty Ltd +│ │ └─ VendorUrl: https://github.com/bevry/editions +│ ├─ editor@1.0.0 +│ │ ├─ URL: git://github.com/substack/node-editor.git +│ │ ├─ VendorName: James Halliday +│ │ └─ VendorUrl: https://github.com/substack/node-editor +│ ├─ ee-first@1.1.1 +│ │ ├─ URL: https://github.com/jonathanong/ee-first.git +│ │ ├─ VendorName: Jonathan Ong +│ │ └─ VendorUrl: http://jongleberry.com +│ ├─ em-helpers@0.8.0 +│ │ ├─ URL: https://github.com/sreenaths/em-helpers.git +│ │ └─ VendorName: Sreenath Somarajapuram +│ ├─ em-table@0.12.0 +│ │ ├─ URL: https://github.com/sreenaths/em-table.git +│ │ └─ VendorName: Sreenath Somarajapuram +│ ├─ ember-array-contains-helper@1.0.2 +│ │ ├─ URL: https://github.com/bmeurant/ember-array-contains-helper +│ │ └─ VendorName: Baptiste Meurant +│ ├─ ember-bootstrap@0.5.1 +│ │ ├─ URL: https://github.com/kaliber5/ember-bootstrap +│ │ ├─ VendorName: Simon Ihmig +│ │ └─ VendorUrl: http://kaliber5.github.io/ember-bootstrap/ +│ ├─ ember-cli-app-version@1.0.0 +│ │ ├─ URL: https://github.com/embersherpa/ember-cli-app-version.git +│ │ └─ VendorName: Taras Mankovski +│ ├─ ember-cli-babel@5.1.6 +│ │ ├─ URL: git://github.com/babel/ember-cli-babel.git +│ │ ├─ VendorName: Gordon Kristan +│ │ └─ VendorUrl: https://github.com/babel/ember-cli-babel +│ ├─ ember-cli-babel@5.2.8 +│ │ ├─ URL: git://github.com/babel/ember-cli-babel.git +│ │ └─ VendorName: Gordon Kristan +│ ├─ ember-cli-content-security-policy@0.4.0 +│ │ └─ URL: https://github.com/rwjblue/ember-cli-content-security-policy +│ ├─ ember-cli-copy-dereference@1.0.0 +│ │ ├─ URL: https://github.com/broccolijs/node-copy-dereference +│ │ └─ VendorName: Jo Liss +│ ├─ ember-cli-dependency-checker@1.2.0 +│ │ ├─ URL: https://github.com/quaertym/ember-cli-dependency-checker.git +│ │ └─ VendorName: Emre Unal +│ ├─ ember-cli-htmlbars-inline-precompile@0.3.1 +│ │ ├─ URL: https://github.com/pangratz/ember-cli-htmlbars-inline-precompile +│ │ └─ VendorName: Clemens Müller +│ ├─ ember-cli-htmlbars@0.7.6 +│ │ ├─ URL: git@github.com:ember-cli/ember-cli-htmlbars.git +│ │ ├─ VendorName: Jonathan Jackson & Chase McCarthy +│ │ └─ VendorUrl: https://github.com/ember-cli/ember-cli-htmlbars +│ ├─ ember-cli-htmlbars@1.0.2 +│ │ ├─ URL: git@github.com:ember-cli/ember-cli-htmlbars.git +│ │ ├─ VendorName: Jonathan Jackson & Chase McCarthy +│ │ └─ VendorUrl: https://github.com/ember-cli/ember-cli-htmlbars +│ ├─ ember-cli-htmlbars@1.3.5 +│ │ ├─ URL: git@github.com:ember-cli/ember-cli-htmlbars.git +│ │ ├─ VendorName: Jonathan Jackson & Chase McCarthy +│ │ └─ VendorUrl: https://github.com/ember-cli/ember-cli-htmlbars +│ ├─ ember-cli-ic-ajax@0.2.1 +│ │ ├─ URL: https://github.com/rjackson/ember-cli-ic-ajax +│ │ ├─ VendorName: Robert Jackson +│ │ └─ VendorUrl: https://github.com/rjackson/ember-cli-ic-ajax +│ ├─ ember-cli-inject-live-reload@1.4.0 +│ │ ├─ URL: git://github.com/rwjblue/ember-cli-inject-live-reload.git +│ │ ├─ VendorName: Robert Jackson +│ │ └─ VendorUrl: https://github.com/rwjblue/ember-cli-inject-live-reload +│ ├─ ember-cli-jquery-ui@0.0.20 +│ │ ├─ URL: https://github.com/gaurav0/ember-cli-jquery-ui +│ │ └─ VendorName: Gaurav Munjal +│ ├─ ember-cli-less@1.5.7 +│ │ ├─ URL: https://github.com/gdub22/ember-cli-less +│ │ └─ VendorName: Garth Poitras +│ ├─ ember-cli-moment-shim@0.7.3 +│ │ ├─ URL: git://github.com/jasonmit/ember-cli-moment-shim.git +│ │ ├─ VendorName: Jason Mitchell +│ │ └─ VendorUrl: https://github.com/jasonmit/ember-cli-moment-shim +│ ├─ ember-cli-node-assets@0.1.6 +│ │ ├─ URL: https://github.com/dfreeman/ember-cli-node-assets.git +│ │ └─ VendorName: Dan Freeman +│ ├─ ember-cli-numeral@0.2.0 +│ │ ├─ URL: http://github.com/josemarluedke/ember-cli-numeral +│ │ ├─ VendorName: Jay Phelps +│ │ └─ VendorUrl: https://github.com/josemarluedke/ember-cli-numeral +│ ├─ ember-cli-qunit@1.2.1 +│ │ ├─ URL: https://github.com/ember-cli/ember-cli-qunit.git +│ │ ├─ VendorName: Jake Craige +│ │ └─ VendorUrl: https://github.com/ember-cli/ember-cli-qunit +│ ├─ ember-cli-release@0.2.8 +│ │ ├─ URL: https://github.com/lytics/ember-cli-release.git +│ │ └─ VendorName: Steven Lindberg +│ ├─ ember-cli-sass@7.0.0 +│ │ ├─ URL: git://github.com/aexmachina/ember-cli-sass.git +│ │ ├─ VendorName: @aexmachina +│ │ └─ VendorUrl: https://github.com/aexmachina/ember-cli-sass +│ ├─ ember-cli-sri@1.2.1 +│ │ ├─ URL: https://github.com/jonathanKingston/ember-cli-sri +│ │ └─ VendorName: Jonathan Kingston +│ ├─ ember-cli-uglify@1.2.0 +│ │ └─ URL: https://github.com/ember-cli/ember-cli-uglify.git +│ ├─ ember-cli-version-checker@1.3.1 +│ │ ├─ URL: https://github.com/rwjblue/ember-cli-version-checker.git +│ │ ├─ VendorName: Robert Jackson +│ │ └─ VendorUrl: https://github.com/rwjblue/ember-cli-version-checker +│ ├─ ember-cli@1.13.14 +│ │ ├─ URL: https://github.com/ember-cli/ember-cli.git +│ │ └─ VendorName: Stefan Penner, Robert Jackson and ember-cli contributors +│ ├─ ember-d3@0.1.0 +│ │ ├─ URL: https://github.com/brzpegasus/ember-d3 +│ │ └─ VendorName: Estelle DeBlois +│ ├─ ember-data@2.1.0 +│ │ └─ URL: git://github.com/emberjs/data.git +│ ├─ ember-disable-proxy-controllers@1.0.1 +│ │ ├─ URL: https://github.com/cibernox/ember-disable-proxy-controllers +│ │ └─ VendorName: Miguel Camba +│ ├─ ember-export-application-global@1.0.5 +│ │ ├─ URL: https://github.com/ember-cli/ember-export-application-global.git +│ │ ├─ VendorName: Robert Jackson +│ │ └─ VendorUrl: https://github.com/ember-cli/ember-export-application-global +│ ├─ ember-lodash@0.0.10 +│ │ ├─ URL: https://github.com/levanto-financial/ember-lodash.git +│ │ ├─ VendorName: Mike North +│ │ └─ VendorUrl: https://github.com/levanto-financial/ember-lodash +│ ├─ ember-qunit@0.4.24 +│ │ └─ URL: https://github.com/rwjblue/ember-qunit.git +│ ├─ ember-resolver@2.0.3 +│ │ ├─ URL: git+https://github.com/ember-cli/ember-resolver.git +│ │ ├─ VendorName: Robert Jackson +│ │ └─ VendorUrl: https://github.com/ember-cli/ember-resolver#readme +│ ├─ ember-router-generator@1.2.3 +│ │ ├─ URL: https://github.com/ember-cli/ember-router-generator.git +│ │ ├─ VendorName: Adolfo Builes +│ │ └─ VendorUrl: https://github.com/ember-cli/ember-router-generator +│ ├─ ember-spin-spinner@0.2.3 +│ │ ├─ URL: https://github.com/rsschermer/ember-spin-spinner +│ │ ├─ VendorName: Roland Schermer +│ │ └─ VendorUrl: https://github.com/rsschermer/ember-spin-spinner +│ ├─ ember-truth-helpers@1.3.0 +│ │ ├─ URL: https://github.com/jmurphyau/ember-truth-helpers +│ │ └─ VendorName: James Murphy +│ ├─ ember-wormhole@0.3.6 +│ │ ├─ URL: https://github.com/yapplabs/ember-wormhole.git +│ │ └─ VendorName: Yapp Labs +│ ├─ encodeurl@1.0.2 +│ │ └─ URL: https://github.com/pillarjs/encodeurl.git +│ ├─ engine.io-parser@1.2.2 +│ │ ├─ URL: git@github.com:Automattic/engine.io-parser.git +│ │ └─ VendorUrl: https://github.com/Automattic/engine.io-parser +│ ├─ engine.io-pure@1.5.9 +│ │ ├─ URL: git@github.com:Automattic/engine.io.git +│ │ ├─ VendorName: Guillermo Rauch +│ │ └─ VendorUrl: https://github.com/LearnBoost/engine.io +│ ├─ errno@0.1.7 +│ │ └─ URL: https://github.com/rvagg/node-errno.git +│ ├─ error-ex@1.3.2 +│ │ └─ URL: https://github.com/qix-/node-error-ex.git +│ ├─ es6-iterator@0.1.3 +│ │ ├─ URL: git://github.com/medikoo/es6-iterator.git +│ │ ├─ VendorName: Mariusz Nowak +│ │ └─ VendorUrl: http://www.medikoo.com/ +│ ├─ es6-iterator@2.0.3 +│ │ ├─ URL: git://github.com/medikoo/es6-iterator.git +│ │ ├─ VendorName: Mariusz Nowak +│ │ └─ VendorUrl: http://www.medikoo.com/ +│ ├─ es6-promise@4.0.5 +│ │ ├─ URL: git://github.com/stefanpenner/es6-promise.git +│ │ ├─ VendorName: Yehuda Katz, Tom Dale, Stefan Penner and contributors +│ │ └─ VendorUrl: Conversion to ES6 API by Jake Archibald +│ ├─ es6-symbol@2.0.1 +│ │ ├─ URL: git://github.com/medikoo/es6-symbol.git +│ │ ├─ VendorName: Mariusz Nowak +│ │ └─ VendorUrl: http://www.medikoo.com/ +│ ├─ es6-weak-map@0.1.4 +│ │ ├─ URL: git://github.com/medikoo/es6-weak-map.git +│ │ ├─ VendorName: Mariusz Nowak +│ │ └─ VendorUrl: http://www.medikoo.com/ +│ ├─ escape-html@1.0.3 +│ │ └─ URL: https://github.com/component/escape-html.git +│ ├─ escape-string-regexp@1.0.5 +│ │ ├─ URL: https://github.com/sindresorhus/escape-string-regexp.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ etag@1.8.1 +│ │ └─ URL: https://github.com/jshttp/etag.git +│ ├─ event-emitter@0.3.5 +│ │ ├─ URL: git://github.com/medikoo/event-emitter.git +│ │ ├─ VendorName: Mariusz Nowak +│ │ └─ VendorUrl: http://www.medikoo.com/ +│ ├─ eventemitter3@4.0.0 +│ │ ├─ URL: git://github.com/primus/eventemitter3.git +│ │ └─ VendorName: Arnout Kazemier +│ ├─ exec-sh@0.2.2 +│ │ ├─ URL: git@github.com:tsertkov/exec-sh.git +│ │ └─ VendorName: Aleksandr Tsertkov +│ ├─ exit@0.1.2 +│ │ ├─ URL: git://github.com/cowboy/node-exit.git +│ │ ├─ VendorName: "Cowboy" Ben Alman +│ │ └─ VendorUrl: https://github.com/cowboy/node-exit +│ ├─ expand-brackets@0.1.5 +│ │ ├─ URL: https://github.com/jonschlinkert/expand-brackets.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/expand-brackets +│ ├─ expand-range@1.8.2 +│ │ ├─ URL: https://github.com/jonschlinkert/expand-range.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/expand-range +│ ├─ express@4.17.1 +│ │ ├─ URL: https://github.com/expressjs/express.git +│ │ ├─ VendorName: TJ Holowaychuk +│ │ └─ VendorUrl: http://expressjs.com/ +│ ├─ extend@3.0.2 +│ │ ├─ URL: https://github.com/justmoon/node-extend.git +│ │ ├─ VendorName: Stefan Thomas +│ │ └─ VendorUrl: http://www.justmoon.net +│ ├─ extglob@0.3.2 +│ │ ├─ URL: git://github.com/jonschlinkert/extglob.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/extglob +│ ├─ extsprintf@1.3.0 +│ │ └─ URL: git://github.com/davepacheco/node-extsprintf.git +│ ├─ extsprintf@1.4.0 +│ │ └─ URL: git://github.com/davepacheco/node-extsprintf.git +│ ├─ fast-deep-equal@2.0.1 +│ │ ├─ URL: git+https://github.com/epoberezkin/fast-deep-equal.git +│ │ ├─ VendorName: Evgeny Poberezkin +│ │ └─ VendorUrl: https://github.com/epoberezkin/fast-deep-equal#readme +│ ├─ fast-json-stable-stringify@2.1.0 +│ │ ├─ URL: git://github.com/epoberezkin/fast-json-stable-stringify.git +│ │ ├─ VendorName: James Halliday +│ │ └─ VendorUrl: https://github.com/epoberezkin/fast-json-stable-stringify +│ ├─ fast-sourcemap-concat@0.2.7 +│ │ ├─ URL: https://github.com/ef4/fast-sourcemap-concat +│ │ └─ VendorName: Edward Faulkner +│ ├─ faye-websocket@0.10.0 +│ │ ├─ URL: git://github.com/faye/faye-websocket-node.git +│ │ ├─ VendorName: James Coglan +│ │ └─ VendorUrl: http://github.com/faye/faye-websocket-node +│ ├─ fd-slicer@1.0.1 +│ │ ├─ URL: git://github.com/andrewrk/node-fd-slicer.git +│ │ └─ VendorName: Andrew Kelley +│ ├─ filename-regex@2.0.1 +│ │ ├─ URL: https://github.com/regexhq/filename-regex.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/regexhq/filename-regex +│ ├─ fileset@0.2.1 +│ │ ├─ URL: git://github.com/mklabs/node-fileset.git +│ │ ├─ VendorName: mklabs +│ │ └─ VendorUrl: https://github.com/mklabs/node-fileset +│ ├─ fill-range@2.2.4 +│ │ ├─ URL: https://github.com/jonschlinkert/fill-range.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/fill-range +│ ├─ finalhandler@1.1.2 +│ │ ├─ URL: https://github.com/pillarjs/finalhandler.git +│ │ └─ VendorName: Douglas Christopher Wilson +│ ├─ find-up@1.1.2 +│ │ ├─ URL: https://github.com/sindresorhus/find-up.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ findup-sync@0.2.1 +│ │ ├─ URL: git://github.com/cowboy/node-findup-sync.git +│ │ ├─ VendorName: "Cowboy" Ben Alman +│ │ └─ VendorUrl: https://github.com/cowboy/node-findup-sync +│ ├─ findup-sync@0.3.0 +│ │ ├─ URL: git://github.com/cowboy/node-findup-sync.git +│ │ ├─ VendorName: "Cowboy" Ben Alman +│ │ └─ VendorUrl: https://github.com/cowboy/node-findup-sync +│ ├─ findup@0.1.5 +│ │ ├─ URL: https://github.com/Filirom1/findup.git +│ │ └─ VendorName: Filirom1 +│ ├─ fireworm@0.6.6 +│ │ ├─ URL: git@github.com:airportyh/fireworm.git +│ │ └─ VendorName: Toby Ho +│ ├─ follow-redirects@1.9.0 +│ │ ├─ URL: git@github.com:follow-redirects/follow-redirects.git +│ │ ├─ VendorName: Ruben Verborgh +│ │ └─ VendorUrl: https://github.com/follow-redirects/follow-redirects +│ ├─ for-in@1.0.2 +│ │ ├─ URL: https://github.com/jonschlinkert/for-in.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/for-in +│ ├─ for-own@0.1.5 +│ │ ├─ URL: https://github.com/jonschlinkert/for-own.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/for-own +│ ├─ form-data@1.0.1 +│ │ ├─ URL: git://github.com/form-data/form-data.git +│ │ ├─ VendorName: Felix Geisendörfer +│ │ └─ VendorUrl: http://debuggable.com/ +│ ├─ form-data@2.1.4 +│ │ ├─ URL: git://github.com/form-data/form-data.git +│ │ ├─ VendorName: Felix Geisendörfer +│ │ └─ VendorUrl: http://debuggable.com/ +│ ├─ form-data@2.3.3 +│ │ ├─ URL: git://github.com/form-data/form-data.git +│ │ ├─ VendorName: Felix Geisendörfer +│ │ └─ VendorUrl: http://debuggable.com/ +│ ├─ forwarded@0.1.2 +│ │ └─ URL: https://github.com/jshttp/forwarded.git +│ ├─ fresh@0.5.2 +│ │ ├─ URL: https://github.com/jshttp/fresh.git +│ │ ├─ VendorName: TJ Holowaychuk +│ │ └─ VendorUrl: http://tjholowaychuk.com +│ ├─ fs-extra@0.16.5 +│ │ ├─ URL: https://github.com/jprichardson/node-fs-extra +│ │ ├─ VendorName: JP Richardson +│ │ └─ VendorUrl: https://github.com/jprichardson/node-fs-extra +│ ├─ fs-extra@0.22.1 +│ │ ├─ URL: https://github.com/jprichardson/node-fs-extra +│ │ ├─ VendorName: JP Richardson +│ │ └─ VendorUrl: https://github.com/jprichardson/node-fs-extra +│ ├─ fs-extra@0.24.0 +│ │ ├─ URL: https://github.com/jprichardson/node-fs-extra +│ │ ├─ VendorName: JP Richardson +│ │ └─ VendorUrl: https://github.com/jprichardson/node-fs-extra +│ ├─ fs-extra@0.30.0 +│ │ ├─ URL: https://github.com/jprichardson/node-fs-extra +│ │ ├─ VendorName: JP Richardson +│ │ └─ VendorUrl: https://github.com/jprichardson/node-fs-extra +│ ├─ fs-extra@5.0.0 +│ │ ├─ URL: https://github.com/jprichardson/node-fs-extra +│ │ ├─ VendorName: JP Richardson +│ │ └─ VendorUrl: https://github.com/jprichardson/node-fs-extra +│ ├─ fs-readdir-recursive@0.1.2 +│ │ ├─ URL: https://github.com/fs-utils/fs-readdir-recursive.git +│ │ ├─ VendorName: Jonathan Ong +│ │ └─ VendorUrl: http://jongleberry.com +│ ├─ fs-tree-diff@0.3.1 +│ │ └─ VendorName: Stefan Penner, David J. Hamilton, Chad Hietala +│ ├─ fs-tree-diff@0.4.4 +│ │ └─ VendorName: Stefan Penner, David J. Hamilton, Chad Hietala +│ ├─ fs-tree-diff@0.5.9 +│ │ ├─ URL: git://github.com/stefanpenner/fs-tree-diff.git +│ │ └─ VendorName: Stefan Penner, David J. Hamilton, Chad Hietala +│ ├─ gaze@1.1.3 +│ │ ├─ URL: https://github.com/shama/gaze.git +│ │ ├─ VendorName: Kyle Robinson Young +│ │ └─ VendorUrl: https://github.com/shama/gaze +│ ├─ generate-function@2.3.1 +│ │ ├─ URL: https://github.com/mafintosh/generate-function +│ │ ├─ VendorName: Mathias Buus +│ │ └─ VendorUrl: https://github.com/mafintosh/generate-function +│ ├─ generate-object-property@1.2.0 +│ │ ├─ URL: https://github.com/mafintosh/generate-object-property +│ │ ├─ VendorName: Mathias Buus +│ │ └─ VendorUrl: https://github.com/mafintosh/generate-object-property +│ ├─ get-stdin@4.0.1 +│ │ ├─ URL: https://github.com/sindresorhus/get-stdin.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ getpass@0.1.7 +│ │ ├─ URL: https://github.com/arekinath/node-getpass.git +│ │ └─ VendorName: Alex Wilson +│ ├─ git-repo-info@1.4.1 +│ │ ├─ URL: https://github.com/rwjblue/git-repo-info.git +│ │ ├─ VendorName: Robert Jackson +│ │ └─ VendorUrl: https://github.com/rwjblue/git-repo-info +│ ├─ git-repo-version@0.3.0 +│ │ ├─ URL: https://github.com/cibernox/git-repo-version +│ │ └─ VendorName: Miguel Camba +│ ├─ github-url-from-git@1.4.0 +│ │ ├─ URL: https://github.com/visionmedia/node-github-url-from-git.git +│ │ └─ VendorName: TJ Holowaychuk +│ ├─ glob-base@0.3.0 +│ │ ├─ URL: git://github.com/jonschlinkert/glob-base.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/glob-base +│ ├─ globals@6.4.1 +│ │ ├─ URL: https://github.com/sindresorhus/globals.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ globule@1.3.0 +│ │ ├─ URL: git://github.com/cowboy/node-globule.git +│ │ ├─ VendorName: "Cowboy" Ben Alman +│ │ └─ VendorUrl: https://github.com/cowboy/node-globule +│ ├─ growl@1.10.5 +│ │ ├─ URL: git://github.com/tj/node-growl.git +│ │ └─ VendorName: TJ Holowaychuk +│ ├─ handlebars@3.0.7 +│ │ ├─ URL: https://github.com/wycats/handlebars.js.git +│ │ ├─ VendorName: Yehuda Katz +│ │ └─ VendorUrl: http://www.handlebarsjs.com/ +│ ├─ har-validator@5.1.3 +│ │ ├─ URL: https://github.com/ahmadnassri/node-har-validator.git +│ │ ├─ VendorName: Ahmad Nassri +│ │ └─ VendorUrl: https://github.com/ahmadnassri/node-har-validator +│ ├─ has-ansi@0.1.0 +│ │ ├─ URL: https://github.com/sindresorhus/has-ansi.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ has-ansi@2.0.0 +│ │ ├─ URL: https://github.com/sindresorhus/has-ansi.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ has-binary@0.1.6 +│ │ └─ VendorName: Kevin Roark +│ ├─ has-color@0.1.7 +│ │ ├─ URL: https://github.com/sindresorhus/has-color.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ has-cors@1.1.0 +│ │ ├─ URL: git://github.com/component/has-cors.git +│ │ ├─ VendorName: Nathan Rajlich +│ │ └─ VendorUrl: http://n8.io/ +│ ├─ has-flag@3.0.0 +│ │ ├─ URL: https://github.com/sindresorhus/has-flag.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ hasha@2.2.0 +│ │ ├─ URL: https://github.com/sindresorhus/hasha.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ heimdalljs-logger@0.1.10 +│ │ ├─ URL: git+https://github.com/heimdalljs/heimdalljs-logger.git +│ │ ├─ VendorName: David J. Hamilton +│ │ └─ VendorUrl: https://github.com/heimdalljs/heimdalljs-logger#README.md +│ ├─ heimdalljs@0.2.6 +│ │ ├─ URL: git+https://github.com/heimdalljs/heimdalljs-lib.git +│ │ └─ VendorUrl: https://github.com/hjdivad/heimdalljs-lib#readme +│ ├─ home-or-tmp@1.0.0 +│ │ ├─ URL: https://github.com/sindresorhus/home-or-tmp.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ htmlparser2@3.8.3 +│ │ ├─ URL: git://github.com/fb55/htmlparser2.git +│ │ └─ VendorName: Felix Boehm +│ ├─ http-errors@1.3.1 +│ │ ├─ URL: https://github.com/jshttp/http-errors.git +│ │ ├─ VendorName: Jonathan Ong +│ │ └─ VendorUrl: http://jongleberry.com +│ ├─ http-errors@1.7.2 +│ │ ├─ URL: https://github.com/jshttp/http-errors.git +│ │ ├─ VendorName: Jonathan Ong +│ │ └─ VendorUrl: http://jongleberry.com +│ ├─ http-errors@1.7.3 +│ │ ├─ URL: https://github.com/jshttp/http-errors.git +│ │ ├─ VendorName: Jonathan Ong +│ │ └─ VendorUrl: http://jongleberry.com +│ ├─ http-parser-js@0.4.10 +│ │ ├─ URL: git://github.com/creationix/http-parser-js.git +│ │ ├─ VendorName: Tim Caswell +│ │ └─ VendorUrl: https://github.com/creationix +│ ├─ http-proxy@1.18.0 +│ │ ├─ URL: https://github.com/http-party/node-http-proxy.git +│ │ └─ VendorName: Charlie Robbins +│ ├─ http-signature@0.11.0 +│ │ ├─ URL: git://github.com/joyent/node-http-signature.git +│ │ ├─ VendorName: Joyent, Inc +│ │ └─ VendorUrl: https://github.com/joyent/node-http-signature/ +│ ├─ http-signature@1.1.1 +│ │ ├─ URL: git://github.com/joyent/node-http-signature.git +│ │ ├─ VendorName: Joyent, Inc +│ │ └─ VendorUrl: https://github.com/joyent/node-http-signature/ +│ ├─ http-signature@1.2.0 +│ │ ├─ URL: git://github.com/joyent/node-http-signature.git +│ │ ├─ VendorName: Joyent, Inc +│ │ └─ VendorUrl: https://github.com/joyent/node-http-signature/ +│ ├─ ic-ajax@2.0.2 +│ │ ├─ URL: https://github.com/instructure/ic-ajax.git +│ │ ├─ VendorName: Ryan Florence +│ │ └─ VendorUrl: https://github.com/instructure/ic-ajax +│ ├─ iconv-lite@0.4.13 +│ │ ├─ URL: git://github.com/ashtuchkin/iconv-lite.git +│ │ ├─ VendorName: Alexander Shtuchkin +│ │ └─ VendorUrl: https://github.com/ashtuchkin/iconv-lite +│ ├─ iconv-lite@0.4.24 +│ │ ├─ URL: git://github.com/ashtuchkin/iconv-lite.git +│ │ ├─ VendorName: Alexander Shtuchkin +│ │ └─ VendorUrl: https://github.com/ashtuchkin/iconv-lite +│ ├─ iferr@0.1.5 +│ │ ├─ URL: https://github.com/shesek/iferr +│ │ ├─ VendorName: Nadav Ivgi +│ │ └─ VendorUrl: https://github.com/shesek/iferr +│ ├─ image-size@0.5.5 +│ │ ├─ URL: https://github.com/image-size/image-size.git +│ │ ├─ VendorName: netroy +│ │ └─ VendorUrl: http://netroy.in/ +│ ├─ imurmurhash@0.1.4 +│ │ ├─ URL: https://github.com/jensyt/imurmurhash-js +│ │ ├─ VendorName: Jens Taylor +│ │ └─ VendorUrl: https://github.com/jensyt/imurmurhash-js +│ ├─ include-path-searcher@0.1.0 +│ │ ├─ URL: https://github.com/joliss/include-path-searcher +│ │ └─ VendorName: Jo Liss +│ ├─ indent-string@2.1.0 +│ │ ├─ URL: https://github.com/sindresorhus/indent-string.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ inflection@1.12.0 +│ │ ├─ URL: https://github.com/dreamerslab/node.inflection.git +│ │ └─ VendorName: dreamerslab +│ ├─ inquirer@0.5.1 +│ │ ├─ URL: git://github.com/SBoudrias/Inquirer.js.git +│ │ └─ VendorName: Simon Boudrias +│ ├─ invert-kv@1.0.0 +│ │ ├─ URL: https://github.com/sindresorhus/invert-kv.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ ipaddr.js@1.9.0 +│ │ ├─ URL: git://github.com/whitequark/ipaddr.js +│ │ └─ VendorName: whitequark +│ ├─ is-arrayish@0.2.1 +│ │ ├─ URL: https://github.com/qix-/node-is-arrayish.git +│ │ ├─ VendorName: Qix +│ │ └─ VendorUrl: http://github.com/qix- +│ ├─ is-buffer@1.1.6 +│ │ ├─ URL: git://github.com/feross/is-buffer.git +│ │ ├─ VendorName: Feross Aboukhadijeh +│ │ └─ VendorUrl: http://feross.org/ +│ ├─ is-builtin-module@1.0.0 +│ │ ├─ URL: https://github.com/sindresorhus/is-builtin-module.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ is-dotfile@1.0.3 +│ │ ├─ URL: https://github.com/jonschlinkert/is-dotfile.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/is-dotfile +│ ├─ is-equal-shallow@0.1.3 +│ │ ├─ URL: git://github.com/jonschlinkert/is-equal-shallow.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/is-equal-shallow +│ ├─ is-extendable@0.1.1 +│ │ ├─ URL: https://github.com/jonschlinkert/is-extendable.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/is-extendable +│ ├─ is-extglob@1.0.0 +│ │ ├─ URL: https://github.com/jonschlinkert/is-extglob.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/is-extglob +│ ├─ is-finite@1.0.2 +│ │ ├─ URL: https://github.com/sindresorhus/is-finite.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ is-fullwidth-code-point@1.0.0 +│ │ ├─ URL: https://github.com/sindresorhus/is-fullwidth-code-point.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ is-fullwidth-code-point@2.0.0 +│ │ ├─ URL: https://github.com/sindresorhus/is-fullwidth-code-point.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ is-git-url@0.2.0 +│ │ ├─ URL: git://github.com/jonschlinkert/is-git-url.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/is-git-url +│ ├─ is-git-url@0.2.3 +│ │ ├─ URL: https://github.com/jonschlinkert/is-git-url.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/is-git-url +│ ├─ is-glob@2.0.1 +│ │ ├─ URL: https://github.com/jonschlinkert/is-glob.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/is-glob +│ ├─ is-my-ip-valid@1.0.0 +│ │ └─ URL: https://github.com/LinusU/is-my-ip-valid.git +│ ├─ is-my-json-valid@2.20.0 +│ │ └─ URL: https://github.com/mafintosh/is-my-json-valid.git +│ ├─ is-number@2.1.0 +│ │ ├─ URL: https://github.com/jonschlinkert/is-number.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/is-number +│ ├─ is-number@4.0.0 +│ │ ├─ URL: https://github.com/jonschlinkert/is-number.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/is-number +│ ├─ is-posix-bracket@0.1.1 +│ │ ├─ URL: https://github.com/jonschlinkert/is-posix-bracket.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/is-posix-bracket +│ ├─ is-primitive@2.0.0 +│ │ ├─ URL: git://github.com/jonschlinkert/is-primitive.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/is-primitive +│ ├─ is-property@1.0.2 +│ │ ├─ URL: git://github.com/mikolalysenko/is-property.git +│ │ └─ VendorName: Mikola Lysenko +│ ├─ is-stream@1.1.0 +│ │ ├─ URL: https://github.com/sindresorhus/is-stream.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ is-type@0.0.1 +│ │ ├─ URL: git://github.com/juliangruber/is-type.git +│ │ ├─ VendorName: Julian Gruber +│ │ └─ VendorUrl: https://github.com/juliangruber/is-type +│ ├─ is-typedarray@1.0.0 +│ │ ├─ URL: git://github.com/hughsk/is-typedarray.git +│ │ ├─ VendorName: Hugh Kennedy +│ │ └─ VendorUrl: https://github.com/hughsk/is-typedarray +│ ├─ is-utf8@0.2.1 +│ │ ├─ URL: https://github.com/wayfind/is-utf8.git +│ │ └─ VendorName: wayfind +│ ├─ isarray@0.0.1 +│ │ ├─ URL: git://github.com/juliangruber/isarray.git +│ │ ├─ VendorName: Julian Gruber +│ │ └─ VendorUrl: https://github.com/juliangruber/isarray +│ ├─ isarray@1.0.0 +│ │ ├─ URL: git://github.com/juliangruber/isarray.git +│ │ ├─ VendorName: Julian Gruber +│ │ └─ VendorUrl: https://github.com/juliangruber/isarray +│ ├─ isbinaryfile@2.0.4 +│ │ └─ URL: https://github.com/gjtorikian/isBinaryFile +│ ├─ isobject@2.1.0 +│ │ ├─ URL: https://github.com/jonschlinkert/isobject.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/isobject +│ ├─ isstream@0.1.2 +│ │ ├─ URL: https://github.com/rvagg/isstream.git +│ │ ├─ VendorName: Rod Vagg +│ │ └─ VendorUrl: https://github.com/rvagg/isstream +│ ├─ istextorbinary@2.1.0 +│ │ ├─ URL: http://github.com/bevry/istextorbinary.git +│ │ ├─ VendorName: 2012+ Bevry Pty Ltd +│ │ └─ VendorUrl: https://github.com/bevry/istextorbinary +│ ├─ js-tokens@1.0.1 +│ │ ├─ URL: https://github.com/lydell/js-tokens.git +│ │ └─ VendorName: Simon Lydell +│ ├─ js-yaml@3.13.1 +│ │ ├─ URL: https://github.com/nodeca/js-yaml.git +│ │ ├─ VendorName: Vladimir Zapparov +│ │ └─ VendorUrl: https://github.com/nodeca/js-yaml +│ ├─ jsbn@0.1.1 +│ │ ├─ URL: https://github.com/andyperlitch/jsbn.git +│ │ └─ VendorName: Tom Wu +│ ├─ jsesc@0.5.0 +│ │ ├─ URL: https://github.com/mathiasbynens/jsesc.git +│ │ ├─ VendorName: Mathias Bynens +│ │ └─ VendorUrl: http://mths.be/jsesc +│ ├─ json-parse-better-errors@1.0.2 +│ │ ├─ URL: https://github.com/zkat/json-parse-better-errors +│ │ └─ VendorName: Kat Marchán +│ ├─ json-schema-traverse@0.4.1 +│ │ ├─ URL: git+https://github.com/epoberezkin/json-schema-traverse.git +│ │ ├─ VendorName: Evgeny Poberezkin +│ │ └─ VendorUrl: https://github.com/epoberezkin/json-schema-traverse#readme +│ ├─ json-stable-stringify@1.0.1 +│ │ ├─ URL: git://github.com/substack/json-stable-stringify.git +│ │ ├─ VendorName: James Halliday +│ │ └─ VendorUrl: https://github.com/substack/json-stable-stringify +│ ├─ json3@3.2.6 +│ │ ├─ URL: git://github.com/bestiejs/json3.git +│ │ ├─ VendorName: Kit Cambridge +│ │ └─ VendorUrl: http://bestiejs.github.io/json3 +│ ├─ json5@0.4.0 +│ │ ├─ URL: https://github.com/aseemk/json5.git +│ │ ├─ VendorName: Aseem Kishore +│ │ └─ VendorUrl: http://json5.org/ +│ ├─ jsonfile@2.4.0 +│ │ ├─ URL: git@github.com:jprichardson/node-jsonfile.git +│ │ └─ VendorName: JP Richardson +│ ├─ jsonfile@4.0.0 +│ │ ├─ URL: git@github.com:jprichardson/node-jsonfile.git +│ │ └─ VendorName: JP Richardson +│ ├─ jsonpointer@4.0.1 +│ │ ├─ URL: http://github.com/janl/node-jsonpointer.git +│ │ └─ VendorName: Jan Lehnardt +│ ├─ jsprim@1.4.1 +│ │ └─ URL: git://github.com/joyent/node-jsprim.git +│ ├─ kind-of@3.2.2 +│ │ ├─ URL: https://github.com/jonschlinkert/kind-of.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/kind-of +│ ├─ kind-of@6.0.2 +│ │ ├─ URL: https://github.com/jonschlinkert/kind-of.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/kind-of +│ ├─ klassy@0.1.3 +│ │ └─ URL: https://github.com/cerebris/klassy.js.git +│ ├─ klaw@1.3.1 +│ │ ├─ URL: git+https://github.com/jprichardson/node-klaw.git +│ │ ├─ VendorName: JP Richardson +│ │ └─ VendorUrl: https://github.com/jprichardson/node-klaw#readme +│ ├─ lazy-cache@1.0.4 +│ │ ├─ URL: https://github.com/jonschlinkert/lazy-cache.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/lazy-cache +│ ├─ lcid@1.0.0 +│ │ ├─ URL: https://github.com/sindresorhus/lcid.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ leek@0.0.18 +│ │ ├─ URL: https://github.com/twokul/leek +│ │ ├─ VendorName: Alex Navasardyan +│ │ └─ VendorUrl: http://twokul.io +│ ├─ leven@1.0.2 +│ │ ├─ URL: https://github.com/sindresorhus/leven.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ linkify-it@1.2.4 +│ │ └─ URL: https://github.com/markdown-it/linkify-it.git +│ ├─ livereload-js@2.4.0 +│ │ ├─ URL: git://github.com/livereload/livereload-js.git +│ │ └─ VendorUrl: https://github.com/livereload/livereload-js +│ ├─ load-json-file@1.1.0 +│ │ ├─ URL: https://github.com/sindresorhus/load-json-file.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ loader.js@4.2.3 +│ │ ├─ URL: https://github.com/ember-cli/loader.js.git +│ │ └─ VendorUrl: https://github.com/ember-cli/loader.js +│ ├─ lodash-es@3.10.1 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/custom-builds +│ ├─ lodash-node@2.4.1 +│ │ ├─ URL: https://github.com/lodash/lodash-node.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: http://lodash.com/custom-builds +│ ├─ lodash-node@3.10.2 +│ │ ├─ URL: https://github.com/lodash/lodash-node.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash._arraycopy@3.0.0 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash._arrayeach@3.0.0 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash._baseassign@3.2.0 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash._basecallback@3.3.1 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash._basecopy@3.0.1 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash._basefor@3.0.3 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash._baseindexof@3.1.0 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash._baseisequal@3.0.7 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash._baseuniq@3.0.3 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash._bindcallback@3.0.1 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash._cacheindexof@3.0.2 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash._createassigner@3.1.1 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash._createcache@3.1.2 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash._getnative@3.9.1 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash._isiterateecall@3.0.9 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash.assign@3.2.0 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash.defaults@3.1.2 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash.isarguments@3.1.0 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash.isarray@3.0.4 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash.isplainobject@3.2.0 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash.istypedarray@3.0.6 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash.keys@3.1.2 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash.keysin@3.0.8 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash.merge@3.3.2 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash.merge@4.6.2 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash.pad@4.5.1 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash.padend@4.6.1 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash.padstart@4.6.1 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash.pairs@3.0.1 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash.restparam@3.6.1 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash.toplainobject@3.0.0 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash.uniq@3.2.2 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash@2.3.0 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: http://lodash.com/ +│ ├─ lodash@2.4.2 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: http://lodash.com/ +│ ├─ lodash@3.10.1 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ lodash@4.17.15 +│ │ ├─ URL: https://github.com/lodash/lodash.git +│ │ ├─ VendorName: John-David Dalton +│ │ └─ VendorUrl: https://lodash.com/ +│ ├─ longest@1.0.1 +│ │ ├─ URL: https://github.com/jonschlinkert/longest.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/longest +│ ├─ loud-rejection@1.6.0 +│ │ ├─ URL: https://github.com/sindresorhus/loud-rejection.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ lru-queue@0.1.0 +│ │ ├─ URL: git://github.com/medikoo/lru-queue.git +│ │ ├─ VendorName: Mariusz Nowak +│ │ └─ VendorUrl: http://www.medikoo.com/ +│ ├─ make-array@0.1.2 +│ │ ├─ URL: git://github.com/kaelzhang/make-array.git +│ │ └─ VendorName: kael +│ ├─ map-obj@1.0.1 +│ │ ├─ URL: https://github.com/sindresorhus/map-obj.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ markdown-it-terminal@0.0.2 +│ │ ├─ URL: http://github.com/trabus/markdown-it-terminal +│ │ ├─ VendorName: Jake Bixby +│ │ └─ VendorUrl: https://github.com/trabus/markdown-it-terminal +│ ├─ markdown-it@4.3.0 +│ │ ├─ URL: https://github.com/markdown-it/markdown-it.git +│ │ └─ VendorUrl: https://github.com/markdown-it/markdown-it +│ ├─ markdown-it@4.4.0 +│ │ ├─ URL: https://github.com/markdown-it/markdown-it.git +│ │ └─ VendorUrl: https://github.com/markdown-it/markdown-it +│ ├─ math-random@1.0.4 +│ │ ├─ URL: https://github.com/michaelrhodes/math-random.git +│ │ └─ VendorName: Michael Rhodes +│ ├─ mdurl@1.0.1 +│ │ └─ URL: https://github.com/markdown-it/mdurl.git +│ ├─ media-typer@0.3.0 +│ │ ├─ URL: https://github.com/jshttp/media-typer.git +│ │ └─ VendorName: Douglas Christopher Wilson +│ ├─ memoizee@0.3.10 +│ │ ├─ URL: git://github.com/medikoo/memoizee.git +│ │ ├─ VendorName: Mariusz Nowak +│ │ └─ VendorUrl: http://www.medikoo.com/ +│ ├─ meow@3.7.0 +│ │ ├─ URL: https://github.com/sindresorhus/meow.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ merge-defaults@0.2.2 +│ │ ├─ URL: git://github.com/mikermcneil/merge-defaults.git +│ │ ├─ VendorName: Mike McNeil +│ │ └─ VendorUrl: https://github.com/mikermcneil/merge-defaults +│ ├─ merge-descriptors@1.0.1 +│ │ ├─ URL: https://github.com/component/merge-descriptors.git +│ │ ├─ VendorName: Jonathan Ong +│ │ └─ VendorUrl: http://jongleberry.com +│ ├─ merge-trees@1.0.1 +│ │ ├─ URL: https://github.com/broccolijs/node-merge-trees +│ │ └─ VendorName: Jo Liss +│ ├─ merge@1.2.1 +│ │ ├─ URL: https://github.com/yeikos/js.merge.git +│ │ ├─ VendorName: yeikos +│ │ └─ VendorUrl: https://github.com/yeikos/js.merge +│ ├─ methods@1.1.2 +│ │ └─ URL: https://github.com/jshttp/methods.git +│ ├─ micromatch@2.3.11 +│ │ ├─ URL: https://github.com/jonschlinkert/micromatch.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/micromatch +│ ├─ mime-db@1.42.0 +│ │ └─ URL: https://github.com/jshttp/mime-db.git +│ ├─ mime-types@2.1.25 +│ │ └─ URL: https://github.com/jshttp/mime-types.git +│ ├─ mime@1.6.0 +│ │ ├─ URL: https://github.com/broofa/node-mime +│ │ ├─ VendorName: Robert Kieffer +│ │ └─ VendorUrl: http://github.com/broofa +│ ├─ minimatch@0.2.14 +│ │ ├─ URL: git://github.com/isaacs/minimatch.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me +│ ├─ minimatch@1.0.0 +│ │ ├─ URL: git://github.com/isaacs/minimatch.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me +│ ├─ minimist@0.0.10 +│ │ ├─ URL: git://github.com/substack/minimist.git +│ │ ├─ VendorName: James Halliday +│ │ └─ VendorUrl: https://github.com/substack/minimist +│ ├─ minimist@0.0.8 +│ │ ├─ URL: git://github.com/substack/minimist.git +│ │ ├─ VendorName: James Halliday +│ │ └─ VendorUrl: https://github.com/substack/minimist +│ ├─ minimist@1.2.0 +│ │ ├─ URL: git://github.com/substack/minimist.git +│ │ ├─ VendorName: James Halliday +│ │ └─ VendorUrl: https://github.com/substack/minimist +│ ├─ mkdirp@0.3.5 +│ │ ├─ URL: http://github.com/substack/node-mkdirp.git +│ │ ├─ VendorName: James Halliday +│ │ └─ VendorUrl: http://substack.net +│ ├─ mkdirp@0.4.2 +│ │ ├─ URL: https://github.com/substack/node-mkdirp.git +│ │ ├─ VendorName: James Halliday +│ │ └─ VendorUrl: http://substack.net +│ ├─ mkdirp@0.5.0 +│ │ ├─ URL: https://github.com/substack/node-mkdirp.git +│ │ ├─ VendorName: James Halliday +│ │ └─ VendorUrl: http://substack.net +│ ├─ mkdirp@0.5.1 +│ │ ├─ URL: https://github.com/substack/node-mkdirp.git +│ │ ├─ VendorName: James Halliday +│ │ └─ VendorUrl: http://substack.net +│ ├─ mktemp@0.3.5 +│ │ ├─ URL: git://github.com/sasaplus1/mktemp.git +│ │ └─ VendorName: sasa+1 +│ ├─ mktemp@0.4.0 +│ │ ├─ URL: git://github.com/sasaplus1/mktemp.git +│ │ └─ VendorName: sasa+1 +│ ├─ moment-timezone@0.3.1 +│ │ ├─ URL: https://github.com/moment/moment-timezone.git +│ │ ├─ VendorName: Tim Wood +│ │ └─ VendorUrl: http://momentjs.com/timezone/ +│ ├─ moment@2.24.0 +│ │ ├─ URL: https://github.com/moment/moment.git +│ │ ├─ VendorName: Iskren Ivov Chernev +│ │ └─ VendorUrl: http://momentjs.com/ +│ ├─ morgan@1.9.1 +│ │ └─ URL: https://github.com/expressjs/morgan.git +│ ├─ mout@0.9.1 +│ │ ├─ URL: git://github.com/mout/mout.git +│ │ └─ VendorUrl: http://moutjs.com/ +│ ├─ ms@0.7.1 +│ │ └─ URL: git://github.com/guille/ms.js.git +│ ├─ ms@2.0.0 +│ │ └─ URL: https://github.com/zeit/ms.git +│ ├─ ms@2.1.1 +│ │ └─ URL: https://github.com/zeit/ms.git +│ ├─ ms@2.1.2 +│ │ └─ URL: https://github.com/zeit/ms.git +│ ├─ mustache@2.3.2 +│ │ ├─ URL: https://github.com/janl/mustache.js.git +│ │ ├─ VendorName: mustache.js Authors +│ │ └─ VendorUrl: https://github.com/janl/mustache.js +│ ├─ nan@2.14.0 +│ │ └─ URL: git://github.com/nodejs/nan.git +│ ├─ negotiator@0.6.2 +│ │ └─ URL: https://github.com/jshttp/negotiator.git +│ ├─ next-tick@0.2.2 +│ │ ├─ URL: git://github.com/medikoo/next-tick.git +│ │ ├─ VendorName: Mariusz Nowak +│ │ └─ VendorUrl: http://www.medikoo.com/ +│ ├─ next-tick@1.0.0 +│ │ ├─ URL: git://github.com/medikoo/next-tick.git +│ │ ├─ VendorName: Mariusz Nowak +│ │ └─ VendorUrl: http://www.medikoo.com/ +│ ├─ node-gyp@3.0.3 +│ │ ├─ URL: git://github.com/nodejs/node-gyp.git +│ │ ├─ VendorName: Nathan Rajlich +│ │ └─ VendorUrl: http://tootallnate.net +│ ├─ node-gyp@3.8.0 +│ │ ├─ URL: git://github.com/nodejs/node-gyp.git +│ │ ├─ VendorName: Nathan Rajlich +│ │ └─ VendorUrl: http://tootallnate.net +│ ├─ node-int64@0.4.0 +│ │ ├─ URL: https://github.com/broofa/node-int64 +│ │ └─ VendorName: Robert Kieffer +│ ├─ node-sass@4.13.0 +│ │ ├─ URL: https://github.com/sass/node-sass +│ │ ├─ VendorName: Andrew Nesbitt +│ │ └─ VendorUrl: https://github.com/sass/node-sass +│ ├─ node-uuid@1.4.8 +│ │ ├─ URL: https://github.com/broofa/node-uuid.git +│ │ ├─ VendorName: Robert Kieffer +│ │ └─ VendorUrl: https://github.com/broofa/node-uuid +│ ├─ normalize-path@2.1.1 +│ │ ├─ URL: https://github.com/jonschlinkert/normalize-path.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/normalize-path +│ ├─ number-is-nan@1.0.1 +│ │ ├─ URL: https://github.com/sindresorhus/number-is-nan.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ numeral@1.5.6 +│ │ ├─ URL: https://github.com/adamwdraper/Numeral-js +│ │ ├─ VendorName: Adam Draper +│ │ └─ VendorUrl: http://numeraljs.com/ +│ ├─ object-assign@2.1.1 +│ │ ├─ URL: https://github.com/sindresorhus/object-assign.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ object-assign@3.0.0 +│ │ ├─ URL: https://github.com/sindresorhus/object-assign.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ object-assign@4.1.1 +│ │ ├─ URL: https://github.com/sindresorhus/object-assign.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ object-keys@1.0.1 +│ │ ├─ URL: git://github.com/ljharb/object-keys.git +│ │ └─ VendorName: Jordan Harband +│ ├─ object.omit@2.0.1 +│ │ ├─ URL: https://github.com/jonschlinkert/object.omit.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/object.omit +│ ├─ on-finished@2.3.0 +│ │ └─ URL: https://github.com/jshttp/on-finished.git +│ ├─ on-headers@1.0.2 +│ │ ├─ URL: https://github.com/jshttp/on-headers.git +│ │ └─ VendorName: Douglas Christopher Wilson +│ ├─ optimist@0.6.1 +│ │ ├─ URL: http://github.com/substack/node-optimist.git +│ │ ├─ VendorName: James Halliday +│ │ └─ VendorUrl: http://substack.net +│ ├─ options@0.0.6 +│ │ ├─ URL: git://github.com/einaros/options.js.git +│ │ ├─ VendorName: Einar Otto Stangvik +│ │ └─ VendorUrl: http://2x.io +│ ├─ os-homedir@1.0.2 +│ │ ├─ URL: https://github.com/sindresorhus/os-homedir.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ os-locale@1.4.0 +│ │ ├─ URL: https://github.com/sindresorhus/os-locale.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ os-tmpdir@1.0.2 +│ │ ├─ URL: https://github.com/sindresorhus/os-tmpdir.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ output-file-sync@1.1.2 +│ │ ├─ URL: https://github.com/shinnn/output-file-sync.git +│ │ ├─ VendorName: Shinnosuke Watanabe +│ │ └─ VendorUrl: https://github.com/shinnn +│ ├─ parse-glob@3.0.4 +│ │ ├─ URL: https://github.com/jonschlinkert/parse-glob.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/parse-glob +│ ├─ parse-json@2.2.0 +│ │ ├─ URL: https://github.com/sindresorhus/parse-json.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ parsejson@0.0.1 +│ ├─ parseqs@0.0.2 +│ ├─ parseuri@0.0.2 +│ ├─ parseuri@0.0.4 +│ │ ├─ URL: https://github.com/get/parseuri.git +│ │ └─ VendorUrl: https://github.com/get/parseuri +│ ├─ parseurl@1.3.3 +│ │ └─ URL: https://github.com/pillarjs/parseurl.git +│ ├─ path-array@1.0.1 +│ │ ├─ URL: git://github.com/TooTallNate/node-path-array.git +│ │ ├─ VendorName: Nathan Rajlich +│ │ └─ VendorUrl: https://github.com/TooTallNate/node-path-array +│ ├─ path-exists@1.0.0 +│ │ ├─ URL: https://github.com/sindresorhus/path-exists.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ path-exists@2.1.0 +│ │ ├─ URL: https://github.com/sindresorhus/path-exists.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ path-is-absolute@1.0.1 +│ │ ├─ URL: https://github.com/sindresorhus/path-is-absolute.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ path-parse@1.0.6 +│ │ ├─ URL: https://github.com/jbgutierrez/path-parse.git +│ │ ├─ VendorName: Javier Blanco +│ │ └─ VendorUrl: https://github.com/jbgutierrez/path-parse#readme +│ ├─ path-root-regex@0.1.2 +│ │ ├─ URL: https://github.com/regexhq/path-root-regex.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/regexhq/path-root-regex +│ ├─ path-root@0.1.1 +│ │ ├─ URL: https://github.com/jonschlinkert/path-root.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/path-root +│ ├─ path-to-regexp@0.1.7 +│ │ └─ URL: https://github.com/component/path-to-regexp.git +│ ├─ path-type@1.1.0 +│ │ ├─ URL: https://github.com/sindresorhus/path-type.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ pend@1.2.0 +│ │ ├─ URL: git://github.com/andrewrk/node-pend.git +│ │ └─ VendorName: Andrew Kelley +│ ├─ performance-now@0.2.0 +│ │ ├─ URL: git://github.com/meryn/performance-now.git +│ │ ├─ VendorName: Meryn Stol +│ │ └─ VendorUrl: https://github.com/meryn/performance-now +│ ├─ performance-now@2.1.0 +│ │ ├─ URL: git://github.com/braveg1rl/performance-now.git +│ │ ├─ VendorName: Braveg1rl +│ │ └─ VendorUrl: https://github.com/braveg1rl/performance-now +│ ├─ pify@2.3.0 +│ │ ├─ URL: https://github.com/sindresorhus/pify.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ pinkie-promise@2.0.1 +│ │ ├─ URL: https://github.com/floatdrop/pinkie-promise.git +│ │ ├─ VendorName: Vsevolod Strukchinsky +│ │ └─ VendorUrl: github.com/floatdrop +│ ├─ pinkie@2.0.4 +│ │ ├─ URL: https://github.com/floatdrop/pinkie.git +│ │ ├─ VendorName: Vsevolod Strukchinsky +│ │ └─ VendorUrl: github.com/floatdrop +│ ├─ portfinder@0.4.0 +│ │ ├─ URL: git@github.com:indexzero/node-portfinder.git +│ │ └─ VendorName: Charlie Robbins +│ ├─ preserve@0.2.0 +│ │ ├─ URL: git://github.com/jonschlinkert/preserve.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/preserve +│ ├─ private@0.1.8 +│ │ ├─ URL: git://github.com/benjamn/private.git +│ │ ├─ VendorName: Ben Newman +│ │ └─ VendorUrl: http://github.com/benjamn/private +│ ├─ process-nextick-args@1.0.7 +│ │ ├─ URL: https://github.com/calvinmetcalf/process-nextick-args.git +│ │ └─ VendorUrl: https://github.com/calvinmetcalf/process-nextick-args +│ ├─ process-nextick-args@2.0.1 +│ │ ├─ URL: https://github.com/calvinmetcalf/process-nextick-args.git +│ │ └─ VendorUrl: https://github.com/calvinmetcalf/process-nextick-args +│ ├─ progress@1.1.8 +│ │ ├─ URL: git://github.com/visionmedia/node-progress +│ │ └─ VendorName: TJ Holowaychuk +│ ├─ promise-map-series@0.2.3 +│ │ ├─ URL: https://github.com/joliss/promise-map-series +│ │ └─ VendorName: Jo Liss +│ ├─ promise@7.3.1 +│ │ ├─ URL: https://github.com/then/promise.git +│ │ └─ VendorName: ForbesLindesay +│ ├─ proxy-addr@2.0.5 +│ │ ├─ URL: https://github.com/jshttp/proxy-addr.git +│ │ └─ VendorName: Douglas Christopher Wilson +│ ├─ prr@1.0.1 +│ │ ├─ URL: https://github.com/rvagg/prr.git +│ │ ├─ VendorName: Rod Vagg +│ │ └─ VendorUrl: https://github.com/rvagg/prr +│ ├─ psl@1.6.0 +│ │ ├─ URL: git@github.com:lupomontero/psl.git +│ │ ├─ VendorName: Lupo Montero +│ │ └─ VendorUrl: https://lupomontero.com/ +│ ├─ punycode@1.4.1 +│ │ ├─ URL: https://github.com/bestiejs/punycode.js.git +│ │ ├─ VendorName: Mathias Bynens +│ │ └─ VendorUrl: https://mths.be/punycode +│ ├─ punycode@2.1.1 +│ │ ├─ URL: https://github.com/bestiejs/punycode.js.git +│ │ ├─ VendorName: Mathias Bynens +│ │ └─ VendorUrl: https://mths.be/punycode +│ ├─ q@1.5.1 +│ │ ├─ URL: git://github.com/kriskowal/q.git +│ │ ├─ VendorName: Kris Kowal +│ │ └─ VendorUrl: https://github.com/kriskowal/q +│ ├─ quick-temp@0.1.3 +│ │ ├─ URL: https://github.com/joliss/node-quick-temp +│ │ └─ VendorName: Jo Liss +│ ├─ quick-temp@0.1.8 +│ │ ├─ URL: https://github.com/joliss/node-quick-temp +│ │ └─ VendorName: Jo Liss +│ ├─ qunitjs@1.23.1 +│ │ ├─ URL: git://github.com/jquery/qunit.git +│ │ ├─ VendorName: jQuery Foundation and other contributors +│ │ └─ VendorUrl: https://qunitjs.com/ +│ ├─ randomatic@3.1.1 +│ │ ├─ URL: https://github.com/jonschlinkert/randomatic.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/randomatic +│ ├─ range-parser@1.2.1 +│ │ ├─ URL: https://github.com/jshttp/range-parser.git +│ │ ├─ VendorName: TJ Holowaychuk +│ │ └─ VendorUrl: http://tjholowaychuk.com +│ ├─ raw-body@2.1.7 +│ │ ├─ URL: https://github.com/stream-utils/raw-body.git +│ │ ├─ VendorName: Jonathan Ong +│ │ └─ VendorUrl: http://jongleberry.com +│ ├─ raw-body@2.4.0 +│ │ ├─ URL: https://github.com/stream-utils/raw-body.git +│ │ ├─ VendorName: Jonathan Ong +│ │ └─ VendorUrl: http://jongleberry.com +│ ├─ read-pkg-up@1.0.1 +│ │ ├─ URL: https://github.com/sindresorhus/read-pkg-up.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ read-pkg@1.1.0 +│ │ ├─ URL: https://github.com/sindresorhus/read-pkg.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ readable-stream@1.1.13 +│ │ ├─ URL: git://github.com/isaacs/readable-stream +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ readable-stream@1.1.14 +│ │ ├─ URL: git://github.com/isaacs/readable-stream +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ readable-stream@2.0.6 +│ │ └─ URL: git://github.com/nodejs/readable-stream +│ ├─ readable-stream@2.3.6 +│ │ └─ URL: git://github.com/nodejs/readable-stream +│ ├─ readline2@0.1.1 +│ │ ├─ URL: https://github.com/SBoudrias/readline2.git +│ │ └─ VendorName: Simon Boudrias +│ ├─ recast@0.10.33 +│ │ ├─ URL: git://github.com/benjamn/recast.git +│ │ ├─ VendorName: Ben Newman +│ │ └─ VendorUrl: http://github.com/benjamn/recast +│ ├─ recast@0.10.43 +│ │ ├─ URL: git://github.com/benjamn/recast.git +│ │ ├─ VendorName: Ben Newman +│ │ └─ VendorUrl: http://github.com/benjamn/recast +│ ├─ recast@0.11.23 +│ │ ├─ URL: git://github.com/benjamn/recast.git +│ │ ├─ VendorName: Ben Newman +│ │ └─ VendorUrl: http://github.com/benjamn/recast +│ ├─ redent@1.0.0 +│ │ ├─ URL: https://github.com/sindresorhus/redent.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ redeyed@0.5.0 +│ │ ├─ URL: git://github.com/thlorenz/redeyed.git +│ │ ├─ VendorName: Thorsten Lorenz +│ │ └─ VendorUrl: thlorenz.com +│ ├─ regenerate@1.4.0 +│ │ ├─ URL: https://github.com/mathiasbynens/regenerate.git +│ │ ├─ VendorName: Mathias Bynens +│ │ └─ VendorUrl: https://mths.be/regenerate +│ ├─ regex-cache@0.4.4 +│ │ ├─ URL: https://github.com/jonschlinkert/regex-cache.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/regex-cache +│ ├─ regexpu@1.3.0 +│ │ ├─ URL: https://github.com/mathiasbynens/regexpu.git +│ │ ├─ VendorName: Mathias Bynens +│ │ └─ VendorUrl: https://mths.be/regexpu +│ ├─ regjsgen@0.2.0 +│ │ ├─ URL: https://github.com/d10/regjsgen.git +│ │ ├─ VendorName: Benjamin Tan +│ │ └─ VendorUrl: https://github.com/d10/regjsgen +│ ├─ repeat-element@1.1.3 +│ │ ├─ URL: https://github.com/jonschlinkert/repeat-element.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/repeat-element +│ ├─ repeat-string@1.6.1 +│ │ ├─ URL: https://github.com/jonschlinkert/repeat-string.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/repeat-string +│ ├─ repeating@1.1.3 +│ │ ├─ URL: https://github.com/sindresorhus/repeating.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ repeating@2.0.1 +│ │ ├─ URL: https://github.com/sindresorhus/repeating.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ request-progress@2.0.1 +│ │ ├─ URL: git://github.com/IndigoUnited/node-request-progress +│ │ ├─ VendorName: IndigoUnited +│ │ └─ VendorUrl: http://indigounited.com +│ ├─ require-directory@2.1.1 +│ │ ├─ URL: git://github.com/troygoode/node-require-directory.git +│ │ ├─ VendorName: Troy Goode +│ │ └─ VendorUrl: https://github.com/troygoode/node-require-directory/ +│ ├─ requires-port@1.0.0 +│ │ ├─ URL: https://github.com/unshiftio/requires-port +│ │ ├─ VendorName: Arnout Kazemier +│ │ └─ VendorUrl: https://github.com/unshiftio/requires-port +│ ├─ resolve-package-path@1.2.7 +│ ├─ resolve@1.14.1 +│ │ ├─ URL: git://github.com/browserify/resolve.git +│ │ ├─ VendorName: James Halliday +│ │ └─ VendorUrl: http://substack.net +│ ├─ retry@0.8.0 +│ │ ├─ URL: git://github.com/tim-kos/node-retry.git +│ │ ├─ VendorName: Tim Koschützki +│ │ └─ VendorUrl: https://github.com/tim-kos/node-retry +│ ├─ right-align@0.1.3 +│ │ ├─ URL: git://github.com/jonschlinkert/right-align.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/right-align +│ ├─ rimraf@2.2.8 +│ │ ├─ URL: git://github.com/isaacs/rimraf.git +│ │ ├─ VendorName: Isaac Z. Schlueter +│ │ └─ VendorUrl: http://blog.izs.me/ +│ ├─ rsvp@3.0.21 +│ │ ├─ URL: https://github.com/tildeio/rsvp.js.git +│ │ └─ VendorName: Tilde, Inc. & Stefan Penner +│ ├─ rsvp@3.2.1 +│ │ ├─ URL: https://github.com/tildeio/rsvp.js.git +│ │ └─ VendorName: Tilde, Inc. & Stefan Penner +│ ├─ rsvp@3.6.2 +│ │ ├─ URL: https://github.com/tildeio/rsvp.js.git +│ │ └─ VendorName: Tilde, Inc. & Stefan Penner +│ ├─ rsvp@4.8.5 +│ │ ├─ URL: https://github.com/tildeio/rsvp.js.git +│ │ ├─ VendorName: Tilde, Inc. & Stefan Penner +│ │ └─ VendorUrl: https://github.com/tildeio/rsvp.js +│ ├─ safe-buffer@5.1.2 +│ │ ├─ URL: git://github.com/feross/safe-buffer.git +│ │ ├─ VendorName: Feross Aboukhadijeh +│ │ └─ VendorUrl: https://github.com/feross/safe-buffer +│ ├─ safe-buffer@5.2.0 +│ │ ├─ URL: git://github.com/feross/safe-buffer.git +│ │ ├─ VendorName: Feross Aboukhadijeh +│ │ └─ VendorUrl: https://github.com/feross/safe-buffer +│ ├─ safer-buffer@2.1.2 +│ │ ├─ URL: git+https://github.com/ChALkeR/safer-buffer.git +│ │ ├─ VendorName: Nikita Skovoroda +│ │ └─ VendorUrl: https://github.com/ChALkeR +│ ├─ sane@1.7.0 +│ │ ├─ URL: https://github.com/amasad/sane +│ │ ├─ VendorName: amasad +│ │ └─ VendorUrl: https://github.com/amasad/sane +│ ├─ sass-graph@2.2.4 +│ │ ├─ URL: https://github.com/xzyfer/sass-graph.git +│ │ └─ VendorName: xzyfer +│ ├─ scss-tokenizer@0.2.3 +│ │ ├─ URL: https://github.com/sasstools/scss-tokenizer.git +│ │ ├─ VendorName: xzyfer +│ │ └─ VendorUrl: https://github.com/sasstools/scss-tokenizer +│ ├─ select2@4.0.0 +│ │ ├─ URL: git://github.com/select2/select2.git +│ │ ├─ VendorName: Kevin Brown +│ │ └─ VendorUrl: https://select2.github.io/ +│ ├─ send@0.17.1 +│ │ ├─ URL: https://github.com/pillarjs/send.git +│ │ └─ VendorName: TJ Holowaychuk +│ ├─ serve-static@1.14.1 +│ │ ├─ URL: https://github.com/expressjs/serve-static.git +│ │ └─ VendorName: Douglas Christopher Wilson +│ ├─ shebang-regex@1.0.0 +│ │ ├─ URL: https://github.com/sindresorhus/shebang-regex.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ simple-fmt@0.1.0 +│ │ ├─ URL: https://github.com/olov/simple-fmt.git +│ │ └─ VendorName: Olov Lassus +│ ├─ simple-is@0.2.0 +│ │ ├─ URL: https://github.com/olov/simple-is.git +│ │ └─ VendorName: Olov Lassus +│ ├─ slash@1.0.0 +│ │ ├─ URL: https://github.com/sindresorhus/slash.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ socket.io-client-pure@1.3.12 +│ │ └─ URL: https://github.com/Automattic/socket.io-client.git +│ ├─ socket.io-parser@2.2.2 +│ │ └─ URL: https://github.com/Automattic/socket.io-parser.git +│ ├─ socket.io-parser@2.2.4 +│ │ └─ URL: https://github.com/Automattic/socket.io-parser.git +│ ├─ socket.io-pure@1.3.12 +│ │ └─ URL: git://github.com/Automattic/socket.io +│ ├─ source-map-support@0.2.10 +│ │ └─ URL: https://github.com/evanw/node-source-map-support +│ ├─ source-map-url@0.3.0 +│ │ ├─ URL: https://github.com/lydell/source-map-url.git +│ │ └─ VendorName: Simon Lydell +│ ├─ spawnback@1.0.0 +│ │ ├─ URL: git://github.com/scottgonzalez/spawnback.git +│ │ ├─ VendorName: Scott González +│ │ └─ VendorUrl: https://github.com/scottgonzalez/spawnback +│ ├─ spdx-expression-parse@3.0.0 +│ │ ├─ URL: https://github.com/jslicense/spdx-expression-parse.js.git +│ │ ├─ VendorName: Kyle E. Mitchell +│ │ └─ VendorUrl: http://kemitchell.com +│ ├─ sri-toolbox@0.2.0 +│ │ ├─ URL: https://github.com/neftaly/npm-sri-toolbox.git +│ │ └─ VendorName: Neftaly Hernandez +│ ├─ sshpk@1.16.1 +│ │ ├─ URL: git+https://github.com/joyent/node-sshpk.git +│ │ ├─ VendorName: Joyent, Inc +│ │ └─ VendorUrl: https://github.com/arekinath/node-sshpk#readme +│ ├─ stable@0.1.8 +│ │ ├─ URL: https://github.com/Two-Screen/stable.git +│ │ └─ VendorName: Angry Bytes +│ ├─ statuses@1.5.0 +│ │ └─ URL: https://github.com/jshttp/statuses.git +│ ├─ stdout-stream@1.4.1 +│ │ └─ URL: https://github.com/mafintosh/stdout-stream.git +│ ├─ string_decoder@0.10.31 +│ │ ├─ URL: git://github.com/rvagg/string_decoder.git +│ │ └─ VendorUrl: https://github.com/rvagg/string_decoder +│ ├─ string_decoder@1.1.1 +│ │ ├─ URL: git://github.com/nodejs/string_decoder.git +│ │ └─ VendorUrl: https://github.com/nodejs/string_decoder +│ ├─ string-width@1.0.2 +│ │ ├─ URL: https://github.com/sindresorhus/string-width.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ string-width@2.1.1 +│ │ ├─ URL: https://github.com/sindresorhus/string-width.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ string.prototype.endswith@0.2.0 +│ │ ├─ URL: https://github.com/mathiasbynens/String.prototype.endsWith.git +│ │ ├─ VendorName: Mathias Bynens +│ │ └─ VendorUrl: http://mths.be/endswith +│ ├─ stringmap@0.2.2 +│ │ ├─ URL: https://github.com/olov/stringmap.git +│ │ └─ VendorName: Olov Lassus +│ ├─ stringset@0.2.1 +│ │ ├─ URL: https://github.com/olov/stringset.git +│ │ └─ VendorName: Olov Lassus +│ ├─ stringstream@0.0.6 +│ │ ├─ URL: https://github.com/mhart/StringStream.git +│ │ ├─ VendorName: Michael Hart +│ │ └─ VendorUrl: http://github.com/mhart +│ ├─ strip-ansi@0.1.1 +│ │ ├─ URL: https://github.com/sindresorhus/strip-ansi.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ strip-ansi@0.3.0 +│ │ ├─ URL: https://github.com/sindresorhus/strip-ansi.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ strip-ansi@2.0.1 +│ │ ├─ URL: https://github.com/sindresorhus/strip-ansi.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ strip-ansi@3.0.1 +│ │ ├─ URL: https://github.com/chalk/strip-ansi.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ strip-ansi@4.0.0 +│ │ ├─ URL: https://github.com/chalk/strip-ansi.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ strip-bom@2.0.0 +│ │ ├─ URL: https://github.com/sindresorhus/strip-bom.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ strip-indent@1.0.1 +│ │ ├─ URL: https://github.com/sindresorhus/strip-indent.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ strip-json-comments@1.0.4 +│ │ ├─ URL: https://github.com/sindresorhus/strip-json-comments.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ styled_string@0.0.1 +│ │ └─ VendorName: Toby Ho +│ ├─ supports-color@0.2.0 +│ │ ├─ URL: https://github.com/sindresorhus/supports-color.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ supports-color@2.0.0 +│ │ ├─ URL: https://github.com/chalk/supports-color.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ supports-color@5.5.0 +│ │ ├─ URL: https://github.com/chalk/supports-color.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ symlink-or-copy@1.3.1 +│ │ ├─ URL: https://github.com/broccolijs/node-symlink-or-copy +│ │ └─ VendorName: Jo Liss +│ ├─ tap-parser@1.3.2 +│ │ ├─ URL: git://github.com/substack/tap-parser.git +│ │ ├─ VendorName: James Halliday +│ │ └─ VendorUrl: https://github.com/substack/tap-parser +│ ├─ temp@0.8.1 +│ │ ├─ URL: git://github.com/bruce/node-temp.git +│ │ └─ VendorName: Bruce Williams +│ ├─ testem@0.9.11 +│ │ ├─ URL: git://github.com/airportyh/testem.git +│ │ └─ VendorName: Toby Ho +│ ├─ text-table@0.2.0 +│ │ ├─ URL: git://github.com/substack/text-table.git +│ │ ├─ VendorName: James Halliday +│ │ └─ VendorUrl: https://github.com/substack/text-table +│ ├─ textextensions@2.6.0 +│ │ ├─ URL: https://github.com/bevry/textextensions.git +│ │ ├─ VendorName: 2013+ Bevry Pty Ltd +│ │ └─ VendorUrl: https://github.com/bevry/textextensions +│ ├─ throttleit@1.0.0 +│ │ └─ URL: git://github.com/component/throttle.git +│ ├─ through@2.3.8 +│ │ ├─ URL: https://github.com/dominictarr/through.git +│ │ ├─ VendorName: Dominic Tarr +│ │ └─ VendorUrl: https://github.com/dominictarr/through +│ ├─ tiny-lr@0.2.0 +│ │ ├─ URL: git://github.com/mklabs/tiny-lr.git +│ │ ├─ VendorName: mklabs +│ │ └─ VendorUrl: https://github.com/mklabs/tiny-lr +│ ├─ tmp@0.0.28 +│ │ ├─ URL: git://github.com/raszi/node-tmp.git +│ │ ├─ VendorName: KARASZI István +│ │ └─ VendorUrl: http://github.com/raszi/node-tmp +│ ├─ to-array@0.1.3 +│ │ ├─ URL: git://github.com/Raynos/to-array.git +│ │ ├─ VendorName: Raynos +│ │ └─ VendorUrl: https://github.com/Raynos/to-array +│ ├─ to-fast-properties@1.0.3 +│ │ ├─ URL: https://github.com/sindresorhus/to-fast-properties.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ toidentifier@1.0.0 +│ │ ├─ URL: https://github.com/component/toidentifier.git +│ │ └─ VendorName: Douglas Christopher Wilson +│ ├─ trim-newlines@1.0.0 +│ │ ├─ URL: https://github.com/sindresorhus/trim-newlines.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ trim-right@1.0.1 +│ │ ├─ URL: https://github.com/sindresorhus/trim-right.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ try-resolve@1.0.1 +│ │ ├─ URL: https://github.com/sebmck/try-resolve.git +│ │ └─ VendorName: Sebastian McKenzie +│ ├─ tryor@0.1.2 +│ │ ├─ URL: https://github.com/olov/tryor.git +│ │ └─ VendorName: Olov Lassus +│ ├─ type-is@1.6.18 +│ │ └─ URL: https://github.com/jshttp/type-is.git +│ ├─ typedarray@0.0.6 +│ │ ├─ URL: git://github.com/substack/typedarray.git +│ │ ├─ VendorName: James Halliday +│ │ └─ VendorUrl: https://github.com/substack/typedarray +│ ├─ uc.micro@1.0.6 +│ │ └─ URL: https://github.com/markdown-it/uc.micro.git +│ ├─ uglify-to-browserify@1.0.2 +│ │ ├─ URL: https://github.com/ForbesLindesay/uglify-to-browserify.git +│ │ └─ VendorName: ForbesLindesay +│ ├─ ultron@1.0.2 +│ │ ├─ URL: https://github.com/unshiftio/ultron +│ │ ├─ VendorName: Arnout Kazemier +│ │ └─ VendorUrl: https://github.com/unshiftio/ultron +│ ├─ umask@1.1.0 +│ │ ├─ URL: https://github.com/smikes/umask.git +│ │ ├─ VendorName: Sam Mikes +│ │ └─ VendorUrl: https://github.com/smikes/umask +│ ├─ underscore.string@2.3.3 +│ │ ├─ URL: https://github.com/epeli/underscore.string.git +│ │ └─ VendorUrl: http://epeli.github.com/underscore.string/ +│ ├─ underscore.string@3.3.5 +│ │ ├─ URL: https://github.com/epeli/underscore.string.git +│ │ └─ VendorUrl: http://epeli.github.com/underscore.string/ +│ ├─ underscore@1.9.1 +│ │ ├─ URL: git://github.com/jashkenas/underscore.git +│ │ ├─ VendorName: Jeremy Ashkenas +│ │ └─ VendorUrl: http://underscorejs.org/ +│ ├─ universalify@0.1.2 +│ │ ├─ URL: git+https://github.com/RyanZim/universalify.git +│ │ ├─ VendorName: Ryan Zimmerman +│ │ └─ VendorUrl: https://github.com/RyanZim/universalify#readme +│ ├─ unpipe@1.0.0 +│ │ ├─ URL: https://github.com/stream-utils/unpipe.git +│ │ └─ VendorName: Douglas Christopher Wilson +│ ├─ user-home@1.1.1 +│ │ ├─ URL: https://github.com/sindresorhus/user-home.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: http://sindresorhus.com +│ ├─ username-sync@1.0.2 +│ │ ├─ URL: git@github.com:stefanpenner/username-sync +│ │ └─ VendorName: Stefan Penner +│ ├─ utf8@2.1.0 +│ │ ├─ URL: https://github.com/mathiasbynens/utf8.js.git +│ │ ├─ VendorName: Mathias Bynens +│ │ └─ VendorUrl: https://mths.be/utf8js +│ ├─ util-deprecate@1.0.2 +│ │ ├─ URL: git://github.com/TooTallNate/util-deprecate.git +│ │ ├─ VendorName: Nathan Rajlich +│ │ └─ VendorUrl: https://github.com/TooTallNate/util-deprecate +│ ├─ util-extend@1.0.3 +│ │ └─ URL: git://github.com/isaacs/util-extend +│ ├─ utils-merge@1.0.1 +│ │ ├─ URL: git://github.com/jaredhanson/utils-merge.git +│ │ ├─ VendorName: Jared Hanson +│ │ └─ VendorUrl: http://www.jaredhanson.net/ +│ ├─ uuid@2.0.3 +│ │ ├─ URL: https://github.com/defunctzombie/node-uuid.git +│ │ └─ VendorName: Robert Kieffer +│ ├─ uuid@3.3.3 +│ │ └─ URL: https://github.com/kelektiv/node-uuid.git +│ ├─ vary@1.1.2 +│ │ ├─ URL: https://github.com/jshttp/vary.git +│ │ └─ VendorName: Douglas Christopher Wilson +│ ├─ verror@1.10.0 +│ │ └─ URL: git://github.com/davepacheco/node-verror.git +│ ├─ walk-sync@0.1.3 +│ │ ├─ URL: https://github.com/joliss/node-walk-sync +│ │ └─ VendorName: Jo Liss +│ ├─ walk-sync@0.2.7 +│ │ ├─ URL: https://github.com/joliss/node-walk-sync +│ │ └─ VendorName: Jo Liss +│ ├─ walk-sync@0.3.4 +│ │ ├─ URL: https://github.com/joliss/node-walk-sync +│ │ └─ VendorName: Jo Liss +│ ├─ wcwidth@1.0.1 +│ │ ├─ URL: git+https://github.com/timoxley/wcwidth.git +│ │ ├─ VendorName: Tim Oxley +│ │ └─ VendorUrl: https://github.com/timoxley/wcwidth#readme +│ ├─ websocket-extensions@0.1.3 +│ │ ├─ URL: git://github.com/faye/websocket-extensions-node.git +│ │ ├─ VendorName: James Coglan +│ │ └─ VendorUrl: http://github.com/faye/websocket-extensions-node +│ ├─ window-size@0.1.0 +│ │ ├─ URL: https://github.com/jonschlinkert/window-size.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/window-size +│ ├─ window-size@0.1.4 +│ │ ├─ URL: https://github.com/jonschlinkert/window-size.git +│ │ ├─ VendorName: Jon Schlinkert +│ │ └─ VendorUrl: https://github.com/jonschlinkert/window-size +│ ├─ wordwrap@0.0.2 +│ │ ├─ URL: git://github.com/substack/node-wordwrap.git +│ │ ├─ VendorName: James Halliday +│ │ └─ VendorUrl: http://substack.net +│ ├─ wordwrap@0.0.3 +│ │ ├─ URL: git://github.com/substack/node-wordwrap.git +│ │ ├─ VendorName: James Halliday +│ │ └─ VendorUrl: http://substack.net +│ ├─ wrap-ansi@2.1.0 +│ │ ├─ URL: https://github.com/chalk/wrap-ansi.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ ws-pure@0.8.0 +│ │ ├─ URL: git://github.com/patocallaghan/ws.git +│ │ └─ VendorName: Pat O'Callaghan +│ ├─ xdg-basedir@2.0.0 +│ │ ├─ URL: https://github.com/sindresorhus/xdg-basedir.git +│ │ ├─ VendorName: Sindre Sorhus +│ │ └─ VendorUrl: sindresorhus.com +│ ├─ xmlhttprequest-ssl@1.5.1 +│ │ ├─ URL: git://github.com/mjwwit/node-XMLHttpRequest.git +│ │ └─ VendorName: Michael de Wit +│ ├─ xtend@4.0.2 +│ │ ├─ URL: git://github.com/Raynos/xtend.git +│ │ ├─ VendorName: Raynos +│ │ └─ VendorUrl: https://github.com/Raynos/xtend +│ ├─ yam@0.0.18 +│ │ ├─ URL: https://github.com/twokul/yam +│ │ ├─ VendorName: Alex Navasardyan +│ │ └─ VendorUrl: http://twokul.io +│ ├─ yargs@3.10.0 +│ │ ├─ URL: http://github.com/bcoe/yargs.git +│ │ ├─ VendorName: Alex Ford +│ │ └─ VendorUrl: http://CodeTunnel.com +│ ├─ yargs@3.27.0 +│ │ ├─ URL: http://github.com/bcoe/yargs.git +│ │ ├─ VendorName: Alex Ford +│ │ └─ VendorUrl: http://CodeTunnel.com +│ ├─ yargs@7.1.0 +│ │ ├─ URL: http://github.com/yargs/yargs.git +│ │ └─ VendorUrl: http://yargs.js.org/ +│ └─ yauzl@2.4.1 +│ ├─ URL: https://github.com/thejoshwolfe/yauzl.git +│ ├─ VendorName: Josh Wolfe +│ └─ VendorUrl: https://github.com/thejoshwolfe/yauzl +├─ MIT* +│ ├─ after@0.8.1 +│ │ ├─ URL: git://github.com/Raynos/after.git +│ │ └─ VendorName: Raynos +│ ├─ assert-plus@0.1.5 +│ │ ├─ URL: https://github.com/mcavage/node-assert-plus.git +│ │ └─ VendorName: Mark Cavage +│ ├─ async-foreach@0.1.3 +│ │ ├─ URL: git://github.com/cowboy/javascript-sync-async-foreach.git +│ │ ├─ VendorName: "Cowboy" Ben Alman +│ │ └─ VendorUrl: http://github.com/cowboy/javascript-sync-async-foreach +│ ├─ blob@0.0.4 +│ │ ├─ URL: git@github.com:rase-/blob.git +│ │ └─ VendorUrl: https://github.com/rase-/blob +│ ├─ callsite@1.0.0 +│ │ └─ VendorName: TJ Holowaychuk +│ ├─ component-bind@1.0.0 +│ │ └─ URL: https://github.com/component/bind.git +│ ├─ component-emitter@1.1.2 +│ │ └─ URL: https://github.com/component/emitter.git +│ ├─ component-inherit@0.0.3 +│ │ └─ URL: https://github.com/component/inherit.git +│ ├─ engine.io-client-pure@1.5.9 +│ │ ├─ URL: https://github.com/Automattic/engine.io-client.git +│ │ └─ VendorUrl: http://socket.io/ +│ ├─ git-tools@0.1.4 +│ │ ├─ URL: git://github.com/scottgonzalez/node-git-tools.git +│ │ ├─ VendorName: Scott González +│ │ └─ VendorUrl: https://github.com/scottgonzalez/node-git-tools +│ ├─ indexof@0.0.1 +│ ├─ ms@0.6.2 +│ │ └─ URL: git://github.com/guille/ms.js.git +│ ├─ object-component@0.0.3 +│ └─ socket.io-adapter@0.3.1 +│ └─ URL: git://github.com/Automattic/socket.io-adapter.git +├─ Public Domain +│ └─ jsonify@0.0.0 +│ ├─ URL: http://github.com/substack/jsonify.git +│ ├─ VendorName: Douglas Crockford +│ └─ VendorUrl: http://crockford.com/ +├─ SEE LICENSE IN LICENSE +│ └─ sntp@1.0.9 +│ ├─ URL: git://github.com/hueniverse/sntp +│ ├─ VendorName: Eran Hammer +│ └─ VendorUrl: http://hueniverse.com +├─ UNKNOWN +│ ├─ base64id@0.1.0 +│ │ ├─ URL: https://github.com/faeldt/base64id.git +│ │ └─ VendorName: Kristian Faeldt +│ └─ colors@0.6.2 +│ ├─ URL: http://github.com/Marak/colors.js.git +│ ├─ VendorName: Marak Squires +│ └─ VendorUrl: https://github.com/Marak/colors.js +├─ Unlicense +│ ├─ spdx-license-ids@1.2.2 +│ │ ├─ URL: https://github.com/shinnn/spdx-license-ids.git +│ │ ├─ VendorName: Shinnosuke Watanabe +│ │ └─ VendorUrl: https://github.com/shinnn +│ └─ tweetnacl@0.14.5 +│ ├─ URL: https://github.com/dchest/tweetnacl-js.git +│ ├─ VendorName: TweetNaCl-js contributors +│ └─ VendorUrl: https://tweetnacl.js.org/ +├─ WTFPL +│ └─ sorted-object@1.0.0 +│ ├─ URL: git://github.com/domenic/sorted-object.git +│ ├─ VendorName: Domenic Denicola +│ └─ VendorUrl: http://domenic.me/ +└─ WTFPL OR ISC + └─ is-integer@1.0.7 + ├─ URL: git@github.com:parshap/js-is-integer + └─ VendorName: Parsha Pourkhomami +Done in 0.84s. diff --git a/LICENSE.txt b/LICENSE.txt index c8e90f27f293f..9db61788de794 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -210,7 +210,6 @@ See licenses/ for text of these licenses. Apache Software Foundation License 2.0 -------------------------------------- -hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/nvd3-1.8.5.* (css and js files) hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/AbstractFuture.java hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/TimeoutFuture.java @@ -218,7 +217,7 @@ hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/data BSD 2-Clause ------------ -hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/{lz4.h,lz4.c,lz4hc.h,lz4hc.c} +hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/lz4/lz4.{c|h} hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/util/tree.h hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/{fstatat|openat|unlinkat}.h @@ -230,52 +229,42 @@ hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/bloom/* hadoop-common-project/hadoop-common/src/main/native/gtest/gtest-all.cc hadoop-common-project/hadoop-common/src/main/native/gtest/include/gtest/gtest.h hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_x86.c +hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/protobuf/protobuf/cpp_helpers.h +hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/gmock-1.7.0/*/*.{cc|h} hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/d3.v3.js -hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/d3-3.5.17.min.js +hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/d3-v4.1.1.min.js MIT License ----------- -hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/angular-1.6.4.min.js -hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/angular-nvd3-1.0.9.min.js -hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/angular-route-1.6.4.min.js hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/bootstrap-3.4.1 hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.css hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.js hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dust-full-2.0.0.min.js hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dust-helpers-1.1.1.min.js -hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery-3.4.1.min.js +hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery-3.6.0.min.js hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery.dataTables.min.js hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/moment.min.js hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/bootstrap.min.js hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/jquery.js hadoop-tools/hadoop-sls/src/main/html/css/bootstrap.min.css hadoop-tools/hadoop-sls/src/main/html/css/bootstrap-responsive.min.css +hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/node_modules/.bin/r.js hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/dt-1.10.18/* hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/jquery hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/jt/jquery.jstree.js hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/TERMINAL -======= -For hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/cJSON.[ch]: +uriparser2 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/uriparser2) +hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/cJSON.[ch] -Copyright (c) 2009-2017 Dave Gamble and cJSON contributors +Boost Software License, Version 1.0 +------------- +asio-1.10.2 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/asio-1.10.2) +rapidxml-1.13 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/rapidxml-1.13) +tr2 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/tr2) -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. +Public Domain +------------- +hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/json-bignum.js diff --git a/NOTICE-binary b/NOTICE-binary index 2f8a9241a8d00..2189de34e37ed 100644 --- a/NOTICE-binary +++ b/NOTICE-binary @@ -66,7 +66,7 @@ available from http://www.digip.org/jansson/. AWS SDK for Java -Copyright 2010-2014 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Copyright 2010-2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. This product includes software developed by Amazon Technologies, Inc (http://www.amazon.com/). @@ -838,3 +838,56 @@ Copyright 2009-2018 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). + +---- +jaxb-api + +Notices for Jakarta XML Binding +This content is produced and maintained by the Jakarta XML Binding project. + +Project home: https://projects.eclipse.org/projects/ee4j.jaxb +Trademarks +Jakarta XML Binding is a trademark of the Eclipse Foundation. + +Copyright +All content is the property of the respective authors or their employers. For more information regarding authorship of content, please consult the listed source code repository logs. + +Declared Project Licenses +This program and the accompanying materials are made available under the terms of the Eclipse Distribution License v. 1.0 which is available at http://www.eclipse.org/org/documents/edl-v10.php. + +SPDX-License-Identifier: BSD-3-Clause + +Source Code +The project maintains the following source code repositories: + +https://github.com/eclipse-ee4j/jaxb-api +https://github.com/eclipse-ee4j/jaxb-tck +Third-party Content +This project leverages the following third party content. + +Apache River (3.0.0) + +License: Apache-2.0 AND BSD-3-Clause +ASM 7 (n/a) + +License: BSD-3-Clause +Project: https://asm.ow2.io/ +Source: https://repository.ow2.org/nexus/#nexus-search;gav~org.ow2.asm~asm-commons~~~~kw,versionexpand +JTHarness (5.0) + +License: (GPL-2.0 OR GPL-2.0 WITH Classpath-exception-2.0) +Project: https://wiki.openjdk.java.net/display/CodeTools/JT+Harness +Source: http://hg.openjdk.java.net/code-tools/jtharness/ +normalize.css (3.0.2) + +License: MIT +SigTest (n/a) + +License: GPL-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 +Cryptography +Content may contain encryption software. The country in which you are currently +may have restrictions on the import, possession, and use, and/or re-export to +another country, of encryption software. BEFORE using any encryption software, +please check the country's laws, regulations and policies concerning the import, +possession, or use, and re-export of encryption software, to see if this is +permitted. \ No newline at end of file diff --git a/Jenkinsfile b/dev-support/Jenkinsfile similarity index 81% rename from Jenkinsfile rename to dev-support/Jenkinsfile index 17e74661fc471..9edd77b58e5c2 100644 --- a/Jenkinsfile +++ b/dev-support/Jenkinsfile @@ -23,7 +23,7 @@ pipeline { options { buildDiscarder(logRotator(numToKeepStr: '5')) - timeout (time: 5, unit: 'HOURS') + timeout (time: 48, unit: 'HOURS') timestamps() checkoutToSubdirectory('src') } @@ -35,7 +35,7 @@ pipeline { DOCKERFILE = "${SOURCEDIR}/dev-support/docker/Dockerfile" YETUS='yetus' // Branch or tag name. Yetus release tags are 'rel/X.Y.Z' - YETUS_VERSION='rel/0.11.1' + YETUS_VERSION='rel/0.14.0' } parameters { @@ -61,11 +61,8 @@ pipeline { steps { withCredentials( [usernamePassword(credentialsId: 'apache-hadoop-at-github.com', - passwordVariable: 'GITHUB_PASSWORD', - usernameVariable: 'GITHUB_USER'), - usernamePassword(credentialsId: 'hadoopqa-at-asf-jira', - passwordVariable: 'JIRA_PASSWORD', - usernameVariable: 'JIRA_USER')]) { + passwordVariable: 'GITHUB_TOKEN', + usernameVariable: 'GITHUB_USER')]) { sh '''#!/usr/bin/env bash set -e @@ -96,8 +93,8 @@ pipeline { YETUS_ARGS+=("--basedir=${WORKSPACE}/${SOURCEDIR}") # our project defaults come from a personality file - # which will get loaded automatically by setting the project name YETUS_ARGS+=("--project=hadoop") + YETUS_ARGS+=("--personality=${WORKSPACE}/${SOURCEDIR}/dev-support/bin/hadoop.sh") # lots of different output formats YETUS_ARGS+=("--brief-report-file=${WORKSPACE}/${PATCHDIR}/brief.txt") @@ -105,12 +102,7 @@ pipeline { YETUS_ARGS+=("--html-report-file=${WORKSPACE}/${PATCHDIR}/report.html") # enable writing back to Github - YETUS_ARGS+=(--github-password="${GITHUB_PASSWORD}") - YETUS_ARGS+=(--github-user=${GITHUB_USER}) - - # enable writing back to ASF JIRA - YETUS_ARGS+=(--jira-password="${JIRA_PASSWORD}") - YETUS_ARGS+=(--jira-user="${JIRA_USER}") + YETUS_ARGS+=(--github-token="${GITHUB_TOKEN}") # auto-kill any surefire stragglers during unit test runs YETUS_ARGS+=("--reapermode=kill") @@ -119,23 +111,20 @@ pipeline { # changing these to higher values may cause problems # with other jobs on systemd-enabled machines YETUS_ARGS+=("--proclimit=5500") - YETUS_ARGS+=("--dockermemlimit=20g") + YETUS_ARGS+=("--dockermemlimit=22g") - # -1 findbugs issues that show up prior to the patch being applied - YETUS_ARGS+=("--findbugs-strict-precheck") + # -1 spotbugs issues that show up prior to the patch being applied + YETUS_ARGS+=("--spotbugs-strict-precheck") # rsync these files back into the archive dir - YETUS_ARGS+=("--archive-list=checkstyle-errors.xml,findbugsXml.xml") + YETUS_ARGS+=("--archive-list=checkstyle-errors.xml,spotbugsXml.xml") # URL for user-side presentation in reports and such to our artifacts # (needs to match the archive bits below) YETUS_ARGS+=("--build-url-artifacts=artifact/out") # plugins to enable - YETUS_ARGS+=("--plugins=all") - - # use Hadoop's bundled shelldocs - YETUS_ARGS+=("--shelldocs=/testptch/hadoop/dev-support/bin/shelldocs") + YETUS_ARGS+=("--plugins=all,-jira") # don't let these tests cause -1s because we aren't really paying that # much attention to them @@ -145,6 +134,7 @@ pipeline { # Dockerfile since we don't want to use the auto-pulled version. YETUS_ARGS+=("--docker") YETUS_ARGS+=("--dockerfile=${DOCKERFILE}") + YETUS_ARGS+=("--mvn-custom-repos") # effectively treat dev-suport as a custom maven module YETUS_ARGS+=("--skip-dirs=dev-support") @@ -152,7 +142,11 @@ pipeline { # help keep the ASF boxes clean YETUS_ARGS+=("--sentinel") - # use emoji vote so it is easier to find the broken line + # custom javadoc goals + YETUS_ARGS+=("--mvn-javadoc-goals=process-sources,javadoc:javadoc-no-fork") + + # write Yetus report as GitHub comment (YETUS-1102) + YETUS_ARGS+=("--github-write-comment") YETUS_ARGS+=("--github-use-emoji-vote") "${TESTPATCHBIN}" "${YETUS_ARGS[@]}" @@ -166,6 +160,19 @@ pipeline { post { always { script { + // Publish status if it was missed (YETUS-1059) + withCredentials( + [usernamePassword(credentialsId: '683f5dcf-5552-4b28-9fb1-6a6b77cf53dd', + passwordVariable: 'GITHUB_TOKEN', + usernameVariable: 'GITHUB_USER')]) { + sh '''#!/usr/bin/env bash + YETUS_ARGS+=("--github-token=${GITHUB_TOKEN}") + YETUS_ARGS+=("--patch-dir=${WORKSPACE}/${PATCHDIR}") + TESTPATCHBIN="${WORKSPACE}/${YETUS}/precommit/src/main/shell/github-status-recovery.sh" + /usr/bin/env bash "${TESTPATCHBIN}" "${YETUS_ARGS[@]}" ${EXTRA_ARGS} || true + ''' + } + // Yetus output archiveArtifacts "${env.PATCHDIR}/**" // Publish the HTML report so that it can be looked at diff --git a/dev-support/bin/checkcompatibility.py b/dev-support/bin/checkcompatibility.py index ad1e9cbe47ff2..e8c0e26a712db 100755 --- a/dev-support/bin/checkcompatibility.py +++ b/dev-support/bin/checkcompatibility.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file @@ -30,33 +30,16 @@ import shutil import subprocess import sys -import urllib2 -try: - import argparse -except ImportError: - sys.stderr.write("Please install argparse, e.g. via `pip install argparse`.") - sys.exit(2) +import urllib.request +import argparse # Various relative paths REPO_DIR = os.getcwd() def check_output(*popenargs, **kwargs): - r"""Run command with arguments and return its output as a byte string. - Backported from Python 2.7 as it's implemented as pure python on stdlib. - >>> check_output(['/usr/bin/python', '--version']) - Python 2.6.2 - """ - process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) - output, _ = process.communicate() - retcode = process.poll() - if retcode: - cmd = kwargs.get("args") - if cmd is None: - cmd = popenargs[0] - error = subprocess.CalledProcessError(retcode, cmd) - error.output = output - raise error - return output + """ Run command with arguments and return its output as a string. """ + return subprocess.check_output(*popenargs, **kwargs, encoding='utf-8') + def get_repo_dir(): """ Return the path to the top of the repo. """ @@ -139,7 +122,7 @@ def checkout_java_acc(force): url = "https://github.com/lvc/japi-compliance-checker/archive/1.8.tar.gz" scratch_dir = get_scratch_dir() path = os.path.join(scratch_dir, os.path.basename(url)) - jacc = urllib2.urlopen(url) + jacc = urllib.request.urlopen(url) with open(path, 'wb') as w: w.write(jacc.read()) @@ -192,9 +175,9 @@ def run_java_acc(src_name, src_jars, dst_name, dst_jars, annotations): if annotations is not None: annotations_path = os.path.join(get_scratch_dir(), "annotations.txt") - with file(annotations_path, "w") as f: + with open(annotations_path, "w") as f: for ann in annotations: - print >>f, ann + print(ann, file=f) args += ["-annotations-list", annotations_path] subprocess.check_call(args) @@ -264,8 +247,8 @@ def main(): parser.add_argument("--skip-build", action="store_true", help="Skip building the projects.") - parser.add_argument("src_rev", nargs=1, help="Source revision.") - parser.add_argument("dst_rev", nargs="?", default="HEAD", + parser.add_argument("src_rev", nargs=1, type=str, help="Source revision.") + parser.add_argument("dst_rev", nargs="?", type=str, default="HEAD", help="Destination revision. " + "If not specified, will use HEAD.") diff --git a/dev-support/bin/create-release b/dev-support/bin/create-release index f4851d1c76498..fc0602ab18619 100755 --- a/dev-support/bin/create-release +++ b/dev-support/bin/create-release @@ -205,7 +205,8 @@ function set_defaults DOCKERRAN=false CPU_ARCH=$(echo "$MACHTYPE" | cut -d- -f1) - if [ "$CPU_ARCH" = "aarch64" ]; then + if [[ "$CPU_ARCH" = "aarch64" || "$CPU_ARCH" = "arm64" ]]; then + echo "Using aarch64 docker file" DOCKERFILE="${BASEDIR}/dev-support/docker/Dockerfile_aarch64" fi @@ -293,6 +294,7 @@ function usage echo "--security Emergency security release" echo "--sign Use .gnupg dir to sign the artifacts and jars" echo "--version=[version] Use an alternative version string" + echo "--mvnargs=[args] Extra Maven args to be provided when running mvn commands" } function option_parse @@ -347,6 +349,9 @@ function option_parse --version=*) HADOOP_VERSION=${i#*=} ;; + --mvnargs=*) + MVNEXTRAARGS=${i#*=} + ;; esac done @@ -413,6 +418,9 @@ function option_parse MVN_ARGS=("-Dmaven.repo.local=${MVNCACHE}") fi fi + if [ -n "$MVNEXTRAARGS" ]; then + MVN_ARGS+=("$MVNEXTRAARGS") + fi if [[ "${SECURITYRELEASE}" = true ]]; then if [[ ! -d "${BASEDIR}/hadoop-common-project/hadoop-common/src/site/markdown/release/${HADOOP_VERSION}" ]]; then @@ -506,7 +514,7 @@ function dockermode # we always force build with the OpenJDK JDK # but with the correct version - if [ "$CPU_ARCH" = "aarch64" ]; then + if [[ "$CPU_ARCH" = "aarch64" || "$CPU_ARCH" = "arm64" ]]; then echo "ENV JAVA_HOME /usr/lib/jvm/java-${JVM_VERSION}-openjdk-arm64" else echo "ENV JAVA_HOME /usr/lib/jvm/java-${JVM_VERSION}-openjdk-amd64" @@ -535,6 +543,10 @@ function makearelease big_console_header "Cleaning the Source Tree" + # Since CVE-2022-24765 in April 2022, git refuses to work in directories + # whose owner != the current user, unless explicitly told to trust it. + git config --global --add safe.directory /build/source + # git clean to clear any remnants from previous build run "${GIT}" clean -xdf -e /patchprocess @@ -651,10 +663,12 @@ function signartifacts big_console_header "Signing the release" - for i in ${ARTIFACTS_DIR}/*; do + run cd "${ARTIFACTS_DIR}" + for i in *; do ${GPG} --use-agent --armor --output "${i}.asc" --detach-sig "${i}" sha512sum --tag "${i}" > "${i}.sha512" done + run cd "${BASEDIR}" if [[ "${ASFRELEASE}" = true ]]; then echo "Fetching the Apache Hadoop KEYS file..." diff --git a/dev-support/bin/dist-copynativelibs b/dev-support/bin/dist-copynativelibs index ffc82b8fb1b05..95de186e7e729 100755 --- a/dev-support/bin/dist-copynativelibs +++ b/dev-support/bin/dist-copynativelibs @@ -111,9 +111,6 @@ for i in "$@"; do --openssllibbundle=*) OPENSSLLIBBUNDLE=${i#*=} ;; - --snappybinbundle=*) - SNAPPYBINBUNDLE=${i#*=} - ;; --snappylib=*) SNAPPYLIB=${i#*=} ;; @@ -167,7 +164,7 @@ fi # Windows doesn't have a LIB_DIR, everything goes into bin -if [[ -d "${BIN_DIR}" ]] ; then +if [[ -d "${BIN_DIR}" && $(ls -A "${BIN_DIR}") ]] ; then mkdir -p "${TARGET_BIN_DIR}" cd "${BIN_DIR}" || exit 1 ${TAR} ./* | (cd "${TARGET_BIN_DIR}"/ || exit 1; ${UNTAR}) @@ -176,8 +173,6 @@ if [[ -d "${BIN_DIR}" ]] ; then exit 1 fi - bundle_native_bin "${SNAPPYBINBUNDLE}" "${SNAPPYLIBBUNDLE}" "snappy.lib" "snappy" "${SNAPPYLIB}" - bundle_native_bin "${ZSTDBINBUNDLE}" "${ZSTDLIBBUNDLE}" "zstd.lib" "zstd" "${ZSTDLIB}" bundle_native_bin "${OPENSSLBINBUNDLE}" "${OPENSSLLIBBUNDLE}" "openssl.lib" "crypto" "${OPENSSLLIB}" diff --git a/dev-support/bin/dist-layout-stitching b/dev-support/bin/dist-layout-stitching index 20e8cf27805e2..d4bfd8aaada3b 100755 --- a/dev-support/bin/dist-layout-stitching +++ b/dev-support/bin/dist-layout-stitching @@ -21,9 +21,6 @@ VERSION=$1 # project.build.directory BASEDIR=$2 -#hdds.version -HDDS_VERSION=$3 - function run() { declare res diff --git a/dev-support/bin/hadoop.sh b/dev-support/bin/hadoop.sh new file mode 100755 index 0000000000000..7835b752f956a --- /dev/null +++ b/dev-support/bin/hadoop.sh @@ -0,0 +1,570 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# SHELLDOC-IGNORE +# +# Override these to match Apache Hadoop's requirements +personality_plugins "all,-ant,-gradle,-scalac,-scaladoc" + +## @description Globals specific to this personality +## @audience private +## @stability evolving +function personality_globals +{ + # shellcheck disable=SC2034 + BUILDTOOL=maven + #shellcheck disable=SC2034 + PATCH_BRANCH_DEFAULT=trunk + #shellcheck disable=SC2034 + PATCH_NAMING_RULE="https://cwiki.apache.org/confluence/display/HADOOP/How+To+Contribute" + #shellcheck disable=SC2034 + JIRA_ISSUE_RE='^(HADOOP|YARN|MAPREDUCE|HDFS)-[0-9]+$' + #shellcheck disable=SC2034 + GITHUB_REPO_DEFAULT="apache/hadoop" + + HADOOP_HOMEBREW_DIR=${HADOOP_HOMEBREW_DIR:-$(brew --prefix 2>/dev/null)} + if [[ -z "${HADOOP_HOMEBREW_DIR}" ]]; then + HADOOP_HOMEBREW_DIR=/usr/local + fi +} + +function personality_parse_args +{ + declare i + + for i in "$@"; do + case ${i} in + --hadoop-isal-prefix=*) + delete_parameter "${i}" + ISAL_HOME=${i#*=} + ;; + --hadoop-openssl-prefix=*) + delete_parameter "${i}" + OPENSSL_HOME=${i#*=} + ;; + --hadoop-snappy-prefix=*) + delete_parameter "${i}" + SNAPPY_HOME=${i#*=} + ;; + esac + done +} + +## @description Calculate the actual module ordering +## @audience private +## @stability evolving +## @param ordering +function hadoop_order +{ + declare ordering=$1 + declare hadoopm + + if [[ ${ordering} = normal ]]; then + hadoopm="${CHANGED_MODULES[*]}" + elif [[ ${ordering} = union ]]; then + hadoopm="${CHANGED_UNION_MODULES}" + elif [[ ${ordering} = mvnsrc ]]; then + hadoopm="${MAVEN_SRC_MODULES[*]}" + elif [[ ${ordering} = mvnsrctest ]]; then + hadoopm="${MAVEN_SRCTEST_MODULES[*]}" + else + hadoopm="${ordering}" + fi + echo "${hadoopm}" +} + +## @description Determine if it is safe to run parallel tests +## @audience private +## @stability evolving +## @param ordering +function hadoop_test_parallel +{ + if [[ -f "${BASEDIR}/pom.xml" ]]; then + HADOOP_VERSION=$(grep '' "${BASEDIR}/pom.xml" \ + | head -1 \ + | "${SED}" -e 's|^ *||' -e 's|.*$||' \ + | cut -f1 -d- ) + export HADOOP_VERSION + else + return 1 + fi + + hmajor=${HADOOP_VERSION%%\.*} + hmajorminor=${HADOOP_VERSION%\.*} + hminor=${hmajorminor##*\.} + # ... and just for reference + #hmicro=${HADOOP_VERSION##*\.} + + # Apache Hadoop v2.8.0 was the first one to really + # get working parallel unit tests + if [[ ${hmajor} -lt 3 && ${hminor} -lt 8 ]]; then + return 1 + fi + + return 0 +} + +## @description Install extra modules for unit tests +## @audience private +## @stability evolving +## @param ordering +function hadoop_unittest_prereqs +{ + declare input=$1 + declare mods + declare need_common=0 + declare building_common=0 + declare module + declare flags + declare fn + + # prior to running unit tests, hdfs needs libhadoop.so built + # if we're building root, then this extra work is moot + + #shellcheck disable=SC2086 + mods=$(hadoop_order ${input}) + + for module in ${mods}; do + if [[ ${module} = hadoop-hdfs-project* ]]; then + need_common=1 + elif [[ ${module} = hadoop-common-project/hadoop-common + || ${module} = hadoop-common-project ]]; then + building_common=1 + elif [[ ${module} = . ]]; then + return + fi + done + + # Windows builds *ALWAYS* need hadoop-common compiled + case ${OSTYPE} in + Windows_NT|CYGWIN*|MINGW*|MSYS*) + need_common=1 + ;; + esac + + if [[ ${need_common} -eq 1 + && ${building_common} -eq 0 ]]; then + echo "unit test pre-reqs:" + module="hadoop-common-project/hadoop-common" + fn=$(module_file_fragment "${module}") + flags="$(hadoop_native_flags) $(yarn_ui2_flag)" + pushd "${BASEDIR}/${module}" >/dev/null || return 1 + # shellcheck disable=SC2086 + echo_and_redirect "${PATCH_DIR}/maven-unit-prereq-${fn}-install.txt" \ + "${MAVEN}" "${MAVEN_ARGS[@]}" install -DskipTests ${flags} + popd >/dev/null || return 1 + fi +} + +## @description Calculate the flags/settings for yarn-ui v2 build +## @description based upon the OS +## @audience private +## @stability evolving +function yarn_ui2_flag +{ + + if [[ ${BUILD_NATIVE} != true ]]; then + return + fi + + # Now it only tested on Linux/OSX, don't enable the profile on + # windows until it get verified + case ${OSTYPE} in + Linux) + # shellcheck disable=SC2086 + echo -Pyarn-ui + ;; + Darwin) + echo -Pyarn-ui + ;; + *) + # Do nothing + ;; + esac +} + +## @description Calculate the flags/settings for native code +## @description based upon the OS +## @audience private +## @stability evolving +function hadoop_native_flags +{ + if [[ ${BUILD_NATIVE} != true ]]; then + return + fi + + declare -a args + + # Based upon HADOOP-11937 + # + # Some notes: + # + # - getting fuse to compile on anything but Linux + # is always tricky. + # - Darwin assumes homebrew is in use. + # - HADOOP-12027 required for bzip2 on OS X. + # - bzip2 is broken in lots of places + # (the shared library is considered experimental) + # e.g, HADOOP-12027 for OS X. so no -Drequire.bzip2 + # + + args=("-Drequire.test.libhadoop") + + if [[ -d "${ISAL_HOME}/include" ]]; then + args=("${args[@]}" "-Disal.prefix=${ISAL_HOME}") + fi + + if [[ -d "${OPENSSL_HOME}/include" ]]; then + args=("${args[@]}" "-Dopenssl.prefix=${OPENSSL_HOME}") + elif [[ -d "${HADOOP_HOMEBREW_DIR}/opt/openssl/" ]]; then + args=("${args[@]}" "-Dopenssl.prefix=${HADOOP_HOMEBREW_DIR}/opt/openssl/") + fi + + if [[ -d "${SNAPPY_HOME}/include" ]]; then + args=("${args[@]}" "-Dsnappy.prefix=${SNAPPY_HOME}") + elif [[ -d "${HADOOP_HOMEBREW_DIR}/include/snappy.h" ]]; then + args=("${args[@]}" "-Dsnappy.prefix=${HADOOP_HOMEBREW_DIR}/opt/snappy") + fi + + case ${OSTYPE} in + Linux) + # shellcheck disable=SC2086 + echo \ + -Pnative \ + -Drequire.fuse \ + -Drequire.openssl \ + -Drequire.snappy \ + -Drequire.valgrind \ + -Drequire.zstd \ + "${args[@]}" + ;; + Darwin) + echo \ + "${args[@]}" \ + -Pnative \ + -Drequire.snappy \ + -Drequire.openssl + ;; + Windows_NT|CYGWIN*|MINGW*|MSYS*) + echo \ + "${args[@]}" \ + -Drequire.snappy -Drequire.openssl -Pnative-win + ;; + *) + echo \ + "${args[@]}" + ;; + esac +} + +## @description Queue up modules for this personality +## @audience private +## @stability evolving +## @param repostatus +## @param testtype +function personality_modules +{ + declare repostatus=$1 + declare testtype=$2 + declare extra="" + declare ordering="normal" + declare needflags=false + declare foundbats=false + declare flags + declare fn + declare i + declare hadoopm + + yetus_debug "Personality: ${repostatus} ${testtype}" + + clear_personality_queue + + case ${testtype} in + asflicense) + # this is very fast and provides the full path if we do it from + # the root of the source + personality_enqueue_module . + return + ;; + checkstyle) + ordering="union" + extra="-DskipTests" + ;; + compile) + ordering="union" + extra="-DskipTests" + needflags=true + + # if something in common changed, we build the whole world + if [[ "${CHANGED_MODULES[*]}" =~ hadoop-common ]]; then + yetus_debug "hadoop personality: javac + hadoop-common = ordering set to . " + ordering="." + fi + ;; + distclean) + ordering="." + extra="-DskipTests" + ;; + javadoc) + if [[ "${CHANGED_MODULES[*]}" =~ \. ]]; then + ordering=. + fi + + if [[ "${repostatus}" = patch && "${BUILDMODE}" = patch ]]; then + echo "javadoc pre-reqs:" + for i in hadoop-project \ + hadoop-common-project/hadoop-annotations; do + fn=$(module_file_fragment "${i}") + pushd "${BASEDIR}/${i}" >/dev/null || return 1 + echo "cd ${i}" + echo_and_redirect "${PATCH_DIR}/maven-${fn}-install.txt" \ + "${MAVEN}" "${MAVEN_ARGS[@]}" install + popd >/dev/null || return 1 + done + fi + extra="-Pdocs -DskipTests" + ;; + mvneclipse) + if [[ "${CHANGED_MODULES[*]}" =~ \. ]]; then + ordering=. + fi + ;; + mvninstall) + extra="-DskipTests" + if [[ "${repostatus}" = branch || "${BUILDMODE}" = full ]]; then + ordering=. + fi + ;; + mvnsite) + if [[ "${CHANGED_MODULES[*]}" =~ \. ]]; then + ordering=. + fi + ;; + unit) + if [[ "${BUILDMODE}" = full ]]; then + ordering=mvnsrc + elif [[ "${CHANGED_MODULES[*]}" =~ \. ]]; then + ordering=. + fi + + if [[ ${TEST_PARALLEL} = "true" ]] ; then + if hadoop_test_parallel; then + extra="-Pparallel-tests" + if [[ -n ${TEST_THREADS:-} ]]; then + extra="${extra} -DtestsThreadCount=${TEST_THREADS}" + fi + fi + fi + needflags=true + hadoop_unittest_prereqs "${ordering}" + + if ! verify_needed_test javac; then + yetus_debug "hadoop: javac not requested" + if ! verify_needed_test native; then + yetus_debug "hadoop: native not requested" + yetus_debug "hadoop: adding -DskipTests to unit test" + extra="-DskipTests" + fi + fi + + for i in "${CHANGED_FILES[@]}"; do + if [[ "${i}" =~ \.bats ]]; then + foundbats=true + fi + done + + if ! verify_needed_test shellcheck && [[ ${foundbats} = false ]]; then + yetus_debug "hadoop: NO shell code change detected; disabling shelltest profile" + extra="${extra} -P!shelltest" + else + extra="${extra} -Pshelltest" + fi + ;; + *) + extra="-DskipTests" + ;; + esac + + if [[ ${needflags} = true ]]; then + flags="$(hadoop_native_flags) $(yarn_ui2_flag)" + extra="${extra} ${flags}" + fi + + extra="-Ptest-patch ${extra}" + for module in $(hadoop_order ${ordering}); do + # shellcheck disable=SC2086 + personality_enqueue_module ${module} ${extra} + done +} + +## @description Add tests based upon personality needs +## @audience private +## @stability evolving +## @param filename +function personality_file_tests +{ + declare filename=$1 + + yetus_debug "Using Hadoop-specific personality_file_tests" + + if [[ ${filename} =~ src/main/webapp ]]; then + yetus_debug "tests/webapp: ${filename}" + add_test shadedclient + elif [[ ${filename} =~ \.sh + || ${filename} =~ \.cmd + || ${filename} =~ src/scripts + || ${filename} =~ src/test/scripts + || ${filename} =~ src/main/bin + || ${filename} =~ shellprofile\.d + || ${filename} =~ src/main/conf + ]]; then + yetus_debug "tests/shell: ${filename}" + add_test mvnsite + add_test unit + elif [[ ${filename} =~ \.md$ + || ${filename} =~ \.md\.vm$ + || ${filename} =~ src/site + ]]; then + yetus_debug "tests/site: ${filename}" + add_test mvnsite + elif [[ ${filename} =~ \.c$ + || ${filename} =~ \.cc$ + || ${filename} =~ \.h$ + || ${filename} =~ \.hh$ + || ${filename} =~ \.proto$ + || ${filename} =~ \.cmake$ + || ${filename} =~ CMakeLists.txt + ]]; then + yetus_debug "tests/units: ${filename}" + add_test compile + add_test cc + add_test mvnsite + add_test javac + add_test unit + elif [[ ${filename} =~ build.xml$ + || ${filename} =~ pom.xml$ + || ${filename} =~ \.java$ + || ${filename} =~ src/main + ]]; then + yetus_debug "tests/javadoc+units: ${filename}" + add_test compile + add_test javac + add_test javadoc + add_test mvninstall + add_test mvnsite + add_test unit + add_test shadedclient + fi + + # if we change anything in here, e.g. the test scripts + # then run the client artifact tests + if [[ ${filename} =~ hadoop-client-modules ]]; then + add_test shadedclient + fi + + if [[ ${filename} =~ src/test ]]; then + yetus_debug "tests: src/test" + add_test unit + fi + + if [[ ${filename} =~ \.java$ ]]; then + add_test spotbugs + fi +} + +## @description Image to print on success +## @audience private +## @stability evolving +function hadoop_console_success +{ + printf "IF9fX19fX19fX18gCjwgU3VjY2VzcyEgPgogLS0tLS0tLS0tLSAKIFwgICAg"; + printf "IC9cICBfX18gIC9cCiAgXCAgIC8vIFwvICAgXC8gXFwKICAgICAoKCAgICBP"; + printf "IE8gICAgKSkKICAgICAgXFwgLyAgICAgXCAvLwogICAgICAgXC8gIHwgfCAg"; + printf "XC8gCiAgICAgICAgfCAgfCB8ICB8ICAKICAgICAgICB8ICB8IHwgIHwgIAog"; + printf "ICAgICAgIHwgICBvICAgfCAgCiAgICAgICAgfCB8ICAgfCB8ICAKICAgICAg"; + printf "ICB8bXwgICB8bXwgIAo" +} + +################################################### +# Hadoop project specific check of IT for shaded artifacts + +add_test_type shadedclient + +## @description check for test modules and add test/plugins as needed +## @audience private +## @stability evolving +function shadedclient_initialize +{ + maven_add_install shadedclient +} + +## @description build client facing shaded and non-shaded artifacts and test them +## @audience private +## @stability evolving +## @param repostatus +function shadedclient_rebuild +{ + declare repostatus=$1 + declare logfile="${PATCH_DIR}/${repostatus}-shadedclient.txt" + declare module + declare -a modules=() + + if [[ ${OSTYPE} = Windows_NT || + ${OSTYPE} =~ ^CYGWIN.* || + ${OSTYPE} =~ ^MINGW32.* || + ${OSTYPE} =~ ^MSYS.* ]]; then + echo "hadoop personality: building on windows, skipping check of client artifacts." + return 0 + fi + + yetus_debug "hadoop personality: seeing if we need the test of client artifacts." + for module in hadoop-client-modules/hadoop-client-check-invariants \ + hadoop-client-modules/hadoop-client-check-test-invariants \ + hadoop-client-modules/hadoop-client-integration-tests; do + if [ -d "${module}" ]; then + yetus_debug "hadoop personality: test module '${module}' is present." + modules+=(-pl "${module}") + fi + done + if [ ${#modules[@]} -eq 0 ]; then + echo "hadoop personality: no test modules present, skipping check of client artifacts." + return 0 + fi + + big_console_header "Checking client artifacts on ${repostatus} with shaded clients" + + echo_and_redirect "${logfile}" \ + "${MAVEN}" "${MAVEN_ARGS[@]}" verify -fae --batch-mode -am \ + "${modules[@]}" \ + -Dtest=NoUnitTests -Dmaven.javadoc.skip=true -Dcheckstyle.skip=true -Dspotbugs.skip=true + + big_console_header "Checking client artifacts on ${repostatus} with non-shaded clients" + + echo_and_redirect "${logfile}" \ + "${MAVEN}" "${MAVEN_ARGS[@]}" verify -fae --batch-mode -am \ + "${modules[@]}" \ + -DskipShade -Dtest=NoUnitTests -Dmaven.javadoc.skip=true -Dcheckstyle.skip=true -Dspotbugs.skip=true + + count=$("${GREP}" -c '\[ERROR\]' "${logfile}") + if [[ ${count} -gt 0 ]]; then + add_vote_table -1 shadedclient "${repostatus} has errors when building and testing our client artifacts." + return 1 + fi + + add_vote_table +1 shadedclient "${repostatus} has no errors when building and testing our client artifacts." + return 0 +} diff --git a/dev-support/bin/test-patch b/dev-support/bin/test-patch index 8ff8119b3e086..5faf472d325e8 100755 --- a/dev-support/bin/test-patch +++ b/dev-support/bin/test-patch @@ -15,4 +15,4 @@ # limitations under the License. BINDIR=$(cd -P -- "$(dirname -- "${BASH_SOURCE-0}")" >/dev/null && pwd -P) -exec "${BINDIR}/yetus-wrapper" test-patch --project=hadoop --skip-dir=dev-support "$@" +exec "${BINDIR}/yetus-wrapper" test-patch --project=hadoop --skip-dirs=dev-support "$@" diff --git a/dev-support/bin/yetus-wrapper b/dev-support/bin/yetus-wrapper index b0f71f105d85e..a93833767a201 100755 --- a/dev-support/bin/yetus-wrapper +++ b/dev-support/bin/yetus-wrapper @@ -77,7 +77,7 @@ WANTED="$1" shift ARGV=("$@") -HADOOP_YETUS_VERSION=${HADOOP_YETUS_VERSION:-0.10.0} +HADOOP_YETUS_VERSION=${HADOOP_YETUS_VERSION:-0.14.0} BIN=$(yetus_abs "${BASH_SOURCE-$0}") BINDIR=$(dirname "${BIN}") @@ -144,7 +144,7 @@ else exit 1 fi -if [[ -n "${GPGBIN}" ]]; then +if [[ -n "${GPGBIN}" && ! "${HADOOP_SKIP_YETUS_VERIFICATION}" = true ]]; then if ! mkdir -p .gpg; then yetus_error "ERROR: yetus-dl: Unable to create ${HADOOP_PATCHPROCESS}/.gpg" exit 1 diff --git a/dev-support/determine-flaky-tests-hadoop.py b/dev-support/determine-flaky-tests-hadoop.py deleted file mode 100755 index 8644299bba4a2..0000000000000 --- a/dev-support/determine-flaky-tests-hadoop.py +++ /dev/null @@ -1,245 +0,0 @@ -#!/usr/bin/env python -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Given a jenkins test job, this script examines all runs of the job done -# within specified period of time (number of days prior to the execution -# time of this script), and reports all failed tests. -# -# The output of this script includes a section for each run that has failed -# tests, with each failed test name listed. -# -# More importantly, at the end, it outputs a summary section to list all failed -# tests within all examined runs, and indicate how many runs a same test -# failed, and sorted all failed tests by how many runs each test failed. -# -# This way, when we see failed tests in PreCommit build, we can quickly tell -# whether a failed test is a new failure, or it failed before and how often it -# failed, so to have idea whether it may just be a flaky test. -# -# Of course, to be 100% sure about the reason of a test failure, closer look -# at the failed test for the specific run is necessary. -# -import sys -import platform -sysversion = sys.hexversion -onward30 = False -if sysversion < 0x020600F0: - sys.exit("Minimum supported python version is 2.6, the current version is " + - "Python" + platform.python_version()) - -if sysversion == 0x030000F0: - sys.exit("There is a known bug with Python" + platform.python_version() + - ", please try a different version"); - -if sysversion < 0x03000000: - import urllib2 -else: - onward30 = True - import urllib.request - -import datetime -import json as simplejson -import logging -from optparse import OptionParser -import time - -# Configuration -DEFAULT_JENKINS_URL = "https://builds.apache.org" -DEFAULT_JOB_NAME = "Hadoop-Common-trunk" -DEFAULT_NUM_PREVIOUS_DAYS = 14 -DEFAULT_TOP_NUM_FAILED_TEST = -1 - -SECONDS_PER_DAY = 86400 - -# total number of runs to examine -numRunsToExamine = 0 - -#summary mode -summary_mode = False - -#total number of errors -error_count = 0 - -""" Parse arguments """ -def parse_args(): - parser = OptionParser() - parser.add_option("-J", "--jenkins-url", type="string", - dest="jenkins_url", help="Jenkins URL", - default=DEFAULT_JENKINS_URL) - parser.add_option("-j", "--job-name", type="string", - dest="job_name", help="Job name to look at", - default=DEFAULT_JOB_NAME) - parser.add_option("-n", "--num-days", type="int", - dest="num_prev_days", help="Number of days to examine", - default=DEFAULT_NUM_PREVIOUS_DAYS) - parser.add_option("-t", "--top", type="int", - dest="num_failed_tests", - help="Summary Mode, only show top number of failed tests", - default=DEFAULT_TOP_NUM_FAILED_TEST) - - (options, args) = parser.parse_args() - if args: - parser.error("unexpected arguments: " + repr(args)) - return options - -""" Load data from specified url """ -def load_url_data(url): - if onward30: - ourl = urllib.request.urlopen(url) - codec = ourl.info().get_param('charset') - content = ourl.read().decode(codec) - data = simplejson.loads(content, strict=False) - else: - ourl = urllib2.urlopen(url) - data = simplejson.load(ourl, strict=False) - return data - -""" List all builds of the target project. """ -def list_builds(jenkins_url, job_name): - global summary_mode - url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict( - jenkins=jenkins_url, - job_name=job_name) - - try: - data = load_url_data(url) - - except: - if not summary_mode: - logging.error("Could not fetch: %s" % url) - error_count += 1 - raise - return data['builds'] - -""" Find the names of any tests which failed in the given build output URL. """ -def find_failing_tests(testReportApiJson, jobConsoleOutput): - global summary_mode - global error_count - ret = set() - try: - data = load_url_data(testReportApiJson) - - except: - if not summary_mode: - logging.error(" Could not open testReport, check " + - jobConsoleOutput + " for why it was reported failed") - error_count += 1 - return ret - - for suite in data['suites']: - for cs in suite['cases']: - status = cs['status'] - errDetails = cs['errorDetails'] - if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)): - ret.add(cs['className'] + "." + cs['name']) - - if len(ret) == 0 and (not summary_mode): - logging.info(" No failed tests in testReport, check " + - jobConsoleOutput + " for why it was reported failed.") - return ret - -""" Iterate runs of specfied job within num_prev_days and collect results """ -def find_flaky_tests(jenkins_url, job_name, num_prev_days): - global numRunsToExamine - global summary_mode - all_failing = dict() - # First list all builds - builds = list_builds(jenkins_url, job_name) - - # Select only those in the last N days - min_time = int(time.time()) - SECONDS_PER_DAY * num_prev_days - builds = [b for b in builds if (int(b['timestamp']) / 1000) > min_time] - - # Filter out only those that failed - failing_build_urls = [(b['url'] , b['timestamp']) for b in builds - if (b['result'] in ('UNSTABLE', 'FAILURE'))] - - tnum = len(builds) - num = len(failing_build_urls) - numRunsToExamine = tnum - if not summary_mode: - logging.info(" THERE ARE " + str(num) + " builds (out of " + str(tnum) - + ") that have failed tests in the past " + str(num_prev_days) + " days" - + ((".", ", as listed below:\n")[num > 0])) - - for failed_build_with_time in failing_build_urls: - failed_build = failed_build_with_time[0]; - jobConsoleOutput = failed_build + "Console"; - testReport = failed_build + "testReport"; - testReportApiJson = testReport + "/api/json"; - - ts = float(failed_build_with_time[1]) / 1000. - st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') - if not summary_mode: - logging.info("===>%s" % str(testReport) + " (" + st + ")") - failing = find_failing_tests(testReportApiJson, jobConsoleOutput) - if failing: - for ftest in failing: - if not summary_mode: - logging.info(" Failed test: %s" % ftest) - all_failing[ftest] = all_failing.get(ftest,0)+1 - - return all_failing - -def main(): - global numRunsToExamine - global summary_mode - logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) - - # set up logger to write to stdout - soh = logging.StreamHandler(sys.stdout) - soh.setLevel(logging.INFO) - logger = logging.getLogger() - logger.removeHandler(logger.handlers[0]) - logger.addHandler(soh) - - opts = parse_args() - logging.info("****Recently FAILED builds in url: " + opts.jenkins_url - + "/job/" + opts.job_name + "") - - if opts.num_failed_tests != -1: - summary_mode = True - - all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name, - opts.num_prev_days) - if len(all_failing) == 0: - raise SystemExit(0) - - if summary_mode and opts.num_failed_tests < len(all_failing): - logging.info("\nAmong " + str(numRunsToExamine) + - " runs examined, top " + str(opts.num_failed_tests) + - " failed tests <#failedRuns: testName>:") - else: - logging.info("\nAmong " + str(numRunsToExamine) + - " runs examined, all failed tests <#failedRuns: testName>:") - - # print summary section: all failed tests sorted by how many times they failed - line_count = 0 - for tn in sorted(all_failing, key=all_failing.get, reverse=True): - logging.info(" " + str(all_failing[tn])+ ": " + tn) - if summary_mode: - line_count += 1 - if line_count == opts.num_failed_tests: - break - - if summary_mode and error_count > 0: - logging.info("\n" + str(error_count) + " errors found, you may " - + "re-run in non summary mode to see error details."); - -if __name__ == "__main__": - main() diff --git a/dev-support/docker/Dockerfile b/dev-support/docker/Dockerfile index de416fa84093c..b2ee1062a0364 100644 --- a/dev-support/docker/Dockerfile +++ b/dev-support/docker/Dockerfile @@ -18,7 +18,7 @@ # Dockerfile for installing the necessary dependencies for building Hadoop. # See BUILDING.txt. -FROM ubuntu:xenial +FROM ubuntu:bionic WORKDIR /root @@ -44,9 +44,11 @@ ENV DEBCONF_TERSE true RUN apt-get -q update \ && apt-get -q install -y --no-install-recommends \ apt-utils \ + bats \ build-essential \ bzip2 \ clang \ + cmake \ curl \ doxygen \ fuse \ @@ -62,27 +64,47 @@ RUN apt-get -q update \ libsasl2-dev \ libsnappy-dev \ libssl-dev \ + libsnappy-dev \ libtool \ libzstd1-dev \ locales \ make \ pinentry-curses \ pkg-config \ - python \ - python2.7 \ - python-pip \ - python-pkg-resources \ - python-setuptools \ - python-wheel \ + python3 \ + python3-pip \ + python3-pkg-resources \ + python3-setuptools \ + python3-wheel \ rsync \ + shellcheck \ software-properties-common \ - snappy \ sudo \ valgrind \ zlib1g-dev \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* +RUN locale-gen en_US.UTF-8 +ENV LANG='en_US.UTF-8' LANGUAGE='en_US:en' LC_ALL='en_US.UTF-8' +ENV PYTHONIOENCODING=utf-8 + +###### +# Set env vars required to build Hadoop +###### +ENV MAVEN_HOME /usr +# JAVA_HOME must be set in Maven >= 3.5.0 (MNG-6003) +ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64 + +####### +# Install SpotBugs 4.2.2 +####### +RUN mkdir -p /opt/spotbugs \ + && curl -L -s -S https://github.com/spotbugs/spotbugs/releases/download/4.2.2/spotbugs-4.2.2.tgz \ + -o /opt/spotbugs.tgz \ + && tar xzf /opt/spotbugs.tgz --strip-components 1 -C /opt/spotbugs \ + && chmod +x /opt/spotbugs/bin/* +ENV SPOTBUGS_HOME /opt/spotbugs ####### # OpenJDK 8 @@ -93,20 +115,8 @@ RUN apt-get -q update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* - -###### -# Install cmake 3.1.0 (3.5.1 ships with Xenial) ###### -RUN mkdir -p /opt/cmake \ - && curl -L -s -S \ - https://cmake.org/files/v3.1/cmake-3.1.0-Linux-x86_64.tar.gz \ - -o /opt/cmake.tar.gz \ - && tar xzf /opt/cmake.tar.gz --strip-components 1 -C /opt/cmake -ENV CMAKE_HOME /opt/cmake -ENV PATH "${PATH}:/opt/cmake/bin" - -###### -# Install Google Protobuf 3.7.1 (2.6.0 ships with Xenial) +# Install Google Protobuf 3.7.1 (3.0.0 ships with Bionic) ###### # hadolint ignore=DL3003 RUN mkdir -p /opt/protobuf-src \ @@ -116,6 +126,7 @@ RUN mkdir -p /opt/protobuf-src \ && tar xzf /opt/protobuf.tar.gz --strip-components 1 -C /opt/protobuf-src \ && cd /opt/protobuf-src \ && ./configure --prefix=/opt/protobuf \ + && make "-j$(nproc)" \ && make install \ && cd /root \ && rm -rf /opt/protobuf-src @@ -123,7 +134,7 @@ ENV PROTOBUF_HOME /opt/protobuf ENV PATH "${PATH}:/opt/protobuf/bin" ###### -# Install Apache Maven 3.3.9 (3.3.9 ships with Xenial) +# Install Apache Maven 3.6.0 (3.6.0 ships with Bionic) ###### # hadolint ignore=DL3008 RUN apt-get -q update \ @@ -131,66 +142,11 @@ RUN apt-get -q update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* ENV MAVEN_HOME /usr +# JAVA_HOME must be set in Maven >= 3.5.0 (MNG-6003) +ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64 -###### -# Install findbugs 3.0.1 (3.0.1 ships with Xenial) -# Ant is needed for findbugs -###### -# hadolint ignore=DL3008 -RUN apt-get -q update \ - && apt-get -q install -y --no-install-recommends findbugs ant \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* -ENV FINDBUGS_HOME /usr - -#### -# Install shellcheck (0.4.6, the latest as of 2017-09-26) -#### -# hadolint ignore=DL3008 -RUN add-apt-repository -y ppa:hvr/ghc \ - && apt-get -q update \ - && apt-get -q install -y --no-install-recommends shellcheck ghc-8.0.2 \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -#### -# Install bats (0.4.0, the latest as of 2017-09-26, ships with Xenial) -#### -# hadolint ignore=DL3008 -RUN apt-get -q update \ - && apt-get -q install -y --no-install-recommends bats \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -#### -# Install pylint at fixed version (2.0.0 removed python2 support) -# https://github.com/PyCQA/pylint/issues/2294 -#### -RUN pip2 install pylint==1.9.2 - -#### -# Install dateutil.parser -#### -RUN pip2 install python-dateutil==2.7.3 - -### -# Install node.js 8.17.0 for web UI framework (4.2.6 ships with Xenial) -### -RUN curl -L -s -S https://deb.nodesource.com/setup_8.x | bash - \ - && apt-get install -y --no-install-recommends nodejs=8.17.0-1nodesource1 \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* \ - && npm install -g bower@1.8.8 - -### -## Install Yarn 1.12.1 for web UI framework -#### -RUN curl -s -S https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - \ - && echo 'deb https://dl.yarnpkg.com/debian/ stable main' > /etc/apt/sources.list.d/yarn.list \ - && apt-get -q update \ - && apt-get install -y --no-install-recommends yarn=1.21.1-1 \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* +# Install pylint and python-dateutil +RUN pip3 install pylint==2.6.0 python-dateutil==2.8.1 ### # Install hadolint @@ -202,10 +158,33 @@ RUN curl -L -s -S \ && shasum -a 512 /bin/hadolint | \ awk '$1!="734e37c1f6619cbbd86b9b249e69c9af8ee1ea87a2b1ff71dccda412e9dac35e63425225a95d71572091a3f0a11e9a04c2fc25d9e91b840530c26af32b9891ca" {exit(1)}' +###### +# Intel ISA-L 2.29.0 +###### +# hadolint ignore=DL3003,DL3008 +RUN mkdir -p /opt/isa-l-src \ + && apt-get -q update \ + && apt-get install -y --no-install-recommends automake yasm \ + && apt-get clean \ + && curl -L -s -S \ + https://github.com/intel/isa-l/archive/v2.29.0.tar.gz \ + -o /opt/isa-l.tar.gz \ + && tar xzf /opt/isa-l.tar.gz --strip-components 1 -C /opt/isa-l-src \ + && cd /opt/isa-l-src \ + && ./autogen.sh \ + && ./configure \ + && make "-j$(nproc)" \ + && make install \ + && cd /root \ + && rm -rf /opt/isa-l-src + ### # Avoid out of memory errors in builds ### -ENV MAVEN_OPTS -Xms256m -Xmx1536m +ENV MAVEN_OPTS -Xms256m -Xmx3072m + +# Skip gpg verification when downloading Yetus via yetus-wrapper +ENV HADOOP_SKIP_YETUS_VERIFICATION true ### # Everything past this point is either not needed for testing or breaks Yetus. @@ -213,7 +192,7 @@ ENV MAVEN_OPTS -Xms256m -Xmx1536m # YETUS CUT HERE ### -# Hugo static website generator (for new hadoop site and Ozone docs) +# Hugo static website generator for new hadoop site RUN curl -L -o hugo.deb https://github.com/gohugoio/hugo/releases/download/v0.58.3/hugo_0.58.3_Linux-64bit.deb \ && dpkg --install hugo.deb \ && rm hugo.deb diff --git a/dev-support/docker/Dockerfile_aarch64 b/dev-support/docker/Dockerfile_aarch64 index 8d3c3ad41ce02..29e9d83a39abc 100644 --- a/dev-support/docker/Dockerfile_aarch64 +++ b/dev-support/docker/Dockerfile_aarch64 @@ -17,7 +17,7 @@ # Dockerfile for installing the necessary dependencies for building Hadoop. # See BUILDING.txt. -FROM ubuntu:xenial +FROM ubuntu:focal WORKDIR /root @@ -35,17 +35,17 @@ ENV DEBCONF_TERSE true ###### # Install common dependencies from packages. Versions here are either # sufficient or irrelevant. -# -# WARNING: DO NOT PUT JAVA APPS HERE! Otherwise they will install default -# Ubuntu Java. See Java section below! ###### # hadolint ignore=DL3008 RUN apt-get -q update \ && apt-get -q install -y --no-install-recommends \ + ant \ apt-utils \ + bats \ build-essential \ bzip2 \ clang \ + cmake \ curl \ doxygen \ fuse \ @@ -53,6 +53,8 @@ RUN apt-get -q update \ gcc \ git \ gnupg-agent \ + hugo \ + libbcprov-java \ libbz2-dev \ libcurl4-openssl-dev \ libfuse-dev \ @@ -62,59 +64,56 @@ RUN apt-get -q update \ libsnappy-dev \ libssl-dev \ libtool \ - libzstd1-dev \ + libzstd-dev \ locales \ make \ + maven \ + nodejs \ + node-yarn \ + npm \ + openjdk-11-jdk \ + openjdk-8-jdk \ + phantomjs \ pinentry-curses \ pkg-config \ - python \ python2.7 \ - python-pip \ - python-pkg-resources \ - python-setuptools \ - python-wheel \ + python3 \ + python3-pip \ + python3-pkg-resources \ + python3-setuptools \ + python3-wheel \ rsync \ + shellcheck \ software-properties-common \ - snappy \ sudo \ valgrind \ zlib1g-dev \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* - -####### -# OpenJDK 8 -####### -# hadolint ignore=DL3008 -RUN apt-get -q update \ - && apt-get -q install -y --no-install-recommends openjdk-8-jdk libbcprov-java \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - +RUN locale-gen en_US.UTF-8 +ENV LANG='en_US.UTF-8' LANGUAGE='en_US:en' LC_ALL='en_US.UTF-8' +ENV PYTHONIOENCODING=utf-8 ###### -# Install cmake 3.1.0 (3.5.1 ships with Xenial) -# There is no cmake binary available for aarch64. Build from source. +# Set env vars required to build Hadoop ###### -# hadolint ignore=DL3003 -RUN mkdir -p /opt/cmake/src \ - && curl -L -s -S \ - https://cmake.org/files/v3.1/cmake-3.1.0-1-src.tar.bz2 \ - -o /opt/cmake/cmake-src.tar.bz2 \ - && tar xvjf /opt/cmake/cmake-src.tar.bz2 -C /opt/cmake/src \ - && cd /opt/cmake/src \ - && tar xvjf cmake-3.1.0.tar.bz2 \ - && cd cmake-3.1.0 && patch -p0 -i ../cmake-3.1.0-1.patch && mkdir .build && cd .build \ - && ../bootstrap --parallel=2 \ - && make -j2 && ./bin/cpack \ - && tar xzf cmake-3.1.0-Linux-aarch64.tar.gz --strip-components 1 -C /opt/cmake \ - && cd /opt/cmake && rm -rf /opt/cmake/src -ENV CMAKE_HOME /opt/cmake -ENV PATH "${PATH}:/opt/cmake/bin" +ENV MAVEN_HOME /usr +# JAVA_HOME must be set in Maven >= 3.5.0 (MNG-6003) +ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-arm64 + +####### +# Install SpotBugs 4.2.2 +####### +RUN mkdir -p /opt/spotbugs \ + && curl -L -s -S https://github.com/spotbugs/spotbugs/releases/download/4.2.2/spotbugs-4.2.2.tgz \ + -o /opt/spotbugs.tgz \ + && tar xzf /opt/spotbugs.tgz --strip-components 1 -C /opt/spotbugs \ + && chmod +x /opt/spotbugs/bin/* +ENV SPOTBUGS_HOME /opt/spotbugs ###### -# Install Google Protobuf 3.7.1 (2.6.0 ships with Xenial) +# Install Google Protobuf 3.7.1 (3.6.1 ships with Focal) ###### # hadolint ignore=DL3003 RUN mkdir -p /opt/protobuf-src \ @@ -124,97 +123,34 @@ RUN mkdir -p /opt/protobuf-src \ && tar xzf /opt/protobuf.tar.gz --strip-components 1 -C /opt/protobuf-src \ && cd /opt/protobuf-src \ && ./configure --prefix=/opt/protobuf \ + && make "-j$(nproc)" \ && make install \ && cd /root \ && rm -rf /opt/protobuf-src ENV PROTOBUF_HOME /opt/protobuf ENV PATH "${PATH}:/opt/protobuf/bin" -###### -# Install Apache Maven 3.3.9 (3.3.9 ships with Xenial) -###### -# hadolint ignore=DL3008 -RUN apt-get -q update \ - && apt-get -q install -y --no-install-recommends maven \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* -ENV MAVEN_HOME /usr - -###### -# Install findbugs 3.0.1 (3.0.1 ships with Xenial) -# Ant is needed for findbugs -###### -# hadolint ignore=DL3008 -RUN apt-get -q update \ - && apt-get -q install -y --no-install-recommends findbugs ant \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* -ENV FINDBUGS_HOME /usr - #### -# Install shellcheck (0.4.6, the latest as of 2017-09-26) +# Install pylint and python-dateutil #### -# hadolint ignore=DL3008 -RUN add-apt-repository -y ppa:hvr/ghc \ - && apt-get -q update \ - && apt-get -q install -y --no-install-recommends shellcheck \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* +RUN pip3 install pylint==2.6.0 python-dateutil==2.8.1 #### -# Install bats (0.4.0, the latest as of 2017-09-26, ships with Xenial) +# Install bower #### # hadolint ignore=DL3008 -RUN apt-get -q update \ - && apt-get -q install -y --no-install-recommends bats \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -#### -# Install pylint at fixed version (2.0.0 removed python2 support) -# https://github.com/PyCQA/pylint/issues/2294 -#### -RUN pip2 install pylint==1.9.2 - -#### -# Install dateutil.parser -#### -RUN pip2 install python-dateutil==2.7.3 +RUN npm install -g bower@1.8.8 ### -# Install node.js 8.17.0 for web UI framework (4.2.6 ships with Xenial) -### -RUN curl -L -s -S https://deb.nodesource.com/setup_8.x | bash - \ - && apt-get install -y --no-install-recommends nodejs=8.17.0-1nodesource1 \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* \ - && npm install -g bower@1.8.8 - +# Avoid out of memory errors in builds ### -## Install Yarn 1.12.1 for web UI framework -#### -RUN curl -s -S https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - \ - && echo 'deb https://dl.yarnpkg.com/debian/ stable main' > /etc/apt/sources.list.d/yarn.list \ - && apt-get -q update \ - && apt-get install -y --no-install-recommends yarn=1.21.1-1 \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* +ENV MAVEN_OPTS -Xms256m -Xmx3072m -### -# Install phantomjs built for aarch64 -#### -RUN mkdir -p /opt/phantomjs \ - && curl -L -s -S \ - https://github.com/liusheng/phantomjs/releases/download/2.1.1/phantomjs-2.1.1-linux-aarch64.tar.bz2 \ - -o /opt/phantomjs/phantomjs-2.1.1-linux-aarch64.tar.bz2 \ - && tar xvjf /opt/phantomjs/phantomjs-2.1.1-linux-aarch64.tar.bz2 --strip-components 1 -C /opt/phantomjs \ - && cp /opt/phantomjs/bin/phantomjs /usr/bin/ \ - && rm -rf /opt/phantomjs +# Skip gpg verification when downloading Yetus via yetus-wrapper +ENV HADOOP_SKIP_YETUS_VERIFICATION true -### -# Avoid out of memory errors in builds -### -ENV MAVEN_OPTS -Xms256m -Xmx1536m +# Force PhantomJS to be in 'headless' mode, do not connect to Xwindow +ENV QT_QPA_PLATFORM offscreen ### # Everything past this point is either not needed for testing or breaks Yetus. @@ -222,12 +158,6 @@ ENV MAVEN_OPTS -Xms256m -Xmx1536m # YETUS CUT HERE ### -# Hugo static website generator (for new hadoop site docs) -RUN curl -L -o hugo.deb https://github.com/gohugoio/hugo/releases/download/v0.58.3/hugo_0.58.3_Linux-ARM64.deb \ - && dpkg --install hugo.deb \ - && rm hugo.deb - - # Add a welcome message and environment checks. COPY hadoop_env_checks.sh /root/hadoop_env_checks.sh RUN chmod 755 /root/hadoop_env_checks.sh diff --git a/dev-support/git-jira-validation/README.md b/dev-support/git-jira-validation/README.md new file mode 100644 index 0000000000000..308c54228d17c --- /dev/null +++ b/dev-support/git-jira-validation/README.md @@ -0,0 +1,134 @@ + + +Apache Hadoop Git/Jira FixVersion validation +============================================================ + +Git commits in Apache Hadoop contains Jira number of the format +HADOOP-XXXX or HDFS-XXXX or YARN-XXXX or MAPREDUCE-XXXX. +While creating a release candidate, we also include changelist +and this changelist can be identified based on Fixed/Closed Jiras +with the correct fix versions. However, sometimes we face few +inconsistencies between fixed Jira and Git commit message. + +git_jira_fix_version_check.py script takes care of +identifying all git commits with commit +messages with any of these issues: + +1. commit is reverted as per commit message +2. commit does not contain Jira number format in message +3. Jira does not have expected fixVersion +4. Jira has expected fixVersion, but it is not yet resolved + +Moreover, this script also finds any resolved Jira with expected +fixVersion but without any corresponding commit present. + +This should be useful as part of RC preparation. + +git_jira_fix_version_check supports python3 and it required +installation of jira: + +``` +$ python3 --version +Python 3.9.7 + +$ python3 -m venv ./venv + +$ ./venv/bin/pip install -r dev-support/git-jira-validation/requirements.txt + +$ ./venv/bin/python dev-support/git-jira-validation/git_jira_fix_version_check.py + +``` + +The script also requires below inputs: +``` +1. First commit hash to start excluding commits from history: + Usually we can provide latest commit hash from last tagged release + so that the script will only loop through all commits in git commit + history before this commit hash. e.g for 3.3.2 release, we can provide + git hash: fa4915fdbbbec434ab41786cb17b82938a613f16 + because this commit bumps up hadoop pom versions to 3.3.2: + https://github.com/apache/hadoop/commit/fa4915fdbbbec434ab41786cb17b82938a613f16 + +2. Fix Version: + Exact fixVersion that we would like to compare all Jira's fixVersions + with. e.g for 3.3.2 release, it should be 3.3.2. + +3. JIRA Project Name: + The exact name of Project as case-sensitive e.g HADOOP / OZONE + +4. Path of project's working dir with release branch checked-in: + Path of project from where we want to compare git hashes from. Local fork + of the project should be up-to date with upstream and expected release + branch should be checked-in. + +5. Jira server url (default url: https://issues.apache.org/jira): + Default value of server points to ASF Jiras but this script can be + used outside of ASF Jira too. +``` + + +Example of script execution: +``` +JIRA Project Name (e.g HADOOP / OZONE etc): HADOOP +First commit hash to start excluding commits from history: fa4915fdbbbec434ab41786cb17b82938a613f16 +Fix Version: 3.3.2 +Jira server url (default: https://issues.apache.org/jira): +Path of project's working dir with release branch checked-in: /Users/vjasani/Documents/src/hadoop-3.3/hadoop + +Check git status output and verify expected branch + +On branch branch-3.3.2 +Your branch is up to date with 'origin/branch-3.3.2'. + +nothing to commit, working tree clean + + +Jira/Git commit message diff starting: ############################################## +Jira not present with version: 3.3.2. Commit: 8cd8e435fb43a251467ca74fadcb14f21a3e8163 HADOOP-17198. Support S3 Access Points (#3260) (branch-3.3.2) (#3955) +WARN: Jira not found. Commit: 8af28b7cca5c6020de94e739e5373afc69f399e5 Updated the index as per 3.3.2 release +WARN: Jira not found. Commit: e42e483d0085aa46543ebcb1196dd155ddb447d0 Make upstream aware of 3.3.1 release +Commit seems reverted. Commit: 6db1165380cd308fb74c9d17a35c1e57174d1e09 Revert "HDFS-14099. Unknown frame descriptor when decompressing multiple frames (#3836)" +Commit seems reverted. Commit: 1e3f94fa3c3d4a951d4f7438bc13e6f008f228f4 Revert "HDFS-16333. fix balancer bug when transfer an EC block (#3679)" +Jira not present with version: 3.3.2. Commit: ce0bc7b473a62a580c1227a4de6b10b64b045d3a HDFS-16344. Improve DirectoryScanner.Stats#toString (#3695) +Jira not present with version: 3.3.2. Commit: 30f0629d6e6f735c9f4808022f1a1827c5531f75 HDFS-16339. Show the threshold when mover threads quota is exceeded (#3689) +Jira not present with version: 3.3.2. Commit: e449daccf486219e3050254d667b74f92e8fc476 YARN-11007. Correct words in YARN documents (#3680) +Commit seems reverted. Commit: 5c189797828e60a3329fd920ecfb99bcbccfd82d Revert "HDFS-16336. Addendum: De-flake TestRollingUpgrade#testRollback (#3686)" +Jira not present with version: 3.3.2. Commit: 544dffd179ed756bc163e4899e899a05b93d9234 HDFS-16171. De-flake testDecommissionStatus (#3280) +Jira not present with version: 3.3.2. Commit: c6914b1cb6e4cab8263cd3ae5cc00bc7a8de25de HDFS-16350. Datanode start time should be set after RPC server starts successfully (#3711) +Jira not present with version: 3.3.2. Commit: 328d3b84dfda9399021ccd1e3b7afd707e98912d HDFS-16336. Addendum: De-flake TestRollingUpgrade#testRollback (#3686) +Jira not present with version: 3.3.2. Commit: 3ae8d4ccb911c9ababd871824a2fafbb0272c016 HDFS-16336. De-flake TestRollingUpgrade#testRollback (#3686) +Jira not present with version: 3.3.2. Commit: 15d3448e25c797b7d0d401afdec54683055d4bb5 HADOOP-17975. Fallback to simple auth does not work for a secondary DistributedFileSystem instance. (#3579) +Jira not present with version: 3.3.2. Commit: dd50261219de71eaa0a1ad28529953e12dfb92e0 YARN-10991. Fix to ignore the grouping "[]" for resourcesStr in parseResourcesString method (#3592) +Jira not present with version: 3.3.2. Commit: ef462b21bf03b10361d2f9ea7b47d0f7360e517f HDFS-16332. Handle invalid token exception in sasl handshake (#3677) +WARN: Jira not found. Commit: b55edde7071419410ea5bea4ce6462b980e48f5b Also update hadoop.version to 3.3.2 +... +... +... +Found first commit hash after which git history is redundant. commit: fa4915fdbbbec434ab41786cb17b82938a613f16 +Exiting successfully +Jira/Git commit message diff completed: ############################################## + +Any resolved Jira with fixVersion 3.3.2 but corresponding commit not present +Starting diff: ############################################## +HADOOP-18066 is marked resolved with fixVersion 3.3.2 but no corresponding commit found +HADOOP-17936 is marked resolved with fixVersion 3.3.2 but no corresponding commit found +Completed diff: ############################################## + + +``` + diff --git a/dev-support/git-jira-validation/git_jira_fix_version_check.py b/dev-support/git-jira-validation/git_jira_fix_version_check.py new file mode 100644 index 0000000000000..513cc8e25ffdb --- /dev/null +++ b/dev-support/git-jira-validation/git_jira_fix_version_check.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +############################################################################ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +############################################################################ +"""An application to assist Release Managers with ensuring that histories in +Git and fixVersions in JIRA are in agreement. See README.md for a detailed +explanation. +""" + + +import os +import re +import subprocess + +from jira import JIRA + +jira_project_name = input("JIRA Project Name (e.g HADOOP / OZONE etc): ") \ + or "HADOOP" +# Define project_jira_keys with - appended. e.g for HADOOP Jiras, +# project_jira_keys should include HADOOP-, HDFS-, YARN-, MAPREDUCE- +project_jira_keys = [jira_project_name + '-'] +if jira_project_name == 'HADOOP': + project_jira_keys.append('HDFS-') + project_jira_keys.append('YARN-') + project_jira_keys.append('MAPREDUCE-') + +first_exclude_commit_hash = input("First commit hash to start excluding commits from history: ") +fix_version = input("Fix Version: ") + +jira_server_url = input( + "Jira server url (default: https://issues.apache.org/jira): ") \ + or "https://issues.apache.org/jira" + +jira = JIRA(server=jira_server_url) + +local_project_dir = input("Path of project's working dir with release branch checked-in: ") +os.chdir(local_project_dir) + +GIT_STATUS_MSG = subprocess.check_output(['git', 'status']).decode("utf-8") +print('\nCheck git status output and verify expected branch\n') +print(GIT_STATUS_MSG) + +print('\nJira/Git commit message diff starting: ##############################################') + +issue_set_from_commit_msg = set() + +for commit in subprocess.check_output(['git', 'log', '--pretty=oneline']).decode( + "utf-8").splitlines(): + if commit.startswith(first_exclude_commit_hash): + print("Found first commit hash after which git history is redundant. commit: " + + first_exclude_commit_hash) + print("Exiting successfully") + break + if re.search('revert', commit, re.IGNORECASE): + print("Commit seems reverted. \t\t\t Commit: " + commit) + continue + ACTUAL_PROJECT_JIRA = None + matches = re.findall('|'.join(project_jira_keys), commit) + if matches: + ACTUAL_PROJECT_JIRA = matches[0] + if not ACTUAL_PROJECT_JIRA: + print("WARN: Jira not found. \t\t\t Commit: " + commit) + continue + JIRA_NUM = '' + for c in commit.split(ACTUAL_PROJECT_JIRA)[1]: + if c.isdigit(): + JIRA_NUM = JIRA_NUM + c + else: + break + issue = jira.issue(ACTUAL_PROJECT_JIRA + JIRA_NUM) + EXPECTED_FIX_VERSION = False + for version in issue.fields.fixVersions: + if version.name == fix_version: + EXPECTED_FIX_VERSION = True + break + if not EXPECTED_FIX_VERSION: + print("Jira not present with version: " + fix_version + ". \t Commit: " + commit) + continue + if issue.fields.status is None or issue.fields.status.name not in ('Resolved', 'Closed'): + print("Jira is not resolved yet? \t\t Commit: " + commit) + else: + # This means Jira corresponding to current commit message is resolved with expected + # fixVersion. + # This is no-op by default, if needed, convert to print statement. + issue_set_from_commit_msg.add(ACTUAL_PROJECT_JIRA + JIRA_NUM) + +print('Jira/Git commit message diff completed: ##############################################') + +print('\nAny resolved Jira with fixVersion ' + fix_version + + ' but corresponding commit not present') +print('Starting diff: ##############################################') +all_issues_with_fix_version = jira.search_issues( + 'project=' + jira_project_name + ' and status in (Resolved,Closed) and fixVersion=' + + fix_version) + +for issue in all_issues_with_fix_version: + if issue.key not in issue_set_from_commit_msg: + print(issue.key + ' is marked resolved with fixVersion ' + fix_version + + ' but no corresponding commit found') + +print('Completed diff: ##############################################') diff --git a/dev-support/git-jira-validation/requirements.txt b/dev-support/git-jira-validation/requirements.txt new file mode 100644 index 0000000000000..ae7535a119fa9 --- /dev/null +++ b/dev-support/git-jira-validation/requirements.txt @@ -0,0 +1,18 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +jira==3.1.1 diff --git a/hadoop-assemblies/pom.xml b/hadoop-assemblies/pom.xml index b0fd7325c6eb1..85deac7d6818c 100644 --- a/hadoop-assemblies/pom.xml +++ b/hadoop-assemblies/pom.xml @@ -23,11 +23,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../hadoop-project hadoop-assemblies - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop Assemblies Apache Hadoop Assemblies diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-dynamometer.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-dynamometer.xml index 448035262e12d..b2ce562231c5a 100644 --- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-dynamometer.xml +++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-dynamometer.xml @@ -66,7 +66,7 @@ org.slf4j:slf4j-api - org.slf4j:slf4j-log4j12 + org.slf4j:slf4j-reload4j diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-hdfs-nfs-dist.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-hdfs-nfs-dist.xml index 0edfdeb7b0d52..af5d89d7efe48 100644 --- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-hdfs-nfs-dist.xml +++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-hdfs-nfs-dist.xml @@ -40,7 +40,7 @@ org.apache.hadoop:hadoop-hdfs org.slf4j:slf4j-api - org.slf4j:slf4j-log4j12 + org.slf4j:slf4j-reload4j org.hsqldb:hsqldb diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-httpfs-dist.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-httpfs-dist.xml index d698a3005d429..bec2f94b95ea1 100644 --- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-httpfs-dist.xml +++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-httpfs-dist.xml @@ -69,7 +69,7 @@ org.apache.hadoop:hadoop-hdfs org.slf4j:slf4j-api - org.slf4j:slf4j-log4j12 + org.slf4j:slf4j-reload4j org.hsqldb:hsqldb diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-kms-dist.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-kms-dist.xml index ff6f99080cafd..e5e6834b04206 100644 --- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-kms-dist.xml +++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-kms-dist.xml @@ -69,7 +69,7 @@ org.apache.hadoop:hadoop-hdfs org.slf4j:slf4j-api - org.slf4j:slf4j-log4j12 + org.slf4j:slf4j-reload4j org.hsqldb:hsqldb diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml index 06a55d6d06a72..28d5ebe9f605d 100644 --- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml +++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml @@ -179,7 +179,7 @@ org.apache.hadoop:hadoop-hdfs org.slf4j:slf4j-api - org.slf4j:slf4j-log4j12 + org.slf4j:slf4j-reload4j org.hsqldb:hsqldb jdiff:jdiff:jar diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-nfs-dist.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-nfs-dist.xml index cb3d9cdf24978..59000c071131c 100644 --- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-nfs-dist.xml +++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-nfs-dist.xml @@ -40,7 +40,7 @@ org.apache.hadoop:hadoop-hdfs org.slf4j:slf4j-api - org.slf4j:slf4j-log4j12 + org.slf4j:slf4j-reload4j org.hsqldb:hsqldb diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-src.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-src.xml index 7c725d73e041e..871694209393f 100644 --- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-src.xml +++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-src.xml @@ -56,6 +56,7 @@ **/build/** **/file:/** **/SecurityAuth.audit* + patchprocess/** diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-tools.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-tools.xml index 054d8c0ace2bd..1b9140f419b4b 100644 --- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-tools.xml +++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-tools.xml @@ -214,7 +214,7 @@ org.apache.hadoop:hadoop-pipes org.slf4j:slf4j-api - org.slf4j:slf4j-log4j12 + org.slf4j:slf4j-reload4j diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-yarn-dist.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-yarn-dist.xml index 4da4ac5acb98b..cd86ce4e41766 100644 --- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-yarn-dist.xml +++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-yarn-dist.xml @@ -309,7 +309,7 @@ org.apache.hadoop:* org.slf4j:slf4j-api - org.slf4j:slf4j-log4j12 + org.slf4j:slf4j-reload4j org.hsqldb:hsqldb diff --git a/hadoop-build-tools/pom.xml b/hadoop-build-tools/pom.xml index ed4c0ef9ce9ff..eab72dc7ec4bd 100644 --- a/hadoop-build-tools/pom.xml +++ b/hadoop-build-tools/pom.xml @@ -18,7 +18,7 @@ hadoop-main org.apache.hadoop - 3.3.0-SNAPSHOT + 3.3.6 4.0.0 hadoop-build-tools diff --git a/hadoop-build-tools/src/main/resources/checkstyle/checkstyle.xml b/hadoop-build-tools/src/main/resources/checkstyle/checkstyle.xml index 8f3d3f13824ef..adffe4ecc96ea 100644 --- a/hadoop-build-tools/src/main/resources/checkstyle/checkstyle.xml +++ b/hadoop-build-tools/src/main/resources/checkstyle/checkstyle.xml @@ -69,7 +69,9 @@ - + + + @@ -119,7 +121,12 @@ - + + + + + @@ -154,7 +161,9 @@ - + + + diff --git a/hadoop-client-modules/hadoop-client-api/pom.xml b/hadoop-client-modules/hadoop-client-api/pom.xml index 7ee7b85fec937..f49bc0abf1bd6 100644 --- a/hadoop-client-modules/hadoop-client-api/pom.xml +++ b/hadoop-client-modules/hadoop-client-api/pom.xml @@ -18,11 +18,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-client-api - 3.3.0-SNAPSHOT + 3.3.6 jar Apache Hadoop Client @@ -67,6 +67,13 @@ + + + org.xerial.snappy + snappy-java + @@ -87,6 +94,10 @@ org.apache.maven.plugins maven-shade-plugin + + true + true + org.apache.hadoop @@ -105,6 +116,10 @@ org.apache.hadoop:* + + + org.xerial.snappy:* + @@ -126,9 +141,7 @@ org/apache/hadoop/* org/apache/hadoop/**/* - - org/apache/htrace/* - org/apache/htrace/**/* + org/slf4j/* org/slf4j/**/* org/apache/commons/logging/* @@ -143,6 +156,14 @@ org/w3c/dom/**/* org/xml/sax/* org/xml/sax/**/* + org/bouncycastle/* + org/bouncycastle/**/* + + org/xerial/snappy/* + org/xerial/snappy/**/* + + org/wildfly/openssl/* + org/wildfly/openssl/**/* @@ -161,6 +182,8 @@ com/sun/security/**/* com/sun/jndi/**/* com/sun/management/**/* + com/ibm/security/* + com/ibm/security/**/* @@ -219,6 +242,9 @@ net/topology/* net/topology/**/* + + net/jpountz/* + net/jpountz/**/* diff --git a/hadoop-client-modules/hadoop-client-check-invariants/pom.xml b/hadoop-client-modules/hadoop-client-check-invariants/pom.xml index 757b374ec0ab0..c733d1c0c86a3 100644 --- a/hadoop-client-modules/hadoop-client-check-invariants/pom.xml +++ b/hadoop-client-modules/hadoop-client-check-invariants/pom.xml @@ -18,11 +18,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-client-check-invariants - 3.3.0-SNAPSHOT + 3.3.6 pom @@ -80,18 +80,18 @@ but enforcer still sees it. --> org.apache.hadoop:hadoop-annotations - - org.apache.htrace:htrace-core4 org.slf4j:slf4j-api commons-logging:commons-logging - - log4j:log4j + + ch.qos.reload4j:reload4j com.google.code.findbugs:jsr305 org.bouncycastle:* + + org.xerial.snappy:* @@ -180,7 +180,6 @@ ${shell-executable} ${project.build.testOutputDirectory} - false ensure-jars-have-correct-contents.sh ${hadoop-client-artifacts} diff --git a/hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh b/hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh index 7242ade356fda..2e927402d2542 100644 --- a/hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh +++ b/hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh @@ -67,6 +67,8 @@ allowed_expr+="|^krb5_udp-template.conf$" # Jetty uses this style sheet for directory listings. TODO ensure our # internal use of jetty disallows directory listings and remove this. allowed_expr+="|^jetty-dir.css$" +# Snappy java is native library. We cannot relocate it to under org/apache/hadoop. +allowed_expr+="|^org/xerial/" allowed_expr+=")" declare -i bad_artifacts=0 diff --git a/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml b/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml index 08b4fb27befd9..3249bcdccbfcb 100644 --- a/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml +++ b/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml @@ -18,11 +18,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-client-check-test-invariants - 3.3.0-SNAPSHOT + 3.3.6 pom @@ -84,14 +84,12 @@ but enforcer still sees it. --> org.apache.hadoop:hadoop-annotations - - org.apache.htrace:htrace-core4 org.slf4j:slf4j-api commons-logging:commons-logging - - log4j:log4j + + ch.qos.reload4j:reload4j junit:junit @@ -100,6 +98,8 @@ com.google.code.findbugs:jsr305 org.bouncycastle:* + + org.xerial.snappy:* @@ -190,7 +190,6 @@ ${shell-executable} ${project.build.testOutputDirectory} - false ensure-jars-have-correct-contents.sh ${hadoop-client-artifacts} diff --git a/hadoop-client-modules/hadoop-client-check-test-invariants/src/test/resources/ensure-jars-have-correct-contents.sh b/hadoop-client-modules/hadoop-client-check-test-invariants/src/test/resources/ensure-jars-have-correct-contents.sh index 08f9202972735..0dbfefbf4f16d 100644 --- a/hadoop-client-modules/hadoop-client-check-test-invariants/src/test/resources/ensure-jars-have-correct-contents.sh +++ b/hadoop-client-modules/hadoop-client-check-test-invariants/src/test/resources/ensure-jars-have-correct-contents.sh @@ -43,6 +43,8 @@ allowed_expr+="|^org/apache/hadoop/" allowed_expr+="|^META-INF/" # * whatever under the "webapps" directory; for things shipped by yarn allowed_expr+="|^webapps/" +# * Resources files used by Hadoop YARN mini cluster +allowed_expr+="|^TERMINAL/" # * Hadoop's default configuration files, which have the form # "_module_-default.xml" allowed_expr+="|^[^-]*-default.xml$" @@ -54,13 +56,8 @@ allowed_expr+="|^org.apache.hadoop.application-classloader.properties$" # * Used by JavaSandboxLinuxContainerRuntime as a default, loaded # from root, so can't relocate. :( allowed_expr+="|^java.policy$" -# * allowing native libraries from rocksdb. Leaving native libraries as it is. -allowed_expr+="|^librocksdbjni-linux32.so" -allowed_expr+="|^librocksdbjni-linux64.so" -allowed_expr+="|^librocksdbjni-osx.jnilib" -allowed_expr+="|^librocksdbjni-win64.dll" -allowed_expr+="|^librocksdbjni-linux-ppc64le.so" - +# * Used by javax.annotation +allowed_expr+="|^jndi.properties$" allowed_expr+=")" declare -i bad_artifacts=0 diff --git a/hadoop-client-modules/hadoop-client-integration-tests/pom.xml b/hadoop-client-modules/hadoop-client-integration-tests/pom.xml index 1a14549250c3e..5f3ebc732bcb0 100644 --- a/hadoop-client-modules/hadoop-client-integration-tests/pom.xml +++ b/hadoop-client-modules/hadoop-client-integration-tests/pom.xml @@ -18,11 +18,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-client-integration-tests - 3.3.0-SNAPSHOT + 3.3.6 Checks that we can use the generated artifacts Apache Hadoop Client Packaging Integration Tests @@ -33,8 +33,8 @@ - log4j - log4j + ch.qos.reload4j + reload4j test @@ -43,13 +43,13 @@ test - org.slf4j - slf4j-log4j12 + junit + junit test - junit - junit + org.lz4 + lz4-java test @@ -75,6 +75,27 @@ hadoop-client-minicluster test + + org.bouncycastle + bcprov-jdk15on + test + + + org.bouncycastle + bcpkix-jdk15on + test + + + javax.xml.bind + jaxb-api + test + + + javax.activation + activation + 1.1.1 + test + @@ -159,6 +180,18 @@ test test-jar + + org.apache.hadoop + hadoop-yarn-server-tests + test + test-jar + + + org.apache.hadoop + hadoop-common + test + test-jar + diff --git a/hadoop-client-modules/hadoop-client-integration-tests/src/test/java/org/apache/hadoop/example/ITUseHadoopCodecs.java b/hadoop-client-modules/hadoop-client-integration-tests/src/test/java/org/apache/hadoop/example/ITUseHadoopCodecs.java new file mode 100644 index 0000000000000..fd0effa143b95 --- /dev/null +++ b/hadoop-client-modules/hadoop-client-integration-tests/src/test/java/org/apache/hadoop/example/ITUseHadoopCodecs.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.hadoop.example; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; + +import java.io.*; +import java.util.Arrays; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.io.DataInputBuffer; +import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.io.RandomDatum; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionInputStream; +import org.apache.hadoop.io.compress.CompressionOutputStream; +import org.apache.hadoop.io.compress.zlib.ZlibFactory; +import org.apache.hadoop.util.ReflectionUtils; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Ensure that we can perform codec operations given the API and runtime jars + * by performing some simple smoke tests. + */ +public class ITUseHadoopCodecs { + + private static final Logger LOG = LoggerFactory.getLogger(ITUseHadoopCodecs.class); + + private Configuration haddopConf = new Configuration(); + private int dataCount = 100; + private int dataSeed = new Random().nextInt(); + + @Test + public void testGzipCodec() throws IOException { + ZlibFactory.setNativeZlibLoaded(false); + assertFalse(ZlibFactory.isNativeZlibLoaded(haddopConf)); + codecTest(haddopConf, dataSeed, 0, "org.apache.hadoop.io.compress.GzipCodec"); + codecTest(haddopConf, dataSeed, dataCount, "org.apache.hadoop.io.compress.GzipCodec"); + } + + @Test + public void testSnappyCodec() throws IOException { + codecTest(haddopConf, dataSeed, 0, "org.apache.hadoop.io.compress.SnappyCodec"); + codecTest(haddopConf, dataSeed, dataCount, "org.apache.hadoop.io.compress.SnappyCodec"); + } + + @Test + public void testLz4Codec() { + Arrays.asList(false, true).forEach(config -> { + haddopConf.setBoolean( + CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_USELZ4HC_KEY, + config); + try { + codecTest(haddopConf, dataSeed, 0, "org.apache.hadoop.io.compress.Lz4Codec"); + codecTest(haddopConf, dataSeed, dataCount, "org.apache.hadoop.io.compress.Lz4Codec"); + } catch (IOException e) { + throw new RuntimeException("failed when running codecTest", e); + } + }); + } + + private void codecTest(Configuration conf, int seed, int count, String codecClass) + throws IOException { + + // Create the codec + CompressionCodec codec = null; + try { + codec = (CompressionCodec) + ReflectionUtils.newInstance(conf.getClassByName(codecClass), conf); + } catch (ClassNotFoundException cnfe) { + throw new IOException("Illegal codec!"); + } + LOG.info("Created a Codec object of type: " + codecClass); + + // Generate data + DataOutputBuffer data = new DataOutputBuffer(); + RandomDatum.Generator generator = new RandomDatum.Generator(seed); + for(int i = 0; i < count; ++i) { + generator.next(); + RandomDatum key = generator.getKey(); + RandomDatum value = generator.getValue(); + + key.write(data); + value.write(data); + } + LOG.info("Generated " + count + " records"); + + // Compress data + DataOutputBuffer compressedDataBuffer = new DataOutputBuffer(); + try (CompressionOutputStream deflateFilter = + codec.createOutputStream(compressedDataBuffer); + DataOutputStream deflateOut = + new DataOutputStream(new BufferedOutputStream(deflateFilter))) { + deflateOut.write(data.getData(), 0, data.getLength()); + deflateOut.flush(); + deflateFilter.finish(); + } + + // De-compress data + DataInputBuffer deCompressedDataBuffer = new DataInputBuffer(); + deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, + compressedDataBuffer.getLength()); + DataInputBuffer originalData = new DataInputBuffer(); + originalData.reset(data.getData(), 0, data.getLength()); + try (CompressionInputStream inflateFilter = + codec.createInputStream(deCompressedDataBuffer); + DataInputStream originalIn = + new DataInputStream(new BufferedInputStream(originalData))) { + + // Check + int expected; + do { + expected = originalIn.read(); + assertEquals("Inflated stream read by byte does not match", + expected, inflateFilter.read()); + } while (expected != -1); + } + + LOG.info("SUCCESS! Completed checking " + count + " records"); + } +} diff --git a/hadoop-client-modules/hadoop-client-integration-tests/src/test/java/org/apache/hadoop/example/ITUseMiniCluster.java b/hadoop-client-modules/hadoop-client-integration-tests/src/test/java/org/apache/hadoop/example/ITUseMiniCluster.java index 6022fbc688dde..2e304861babbd 100644 --- a/hadoop-client-modules/hadoop-client-integration-tests/src/test/java/org/apache/hadoop/example/ITUseMiniCluster.java +++ b/hadoop-client-modules/hadoop-client-integration-tests/src/test/java/org/apache/hadoop/example/ITUseMiniCluster.java @@ -35,6 +35,7 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.conf.Configuration; @@ -43,6 +44,7 @@ import org.apache.hadoop.hdfs.web.WebHdfsTestUtil; import org.apache.hadoop.hdfs.web.WebHdfsConstants; +import org.apache.hadoop.yarn.server.MiniYARNCluster; /** * Ensure that we can perform operations against the shaded minicluster @@ -54,6 +56,7 @@ public class ITUseMiniCluster { LoggerFactory.getLogger(ITUseMiniCluster.class); private MiniDFSCluster cluster; + private MiniYARNCluster yarnCluster; private static final String TEST_PATH = "/foo/bar/cats/dee"; private static final String FILENAME = "test.file"; @@ -73,6 +76,12 @@ public void clusterUp() throws IOException { .numDataNodes(3) .build(); cluster.waitActive(); + + conf.set("yarn.scheduler.capacity.root.queues", "default"); + conf.setInt("yarn.scheduler.capacity.root.default.capacity", 100); + yarnCluster = new MiniYARNCluster(getClass().getName(), 1, 1, 1, 1); + yarnCluster.init(conf); + yarnCluster.start(); } @After @@ -80,6 +89,7 @@ public void clusterDown() { if (cluster != null) { cluster.close(); } + IOUtils.cleanupWithLogger(LOG, yarnCluster); } @Test diff --git a/hadoop-client-modules/hadoop-client-minicluster/pom.xml b/hadoop-client-modules/hadoop-client-minicluster/pom.xml index 52595d93523e9..2619603c0f87d 100644 --- a/hadoop-client-modules/hadoop-client-minicluster/pom.xml +++ b/hadoop-client-modules/hadoop-client-minicluster/pom.xml @@ -18,11 +18,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-client-minicluster - 3.3.0-SNAPSHOT + 3.3.6 jar Apache Hadoop Minicluster for Clients @@ -40,6 +40,12 @@ hadoop-client-api runtime + + + org.xerial.snappy + snappy-java + runtime + org.apache.hadoop hadoop-client-runtime @@ -187,8 +193,12 @@ slf4j-log4j12 - log4j - log4j + org.slf4j + slf4j-reload4j + + + ch.qos.reload4j + reload4j com.fasterxml.jackson.core @@ -322,6 +332,10 @@ dnsjava dnsjava + + org.apache.hadoop.thirdparty + hadoop-shaded-guava + @@ -491,6 +515,10 @@ org.eclipse.jetty jetty-util + + org.apache.hadoop.thirdparty + hadoop-shaded-guava + com.google.guava guava @@ -546,6 +574,10 @@ com.google.guava guava + + org.apache.hadoop.thirdparty + hadoop-shaded-guava + commons-logging commons-logging @@ -660,19 +692,18 @@ org.apache.hadoop:hadoop-client-api org.apache.hadoop:hadoop-client-runtime - org.apache.htrace:htrace-core4 org.slf4j:slf4j-api commons-logging:commons-logging junit:junit com.google.code.findbugs:jsr305 - log4j:log4j - org.eclipse.jetty.websocket:* - javax.websocket:javax.websocket-api - javax.annotation:javax.annotation-api - org.eclipse.jetty:jetty-jndi + ch.qos.reload4j:reload4j + org.eclipse.jetty.websocket:websocket-common + org.eclipse.jetty.websocket:websocket-api org.bouncycastle:* + + org.xerial.snappy:* @@ -719,11 +750,16 @@ testdata/* - - org.apache.hadoop:hadoop-yarn-server-nodemanager:* + com.fasterxml.jackson.*:* - TERMINAL/**/* + META-INF/versions/9/module-info.class + + + + com.google.code.gson:gson + + META-INF/versions/9/module-info.class @@ -758,16 +794,16 @@ xml.xsd - - org.rocksdb:rocksdbjni - - HISTORY-JAVA.md - + + org.eclipse.jetty:* + + about.html + - org.eclipse.jetty:* + org.eclipse.jetty.websocket:* about.html @@ -811,15 +847,37 @@ */** - + org.eclipse.jetty:jetty-client */** + + org.eclipse.jetty:jetty-xml + + */** + + + + org.eclipse.jetty:jetty-http + + */** + + + + org.eclipse.jetty:jetty-util-ajax + + */** + + + + org.eclipse.jetty:jetty-server + + jetty-dir.css + + @@ -838,9 +896,7 @@ org/apache/hadoop/* org/apache/hadoop/**/* - - org/apache/htrace/* - org/apache/htrace/**/* + org/slf4j/* org/slf4j/**/* org/apache/commons/logging/* @@ -859,6 +915,11 @@ org/w3c/dom/**/* org/xml/sax/* org/xml/sax/**/* + org/bouncycastle/* + org/bouncycastle/**/* + + org/xerial/snappy/* + org/xerial/snappy/**/* @@ -884,6 +945,8 @@ com/sun/security/**/* com/sun/jndi/**/* com/sun/management/**/* + com/ibm/security/* + com/ibm/security/**/* @@ -955,6 +1018,13 @@ **/pom.xml + + javax/annotation/ + ${shaded.dependency.prefix}.javax.websocket. + + **/pom.xml + + jersey/ ${shaded.dependency.prefix}.jersey. @@ -970,6 +1040,9 @@ net/topology/* net/topology/**/* + + net/jpountz/* + net/jpountz/**/* diff --git a/hadoop-client-modules/hadoop-client-runtime/pom.xml b/hadoop-client-modules/hadoop-client-runtime/pom.xml index 496023521bf89..440bbfcdc04a4 100644 --- a/hadoop-client-modules/hadoop-client-runtime/pom.xml +++ b/hadoop-client-modules/hadoop-client-runtime/pom.xml @@ -18,11 +18,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-client-runtime - 3.3.0-SNAPSHOT + 3.3.6 jar Apache Hadoop Client @@ -60,6 +60,12 @@ hadoop-client-api runtime + + + org.xerial.snappy + snappy-java + runtime + @@ -75,15 +81,9 @@ - - org.apache.htrace - htrace-core4 - runtime - org.slf4j slf4j-api @@ -103,8 +103,8 @@ * one of the three custom log4j appenders we have --> - log4j - log4j + ch.qos.reload4j + reload4j runtime true @@ -146,27 +146,23 @@ org.apache.hadoop:hadoop-client-api - - org.apache.htrace:htrace-core4 org.slf4j:slf4j-api commons-logging:commons-logging - - log4j:log4j + + ch.qos.reload4j:reload4j com.google.code.findbugs:jsr305 io.dropwizard.metrics:metrics-core - org.eclipse.jetty.websocket:* org.eclipse.jetty:jetty-servlet org.eclipse.jetty:jetty-security - org.eclipse.jetty:jetty-client - org.eclipse.jetty:jetty-http - org.eclipse.jetty:jetty-xml org.ow2.asm:* org.bouncycastle:* + + org.xerial.snappy:* @@ -213,6 +209,13 @@ about.html + + + org.eclipse.jetty.websocket:* + + about.html + + org.apache.kerby:kerb-util @@ -239,6 +242,19 @@ google/protobuf/**/*.proto + + com.fasterxml.jackson.*:* + + META-INF/versions/9/module-info.class + + + + com.google.code.gson:gson + + META-INF/versions/9/module-info.class + + + @@ -247,9 +263,7 @@ org/apache/hadoop/* org/apache/hadoop/**/* - - org/apache/htrace/* - org/apache/htrace/**/* + org/slf4j/* org/slf4j/**/* org/apache/commons/logging/* @@ -264,6 +278,11 @@ org/w3c/dom/**/* org/xml/sax/* org/xml/sax/**/* + org/bouncycastle/* + org/bouncycastle/**/* + + org/xerial/snappy/* + org/xerial/snappy/**/* @@ -282,6 +301,8 @@ com/sun/security/**/* com/sun/jndi/**/* com/sun/management/**/* + com/ibm/security/* + com/ibm/security/**/* @@ -354,6 +375,9 @@ net/topology/* net/topology/**/* + + net/jpountz/* + net/jpountz/**/* diff --git a/hadoop-client-modules/hadoop-client/pom.xml b/hadoop-client-modules/hadoop-client/pom.xml index 9216a2e54a397..d73adc2e9422b 100644 --- a/hadoop-client-modules/hadoop-client/pom.xml +++ b/hadoop-client-modules/hadoop-client/pom.xml @@ -18,11 +18,11 @@ org.apache.hadoop hadoop-project-dist - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project-dist hadoop-client - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop Client aggregation pom with dependencies exposed Apache Hadoop Client Aggregator @@ -66,7 +66,7 @@ jersey-core - com.sun.jersey + com.github.pjfanning jersey-json @@ -114,6 +114,18 @@ org.eclipse.jetty jetty-server + + org.jetbrains.kotlin + kotlin-stdlib + + + org.jetbrains.kotlin + kotlin-stdlib-common + + + com.squareup.okhttp3 + okhttp + com.sun.jersey jersey-core @@ -167,7 +179,7 @@ jersey-core - com.sun.jersey + com.github.pjfanning jersey-json @@ -206,8 +218,8 @@ commons-cli - log4j - log4j + ch.qos.reload4j + reload4j com.sun.jersey @@ -218,7 +230,7 @@ jersey-server - com.sun.jersey + com.github.pjfanning jersey-json @@ -275,18 +287,13 @@ guice-servlet - com.sun.jersey + com.github.pjfanning jersey-json io.netty netty - - - org.slf4j - slf4j-log4j12 - @@ -315,11 +322,6 @@ io.netty netty - - - org.slf4j - slf4j-log4j12 - diff --git a/hadoop-client-modules/pom.xml b/hadoop-client-modules/pom.xml index 0895e31ca307f..be3e0490b7692 100644 --- a/hadoop-client-modules/pom.xml +++ b/hadoop-client-modules/pom.xml @@ -18,7 +18,7 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../hadoop-project hadoop-client-modules diff --git a/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml b/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml index b5e35b079f9fd..01c7597783f10 100644 --- a/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml +++ b/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml @@ -18,11 +18,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-cloud-storage - 3.3.0-SNAPSHOT + 3.3.6 jar Apache Hadoop Cloud Storage @@ -125,7 +125,7 @@ org.apache.hadoop - hadoop-openstack + hadoop-cos compile diff --git a/hadoop-cloud-storage-project/hadoop-cos/dev-support/findbugs-exclude.xml b/hadoop-cloud-storage-project/hadoop-cos/dev-support/findbugs-exclude.xml index 40d78d0cd6cec..f8c3472640f25 100644 --- a/hadoop-cloud-storage-project/hadoop-cos/dev-support/findbugs-exclude.xml +++ b/hadoop-cloud-storage-project/hadoop-cos/dev-support/findbugs-exclude.xml @@ -15,4 +15,9 @@ limitations under the License. --> + + + + + diff --git a/hadoop-cloud-storage-project/hadoop-cos/pom.xml b/hadoop-cloud-storage-project/hadoop-cos/pom.xml index 839bd04c9b643..8c89506981c46 100644 --- a/hadoop-cloud-storage-project/hadoop-cos/pom.xml +++ b/hadoop-cloud-storage-project/hadoop-cos/pom.xml @@ -20,7 +20,7 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-cos @@ -64,10 +64,9 @@ - org.codehaus.mojo - findbugs-maven-plugin + com.github.spotbugs + spotbugs-maven-plugin - true true ${basedir}/dev-support/findbugs-exclude.xml @@ -81,6 +80,22 @@ 3600 + + org.apache.maven.plugins + maven-dependency-plugin + + + deplist + compile + + list + + + ${project.basedir}/target/hadoop-cloud-storage-deps/${project.artifactId}.cloud-storage-optional.txt + + + + @@ -93,8 +108,8 @@ com.qcloud - cos_api - 5.4.9 + cos_api-bundle + 5.6.69 compile diff --git a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/BufferPool.java b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/BufferPool.java index a4ee4d5be9ac8..409c9cb42f966 100644 --- a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/BufferPool.java +++ b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/BufferPool.java @@ -63,32 +63,27 @@ private BufferPool() { private File createDir(String dirPath) throws IOException { File dir = new File(dirPath); - if (null != dir) { - if (!dir.exists()) { - LOG.debug("Buffer dir: [{}] does not exists. create it first.", - dirPath); - if (dir.mkdirs()) { - if (!dir.setWritable(true) || !dir.setReadable(true) - || !dir.setExecutable(true)) { - LOG.warn("Set the buffer dir: [{}]'s permission [writable," - + "readable, executable] failed.", dir.getAbsolutePath()); - } - LOG.debug("Buffer dir: [{}] is created successfully.", - dir.getAbsolutePath()); - } else { - // Once again, check if it has been created successfully. - // Prevent problems created by multiple processes at the same time. - if (!dir.exists()) { - throw new IOException("buffer dir:" + dir.getAbsolutePath() - + " is created unsuccessfully"); - } + if (!dir.exists()) { + LOG.debug("Buffer dir: [{}] does not exists. create it first.", + dirPath); + if (dir.mkdirs()) { + if (!dir.setWritable(true) || !dir.setReadable(true) + || !dir.setExecutable(true)) { + LOG.warn("Set the buffer dir: [{}]'s permission [writable," + + "readable, executable] failed.", dir.getAbsolutePath()); } + LOG.debug("Buffer dir: [{}] is created successfully.", + dir.getAbsolutePath()); } else { - LOG.debug("buffer dir: {} already exists.", dirPath); + // Once again, check if it has been created successfully. + // Prevent problems created by multiple processes at the same time. + if (!dir.exists()) { + throw new IOException("buffer dir:" + dir.getAbsolutePath() + + " is created unsuccessfully"); + } } } else { - throw new IOException("creating buffer dir: " + dir.getAbsolutePath() - + "unsuccessfully."); + LOG.debug("buffer dir: {} already exists.", dirPath); } return dir; diff --git a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/CosNFileReadTask.java b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/CosNFileReadTask.java index a5dcdda07120b..249e9e1ade82a 100644 --- a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/CosNFileReadTask.java +++ b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/CosNFileReadTask.java @@ -80,7 +80,6 @@ public CosNFileReadTask( public void run() { int retries = 0; RetryPolicy.RetryAction retryAction; - LOG.info(Thread.currentThread().getName() + "read ..."); try { this.readBuffer.lock(); do { diff --git a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/CosNFileSystem.java b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/CosNFileSystem.java index 333b34929ecda..4dda1260731d3 100644 --- a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/CosNFileSystem.java +++ b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/CosNFileSystem.java @@ -28,11 +28,11 @@ import java.util.HashMap; import java.util.Set; import java.util.TreeSet; +import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.util.concurrent.ListeningExecutorService; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -71,8 +71,8 @@ public class CosNFileSystem extends FileSystem { private String owner = "Unknown"; private String group = "Unknown"; - private ListeningExecutorService boundedIOThreadPool; - private ListeningExecutorService boundedCopyThreadPool; + private ExecutorService boundedIOThreadPool; + private ExecutorService boundedCopyThreadPool; public CosNFileSystem() { } diff --git a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/CosNOutputStream.java b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/CosNOutputStream.java index c437dde613d2c..bd449622906f1 100644 --- a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/CosNOutputStream.java +++ b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/CosNOutputStream.java @@ -32,10 +32,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.util.concurrent.Futures; -import com.google.common.util.concurrent.ListenableFuture; -import com.google.common.util.concurrent.ListeningExecutorService; -import com.google.common.util.concurrent.MoreExecutors; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Futures; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableFuture; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors; import com.qcloud.cos.model.PartETag; import org.apache.hadoop.conf.Configuration; diff --git a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/CosNUtils.java b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/CosNUtils.java index 39981caba24bb..cdac15ffc619e 100644 --- a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/CosNUtils.java +++ b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/CosNUtils.java @@ -22,15 +22,16 @@ import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.lang.reflect.Modifier; +import java.net.URI; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.qcloud.cos.auth.COSCredentialsProvider; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.cosn.auth.COSCredentialProviderList; -import org.apache.hadoop.fs.cosn.auth.EnvironmentVariableCredentialProvider; -import org.apache.hadoop.fs.cosn.auth.SimpleCredentialProvider; +import org.apache.hadoop.fs.cosn.auth.COSCredentialsProviderList; +import org.apache.hadoop.fs.cosn.auth.EnvironmentVariableCredentialsProvider; +import org.apache.hadoop.fs.cosn.auth.SimpleCredentialsProvider; /** * Utility methods for CosN code. @@ -48,21 +49,23 @@ public final class CosNUtils { private CosNUtils() { } - public static COSCredentialProviderList createCosCredentialsProviderSet( + public static COSCredentialsProviderList createCosCredentialsProviderSet( + URI uri, Configuration conf) throws IOException { - COSCredentialProviderList credentialProviderList = - new COSCredentialProviderList(); + COSCredentialsProviderList credentialProviderList = + new COSCredentialsProviderList(); Class[] cosClasses = CosNUtils.loadCosProviderClasses( conf, CosNConfigKeys.COSN_CREDENTIALS_PROVIDER); if (0 == cosClasses.length) { - credentialProviderList.add(new SimpleCredentialProvider(conf)); - credentialProviderList.add(new EnvironmentVariableCredentialProvider()); + credentialProviderList.add( + new SimpleCredentialsProvider(uri, conf)); + credentialProviderList.add( + new EnvironmentVariableCredentialsProvider(uri, conf)); } else { for (Class credClass : cosClasses) { - credentialProviderList.add(createCOSCredentialProvider( - conf, + credentialProviderList.add(createCOSCredentialProvider(uri, conf, credClass)); } } @@ -83,16 +86,17 @@ public static Class[] loadCosProviderClasses( } public static COSCredentialsProvider createCOSCredentialProvider( + URI uri, Configuration conf, Class credClass) throws IOException { COSCredentialsProvider credentialsProvider; if (!COSCredentialsProvider.class.isAssignableFrom(credClass)) { - throw new IllegalArgumentException( - "class " + credClass + " " + NOT_COS_CREDENTIAL_PROVIDER); + throw new IllegalArgumentException("class " + credClass + " " + + NOT_COS_CREDENTIAL_PROVIDER); } if (Modifier.isAbstract(credClass.getModifiers())) { - throw new IllegalArgumentException( - "class " + credClass + " " + ABSTRACT_CREDENTIAL_PROVIDER); + throw new IllegalArgumentException("class " + credClass + " " + + ABSTRACT_CREDENTIAL_PROVIDER); } LOG.debug("Credential Provider class: " + credClass.getName()); @@ -112,8 +116,18 @@ public static COSCredentialsProvider createCOSCredentialProvider( return credentialsProvider; } - Method factory = getFactoryMethod( - credClass, COSCredentialsProvider.class, "getInstance"); + // new credClass(uri, conf) + constructor = getConstructor(credClass, URI.class, + Configuration.class); + if (null != constructor) { + credentialsProvider = + (COSCredentialsProvider) constructor.newInstance(uri, + conf); + return credentialsProvider; + } + + Method factory = getFactoryMethod(credClass, + COSCredentialsProvider.class, "getInstance"); if (null != factory) { credentialsProvider = (COSCredentialsProvider) factory.invoke(null); return credentialsProvider; diff --git a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/CosNativeFileSystemStore.java b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/CosNativeFileSystemStore.java index 833f42d7be6e7..d2484c0e47b3c 100644 --- a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/CosNativeFileSystemStore.java +++ b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/CosNativeFileSystemStore.java @@ -34,6 +34,7 @@ import com.qcloud.cos.ClientConfig; import com.qcloud.cos.auth.BasicCOSCredentials; import com.qcloud.cos.auth.COSCredentials; +import com.qcloud.cos.endpoint.SuffixEndpointBuilder; import com.qcloud.cos.exception.CosClientException; import com.qcloud.cos.exception.CosServiceException; import com.qcloud.cos.http.HttpProtocol; @@ -64,7 +65,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.cosn.auth.COSCredentialProviderList; +import org.apache.hadoop.fs.cosn.auth.COSCredentialsProviderList; import org.apache.hadoop.util.VersionInfo; import org.apache.http.HttpStatus; @@ -89,9 +90,9 @@ class CosNativeFileSystemStore implements NativeFileSystemStore { * @throws IOException Initialize the COS client failed, * caused by incorrect options. */ - private void initCOSClient(Configuration conf) throws IOException { - COSCredentialProviderList credentialProviderList = - CosNUtils.createCosCredentialsProviderSet(conf); + private void initCOSClient(URI uri, Configuration conf) throws IOException { + COSCredentialsProviderList credentialProviderList = + CosNUtils.createCosCredentialsProviderSet(uri, conf); String region = conf.get(CosNConfigKeys.COSN_REGION_KEY); String endpointSuffix = conf.get( CosNConfigKeys.COSN_ENDPOINT_SUFFIX_KEY); @@ -113,7 +114,7 @@ private void initCOSClient(Configuration conf) throws IOException { ClientConfig config; if (null == region) { config = new ClientConfig(new Region("")); - config.setEndPointSuffix(endpointSuffix); + config.setEndpointBuilder(new SuffixEndpointBuilder(endpointSuffix)); } else { config = new ClientConfig(new Region(region)); } @@ -146,7 +147,7 @@ private void initCOSClient(Configuration conf) throws IOException { @Override public void initialize(URI uri, Configuration conf) throws IOException { try { - initCOSClient(conf); + initCOSClient(uri, conf); this.bucketName = uri.getHost(); } catch (Exception e) { handleException(e, ""); @@ -174,8 +175,8 @@ private void storeFileWithRetry(String key, InputStream inputStream, PutObjectResult putObjectResult = (PutObjectResult) callCOSClientWithRetry(putObjectRequest); - LOG.debug("Store file successfully. COS key: [{}], ETag: [{}], " - + "MD5: [{}].", key, putObjectResult.getETag(), new String(md5Hash)); + LOG.debug("Store file successfully. COS key: [{}], ETag: [{}].", + key, putObjectResult.getETag()); } catch (Exception e) { String errMsg = String.format("Store file failed. COS key: [%s], " + "exception: [%s]", key, e.toString()); @@ -196,8 +197,7 @@ private void storeFileWithRetry(String key, InputStream inputStream, public void storeFile(String key, File file, byte[] md5Hash) throws IOException { LOG.info("Store file from local path: [{}]. file length: [{}] COS key: " + - "[{}] MD5: [{}].", file.getCanonicalPath(), file.length(), key, - new String(md5Hash)); + "[{}]", file.getCanonicalPath(), file.length(), key); storeFileWithRetry(key, new BufferedInputStream(new FileInputStream(file)), md5Hash, file.length()); } @@ -218,7 +218,7 @@ public void storeFile( byte[] md5Hash, long contentLength) throws IOException { LOG.info("Store file from input stream. COS key: [{}], " - + "length: [{}], MD5: [{}].", key, contentLength, md5Hash); + + "length: [{}].", key, contentLength); storeFileWithRetry(key, inputStream, md5Hash, contentLength); } @@ -250,7 +250,11 @@ public void storeEmptyFile(String key) throws IOException { public PartETag uploadPart(File file, String key, String uploadId, int partNum) throws IOException { InputStream inputStream = new FileInputStream(file); - return uploadPart(inputStream, key, uploadId, partNum, file.length()); + try { + return uploadPart(inputStream, key, uploadId, partNum, file.length()); + } finally { + inputStream.close(); + } } @Override diff --git a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/AbstractCOSCredentialsProvider.java b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/AbstractCOSCredentialsProvider.java new file mode 100644 index 0000000000000..1363a7934cba0 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/AbstractCOSCredentialsProvider.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.cosn.auth; + +import com.qcloud.cos.auth.COSCredentialsProvider; +import org.apache.hadoop.conf.Configuration; + +import javax.annotation.Nullable; +import java.net.URI; + +/** + * The base class for COS credential providers which take a URI or + * configuration in their constructor. + */ +public abstract class AbstractCOSCredentialsProvider + implements COSCredentialsProvider { + private final URI uri; + private final Configuration conf; + + public AbstractCOSCredentialsProvider(@Nullable URI uri, + Configuration conf) { + this.uri = uri; + this.conf = conf; + } + + public URI getUri() { + return uri; + } + + public Configuration getConf() { + return conf; + } +} \ No newline at end of file diff --git a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/COSCredentialProviderList.java b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/COSCredentialProviderList.java deleted file mode 100644 index e900b997e4858..0000000000000 --- a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/COSCredentialProviderList.java +++ /dev/null @@ -1,139 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.fs.cosn.auth; - -import java.io.Closeable; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; - -import com.google.common.base.Preconditions; -import com.qcloud.cos.auth.AnonymousCOSCredentials; -import com.qcloud.cos.auth.COSCredentials; -import com.qcloud.cos.auth.COSCredentialsProvider; -import com.qcloud.cos.exception.CosClientException; -import com.qcloud.cos.utils.StringUtils; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * a list of cos credentials provider. - */ -public class COSCredentialProviderList implements - COSCredentialsProvider, AutoCloseable { - private static final Logger LOG = - LoggerFactory.getLogger(COSCredentialProviderList.class); - - private static final String NO_COS_CREDENTIAL_PROVIDERS = - "No COS Credential Providers"; - private static final String CREDENTIALS_REQUESTED_WHEN_CLOSED = - "Credentials requested after provider list was closed"; - - private final List providers = - new ArrayList<>(1); - private boolean reuseLastProvider = true; - private COSCredentialsProvider lastProvider; - - private final AtomicInteger refCount = new AtomicInteger(1); - private final AtomicBoolean isClosed = new AtomicBoolean(false); - - public COSCredentialProviderList() { - } - - public COSCredentialProviderList( - Collection providers) { - this.providers.addAll(providers); - } - - public void add(COSCredentialsProvider provider) { - this.providers.add(provider); - } - - public int getRefCount() { - return this.refCount.get(); - } - - public void checkNotEmpty() { - if (this.providers.isEmpty()) { - throw new NoAuthWithCOSException(NO_COS_CREDENTIAL_PROVIDERS); - } - } - - public COSCredentialProviderList share() { - Preconditions.checkState(!this.closed(), "Provider list is closed"); - this.refCount.incrementAndGet(); - return this; - } - - public boolean closed() { - return this.isClosed.get(); - } - - @Override - public COSCredentials getCredentials() { - if (this.closed()) { - throw new NoAuthWithCOSException(CREDENTIALS_REQUESTED_WHEN_CLOSED); - } - - this.checkNotEmpty(); - - if (this.reuseLastProvider && this.lastProvider != null) { - return this.lastProvider.getCredentials(); - } - - for (COSCredentialsProvider provider : this.providers) { - try { - COSCredentials credentials = provider.getCredentials(); - if (!StringUtils.isNullOrEmpty(credentials.getCOSAccessKeyId()) - && !StringUtils.isNullOrEmpty(credentials.getCOSSecretKey()) - || credentials instanceof AnonymousCOSCredentials) { - this.lastProvider = provider; - return credentials; - } - } catch (CosClientException e) { - LOG.warn("No credentials provided by {}: {}", provider, e.toString()); - } - } - - throw new NoAuthWithCOSException( - "No COS Credentials provided by " + this.providers.toString()); - } - - @Override - public void close() throws Exception { - if (this.closed()) { - return; - } - - int remainder = this.refCount.decrementAndGet(); - if (remainder != 0) { - return; - } - this.isClosed.set(true); - - for (COSCredentialsProvider provider : this.providers) { - if (provider instanceof Closeable) { - ((Closeable) provider).close(); - } - } - } - -} diff --git a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/COSCredentialsProviderList.java b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/COSCredentialsProviderList.java new file mode 100644 index 0000000000000..d2d2f8c9a7cab --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/COSCredentialsProviderList.java @@ -0,0 +1,145 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.cosn.auth; + +import java.io.Closeable; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import com.qcloud.cos.auth.AnonymousCOSCredentials; +import com.qcloud.cos.auth.COSCredentials; +import com.qcloud.cos.auth.COSCredentialsProvider; +import com.qcloud.cos.utils.StringUtils; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * a list of cos credentials provider. + */ +public class COSCredentialsProviderList implements + COSCredentialsProvider, AutoCloseable { + private static final Logger LOG = + LoggerFactory.getLogger(COSCredentialsProviderList.class); + + private static final String NO_COS_CREDENTIAL_PROVIDERS = + "No COS Credential Providers"; + private static final String CREDENTIALS_REQUESTED_WHEN_CLOSED = + "Credentials requested after provider list was closed"; + + private final List providers = + new ArrayList(1); + private boolean reuseLastProvider = true; + private COSCredentialsProvider lastProvider; + + private final AtomicInteger refCount = new AtomicInteger(1); + private final AtomicBoolean isClosed = new AtomicBoolean(false); + + public COSCredentialsProviderList() { + } + + public COSCredentialsProviderList( + Collection providers) { + this.providers.addAll(providers); + } + + public void add(COSCredentialsProvider provider) { + this.providers.add(provider); + } + + public int getRefCount() { + return this.refCount.get(); + } + + public void checkNotEmpty() { + if (this.providers.isEmpty()) { + throw new NoAuthWithCOSException(NO_COS_CREDENTIAL_PROVIDERS); + } + } + + public COSCredentialsProviderList share() { + Preconditions.checkState(!this.closed(), "Provider list is closed"); + this.refCount.incrementAndGet(); + return this; + } + + public boolean closed() { + return this.isClosed.get(); + } + + @Override + public COSCredentials getCredentials() { + if (this.closed()) { + throw new NoAuthWithCOSException(CREDENTIALS_REQUESTED_WHEN_CLOSED); + } + + this.checkNotEmpty(); + + if (this.reuseLastProvider && this.lastProvider != null) { + return this.lastProvider.getCredentials(); + } + + for (COSCredentialsProvider provider : this.providers) { + COSCredentials credentials = provider.getCredentials(); + if (null != credentials + && !StringUtils.isNullOrEmpty(credentials.getCOSAccessKeyId()) + && !StringUtils.isNullOrEmpty(credentials.getCOSSecretKey()) + || credentials instanceof AnonymousCOSCredentials) { + this.lastProvider = provider; + return credentials; + } + } + + throw new NoAuthWithCOSException( + "No COS Credentials provided by " + this.providers.toString()); + } + + @Override + public void refresh() { + if (this.closed()) { + return; + } + + for (COSCredentialsProvider cosCredentialsProvider : this.providers) { + cosCredentialsProvider.refresh(); + } + } + + @Override + public void close() throws Exception { + if (this.closed()) { + return; + } + + int remainder = this.refCount.decrementAndGet(); + if (remainder != 0) { + return; + } + this.isClosed.set(true); + + for (COSCredentialsProvider provider : this.providers) { + if (provider instanceof Closeable) { + ((Closeable) provider).close(); + } + } + } +} diff --git a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/EnvironmentVariableCredentialProvider.java b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/EnvironmentVariableCredentialProvider.java deleted file mode 100644 index 0a7786b882f8b..0000000000000 --- a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/EnvironmentVariableCredentialProvider.java +++ /dev/null @@ -1,55 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.fs.cosn.auth; - -import com.qcloud.cos.auth.BasicCOSCredentials; -import com.qcloud.cos.auth.COSCredentials; -import com.qcloud.cos.auth.COSCredentialsProvider; -import com.qcloud.cos.exception.CosClientException; -import com.qcloud.cos.utils.StringUtils; - -import org.apache.hadoop.fs.cosn.Constants; - -/** - * the provider obtaining the cos credentials from the environment variables. - */ -public class EnvironmentVariableCredentialProvider - implements COSCredentialsProvider { - @Override - public COSCredentials getCredentials() { - String secretId = System.getenv(Constants.COSN_SECRET_ID_ENV); - String secretKey = System.getenv(Constants.COSN_SECRET_KEY_ENV); - - secretId = StringUtils.trim(secretId); - secretKey = StringUtils.trim(secretKey); - - if (!StringUtils.isNullOrEmpty(secretId) - && !StringUtils.isNullOrEmpty(secretKey)) { - return new BasicCOSCredentials(secretId, secretKey); - } else { - throw new CosClientException( - "Unable to load COS credentials from environment variables" + - "(COS_SECRET_ID or COS_SECRET_KEY)"); - } - } - - @Override - public String toString() { - return "EnvironmentVariableCredentialProvider{}"; - } -} diff --git a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/EnvironmentVariableCredentialsProvider.java b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/EnvironmentVariableCredentialsProvider.java new file mode 100644 index 0000000000000..baa76908b6147 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/EnvironmentVariableCredentialsProvider.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.cosn.auth; + +import com.qcloud.cos.auth.BasicCOSCredentials; +import com.qcloud.cos.auth.COSCredentials; +import com.qcloud.cos.auth.COSCredentialsProvider; +import com.qcloud.cos.utils.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.cosn.Constants; + +import javax.annotation.Nullable; +import java.net.URI; + +/** + * The provider obtaining the cos credentials from the environment variables. + */ +public class EnvironmentVariableCredentialsProvider + extends AbstractCOSCredentialsProvider implements COSCredentialsProvider { + + public EnvironmentVariableCredentialsProvider(@Nullable URI uri, + Configuration conf) { + super(uri, conf); + } + + @Override + public COSCredentials getCredentials() { + String secretId = System.getenv(Constants.COSN_SECRET_ID_ENV); + String secretKey = System.getenv(Constants.COSN_SECRET_KEY_ENV); + + secretId = StringUtils.trim(secretId); + secretKey = StringUtils.trim(secretKey); + + if (!StringUtils.isNullOrEmpty(secretId) + && !StringUtils.isNullOrEmpty(secretKey)) { + return new BasicCOSCredentials(secretId, secretKey); + } + + return null; + } + + @Override + public void refresh() { + } + + @Override + public String toString() { + return String.format("EnvironmentVariableCredentialsProvider{%s, %s}", + Constants.COSN_SECRET_ID_ENV, + Constants.COSN_SECRET_KEY_ENV); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/SimpleCredentialProvider.java b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/SimpleCredentialProvider.java deleted file mode 100644 index f0635fc0d00cf..0000000000000 --- a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/SimpleCredentialProvider.java +++ /dev/null @@ -1,54 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.fs.cosn.auth; - -import com.qcloud.cos.auth.BasicCOSCredentials; -import com.qcloud.cos.auth.COSCredentials; -import com.qcloud.cos.auth.COSCredentialsProvider; -import com.qcloud.cos.exception.CosClientException; - -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.cosn.CosNConfigKeys; - -/** - * Get the credentials from the hadoop configuration. - */ -public class SimpleCredentialProvider implements COSCredentialsProvider { - private String secretId; - private String secretKey; - - public SimpleCredentialProvider(Configuration conf) { - this.secretId = conf.get( - CosNConfigKeys.COSN_SECRET_ID_KEY - ); - this.secretKey = conf.get( - CosNConfigKeys.COSN_SECRET_KEY_KEY - ); - } - - @Override - public COSCredentials getCredentials() { - if (!StringUtils.isEmpty(this.secretId) - && !StringUtils.isEmpty(this.secretKey)) { - return new BasicCOSCredentials(this.secretId, this.secretKey); - } - throw new CosClientException("secret id or secret key is unset"); - } - -} diff --git a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/SimpleCredentialsProvider.java b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/SimpleCredentialsProvider.java new file mode 100644 index 0000000000000..107574a87c3aa --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/SimpleCredentialsProvider.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.cosn.auth; + +import com.qcloud.cos.auth.BasicCOSCredentials; +import com.qcloud.cos.auth.COSCredentials; +import com.qcloud.cos.auth.COSCredentialsProvider; +import com.qcloud.cos.utils.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.cosn.CosNConfigKeys; + +import javax.annotation.Nullable; +import java.net.URI; + +/** + * Get the credentials from the hadoop configuration. + */ +public class SimpleCredentialsProvider + extends AbstractCOSCredentialsProvider implements COSCredentialsProvider { + private String secretId; + private String secretKey; + + public SimpleCredentialsProvider(@Nullable URI uri, Configuration conf) { + super(uri, conf); + if (null != conf) { + this.secretId = conf.get( + CosNConfigKeys.COSN_SECRET_ID_KEY); + this.secretKey = conf.get( + CosNConfigKeys.COSN_SECRET_KEY_KEY); + } + } + + @Override + public COSCredentials getCredentials() { + if (!StringUtils.isNullOrEmpty(this.secretId) + && !StringUtils.isNullOrEmpty(this.secretKey)) { + return new BasicCOSCredentials(this.secretId, this.secretKey); + } + return null; + } + + @Override + public void refresh() { + } +} diff --git a/hadoop-cloud-storage-project/hadoop-cos/src/site/markdown/cloud-storage/index.md b/hadoop-cloud-storage-project/hadoop-cos/src/site/markdown/cloud-storage/index.md index d4f8728924348..9c96ac3659815 100644 --- a/hadoop-cloud-storage-project/hadoop-cos/src/site/markdown/cloud-storage/index.md +++ b/hadoop-cloud-storage-project/hadoop-cos/src/site/markdown/cloud-storage/index.md @@ -130,20 +130,19 @@ Each user needs to properly configure the credentials ( User's secreteId and sec ```xml fs.cosn.credentials.provider - org.apache.hadoop.fs.auth.SimpleCredentialProvider + org.apache.hadoop.fs.auth.SimpleCredentialsProvider This option allows the user to specify how to get the credentials. Comma-separated class names of credential provider classes which implement com.qcloud.cos.auth.COSCredentialsProvider: - 1.org.apache.hadoop.fs.auth.SimpleCredentialProvider: Obtain the secret id and secret key - from fs.cosn.userinfo.secretId and fs.cosn.userinfo.secretKey in core-site.xml - 2.org.apache.hadoop.fs.auth.EnvironmentVariableCredentialProvider: Obtain the secret id and secret key from system environment variables named COS_SECRET_ID and COS_SECRET_KEY + 1.org.apache.hadoop.fs.auth.SimpleCredentialsProvider: Obtain the secret id and secret key from fs.cosn.userinfo.secretId and fs.cosn.userinfo.secretKey in core-site.xml + 2.org.apache.hadoop.fs.auth.EnvironmentVariableCredentialsProvider: Obtain the secret id and secret key from system environment variables named COS_SECRET_ID and COS_SECRET_KEY If unspecified, the default order of credential providers is: - 1. org.apache.hadoop.fs.auth.SimpleCredentialProvider - 2. org.apache.hadoop.fs.auth.EnvironmentVariableCredentialProvider + 1. org.apache.hadoop.fs.auth.SimpleCredentialsProvider + 2. org.apache.hadoop.fs.auth.EnvironmentVariableCredentialsProvider @@ -237,7 +236,7 @@ Hadoop-COS provides rich runtime properties to set, and most of these do not req | properties | description | default value | required | |:----------:|:-----------|:-------------:|:--------:| | fs.defaultFS | Configure the default file system used by Hadoop.| None | NO | -| fs.cosn.credentials.provider | This option allows the user to specify how to get the credentials. Comma-separated class names of credential provider classes which implement com.qcloud.cos.auth.COSCredentialsProvider:
1. org.apache.hadoop.fs.cos.auth.SimpleCredentialProvider: Obtain the secret id and secret key from `fs.cosn.userinfo.secretId` and `fs.cosn.userinfo.secretKey` in core-site.xml;
2. org.apache.hadoop.fs.auth.EnvironmentVariableCredentialProvider: Obtain the secret id and secret key from system environment variables named `COSN_SECRET_ID` and `COSN_SECRET_KEY`.

If unspecified, the default order of credential providers is:
1. org.apache.hadoop.fs.auth.SimpleCredentialProvider;
2. org.apache.hadoop.fs.auth.EnvironmentVariableCredentialProvider. | None | NO | +| fs.cosn.credentials.provider | This option allows the user to specify how to get the credentials. Comma-separated class names of credential provider classes which implement com.qcloud.cos.auth.COSCredentialsProvider:
1. org.apache.hadoop.fs.cos.auth.SimpleCredentialsProvider: Obtain the secret id and secret key from `fs.cosn.userinfo.secretId` and `fs.cosn.userinfo.secretKey` in core-site.xml;
2. org.apache.hadoop.fs.auth.EnvironmentVariableCredentialsProvider: Obtain the secret id and secret key from system environment variables named `COSN_SECRET_ID` and `COSN_SECRET_KEY`.

If unspecified, the default order of credential providers is:
1. org.apache.hadoop.fs.auth.SimpleCredentialsProvider;
2. org.apache.hadoop.fs.auth.EnvironmentVariableCredentialsProvider. | None | NO | | fs.cosn.userinfo.secretId/secretKey | The API key information of your account | None | YES | | fs.cosn.bucket.region | The region where the bucket is located. | None | YES | | fs.cosn.impl | The implementation class of the CosN filesystem. | None | YES | diff --git a/hadoop-cloud-storage-project/hadoop-cos/src/test/java/org/apache/hadoop/fs/cosn/TestCosCredentials.java b/hadoop-cloud-storage-project/hadoop-cos/src/test/java/org/apache/hadoop/fs/cosn/TestCosCredentials.java new file mode 100644 index 0000000000000..8b74f3639ddbd --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-cos/src/test/java/org/apache/hadoop/fs/cosn/TestCosCredentials.java @@ -0,0 +1,134 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.cosn; + +import com.qcloud.cos.auth.COSCredentials; +import com.qcloud.cos.auth.COSCredentialsProvider; +import org.apache.hadoop.conf.Configuration; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.fail; + +public class TestCosCredentials { + private static final Logger LOG = + LoggerFactory.getLogger(TestCosCredentials.class); + + private final URI fsUri; + + private final String testCosNSecretId = "secretId"; + private final String testCosNSecretKey = "secretKey"; + private final String testCosNEnvSecretId = "env_secretId"; + private final String testCosNEnvSecretKey = "env_secretKey"; + + public TestCosCredentials() throws URISyntaxException { + // A fake uri for tests. + this.fsUri = new URI("cosn://test-bucket-1250000000"); + } + + @Test + public void testSimpleCredentialsProvider() throws Throwable { + Configuration configuration = new Configuration(); + configuration.set(CosNConfigKeys.COSN_SECRET_ID_KEY, + testCosNSecretId); + configuration.set(CosNConfigKeys.COSN_SECRET_KEY_KEY, + testCosNSecretKey); + validateCredentials(this.fsUri, configuration); + } + + @Test + public void testEnvironmentCredentialsProvider() throws Throwable { + Configuration configuration = new Configuration(); + // Set EnvironmentVariableCredentialsProvider as the CosCredentials + // Provider. + configuration.set(CosNConfigKeys.COSN_CREDENTIALS_PROVIDER, + "org.apache.hadoop.fs.cosn.EnvironmentVariableCredentialsProvider"); + // Set the environment variables storing the secret id and secret key. + System.setProperty(Constants.COSN_SECRET_ID_ENV, testCosNEnvSecretId); + System.setProperty(Constants.COSN_SECRET_KEY_ENV, testCosNEnvSecretKey); + validateCredentials(this.fsUri, configuration); + } + + private void validateCredentials(URI uri, Configuration configuration) + throws IOException { + if (null != configuration) { + COSCredentialsProvider credentialsProvider = + CosNUtils.createCosCredentialsProviderSet(uri, configuration); + COSCredentials cosCredentials = credentialsProvider.getCredentials(); + assertNotNull("The cos credentials obtained is null.", cosCredentials); + if (configuration.get( + CosNConfigKeys.COSN_CREDENTIALS_PROVIDER).compareToIgnoreCase( + "org.apache.hadoop.fs.cosn.EnvironmentVariableCredentialsProvider") + == 0) { + if (null == cosCredentials.getCOSAccessKeyId() + || cosCredentials.getCOSAccessKeyId().isEmpty() + || null == cosCredentials.getCOSSecretKey() + || cosCredentials.getCOSSecretKey().isEmpty()) { + String failMessage = String.format( + "Test EnvironmentVariableCredentialsProvider failed. The " + + "expected is [secretId: %s, secretKey: %s], but got null or" + + " empty.", testCosNEnvSecretId, testCosNEnvSecretKey); + fail(failMessage); + } + + if (cosCredentials.getCOSAccessKeyId() + .compareTo(testCosNEnvSecretId) != 0 + || cosCredentials.getCOSSecretKey() + .compareTo(testCosNEnvSecretKey) != 0) { + String failMessage = String.format("Test " + + "EnvironmentVariableCredentialsProvider failed. " + + "The expected is [secretId: %s, secretKey: %s], but got is " + + "[secretId:%s, secretKey:%s].", testCosNEnvSecretId, + testCosNEnvSecretKey, cosCredentials.getCOSAccessKeyId(), + cosCredentials.getCOSSecretKey()); + } + // expected + } else { + if (null == cosCredentials.getCOSAccessKeyId() + || cosCredentials.getCOSAccessKeyId().isEmpty() + || null == cosCredentials.getCOSSecretKey() + || cosCredentials.getCOSSecretKey().isEmpty()) { + String failMessage = String.format( + "Test COSCredentials failed. The " + + "expected is [secretId: %s, secretKey: %s], but got null or" + + " empty.", testCosNSecretId, testCosNSecretKey); + fail(failMessage); + } + if (cosCredentials.getCOSAccessKeyId() + .compareTo(testCosNSecretId) != 0 + || cosCredentials.getCOSSecretKey() + .compareTo(testCosNSecretKey) != 0) { + String failMessage = String.format("Test " + + "EnvironmentVariableCredentialsProvider failed. " + + "The expected is [secretId: %s, secretKey: %s], but got is " + + "[secretId:%s, secretKey:%s].", testCosNSecretId, + testCosNSecretKey, cosCredentials.getCOSAccessKeyId(), + cosCredentials.getCOSSecretKey()); + fail(failMessage); + } + // expected + } + } + } +} diff --git a/hadoop-cloud-storage-project/pom.xml b/hadoop-cloud-storage-project/pom.xml index f39e8c3aaf9f8..8a61a5356a1d6 100644 --- a/hadoop-cloud-storage-project/pom.xml +++ b/hadoop-cloud-storage-project/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../hadoop-project hadoop-cloud-storage-project - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop Cloud Storage Project Apache Hadoop Cloud Storage Project pom diff --git a/hadoop-common-project/hadoop-annotations/pom.xml b/hadoop-common-project/hadoop-annotations/pom.xml index 738f0ada8f1e9..4d9feb2595238 100644 --- a/hadoop-common-project/hadoop-annotations/pom.xml +++ b/hadoop-common-project/hadoop-annotations/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-annotations - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop Annotations Apache Hadoop Annotations jar diff --git a/hadoop-common-project/hadoop-annotations/src/main/java/org/apache/hadoop/classification/VisibleForTesting.java b/hadoop-common-project/hadoop-annotations/src/main/java/org/apache/hadoop/classification/VisibleForTesting.java new file mode 100644 index 0000000000000..6b405ae972922 --- /dev/null +++ b/hadoop-common-project/hadoop-annotations/src/main/java/org/apache/hadoop/classification/VisibleForTesting.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.classification; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Annotates a program element that exists, or is more widely visible than + * otherwise necessary, specifically for use in test code. + * More precisely test code within the hadoop-* modules. + * Moreover, this gives the implicit scope and stability of: + *

+ *   {@link InterfaceAudience.Private}
+ *   {@link InterfaceStability.Unstable}
+ * 
+ * If external modules need to access/override these methods, then + * they MUST be re-scoped as public/limited private. + */ +@Retention(RetentionPolicy.CLASS) +@Target({ ElementType.TYPE, ElementType.METHOD, ElementType.FIELD, ElementType.CONSTRUCTOR }) +@Documented +public @interface VisibleForTesting { +} diff --git a/hadoop-common-project/hadoop-auth-examples/pom.xml b/hadoop-common-project/hadoop-auth-examples/pom.xml index fb904912999b8..6452b109fa94e 100644 --- a/hadoop-common-project/hadoop-auth-examples/pom.xml +++ b/hadoop-common-project/hadoop-auth-examples/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-auth-examples - 3.3.0-SNAPSHOT + 3.3.6 war Apache Hadoop Auth Examples @@ -47,13 +47,13 @@ compile - log4j - log4j + ch.qos.reload4j + reload4j runtime org.slf4j - slf4j-log4j12 + slf4j-reload4j runtime diff --git a/hadoop-common-project/hadoop-auth/pom.xml b/hadoop-common-project/hadoop-auth/pom.xml index 20a3e7059b154..8a9e373af63d9 100644 --- a/hadoop-common-project/hadoop-auth/pom.xml +++ b/hadoop-common-project/hadoop-auth/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-auth - 3.3.0-SNAPSHOT + 3.3.6 jar Apache Hadoop Auth @@ -82,13 +82,13 @@ compile - log4j - log4j + ch.qos.reload4j + reload4j runtime org.slf4j - slf4j-log4j12 + slf4j-reload4j runtime @@ -110,24 +110,21 @@ org.bouncycastle bcprov-jdk15on - - - net.minidev - json-smart - - - net.minidev - json-smart - org.apache.zookeeper zookeeper + + io.dropwizard.metrics + metrics-core + + + org.xerial.snappy + snappy-java + provided + org.apache.curator curator-framework @@ -176,6 +173,12 @@ apacheds-server-integ ${apacheds.version} test + + + log4j + log4j + + org.apache.directory.server @@ -183,12 +186,17 @@ ${apacheds.version} test + + org.apache.hadoop.thirdparty + hadoop-shaded-guava + compile + com.google.guava guava - compile + test - + @@ -228,8 +236,8 @@ - org.codehaus.mojo - findbugs-maven-plugin + com.github.spotbugs + spotbugs-maven-plugin ${basedir}/dev-support/findbugsExcludeFile.xml @@ -252,7 +260,7 @@ package - javadoc + javadoc-no-fork diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/AuthenticatedURL.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/AuthenticatedURL.java index 488400647cf06..32f4edfbc5710 100644 --- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/AuthenticatedURL.java +++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/AuthenticatedURL.java @@ -153,7 +153,6 @@ private synchronized void setAuthCookie(HttpCookie cookie) { cookieHeaders = new HashMap<>(); cookieHeaders.put("Cookie", Arrays.asList(cookie.toString())); } - LOG.trace("Setting token value to {} ({})", authCookie, oldCookie); } private void setAuthCookieValue(String value) { diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/KerberosAuthenticator.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/KerberosAuthenticator.java index 64d43307ffc2d..3bfa349880c5a 100644 --- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/KerberosAuthenticator.java +++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/KerberosAuthenticator.java @@ -13,7 +13,7 @@ */ package org.apache.hadoop.security.authentication.client; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import java.lang.reflect.Constructor; import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.security.authentication.server.HttpConstants; diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/AuthenticationFilter.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/AuthenticationFilter.java index 94d11f48cf2a9..3658bd8b8ec01 100644 --- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/AuthenticationFilter.java +++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/AuthenticationFilter.java @@ -237,8 +237,8 @@ public static SignerSecretProvider constructSecretProvider( provider.init(config, ctx, validity); } catch (Exception e) { if (!disallowFallbackToRandomSecretProvider) { - LOG.info("Unable to initialize FileSignerSecretProvider, " + - "falling back to use random secrets."); + LOG.warn("Unable to initialize FileSignerSecretProvider, " + + "falling back to use random secrets. Reason: " + e.getMessage()); provider = new RandomSignerSecretProvider(); provider.init(config, ctx, validity); } else { @@ -619,11 +619,17 @@ && getMaxInactiveInterval() > 0) { KerberosAuthenticator.WWW_AUTHENTICATE))) { errCode = HttpServletResponse.SC_FORBIDDEN; } + // After Jetty 9.4.21, sendError() no longer allows a custom message. + // use setStatus() to set a custom message. + String reason; if (authenticationEx == null) { - httpResponse.sendError(errCode, "Authentication required"); + reason = "Authentication required"; } else { - httpResponse.sendError(errCode, authenticationEx.getMessage()); + reason = authenticationEx.getMessage(); } + + httpResponse.setStatus(errCode, reason); + httpResponse.sendError(errCode, reason); } } } diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/AuthenticationHandlerUtil.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/AuthenticationHandlerUtil.java index d881c65893058..79739a487b431 100644 --- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/AuthenticationHandlerUtil.java +++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/AuthenticationHandlerUtil.java @@ -20,7 +20,7 @@ import java.util.Locale; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * This is a utility class designed to provide functionality related to diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/JWTRedirectAuthenticationHandler.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/JWTRedirectAuthenticationHandler.java index 884398cb799ee..5e4b0e844275a 100644 --- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/JWTRedirectAuthenticationHandler.java +++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/JWTRedirectAuthenticationHandler.java @@ -28,7 +28,7 @@ import java.security.interfaces.RSAPublicKey; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.security.authentication.client.AuthenticationException; import org.apache.hadoop.security.authentication.util.CertificateUtil; import org.slf4j.Logger; diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/KerberosAuthenticationHandler.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/KerberosAuthenticationHandler.java index 50eeb2a965e27..703842f3e3915 100644 --- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/KerberosAuthenticationHandler.java +++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/KerberosAuthenticationHandler.java @@ -13,7 +13,7 @@ */ package org.apache.hadoop.security.authentication.server; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.security.authentication.client.AuthenticationException; import org.apache.hadoop.security.authentication.client.KerberosAuthenticator; import org.apache.commons.codec.binary.Base64; diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/LdapAuthenticationHandler.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/LdapAuthenticationHandler.java index 8cc8d03447a99..94ed5d44d2a68 100644 --- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/LdapAuthenticationHandler.java +++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/LdapAuthenticationHandler.java @@ -38,8 +38,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * The {@link LdapAuthenticationHandler} implements the BASIC authentication diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/MultiSchemeAuthenticationHandler.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/MultiSchemeAuthenticationHandler.java index 58a0adb237e70..b2499ff734bbe 100644 --- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/MultiSchemeAuthenticationHandler.java +++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/MultiSchemeAuthenticationHandler.java @@ -30,8 +30,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Preconditions; -import com.google.common.base.Splitter; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Splitter; /** * The {@link MultiSchemeAuthenticationHandler} supports configuring multiple diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/FileSignerSecretProvider.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/FileSignerSecretProvider.java index e8aa160a20877..2a8a712b595ba 100644 --- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/FileSignerSecretProvider.java +++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/FileSignerSecretProvider.java @@ -13,15 +13,15 @@ */ package org.apache.hadoop.security.authentication.util; -import com.google.common.base.Charsets; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.security.authentication.server.AuthenticationFilter; -import org.apache.hadoop.security.authentication.util.SignerSecretProvider; import javax.servlet.ServletContext; import java.io.*; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; import java.util.Properties; /** @@ -43,29 +43,24 @@ public void init(Properties config, ServletContext servletContext, String signatureSecretFile = config.getProperty( AuthenticationFilter.SIGNATURE_SECRET_FILE, null); - Reader reader = null; if (signatureSecretFile != null) { - try { + try (Reader reader = new InputStreamReader(Files.newInputStream( + Paths.get(signatureSecretFile)), StandardCharsets.UTF_8)) { StringBuilder sb = new StringBuilder(); - reader = new InputStreamReader( - new FileInputStream(signatureSecretFile), Charsets.UTF_8); int c = reader.read(); while (c > -1) { sb.append((char) c); c = reader.read(); } - secret = sb.toString().getBytes(Charset.forName("UTF-8")); + + secret = sb.toString().getBytes(StandardCharsets.UTF_8); + if (secret.length == 0) { + throw new RuntimeException("No secret in signature secret file: " + + signatureSecretFile); + } } catch (IOException ex) { throw new RuntimeException("Could not read signature secret file: " + signatureSecretFile); - } finally { - if (reader != null) { - try { - reader.close(); - } catch (IOException e) { - // nothing to do - } - } } } diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/JaasConfiguration.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/JaasConfiguration.java new file mode 100644 index 0000000000000..d03e630cedf7e --- /dev/null +++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/JaasConfiguration.java @@ -0,0 +1,77 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ +package org.apache.hadoop.security.authentication.util; + +import java.util.HashMap; +import java.util.Map; +import javax.security.auth.login.AppConfigurationEntry; +import javax.security.auth.login.Configuration; + + +/** + * Creates a programmatic version of a jaas.conf file. This can be used + * instead of writing a jaas.conf file and setting the system property, + * "java.security.auth.login.config", to point to that file. It is meant to be + * used for connecting to ZooKeeper. + */ +public class JaasConfiguration extends Configuration { + + private final javax.security.auth.login.Configuration baseConfig = + javax.security.auth.login.Configuration.getConfiguration(); + private final AppConfigurationEntry[] entry; + private final String entryName; + + /** + * Add an entry to the jaas configuration with the passed in name, + * principal, and keytab. The other necessary options will be set for you. + * + * @param entryName The name of the entry (e.g. "Client") + * @param principal The principal of the user + * @param keytab The location of the keytab + */ + public JaasConfiguration(String entryName, String principal, String keytab) { + this.entryName = entryName; + Map options = new HashMap<>(); + options.put("keyTab", keytab); + options.put("principal", principal); + options.put("useKeyTab", "true"); + options.put("storeKey", "true"); + options.put("useTicketCache", "false"); + options.put("refreshKrb5Config", "true"); + String jaasEnvVar = System.getenv("HADOOP_JAAS_DEBUG"); + if ("true".equalsIgnoreCase(jaasEnvVar)) { + options.put("debug", "true"); + } + entry = new AppConfigurationEntry[]{ + new AppConfigurationEntry(getKrb5LoginModuleName(), + AppConfigurationEntry.LoginModuleControlFlag.REQUIRED, + options)}; + } + + @Override + public AppConfigurationEntry[] getAppConfigurationEntry(String name) { + return (entryName.equals(name)) ? entry : ((baseConfig != null) + ? baseConfig.getAppConfigurationEntry(name) : null); + } + + private String getKrb5LoginModuleName() { + String krb5LoginModuleName; + if (System.getProperty("java.vendor").contains("IBM")) { + krb5LoginModuleName = "com.ibm.security.auth.module.Krb5LoginModule"; + } else { + krb5LoginModuleName = "com.sun.security.auth.module.Krb5LoginModule"; + } + return krb5LoginModuleName; + } +} diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/KerberosName.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/KerberosName.java index 67c2c10237d49..a308cef190396 100644 --- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/KerberosName.java +++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/KerberosName.java @@ -26,7 +26,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.slf4j.Logger; diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/KerberosUtil.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/KerberosUtil.java index 4319aa5b0df98..fc6f957b9622e 100644 --- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/KerberosUtil.java +++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/KerberosUtil.java @@ -22,7 +22,6 @@ import java.io.File; import java.io.IOException; import java.io.UnsupportedEncodingException; -import java.lang.reflect.Field; import java.lang.reflect.InvocationTargetException; import java.net.InetAddress; import java.net.UnknownHostException; @@ -73,21 +72,34 @@ private static Oid getNumericOidInstance(String oidName) { } } - public static Oid getOidInstance(String oidName) + /** + * Returns the Oid instance from string oidName. + * Use {@link GSS_SPNEGO_MECH_OID}, {@link GSS_KRB5_MECH_OID}, + * or {@link NT_GSS_KRB5_PRINCIPAL_OID} instead. + * + * @return Oid instance + * @param oidName The oid Name + * @throws ClassNotFoundException for backward compatibility. + * @throws GSSException for backward compatibility. + * @throws NoSuchFieldException if the input is not supported. + * @throws IllegalAccessException for backward compatibility. + * + */ + @Deprecated + public static Oid getOidInstance(String oidName) throws ClassNotFoundException, GSSException, NoSuchFieldException, IllegalAccessException { - Class oidClass; - if (IBM_JAVA) { - if ("NT_GSS_KRB5_PRINCIPAL".equals(oidName)) { - // IBM JDK GSSUtil class does not have field for krb5 principal oid - return new Oid("1.2.840.113554.1.2.2.1"); - } - oidClass = Class.forName("com.ibm.security.jgss.GSSUtil"); - } else { - oidClass = Class.forName("sun.security.jgss.GSSUtil"); + switch (oidName) { + case "GSS_SPNEGO_MECH_OID": + return GSS_SPNEGO_MECH_OID; + case "GSS_KRB5_MECH_OID": + return GSS_KRB5_MECH_OID; + case "NT_GSS_KRB5_PRINCIPAL": + return NT_GSS_KRB5_PRINCIPAL_OID; + default: + throw new NoSuchFieldException( + "oidName: " + oidName + " is not supported."); } - Field oidField = oidClass.getDeclaredField(oidName); - return (Oid)oidField.get(oidClass); } /** diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/RandomSignerSecretProvider.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/RandomSignerSecretProvider.java index 9245887832102..a57b744c2be0d 100644 --- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/RandomSignerSecretProvider.java +++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/RandomSignerSecretProvider.java @@ -13,7 +13,7 @@ */ package org.apache.hadoop.security.authentication.util; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import java.security.SecureRandom; import java.util.Random; diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/RolloverSignerSecretProvider.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/RolloverSignerSecretProvider.java index e516b5b76a05f..69a09c189be27 100644 --- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/RolloverSignerSecretProvider.java +++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/RolloverSignerSecretProvider.java @@ -18,7 +18,7 @@ import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import javax.servlet.ServletContext; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.slf4j.Logger; diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/ZKSignerSecretProvider.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/ZKSignerSecretProvider.java index f0c350ed9594b..e2328780752a5 100644 --- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/ZKSignerSecretProvider.java +++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/ZKSignerSecretProvider.java @@ -13,16 +13,13 @@ */ package org.apache.hadoop.security.authentication.util; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import java.nio.ByteBuffer; import java.security.SecureRandom; import java.util.Collections; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.Properties; import java.util.Random; -import javax.security.auth.login.AppConfigurationEntry; import javax.security.auth.login.Configuration; import javax.servlet.ServletContext; import org.apache.curator.RetryPolicy; @@ -429,62 +426,4 @@ public List getAclForPath(String path) { return saslACL; } } - - /** - * Creates a programmatic version of a jaas.conf file. This can be used - * instead of writing a jaas.conf file and setting the system property, - * "java.security.auth.login.config", to point to that file. It is meant to be - * used for connecting to ZooKeeper. - */ - @InterfaceAudience.Private - public static class JaasConfiguration extends Configuration { - - private final javax.security.auth.login.Configuration baseConfig = - javax.security.auth.login.Configuration.getConfiguration(); - private static AppConfigurationEntry[] entry; - private String entryName; - - /** - * Add an entry to the jaas configuration with the passed in name, - * principal, and keytab. The other necessary options will be set for you. - * - * @param entryName The name of the entry (e.g. "Client") - * @param principal The principal of the user - * @param keytab The location of the keytab - */ - public JaasConfiguration(String entryName, String principal, String keytab) { - this.entryName = entryName; - Map options = new HashMap(); - options.put("keyTab", keytab); - options.put("principal", principal); - options.put("useKeyTab", "true"); - options.put("storeKey", "true"); - options.put("useTicketCache", "false"); - options.put("refreshKrb5Config", "true"); - String jaasEnvVar = System.getenv("HADOOP_JAAS_DEBUG"); - if (jaasEnvVar != null && "true".equalsIgnoreCase(jaasEnvVar)) { - options.put("debug", "true"); - } - entry = new AppConfigurationEntry[]{ - new AppConfigurationEntry(getKrb5LoginModuleName(), - AppConfigurationEntry.LoginModuleControlFlag.REQUIRED, - options)}; - } - - @Override - public AppConfigurationEntry[] getAppConfigurationEntry(String name) { - return (entryName.equals(name)) ? entry : ((baseConfig != null) - ? baseConfig.getAppConfigurationEntry(name) : null); - } - - private String getKrb5LoginModuleName() { - String krb5LoginModuleName; - if (System.getProperty("java.vendor").contains("IBM")) { - krb5LoginModuleName = "com.ibm.security.auth.module.Krb5LoginModule"; - } else { - krb5LoginModuleName = "com.sun.security.auth.module.Krb5LoginModule"; - } - return krb5LoginModuleName; - } - } } diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/util/PlatformName.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/util/PlatformName.java index eb52839b65ace..c52d5d2135106 100644 --- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/util/PlatformName.java +++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/util/PlatformName.java @@ -18,6 +18,10 @@ package org.apache.hadoop.util; +import java.security.AccessController; +import java.security.PrivilegedAction; +import java.util.Arrays; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -33,21 +37,71 @@ public class PlatformName { * per the java-vm. */ public static final String PLATFORM_NAME = - (System.getProperty("os.name").startsWith("Windows") - ? System.getenv("os") : System.getProperty("os.name")) - + "-" + System.getProperty("os.arch") - + "-" + System.getProperty("sun.arch.data.model"); + (System.getProperty("os.name").startsWith("Windows") ? + System.getenv("os") : System.getProperty("os.name")) + + "-" + System.getProperty("os.arch") + "-" + + System.getProperty("sun.arch.data.model"); /** * The java vendor name used in this platform. */ public static final String JAVA_VENDOR_NAME = System.getProperty("java.vendor"); + /** + * Define a system class accessor that is open to changes in underlying implementations + * of the system class loader modules. + */ + private static final class SystemClassAccessor extends ClassLoader { + public Class getSystemClass(String className) throws ClassNotFoundException { + return findSystemClass(className); + } + } + /** * A public static variable to indicate the current java vendor is - * IBM java or not. + * IBM and the type is Java Technology Edition which provides its + * own implementations of many security packages and Cipher suites. + * Note that these are not provided in Semeru runtimes: + * See https://developer.ibm.com/languages/java/semeru-runtimes for details. */ - public static final boolean IBM_JAVA = JAVA_VENDOR_NAME.contains("IBM"); + public static final boolean IBM_JAVA = JAVA_VENDOR_NAME.contains("IBM") && + hasIbmTechnologyEditionModules(); + + private static boolean hasIbmTechnologyEditionModules() { + return Arrays.asList( + "com.ibm.security.auth.module.JAASLoginModule", + "com.ibm.security.auth.module.Win64LoginModule", + "com.ibm.security.auth.module.NTLoginModule", + "com.ibm.security.auth.module.AIX64LoginModule", + "com.ibm.security.auth.module.LinuxLoginModule", + "com.ibm.security.auth.module.Krb5LoginModule" + ).stream().anyMatch((module) -> isSystemClassAvailable(module)); + } + + /** + * In rare cases where different behaviour is performed based on the JVM vendor + * this method should be used to test for a unique JVM class provided by the + * vendor rather than using the vendor method. For example if on JVM provides a + * different Kerberos login module testing for that login module being loadable + * before configuring to use it is preferable to using the vendor data. + * + * @param className the name of a class in the JVM to test for + * @return true if the class is available, false otherwise. + */ + private static boolean isSystemClassAvailable(String className) { + return AccessController.doPrivileged((PrivilegedAction) () -> { + try { + // Using ClassLoader.findSystemClass() instead of + // Class.forName(className, false, null) because Class.forName with a null + // ClassLoader only looks at the boot ClassLoader with Java 9 and above + // which doesn't look at all the modules available to the findSystemClass. + new SystemClassAccessor().getSystemClass(className); + return true; + } catch (Exception ignored) { + return false; + } + }); + } public static void main(String[] args) { System.out.println(PLATFORM_NAME); diff --git a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestAuthenticationFilter.java b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestAuthenticationFilter.java index 20c0343f957b7..4f4a4521b2f0c 100644 --- a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestAuthenticationFilter.java +++ b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestAuthenticationFilter.java @@ -305,6 +305,34 @@ public byte[][] getAllSecrets() { filter.destroy(); } } + + @Test + public void testEmptySecretFileFallbacksToRandomSecret() throws Exception { + AuthenticationFilter filter = new AuthenticationFilter(); + try { + FilterConfig config = Mockito.mock(FilterConfig.class); + Mockito.when(config.getInitParameter( + AuthenticationFilter.AUTH_TYPE)).thenReturn("simple"); + File secretFile = File.createTempFile("test_empty_secret", ".txt"); + secretFile.deleteOnExit(); + Assert.assertTrue(secretFile.exists()); + Mockito.when(config.getInitParameter( + AuthenticationFilter.SIGNATURE_SECRET_FILE)) + .thenReturn(secretFile.getAbsolutePath()); + Mockito.when(config.getInitParameterNames()).thenReturn( + new Vector<>(Arrays.asList(AuthenticationFilter.AUTH_TYPE, + AuthenticationFilter.SIGNATURE_SECRET_FILE)).elements()); + ServletContext context = Mockito.mock(ServletContext.class); + Mockito.when(context.getAttribute( + AuthenticationFilter.SIGNER_SECRET_PROVIDER_ATTRIBUTE)) + .thenReturn(null); + Mockito.when(config.getServletContext()).thenReturn(context); + filter.init(config); + Assert.assertTrue(filter.isRandomSecret()); + } finally { + filter.destroy(); + } + } @Test public void testInitCaseSensitivity() throws Exception { diff --git a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestKerberosAuthenticationHandler.java b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestKerberosAuthenticationHandler.java index 629b68bffbbd9..f10371b925758 100644 --- a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestKerberosAuthenticationHandler.java +++ b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestKerberosAuthenticationHandler.java @@ -301,11 +301,10 @@ public String call() throws Exception { GSSContext gssContext = null; try { String servicePrincipal = KerberosTestUtils.getServerPrincipal(); - Oid oid = - KerberosUtil.getOidInstance("NT_GSS_KRB5_PRINCIPAL"); + Oid oid = KerberosUtil.NT_GSS_KRB5_PRINCIPAL_OID; GSSName serviceName = gssManager.createName(servicePrincipal, oid); - oid = KerberosUtil.getOidInstance("GSS_KRB5_MECH_OID"); + oid = KerberosUtil.GSS_KRB5_MECH_OID; gssContext = gssManager.createContext(serviceName, oid, null, GSSContext.DEFAULT_LIFETIME); gssContext.requestCredDeleg(true); diff --git a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/util/StringSignerSecretProvider.java b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/util/StringSignerSecretProvider.java index 7e5b10e641889..a7747398eec46 100644 --- a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/util/StringSignerSecretProvider.java +++ b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/util/StringSignerSecretProvider.java @@ -17,7 +17,7 @@ import java.util.Properties; import javax.servlet.ServletContext; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.security.authentication.server.AuthenticationFilter; diff --git a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/util/StringSignerSecretProviderCreator.java b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/util/StringSignerSecretProviderCreator.java index e567e7bfbafb3..cb59c2099fc2c 100644 --- a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/util/StringSignerSecretProviderCreator.java +++ b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/util/StringSignerSecretProviderCreator.java @@ -13,7 +13,7 @@ */ package org.apache.hadoop.security.authentication.util; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceStability; /** diff --git a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/util/TestFileSignerSecretProvider.java b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/util/TestFileSignerSecretProvider.java index 1856410fd2943..5d4aabfc7c7a3 100644 --- a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/util/TestFileSignerSecretProvider.java +++ b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/util/TestFileSignerSecretProvider.java @@ -16,12 +16,16 @@ import org.apache.hadoop.security.authentication.server.AuthenticationFilter; import org.junit.Assert; import org.junit.Test; +import org.junit.function.ThrowingRunnable; import java.io.File; import java.io.FileWriter; import java.io.Writer; import java.util.Properties; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; + public class TestFileSignerSecretProvider { @Test @@ -48,4 +52,27 @@ public void testGetSecrets() throws Exception { Assert.assertEquals(1, allSecrets.length); Assert.assertArrayEquals(secretValue.getBytes(), allSecrets[0]); } + + @Test + public void testEmptySecretFileThrows() throws Exception { + File secretFile = File.createTempFile("test_empty_secret", ".txt"); + assertTrue(secretFile.exists()); + + FileSignerSecretProvider secretProvider + = new FileSignerSecretProvider(); + Properties secretProviderProps = new Properties(); + secretProviderProps.setProperty( + AuthenticationFilter.SIGNATURE_SECRET_FILE, + secretFile.getAbsolutePath()); + + Exception exception = + assertThrows(RuntimeException.class, new ThrowingRunnable() { + @Override + public void run() throws Throwable { + secretProvider.init(secretProviderProps, null, -1); + } + }); + assertTrue(exception.getMessage().startsWith( + "No secret in signature secret file:")); + } } diff --git a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/util/TestJaasConfiguration.java b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/util/TestJaasConfiguration.java index 2b70135800be0..5de4122471f50 100644 --- a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/util/TestJaasConfiguration.java +++ b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/util/TestJaasConfiguration.java @@ -32,8 +32,8 @@ public void test() throws Exception { krb5LoginModuleName = "com.sun.security.auth.module.Krb5LoginModule"; } - ZKSignerSecretProvider.JaasConfiguration jConf = - new ZKSignerSecretProvider.JaasConfiguration("foo", "foo/localhost", + JaasConfiguration jConf = + new JaasConfiguration("foo", "foo/localhost", "/some/location/foo.keytab"); AppConfigurationEntry[] entries = jConf.getAppConfigurationEntry("bar"); Assert.assertNull(entries); diff --git a/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml b/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml index cf5c3874d1063..b885891af73d0 100644 --- a/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml +++ b/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml @@ -283,6 +283,10 @@ + + + + @@ -375,21 +379,6 @@ - - - - - - - - - - - - diff --git a/hadoop-common-project/hadoop-common/dev-support/jdiff-workaround.patch b/hadoop-common-project/hadoop-common/dev-support/jdiff-workaround.patch index 8f87d4092bcd3..26eb53a990bac 100644 --- a/hadoop-common-project/hadoop-common/dev-support/jdiff-workaround.patch +++ b/hadoop-common-project/hadoop-common/dev-support/jdiff-workaround.patch @@ -1,11 +1,12 @@ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java -index a277abd..ed7c709 100644 +index fef8c4b7e4b..8d2b9339706 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java -@@ -43,18 +43,6 @@ +@@ -42,18 +42,6 @@ + @InterfaceAudience.Private public abstract MetricsSystem init(String prefix); - /** +- /** - * Register a metrics source - * @param the actual type of the source object - * @param source object to register @@ -13,19 +14,15 @@ index a277abd..ed7c709 100644 - * the annotations of the source object.) - * @param desc the description of the source (or null. See above.) - * @return the source object -- * @exception MetricsException +- * @exception MetricsException Metrics Exception. - */ - public abstract T register(String name, String desc, T source); - -- /** + /** * Unregister a metrics source * @param name of the source. This is the name you use to call register() - */ -@@ -77,18 +65,19 @@ - */ - @InterfaceAudience.Private +@@ -79,16 +67,16 @@ public abstract MetricsSource getSource(String name); -+ /** - * Register a metrics sink @@ -41,7 +38,7 @@ index a277abd..ed7c709 100644 + * the annotations of the source object.) + * @param desc the description of the source (or null. See above.) + * @return the source object - * @exception MetricsException + * @exception MetricsException Metrics Exception. */ - public abstract - T register(String name, String desc, T sink); @@ -50,10 +47,10 @@ index a277abd..ed7c709 100644 /** * Register a callback interface for JMX events diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java -index 6986edb..eeea81f 100644 +index a6edf08e5a7..5b87be1ec67 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java -@@ -270,27 +270,6 @@ void registerSource(String name, String desc, MetricsSource source) { +@@ -269,31 +269,6 @@ void registerSource(String name, String desc, MetricsSource source) { LOG.debug("Registered source "+ name); } @@ -61,7 +58,11 @@ index 6986edb..eeea81f 100644 - T register(final String name, final String description, final T sink) { - LOG.debug(name +", "+ description); - if (allSinks.containsKey(name)) { -- LOG.warn("Sink "+ name +" already exists!"); +- if(sinks.get(name) == null) { +- registerSink(name, description, sink); +- } else { +- LOG.warn("Sink "+ name +" already exists!"); +- } - return sink; - } - allSinks.put(name, sink); @@ -82,10 +83,10 @@ index 6986edb..eeea81f 100644 checkNotNull(config, "config"); MetricsConfig conf = sinkConfigs.get(name); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestGangliaMetrics.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestGangliaMetrics.java -index c19d238..f8412f1 100644 +index 7bc772f062a..f012c877d18 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestGangliaMetrics.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestGangliaMetrics.java -@@ -130,8 +130,8 @@ public void testTagsForPrefix() throws Exception { +@@ -139,8 +139,8 @@ public void testTagsForPrefix() throws Exception { GangliaMetricsTestHelper.setDatagramSocket(gsink31, mockds31); // register the sinks diff --git a/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.2.2.xml b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.2.2.xml new file mode 100644 index 0000000000000..40bea21f378fe --- /dev/null +++ b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.2.2.xml @@ -0,0 +1,35381 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + UnsupportedOperationException + + If a key is deprecated in favor of multiple keys, they are all treated as + aliases of each other, and setting any one of them resets all the others + to the new value. + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key + @param newKeys + @param customMessage + @deprecated use {@link #addDeprecation(String key, String newKey, + String customMessage)} instead]]> + + + + + + + + UnsupportedOperationException + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key + @param newKey + @param customMessage]]> + + + + + + + UnsupportedOperationException + + If a key is deprecated in favor of multiple keys, they are all treated as + aliases of each other, and setting any one of them resets all the others + to the new value. + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key Key that is to be deprecated + @param newKeys list of keys that take up the values of deprecated key + @deprecated use {@link #addDeprecation(String key, String newKey)} instead]]> + + + + + + + UnsupportedOperationException + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key Key that is to be deprecated + @param newKey key that takes up the value of deprecated key]]> + + + + + + key is deprecated. + + @param key the parameter which is to be checked for deprecation + @return true if the key is deprecated and + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + final. + + @param name resource to be added, the classpath is examined for a file + with that name.]]> + + + + + + + + + + final. + + @param url url of the resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + + + + + final. + + @param file file-path of resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + + + + + final. + + WARNING: The contents of the InputStream will be cached, by this method. + So use this sparingly because it does increase the memory consumption. + + @param in InputStream to deserialize the object from. In will be read from + when a get or set is called next. After it is read the stream will be + closed.]]> + + + + + + + + + + + final. + + @param in InputStream to deserialize the object from. + @param name the name of the resource because InputStream.toString is not + very descriptive some times.]]> + + + + + + + + + + + final. + + @param conf Configuration object from which to load properties]]> + + + + + + + + + + + name property, null if + no such property exists. If the key is deprecated, it returns the value of + the first key which replaces the deprecated key and is not null. + + Values are processed for variable expansion + before being returned. + + @param name the property name, will be trimmed before get value. + @return the value of the name or its replacing property, + or null if no such property exists.]]> + + + + + + + + + + + + + + + name property, but only for + names which have no valid value, usually non-existent or commented + out in XML. + + @param name the property name + @return true if the property name exists without value]]> + + + + + + name property as a trimmed String, + null if no such property exists. + If the key is deprecated, it returns the value of + the first key which replaces the deprecated key and is not null + + Values are processed for variable expansion + before being returned. + + @param name the property name. + @return the value of the name or its replacing property, + or null if no such property exists.]]> + + + + + + + name property as a trimmed String, + defaultValue if no such property exists. + See @{Configuration#getTrimmed} for more details. + + @param name the property name. + @param defaultValue the property default value. + @return the value of the name or defaultValue + if it is not set.]]> + + + + + + name property, without doing + variable expansion.If the key is + deprecated, it returns the value of the first key which replaces + the deprecated key and is not null. + + @param name the property name. + @return the value of the name property or + its replacing property and null if no such property exists.]]> + + + + + + + value of the name property. If + name is deprecated or there is a deprecated name associated to it, + it sets the value to both names. Name will be trimmed before put into + configuration. + + @param name property name. + @param value property value.]]> + + + + + + + + value of the name property. If + name is deprecated, it also sets the value to + the keys that replace the deprecated key. Name will be trimmed before put + into configuration. + + @param name property name. + @param value property value. + @param source the place that this configuration value came from + (For debugging). + @throws IllegalArgumentException when the value or name is null.]]> + + + + + + + + + + + + + + + + + + + + name. If the key is deprecated, + it returns the value of the first key which replaces the deprecated key + and is not null. + If no such property exists, + then defaultValue is returned. + + @param name property name, will be trimmed before get value. + @param defaultValue default value. + @return property value, or defaultValue if the property + doesn't exist.]]> + + + + + + + name property as an int. + + If no such property exists, the provided default value is returned, + or if the specified value is not a valid int, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as an int, + or defaultValue.]]> + + + + + + name property as a set of comma-delimited + int values. + + If no such property exists, an empty array is returned. + + @param name property name + @return property value interpreted as an array of comma-delimited + int values]]> + + + + + + + name property to an int. + + @param name property name. + @param value int value of the property.]]> + + + + + + + name property as a long. + If no such property exists, the provided default value is returned, + or if the specified value is not a valid long, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a long, + or defaultValue.]]> + + + + + + + name property as a long or + human readable format. If no such property exists, the provided default + value is returned, or if the specified value is not a valid + long or human readable format, then an error is thrown. You + can use the following suffix (case insensitive): k(kilo), m(mega), g(giga), + t(tera), p(peta), e(exa) + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a long, + or defaultValue.]]> + + + + + + + name property to a long. + + @param name property name. + @param value long value of the property.]]> + + + + + + + name property as a float. + If no such property exists, the provided default value is returned, + or if the specified value is not a valid float, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a float, + or defaultValue.]]> + + + + + + + name property to a float. + + @param name property name. + @param value property value.]]> + + + + + + + name property as a double. + If no such property exists, the provided default value is returned, + or if the specified value is not a valid double, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a double, + or defaultValue.]]> + + + + + + + name property to a double. + + @param name property name. + @param value property value.]]> + + + + + + + name property as a boolean. + If no such property is specified, or if the specified value is not a valid + boolean, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a boolean, + or defaultValue.]]> + + + + + + + name property to a boolean. + + @param name property name. + @param value boolean value of the property.]]> + + + + + + + + + + + + + + name property to the given type. This + is equivalent to set(<name>, value.toString()). + @param name property name + @param value new value]]> + + + + + + + + + + + + + + + name to the given time duration. This + is equivalent to set(<name>, value + <time suffix>). + @param name Property name + @param value Time duration + @param unit Unit of time]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + name property as a Pattern. + If no such property is specified, or if the specified value is not a valid + Pattern, then DefaultValue is returned. + Note that the returned value is NOT trimmed by this method. + + @param name property name + @param defaultValue default value + @return property value as a compiled Pattern, or defaultValue]]> + + + + + + + Pattern. + If the pattern is passed as null, sets the empty pattern which results in + further calls to getPattern(...) returning the default value. + + @param name property name + @param pattern new value]]> + + + + + + + + + + + + + + + + + + + name property as + a collection of Strings. + If no such property is specified then empty collection is returned. +

+ This is an optimized version of {@link #getStrings(String)} + + @param name property name. + @return property value as a collection of Strings.]]> + + + + + + name property as + an array of Strings. + If no such property is specified then null is returned. + + @param name property name. + @return property value as an array of Strings, + or null.]]> + + + + + + + name property as + an array of Strings. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of Strings, + or default value.]]> + + + + + + name property as + a collection of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then empty Collection is returned. + + @param name property name. + @return property value as a collection of Strings, or empty Collection]]> + + + + + + name property as + an array of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then an empty array is returned. + + @param name property name. + @return property value as an array of trimmed Strings, + or empty array.]]> + + + + + + + name property as + an array of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of trimmed Strings, + or default value.]]> + + + + + + + name property as + as comma delimited values. + + @param name property name. + @param values The values]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostProperty as a + InetSocketAddress. If hostProperty is + null, addressProperty will be used. This + is useful for cases where we want to differentiate between host + bind address and address clients should use to establish connection. + + @param hostProperty bind host property name. + @param addressProperty address property name. + @param defaultAddressValue the default value + @param defaultPort the default port + @return InetSocketAddress]]> + + + + + + + + name property as a + InetSocketAddress. + @param name property name. + @param defaultAddress the default value + @param defaultPort the default port + @return InetSocketAddress]]> + + + + + + + name property as + a host:port.]]> + + + + + + + + + name property as a host:port. The wildcard + address is replaced with the local host's address. If the host and address + properties are configured the host component of the address will be combined + with the port component of the addr to generate the address. This is to allow + optional control over which host name is used in multi-home bind-host + cases where a host can have multiple names + @param hostProperty the bind-host configuration name + @param addressProperty the service address configuration name + @param defaultAddressValue the service default address configuration value + @param addr InetSocketAddress of the service listener + @return InetSocketAddress for clients to connect]]> + + + + + + + name property as a host:port. The wildcard + address is replaced with the local host's address. + @param name property name. + @param addr InetSocketAddress of a listener to store in the given property + @return InetSocketAddress for clients to connect]]> + + + + + + + + + + + + + + + + + + + + name property + as an array of Class. + The value of the property specifies a list of comma separated class names. + If no such property is specified, then defaultValue is + returned. + + @param name the property name. + @param defaultValue default value. + @return property value as a Class[], + or defaultValue.]]> + + + + + + + name property as a Class. + If no such property is specified, then defaultValue is + returned. + + @param name the class name. + @param defaultValue default value. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property as a Class + implementing the interface specified by xface. + + If no such property is specified, then defaultValue is + returned. + + An exception is thrown if the returned class does not implement the named + interface. + + @param name the class name. + @param defaultValue default value. + @param xface the interface implemented by the named class. + @return property value as a Class, + or defaultValue.]]> + + + + + + + name property as a List + of objects implementing the interface specified by xface. + + An exception is thrown if any of the classes does not exist, or if it does + not implement the named interface. + + @param name the property name. + @param xface the interface implemented by the classes named by + name. + @return a List of objects implementing xface.]]> + + + + + + + + name property to the name of a + theClass implementing the given interface xface. + + An exception is thrown if theClass does not implement the + interface xface. + + @param name property name. + @param theClass property value. + @param xface the interface implemented by the named class.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + + + + + name. + + @param name configuration resource name. + @return an input stream attached to the resource.]]> + + + + + + name. + + @param name configuration resource name. + @return a reader attached to the resource.]]> + + + + + + + + + + + + + + + + + + + + + + String + key-value pairs in the configuration. + + @return an iterator over the entries.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When property name is not empty and the property exists in the + configuration, this method writes the property and its attributes + to the {@link Writer}. + +

+ +

  • + When property name is null or empty, this method writes all the + configuration properties and their attributes to the {@link Writer}. +
  • +

    + +

  • + When property name is not empty but the property doesn't exist in + the configuration, this method throws an {@link IllegalArgumentException}. +
  • +

    + @param out the writer to write to.]]> + + + + + + + + + + When propertyName is not empty, and the property exists + in the configuration, the format of the output would be, +

    +  {
    +    "property": {
    +      "key" : "key1",
    +      "value" : "value1",
    +      "isFinal" : "key1.isFinal",
    +      "resource" : "key1.resource"
    +    }
    +  }
    +  
    + + +
  • + When propertyName is null or empty, it behaves same as + {@link #dumpConfiguration(Configuration, Writer)}, the + output would be, +
    +  { "properties" :
    +      [ { key : "key1",
    +          value : "value1",
    +          isFinal : "key1.isFinal",
    +          resource : "key1.resource" },
    +        { key : "key2",
    +          value : "value2",
    +          isFinal : "ke2.isFinal",
    +          resource : "key2.resource" }
    +       ]
    +   }
    +  
    +
  • + +
  • + When propertyName is not empty, and the property is not + found in the configuration, this method will throw an + {@link IllegalArgumentException}. +
  • +

    + @param config the configuration + @param propertyName property name + @param out the Writer to write to + @throws IOException + @throws IllegalArgumentException when property name is not + empty and the property is not found in configuration]]> + + + + + + + + + { "properties" : + [ { key : "key1", + value : "value1", + isFinal : "key1.isFinal", + resource : "key1.resource" }, + { key : "key2", + value : "value2", + isFinal : "ke2.isFinal", + resource : "key2.resource" } + ] + } + + + It does not output the properties of the configuration object which + is loaded from an input stream. +

    + + @param config the configuration + @param out the Writer to write to + @throws IOException]]> + + + + + + + + + + + + + + + + + + + true to set quiet-mode on, false + to turn it off.]]> + + + + + + + + + + + + + + + + + + + + + with matching keys]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Resources + +

    Configurations are specified by resources. A resource contains a set of + name/value pairs as XML data. Each resource is named by either a + String or by a {@link Path}. If named by a String, + then the classpath is examined for a file with that name. If named by a + Path, then the local filesystem is examined directly, without + referring to the classpath. + +

    Unless explicitly turned off, Hadoop by default specifies two + resources, loaded in-order from the classpath:

      +
    1. + + core-default.xml: Read-only defaults for hadoop.
    2. +
    3. core-site.xml: Site-specific configuration for a given hadoop + installation.
    4. +
    + Applications may add additional resources, which are loaded + subsequent to these resources in the order they are added. + +

    Final Parameters

    + +

    Configuration parameters may be declared final. + Once a resource declares a value final, no subsequently-loaded + resource can alter that value. + For example, one might define a final parameter with: +

    +  <property>
    +    <name>dfs.hosts.include</name>
    +    <value>/etc/hadoop/conf/hosts.include</value>
    +    <final>true</final>
    +  </property>
    + + Administrators typically define parameters as final in + core-site.xml for values that user applications may not alter. + +

    Variable Expansion

    + +

    Value strings are first processed for variable expansion. The + available properties are:

      +
    1. Other properties defined in this Configuration; and, if a name is + undefined here,
    2. +
    3. Environment variables in {@link System#getenv()} if a name starts with + "env.", or
    4. +
    5. Properties in {@link System#getProperties()}.
    6. +
    + +

    For example, if a configuration resource contains the following property + definitions: +

    +  <property>
    +    <name>basedir</name>
    +    <value>/user/${user.name}</value>
    +  </property>
    +  
    +  <property>
    +    <name>tempdir</name>
    +    <value>${basedir}/tmp</value>
    +  </property>
    +
    +  <property>
    +    <name>otherdir</name>
    +    <value>${env.BASE_DIR}/other</value>
    +  </property>
    +  
    + +

    When conf.get("tempdir") is called, then ${basedir} + will be resolved to another property in this Configuration, while + ${user.name} would then ordinarily be resolved to the value + of the System property with that name. +

    When conf.get("otherdir") is called, then ${env.BASE_DIR} + will be resolved to the value of the ${BASE_DIR} environment variable. + It supports ${env.NAME:-default} and ${env.NAME-default} notations. + The former is resolved to "default" if ${NAME} environment variable is undefined + or its value is empty. + The latter behaves the same way only if ${NAME} is undefined. +

    By default, warnings will be given to any deprecated configuration + parameters and these are suppressible by configuring + log4j.logger.org.apache.hadoop.conf.Configuration.deprecation in + log4j.properties file. + +

    Tags

    + +

    Optionally we can tag related properties together by using tag + attributes. System tags are defined by hadoop.tags.system property. Users + can define there own custom tags in hadoop.tags.custom property. + +

    For example, we can tag existing property as: +

    +  <property>
    +    <name>dfs.replication</name>
    +    <value>3</value>
    +    <tag>HDFS,REQUIRED</tag>
    +  </property>
    +
    +  <property>
    +    <name>dfs.data.transfer.protection</name>
    +    <value>3</value>
    +    <tag>HDFS,SECURITY</tag>
    +  </property>
    + 
    +

    Properties marked with tags can be retrieved with conf + .getAllPropertiesByTag("HDFS") or conf.getAllPropertiesByTags + (Arrays.asList("YARN","SECURITY")).

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This implementation generates the key material and calls the + {@link #createKey(String, byte[], Options)} method. + + @param name the base name of the key + @param options the options for the new key. + @return the version name of the first version of the key. + @throws IOException + @throws NoSuchAlgorithmException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This implementation generates the key material and calls the + {@link #rollNewVersion(String, byte[])} method. + + @param name the basename of the key + @return the name of the new version of the key + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + KeyProvider implementations must be thread safe.]]> + + + + + + + + + + + + + + + + + + + + + + NULL if + a provider for the specified URI scheme could not be found. + @throws IOException thrown if the provider failed to initialize.]]> + + + + + + + + + + + + + + + + + + + + + + uri has syntax error]]> + + + + + + + + + + + + + + + + uri is + not found]]> + + + + + + + + + + + + + + + + + + + + + + + uri + determines a configuration property name, + fs.AbstractFileSystem.scheme.impl whose value names the + AbstractFileSystem class. + + The entire URI and conf is passed to the AbstractFileSystem factory method. + + @param uri for the file system to be created. + @param conf which is passed to the file system impl. + + @return file system for the given URI. + + @throws UnsupportedFileSystemException if the file system for + uri is not supported.]]> + + + + + + + + + + + + default portn some FileSystem implementations such as HDFS metadata + synchronization is essential to guarantee consistency of read requests + particularly in HA setting. + @throws IOException + @throws UnsupportedOperationException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + describing modifications + @throws IOException if an ACL could not be modified]]> + + + + + + + + describing entries to remove + @throws IOException if an ACL could not be modified]]> + + + + + + + + + + + + + + + + + + + + + + describing modifications, must include entries + for user, group, and others for compatibility with permission bits. + @throws IOException if an ACL could not be modified]]> + + + + + + + which returns each AclStatus + @throws IOException if an ACL could not be read]]> + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @throws IOException]]> + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @param flag xattr set flag + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attribute + @param name xattr name. + @return byte[] xattr value. + @throws IOException]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return Map describing the XAttrs of the file or directory + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @param names XAttr names. + @return Map describing the XAttrs of the file or directory + @throws IOException]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return Map describing the XAttrs of the file or directory + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to remove extended attribute + @param name xattr name + @throws IOExceptionlockLocation(offset: 0, length: BLOCK_SIZE, + hosts: {"host1:9866", "host2:9866, host3:9866"}) + + + And if the file is erasure-coded, each BlockLocation represents a logical + block groups. Value offset is the offset of a block group in the file and + value length is the total length of a block group. Hosts of a BlockLocation + are the datanodes that holding all the data blocks and parity blocks of a + block group. + Suppose we have a RS_3_2 coded file (3 data units and 2 parity units). + A BlockLocation example will be like: +
    + BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
    +   "host2:9866","host3:9866","host4:9866","host5:9866"})
    + 
    + + Please refer to + {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} or + {@link FileContext#getFileBlockLocations(Path, long, long)} + for more examples.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + After a successful call, buf.position() will be advanced by the number + of bytes read and buf.limit() should be unchanged. +

    + In the case of an exception, the values of buf.position() and buf.limit() + are undefined, and callers should be prepared to recover from this + eventuality. +

    + Many implementations will throw {@link UnsupportedOperationException}, so + callers that are not confident in support for this method from the + underlying filesystem should be prepared to handle that exception. +

    + Implementations should treat 0-length requests as legitimate, and must not + signal an error upon their receipt. + + @param buf + the ByteBuffer to receive the results of the read operation. + @return the number of bytes read, possibly zero, or -1 if + reach end-of-stream + @throws IOException + if there is some error performing the read]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + setReplication of FileSystem + @param src file name + @param replication new replication + @throws IOException + @return true if successful; + false if file does not exist or is a directory]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + EnumSet.of(CreateFlag.CREATE, CreateFlag.APPEND) + +

    + + Use the CreateFlag as follows: +

      +
    1. CREATE - to create a file if it does not exist, + else throw FileAlreadyExists.
    2. +
    3. APPEND - to append to a file if it exists, + else throw FileNotFoundException.
    4. +
    5. OVERWRITE - to truncate a file if it exists, + else throw FileNotFoundException.
    6. +
    7. CREATE|APPEND - to create a file if it does not exist, + else append to an existing file.
    8. +
    9. CREATE|OVERWRITE - to create a file if it does not exist, + else overwrite an existing file.
    10. +
    11. SYNC_BLOCK - to force closed blocks to the disk device. + In addition {@link Syncable#hsync()} should be called after each write, + if true synchronous behavior is required.
    12. +
    13. LAZY_PERSIST - Create the block on transient storage (RAM) if + available.
    14. +
    15. APPEND_NEWBLOCK - Append data to a new block instead of end of the last + partial block.
    16. +
    + + Following combinations are not valid and will result in + {@link HadoopIllegalArgumentException}: +
      +
    1. APPEND|OVERWRITE
    2. +
    3. CREATE|APPEND|OVERWRITE
    4. +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + absOrFqPath is not supported. + @throws IOException If the file system for absOrFqPath could + not be instantiated.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + defaultFsUri is not supported]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NewWdir can be one of: +
      +
    • relative path: "foo/bar";
    • +
    • absolute without scheme: "/foo/bar"
    • +
    • fully qualified with scheme: "xx://auth/foo/bar"
    • +
    +
    + Illegal WDs: +
      +
    • relative with scheme: "xx:foo/bar"
    • +
    • non existent directory
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + f does not exist + @throws AccessControlException if access denied + @throws IOException If an IO Error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is not valid]]> + + + + + + + + + + + + + + + + + + + + +
  • Progress - to report progress on the operation - default null +
  • Permission - umask is applied against permission: default is + FsPermissions:getDefault() + +
  • CreateParent - create missing parent path; default is to not + to create parents +
  • The defaults for the following are SS defaults of the file + server implementing the target path. Not all parameters make sense + for all kinds of file system - eg. localFS ignores Blocksize, + replication, checksum +
      +
    • BufferSize - buffersize used in FSDataOutputStream +
    • Blocksize - block size for file blocks +
    • ReplicationFactor - replication for blocks +
    • ChecksumParam - Checksum parameters. server default is used + if not specified. +
    + + + @return {@link FSDataOutputStream} for created file + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If file f already exists + @throws FileNotFoundException If parent of f does not exist + and createParent is false + @throws ParentNotDirectoryException If parent of f is not a + directory. + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is not valid]]> + + + + + + + + + + + + + + + + + + + + + dir already + exists + @throws FileNotFoundException If parent of dir does not exist + and createParent is false + @throws ParentNotDirectoryException If parent of dir is not a + directory + @throws UnsupportedFileSystemException If file system for dir + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path dir is not valid]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is invalid]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + +
  • Fails if path is a directory. +
  • Fails if path does not exist. +
  • Fails if path is not closed. +
  • Fails if new size is greater than current size. + + @param f The path to the file to be truncated + @param newLength The size the file is to be truncated to + + @return true if the file has been truncated to the desired + newLength and is immediately available to be reused for + write operations such as append, or + false if a background process of adjusting the length of + the last block has been started, and clients should wait for it to + complete before proceeding with further file updates. + + @throws AccessControlException If access is denied + @throws FileNotFoundException If file f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + +
  • Fails if src is a file and dst is a directory. +
  • Fails if src is a directory and dst is a file. +
  • Fails if the parent of dst does not exist or is a file. + +

    + If OVERWRITE option is not passed as an argument, rename fails if the dst + already exists. +

    + If OVERWRITE option is passed as an argument, rename overwrites the dst if + it is a file or an empty directory. Rename fails if dst is a non-empty + directory. +

    + Note that atomicity of rename is dependent on the file system + implementation. Please refer to the file system documentation for details +

    + + @param src path to be renamed + @param dst new path after rename + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If dst already exists and + options has {@link Options.Rename#OVERWRITE} + option false. + @throws FileNotFoundException If src does not exist + @throws ParentNotDirectoryException If parent of dst is not a + directory + @throws UnsupportedFileSystemException If file system for src + and dst is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws HadoopIllegalArgumentException If username or + groupname is invalid.]]> + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If the given path does not refer to a symlink + or an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + Given a path referring to a symlink of form: + + <---X---> + fs://host/A/B/link + <-----Y-----> + + In this path X is the scheme and authority that identify the file system, + and Y is the path leading up to the final path component "link". If Y is + a symlink itself then let Y' be the target of Y and X' be the scheme and + authority of Y'. Symlink targets may: + + 1. Fully qualified URIs + + fs://hostX/A/B/file Resolved according to the target file system. + + 2. Partially qualified URIs (eg scheme but no host) + + fs:///A/B/file Resolved according to the target file system. Eg resolving + a symlink to hdfs:///A results in an exception because + HDFS URIs must be fully qualified, while a symlink to + file:///A will not since Hadoop's local file systems + require partially qualified URIs. + + 3. Relative paths + + path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path + is "../B/file" then [Y'][path] is hdfs://host/B/file + + 4. Absolute paths + + path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path + is "/file" then [X][path] is hdfs://host/file + + + @param target the target of the symbolic link + @param link the path to be created that points to target + @param createParent if true then missing parent dirs are created if + false then parent must exist + + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If file linkcode> already exists + @throws FileNotFoundException If target does not exist + @throws ParentNotDirectoryException If parent of link is not a + directory. + @throws UnsupportedFileSystemException If file system for + target or link is not supported + @throws IOException If an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + describing modifications + @throws IOException if an ACL could not be modified]]> + + + + + + + + describing entries to remove + @throws IOException if an ACL could not be modified]]> + + + + + + + + + + + + + + + + + + + + + + describing modifications, must include entries + for user, group, and others for compatibility with permission bits. + @throws IOException if an ACL could not be modified]]> + + + + + + + which returns each AclStatus + @throws IOException if an ACL could not be read]]> + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @throws IOException]]> + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @param flag xattr set flag + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attribute + @param name xattr name. + @return byte[] xattr value. + @throws IOException]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return Map describing the XAttrs of the file or directory + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @param names XAttr names. + @return Map describing the XAttrs of the file or directory + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to remove extended attribute + @param name xattr name + @throws IOException]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return List of the XAttr names of the file or directory + @throws IOException]]> + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Path Names + + The Hadoop file system supports a URI namespace and URI names. This enables + multiple types of file systems to be referenced using fully-qualified URIs. + Two common Hadoop file system implementations are +

      +
    • the local file system: file:///path +
    • the HDFS file system: hdfs://nnAddress:nnPort/path +
    + + The Hadoop file system also supports additional naming schemes besides URIs. + Hadoop has the concept of a default file system, which implies a + default URI scheme and authority. This enables slash-relative names + relative to the default FS, which are more convenient for users and + application writers. The default FS is typically set by the user's + environment, though it can also be manually specified. +

    + + Hadoop also supports working-directory-relative names, which are paths + relative to the current working directory (similar to Unix). The working + directory can be in a different file system than the default FS. +

    + Thus, Hadoop path names can be specified as one of the following: +

      +
    • a fully-qualified URI: scheme://authority/path (e.g. + hdfs://nnAddress:nnPort/foo/bar) +
    • a slash-relative name: path relative to the default file system (e.g. + /foo/bar) +
    • a working-directory-relative name: path relative to the working dir (e.g. + foo/bar) +
    + Relative paths with scheme (scheme:foo/bar) are illegal. + +

    Role of FileContext and Configuration Defaults

    + + The FileContext is the analogue of per-process file-related state in Unix. It + contains two properties: + +
      +
    • the default file system (for resolving slash-relative names) +
    • the umask (for file permissions) +
    + In general, these properties are obtained from the default configuration file + in the user's environment (see {@link Configuration}). + + Further file system properties are specified on the server-side. File system + operations default to using these server-side defaults unless otherwise + specified. +

    + The file system related server-side defaults are: +

      +
    • the home directory (default is "/user/userName") +
    • the initial wd (only for local fs) +
    • replication factor +
    • block size +
    • buffer size +
    • encryptDataTransfer +
    • checksum option. (checksumType and bytesPerChecksum) +
    + +

    Example Usage

    + + Example 1: use the default config read from the $HADOOP_CONFIG/core.xml. + Unspecified values come from core-defaults.xml in the release jar. +
      +
    • myFContext = FileContext.getFileContext(); // uses the default config + // which has your default FS +
    • myFContext.create(path, ...); +
    • myFContext.setWorkingDir(path); +
    • myFContext.open (path, ...); +
    • ... +
    + Example 2: Get a FileContext with a specific URI as the default FS +
      +
    • myFContext = FileContext.getFileContext(URI); +
    • myFContext.create(path, ...); +
    • ... +
    + Example 3: FileContext with local file system as the default +
      +
    • myFContext = FileContext.getLocalFSFileContext(); +
    • myFContext.create(path, ...); +
    • ... +
    + Example 4: Use a specific config, ignoring $HADOOP_CONFIG + Generally you should not need use a config unless you are doing +
      +
    • configX = someConfigSomeOnePassedToYou; +
    • myFContext = getFileContext(configX); // configX is not changed, + // is passed down +
    • myFContext.create(path, ...); +
    • ... +
his implementation throws an UnsupportedOperationException. + + @return the protocol scheme for this FileSystem. + @throws UnsupportedOperationException if the operation is unsupported + (default).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • + If the configuration has the property + {@code "fs.$SCHEME.impl.disable.cache"} set to true, + a new instance will be created, initialized with the supplied URI and + configuration, then returned without being cached. +
  • +
  • + If the there is a cached FS instance matching the same URI, it will + be returned. +
  • +
  • + Otherwise: a new FS instance will be created, initialized with the + configuration and URI, cached and returned to the caller. +
  • + + @throws IOException if the FileSystem cannot be instantiated.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + if f == null : + result = null + elif f.getLen() <= start: + result = [] + else result = [ locations(FS, b) for b in blocks(FS, p, s, s+l)] + + This call is most helpful with and distributed filesystem + where the hostnames of machines that contain blocks of the given file + can be determined. + + The default implementation returns an array containing one element: +
    + BlockLocation( { "localhost:9866" },  { "localhost" }, 0, file.getLen())
    + 
    + + In HDFS, if file is three-replicated, the returned array contains + elements like: +
    + BlockLocation(offset: 0, length: BLOCK_SIZE,
    +   hosts: {"host1:9866", "host2:9866, host3:9866"})
    + BlockLocation(offset: BLOCK_SIZE, length: BLOCK_SIZE,
    +   hosts: {"host2:9866", "host3:9866, host4:9866"})
    + 
    + + And if a file is erasure-coded, the returned BlockLocation are logical + block groups. + + Suppose we have a RS_3_2 coded file (3 data units and 2 parity units). + 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then + there will be one BlockLocation returned, with 0 offset, actual file size + and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks. + 3. If the file size is less than one group size but greater than one + stripe size, then there will be one BlockLocation returned, with 0 offset, + actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting + the actual blocks. + 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123 + for example, then the result will be like: +
    + BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
    +   "host2:9866","host3:9866","host4:9866","host5:9866"})
    + BlockLocation(offset: 3 * BLOCK_SIZE, length: 123, hosts: {"host1:9866",
    +   "host4:9866", "host5:9866"})
    + 
    + + @param file FilesStatus to get data from + @param start offset into the given file + @param len length for which to get locations for + @throws IOException IO failure]]> +
    +
mportant: the default implementation is not atomic + @param f path to use for create + @throws IOException IO failure]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • Fails if src is a file and dst is a directory.
  • +
  • Fails if src is a directory and dst is a file.
  • +
  • Fails if the parent of dst does not exist or is a file.
  • + +

    + If OVERWRITE option is not passed as an argument, rename fails + if the dst already exists. +

    + If OVERWRITE option is passed as an argument, rename overwrites + the dst if it is a file or an empty directory. Rename fails if dst is + a non-empty directory. +

    + Note that atomicity of rename is dependent on the file system + implementation. Please refer to the file system documentation for + details. This default implementation is non atomic. +

    + This method is deprecated since it is a temporary method added to + support the transition from FileSystem to FileContext for user + applications. + + @param src path to be renamed + @param dst new path after rename + @throws FileNotFoundException src path does not exist, or the parent + path of dst does not exist. + @throws FileAlreadyExistsException dest path exists and is a file + @throws ParentNotDirectoryException if the parent path of dest is not + a directory + @throws IOException on failure]]> + + + + + + + + +

  • Fails if path is a directory.
  • +
  • Fails if path does not exist.
  • +
  • Fails if path is not closed.
  • +
  • Fails if new size is greater than current size.
  • + + @param f The path to the file to be truncated + @param newLength The size the file is to be truncated to + + @return true if the file has been truncated to the desired + newLength and is immediately available to be reused for + write operations such as append, or + false if a background process of adjusting the length of + the last block has been started, and clients should wait for it to + complete before proceeding with further file updates. + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default).]]> +
    +
    + + + + + + + + + + + + + + + + + + + + +
  • Clean shutdown of the JVM cannot be guaranteed.
  • +
  • The time to shut down a FileSystem will depends on the number of + files to delete. For filesystems where the cost of checking + for the existence of a file/directory and the actual delete operation + (for example: object stores) is high, the time to shutdown the JVM can be + significantly extended by over-use of this feature.
  • +
  • Connectivity problems with a remote filesystem may delay shutdown + further, and may cause the files to not be deleted.
  • + + @param f the path to delete. + @return true if deleteOnExit is successful, otherwise false. + @throws IOException IO failure]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. +

    + Will not return null. Expect IOException upon access error. + @param f given path + @return the statuses of the files/directories in the given patch + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + + + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. + + @param f + a path name + @param filter + the user-supplied path filter + @return an array of FileStatus objects for the files under the given path + after applying the filter + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. + + @param files + a list of paths + @return a list of statuses for the files under the given paths after + applying the filter default Path filter + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. + + @param files + a list of paths + @param filter + the user-supplied path filter + @return a list of statuses for the files under the given paths after + applying the filter + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + Return all the files that match filePattern and are not checksum + files. Results are sorted by their names. + +

    + A filename pattern is composed of regular characters and + special pattern matching characters, which are: + +

    +
    +
    +

    +

    ? +
    Matches any single character. + +

    +

    * +
    Matches zero or more characters. + +

    +

    [abc] +
    Matches a single character from character set + {a,b,c}. + +

    +

    [a-b] +
    Matches a single character from the character range + {a...b}. Note that character a must be + lexicographically less than or equal to character b. + +

    +

    [^a] +
    Matches a single character that is not from character set or range + {a}. Note that the ^ character must occur + immediately to the right of the opening bracket. + +

    +

    \c +
    Removes (escapes) any special meaning of character c. + +

    +

    {ab,cd} +
    Matches a string from the string set {ab, cd} + +

    +

    {ab,c{de,fh}} +
    Matches a string from the string set {ab, cde, cfh} + +
    +
    +
    + + @param pathPattern a glob specifying a path pattern + + @return an array of paths that match the path pattern + @throws IOException IO failure]]> +
    +
    + + + + + + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred]]> + + + + + + + + + f does not exist + @throws IOException if any I/O error occurred]]> + + + + + + + + p does not exist + @throws IOException if any I/O error occurred]]> + + + + + + + + + + If the path is a directory, + if recursive is false, returns files in the directory; + if recursive is true, return files in the subtree rooted at the path. + If the path is a file, return the file's status and block locations. + + @param f is the path + @param recursive if the subdirectories need to be traversed recursively + + @return an iterator that traverses statuses of the files + + @throws FileNotFoundException when the path does not exist; + @throws IOException see specific implementation]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + undefined. + @throws IOException IO failure]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In some FileSystem implementations such as HDFS metadata + synchronization is essential to guarantee consistency of read requests + particularly in HA setting. + @throws IOException + @throws UnsupportedOperationException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + describing modifications + @throws IOException if an ACL could not be modified + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @param flag xattr set flag + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attribute + @param name xattr name. + @return byte[] xattr value. + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return Map describing the XAttrs of the file or directory + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @param names XAttr names. + @return Map describing the XAttrs of the file or directory + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return List of the XAttr names of the file or directory + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to remove extended attribute + @param name xattr name + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is a default method which is intended to be overridden by + subclasses. The default implementation returns an empty storage statistics + object.

    + + @return The StorageStatistics for this FileSystem instance. + Will never be null.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + All user code that may potentially use the Hadoop Distributed + File System should be written to use a FileSystem object or its + successor, {@link FileContext}. + +

    + The local implementation is {@link LocalFileSystem} and distributed + implementation is DistributedFileSystem. There are other implementations + for object stores and (outside the Apache Hadoop codebase), + third party filesystems. +

    + Notes +

      +
    1. The behaviour of the filesystem is + + specified in the Hadoop documentation. + However, the normative specification of the behavior of this class is + actually HDFS: if HDFS does not behave the way these Javadocs or + the specification in the Hadoop documentations define, assume that + the documentation is incorrect. +
    2. +
    3. The term {@code FileSystem} refers to an instance of this class.
    4. +
    5. The acronym "FS" is used as an abbreviation of FileSystem.
    6. +
    7. The term {@code filesystem} refers to the distributed/local filesystem + itself, rather than the class used to interact with it.
    8. +
    9. The term "file" refers to a file in the remote filesystem, + rather than instances of {@code java.io.File}.
    10. +
    ]]> +
    +
caller's environment variables to use + for expansion + @return String[] with absolute path to new jar in position 0 and + unexpanded wild card entry path in position 1 + @throws IOException if there is an I/O error while writing the jar fileilterFileSystem contains + some other file system, which it uses as + its basic file system, possibly transforming + the data along the way or providing additional + functionality. The class FilterFileSystem + itself simply overrides all methods of + FileSystem with versions that + pass all requests to the contained file + system. Subclasses of FilterFileSystem + may further override some of these methods + and may also provide additional methods + and fields.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -1 + if there is no more data because the end of the stream has been + reached]]> + + + + + + + + + + length bytes have been read. + + @param position position in the input stream to seek + @param buffer buffer into which data is read + @param offset offset into the buffer in which data is written + @param length the number of bytes to read + @throws IOException IO problems + @throws EOFException If the end of stream is reached while reading. + If an exception is thrown an undetermined number + of bytes in the buffer may have been writtenon't + if (fs instanceof FooFileSystem) { + FooFileSystem fs = (FooFileSystem) fs; + OutputStream out = dfs.createFile(path) + .optionA() + .optionB("value") + .cache() + .build() + } else if (fs instanceof BarFileSystem) { + ... + } + + // Do + OutputStream out = fs.createFile(path) + .permission(perm) + .bufferSize(bufSize) + .opt("foofs:option.a", true) + .opt("foofs:option.b", "value") + .opt("barfs:cache", true) + .must("foofs:cache", true) + .must("barfs:cache-size", 256 * 1024 * 1024) + .build(); + + + If the option is not related to the file system, the option will be ignored. + If the option is must, but not supported by the file system, a + {@link IllegalArgumentException} will be thrown.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + path is invalid]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + and the scheme is null, and the authority + is null. + + @return whether the path is absolute and the URI has no scheme nor + authority parts]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if and only if pathname + should be included]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Warning: Not all filesystems satisfy the thread-safety requirement. + @param position position within file + @param buffer destination buffer + @param offset offset in the buffer + @param length number of bytes to read + @return actual number of bytes read; -1 means "none" + @throws IOException IO problems.]]> + + + + + + + + + + Warning: Not all filesystems satisfy the thread-safety requirement. + @param position position within file + @param buffer destination buffer + @param offset offset in the buffer + @param length number of bytes to read + @throws IOException IO problems. + @throws EOFException the end of the data was reached before + the read operation completed]]> + + + + + + + + Warning: Not all filesystems satisfy the thread-safety requirement. + @param position position within file + @param buffer destination buffer + @throws IOException IO problems. + @throws EOFException the end of the data was reached before + the read operation completedote: Returned list is not sorted in any given order, + due to reliance on Java's {@link File#list()} API.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + XAttr is byte[], this class is to + covert byte[] to some kind of string representation or convert back. + String representation is convenient for display and input. For example + display in screen as shell response and json response, input as http + or shell parameter.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return ftp]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} backed by an FTP client provided by Apache Commons Net. +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Since these methods are often vendor- or device-specific, operators + may implement this interface in order to achieve fencing. +

    + Fencing is configured by the operator as an ordered list of methods to + attempt. Each method will be tried in turn, and the next in the list + will only be attempted if the previous one fails. See {@link NodeFencer} + for more information. +

    + If an implementation also implements {@link Configurable} then its + setConf method will be called upon instantiation.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + state (e.g ACTIVE/STANDBY) as well as + some additional information. + + @throws AccessControlException + if access is denied. + @throws IOException + if other errors happen + @see HAServiceStatus]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.http.filter.initializers. + +

      +
    • StaticUserWebFilter - An authorization plugin that makes all +users a static configured user. +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + public class IntArrayWritable extends ArrayWritable { + public IntArrayWritable() { + super(IntWritable.class); + } + }o is a ByteWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the item + @param conf the configuration to store + @param item the object to be stored + @param keyName the name of the key to use + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param items the objects to be stored + @param keyName the name of the key to use + @throws IndexOutOfBoundsException if the items array is empty + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + DefaultStringifier offers convenience methods to store/load objects to/from + the configuration. + + @param the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a DoubleWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + value argument is null or + its size is zero, the elementType argument must not be null. If + the argument value's size is bigger than zero, the argument + elementType is not be used. + + @param value + @param elementType]]> + + + + + value should not be null + or empty. + + @param value]]> + + + + + + + + + + + + + + value and elementType. If the value argument + is null or its size is zero, the elementType argument must not be + null. If the argument value's size is bigger than zero, the + argument elementType is not be used. + + @param value + @param elementType]]> + + + + + + + + + + + + + + + + + + + o is an EnumSetWritable with the same value, + or both are null.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a FloatWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When two sequence files, which have same Key type but different Value + types, are mapped out to reduce, multiple Value types is not allowed. + In this case, this class can help you wrap instances with different types. +

    + +

    + Compared with ObjectWritable, this class is much more effective, + because ObjectWritable will append the class declaration as a String + into the output file in every Key-Value pair. +

    + +

    + Generic Writable implements {@link Configurable} interface, so that it will be + configured by the framework. The configuration is passed to the wrapped objects + implementing {@link Configurable} interface before deserialization. +

    + + how to use it:
    + 1. Write your own class, such as GenericObject, which extends GenericWritable.
    + 2. Implements the abstract method getTypes(), defines + the classes which will be wrapped in GenericObject in application. + Attention: this classes defined in getTypes() method, must + implement Writable interface. +

    + + The code looks like this: +
    + public class GenericObject extends GenericWritable {
    + 
    +   private static Class[] CLASSES = {
    +               ClassType1.class, 
    +               ClassType2.class,
    +               ClassType3.class,
    +               };
    +
    +   protected Class[] getTypes() {
    +       return CLASSES;
    +   }
    +
    + }
    + 
    + + @since Nov 8, 2006]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a IntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + closes the input and output streams + at the end. + + @param in InputStrem to read from + @param out OutputStream to write to + @param conf the Configuration object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ignore any {@link Throwable} or + null pointers. Must only be used for cleanup in exception handlers. + + @param log the log to record problems to at debug level. Can be null. + @param closeables the objects to close + @deprecated use {@link #cleanupWithLogger(Logger, java.io.Closeable...)} + instead]]> + + + + + + + ignore any {@link Throwable} or + null pointers. Must only be used for cleanup in exception handlers. + + @param logger the log to record problems to at debug level. Can be null. + @param closeables the objects to close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is better than File#listDir because it does not ignore IOExceptions. + + @param dir The directory to list. + @param filter If non-null, the filter to use when listing + this directory. + @return The list of files in the directory. + + @throws IOException On I/O error]]> + + + + + + + + Borrowed from Uwe Schindler in LUCENE-5588 + @param fileToSync the file to fsync]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a LongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A map is a directory containing two files, the data file, + containing all keys and values in the map, and a smaller index + file, containing a fraction of the keys. The fraction is determined by + {@link Writer#getIndexInterval()}. + +

    The index file is read entirely into memory. Thus key implementations + should try to keep themselves small. + +

    Map files are created by adding entries in-order. To maintain a large + database, perform updates by copying the previous version of a database and + merging in a sorted change list, to create a new version of the database in + a new file. Sorting large change lists can be done with {@link + SequenceFile.Sorter}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is an MD5Hash whose digest contains the + same values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + className by first finding + it in the specified conf. If the specified conf is null, + try load it directly.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link Comparator} that operates directly on byte representations of + objects. +

    + @param + @see DeserializerComparator]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SequenceFiles are flat files consisting of binary key/value + pairs. + +

    SequenceFile provides {@link SequenceFile.Writer}, + {@link SequenceFile.Reader} and {@link Sorter} classes for writing, + reading and sorting respectively.

    + + There are three SequenceFile Writers based on the + {@link CompressionType} used to compress key/value pairs: +
      +
    1. + Writer : Uncompressed records. +
    2. +
    3. + RecordCompressWriter : Record-compressed files, only compress + values. +
    4. +
    5. + BlockCompressWriter : Block-compressed files, both keys & + values are collected in 'blocks' + separately and compressed. The size of + the 'block' is configurable. +
    + +

    The actual compression algorithm used to compress key and/or values can be + specified by using the appropriate {@link CompressionCodec}.

    + +

    The recommended way is to use the static createWriter methods + provided by the SequenceFile to chose the preferred format.

    + +

    The {@link SequenceFile.Reader} acts as the bridge and can read any of the + above SequenceFile formats.

    + +

    SequenceFile Formats

    + +

    Essentially there are 3 different formats for SequenceFiles + depending on the CompressionType specified. All of them share a + common header described below. + +

    +
      +
    • + version - 3 bytes of magic header SEQ, followed by 1 byte of actual + version number (e.g. SEQ4 or SEQ6) +
    • +
    • + keyClassName -key class +
    • +
    • + valueClassName - value class +
    • +
    • + compression - A boolean which specifies if compression is turned on for + keys/values in this file. +
    • +
    • + blockCompression - A boolean which specifies if block-compression is + turned on for keys/values in this file. +
    • +
    • + compression codec - CompressionCodec class which is used for + compression of keys and/or values (if compression is + enabled). +
    • +
    • + metadata - {@link Metadata} for this file. +
    • +
    • + sync - A sync marker to denote end of the header. +
    • +
    + +
    Uncompressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Value
      • +
      +
    • +
    • + A sync-marker every few 100 kilobytes or so. +
    • +
    + +
    Record-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Compressed Value
      • +
      +
    • +
    • + A sync-marker every few 100 kilobytes or so. +
    • +
    + +
    Block-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record Block +
        +
      • Uncompressed number of records in the block
      • +
      • Compressed key-lengths block-size
      • +
      • Compressed key-lengths block
      • +
      • Compressed keys block-size
      • +
      • Compressed keys block
      • +
      • Compressed value-lengths block-size
      • +
      • Compressed value-lengths block
      • +
      • Compressed values block-size
      • +
      • Compressed values block
      • +
      +
    • +
    • + A sync-marker every block. +
    • +
    + +

    The compressed blocks of key lengths and value lengths consist of the + actual lengths of individual keys/values encoded in ZeroCompressedInteger + format.

    + + @see CompressionCodec]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a ShortWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + position. Note that this + method avoids using the converter or doing String instantiation + @return the Unicode scalar value at position or -1 + if the position is invalid or points to a + trailing byte]]> + + + + + + + + + + what in the backing + buffer, starting as position start. The starting + position is measured in bytes and the return value is in + terms of byte position in the buffer. The backing buffer is + not converted to a string for this operation. + @return byte position of the first occurrence of the search + string in the UTF-8 buffer or -1 if not found]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Note: For performance reasons, this call does not clear the + underlying byte array that is retrievable via {@link #getBytes()}. + In order to free the byte-array memory, call {@link #set(byte[])} + with an empty byte array (For example, new byte[0]).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a Text with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException.]]> + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + @return ByteBuffer: bytes stores at ByteBuffer.array() + and length is ByteBuffer.limit()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In + addition, it provides methods for string traversal without converting the + byte array to a string.

    Also includes utilities for + serializing/deserialing a string, coding/decoding a string, checking if a + byte array contains valid UTF8 code, calculating the length of an encoded + string.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is useful when a class may evolve, so that instances written by the + old version of the class may still be processed by the new version. To + handle this situation, {@link #readFields(DataInput)} + implementations should catch {@link VersionMismatchException}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VIntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VLongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + out. + + @param out DataOuput to serialize this object into. + @throws IOException]]> + + + + + + + in. + +

    For efficiency, implementations should attempt to re-use storage in the + existing object where possible.

    + + @param in DataInput to deseriablize this object from. + @throws IOException]]> +
    + + + Any key or value type in the Hadoop Map-Reduce + framework implements this interface.

    + +

    Implementations typically implement a static read(DataInput) + method which constructs a new instance, calls {@link #readFields(DataInput)} + and returns the instance.

    + +

    Example:

    +

    +     public class MyWritable implements Writable {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +
    +       // Default constructor to allow (de)serialization
    +       MyWritable() { }
    +
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +
    +       public static MyWritable read(DataInput in) throws IOException {
    +         MyWritable w = new MyWritable();
    +         w.readFields(in);
    +         return w;
    +       }
    +     }
    + 

    ]]> +
    + + + + + + + + WritableComparables can be compared to each other, typically + via Comparators. Any type which is to be used as a + key in the Hadoop Map-Reduce framework should implement this + interface.

    + +

    Note that hashCode() is frequently used in Hadoop to partition + keys. It's important that your implementation of hashCode() returns the same + result across different instances of the JVM. Note also that the default + hashCode() implementation in Object does not + satisfy this property.

    + +

    Example:

    +

    +     public class MyWritableComparable implements WritableComparable {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public int compareTo(MyWritableComparable o) {
    +         int thisValue = this.value;
    +         int thatValue = o.value;
    +         return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
    +       }
    +
    +       public int hashCode() {
    +         final int prime = 31;
    +         int result = 1;
    +         result = prime * result + counter;
    +         result = prime * result + (int) (timestamp ^ (timestamp >>> 32));
    +         return result
    +       }
    +     }
    + 

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The default implementation reads the data into two {@link + WritableComparable}s (using {@link + Writable#readFields(DataInput)}, then calls {@link + #compare(WritableComparable,WritableComparable)}.]]> + + + + + + + The default implementation uses the natural ordering, calling {@link + Comparable#compareTo(Object)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This base implementation uses the natural ordering. To define alternate + orderings, override {@link #compare(WritableComparable,WritableComparable)}. + +

    One may optimize compare-intensive operations by overriding + {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are + provided to assist in optimized implementations of this method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Enum type + @param in DataInput to read from + @param enumType Class type of Enum + @return Enum represented by String read from DataInput + @throws IOException]]> + + + + + + + + + + + + + + + + len number of bytes in input streamin + @param in input stream + @param len number of bytes to skip + @throws IOException when skipped less number of bytes]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CompressionCodec for which to get the + Compressor + @param conf the Configuration object which contains confs for creating or reinit the compressor + @return Compressor for the given + CompressionCodec from the pool or a new one]]> + + + + + + + + + CompressionCodec for which to get the + Decompressor + @return Decompressor for the given + CompressionCodec the pool or a new one]]> + + + + + + Compressor to be returned to the pool]]> + + + + + + Decompressor to be returned to the + pool]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Codec aliases are case insensitive. +

    + The code alias is the short class name (without the package name). + If the short class name ends with 'Codec', then there are two aliases for + the codec, the complete short class name and the short class name without + the 'Codec' ending. For example for the 'GzipCodec' codec class name the + alias are 'gzip' and 'gzipcodec'. + + @param codecName the canonical class name of the codec + @return the codec object]]> + + + + + + + Codec aliases are case insensitive. +

    + The code alias is the short class name (without the package name). + If the short class name ends with 'Codec', then there are two aliases for + the codec, the complete short class name and the short class name without + the 'Codec' ending. For example for the 'GzipCodec' codec class name the + alias are 'gzip' and 'gzipcodec'. + + @param codecName the canonical class name of the codec + @return the codec class]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations are assumed to be buffered. This permits clients to + reposition the underlying input stream then call {@link #resetState()}, + without having to also synchronize client buffers.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + (Both native and non-native versions of various Decompressors require + that the data passed in via b[] remain unmodified until + the caller is explicitly notified--via {@link #needsInput()}--that the + buffer may be safely modified. With this requirement, an extra + buffer-copy can be avoided.) + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + {@link #setInput(byte[], int, int)} should be called to + provide more input. + + @return true if the input data buffer is empty and + {@link #setInput(byte[], int, int)} should be called in + order to provide more input.]]> + + + + + + + + + + + + + true if a preset dictionary is needed for decompression. + @return true if a preset dictionary is needed for decompression]]> + + + + + true if the end of the decompressed + data output stream has been reached. Indicates a concatenated data stream + when finished() returns true and {@link #getRemaining()} + returns a positive value. finished() will be reset with the + {@link #reset()} method. + @return true if the end of the decompressed + data output stream has been reached.]]> + + + + + + + + + + + + + + true and getRemaining() returns a positive value. If + {@link #finished()} returns true and getRemaining() returns + a zero value, indicates that the end of data stream has been reached and + is not a concatenated data stream. + @return The number of bytes remaining in the compressed data buffer.]]> + + + + + true and {@link #getRemaining()} returns a positive value, + reset() is called before processing of the next data stream in the + concatenated data stream. {@link #finished()} will be reset and will + return false when reset() is called

  • "none" - No compression. +
  • "lzo" - LZO compression. +
  • "gz" - GZIP compression. + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • Block Compression. +
  • Named meta data blocks. +
  • Sorted or unsorted keys. +
  • Seek by key or by file offset. + + The memory footprint of a TFile includes the following: +
      +
    • Some constant overhead of reading or writing a compressed block. +
        +
      • Each compressed block requires one compression/decompression codec for + I/O. +
      • Temporary space to buffer the key. +
      • Temporary space to buffer the value (for TFile.Writer only). Values are + chunk encoded, so that we buffer at most one chunk of user data. By default, + the chunk buffer is 1MB. Reading chunked value does not require additional + memory. +
      +
    • TFile index, which is proportional to the total number of Data Blocks. + The total amount of memory needed to hold the index can be estimated as + (56+AvgKeySize)*NumBlocks. +
    • MetaBlock index, which is proportional to the total number of Meta + Blocks.The total amount of memory needed to hold the index for Meta Blocks + can be estimated as (40+AvgMetaBlockName)*NumMetaBlock. +
    +

    + The behavior of TFile can be customized by the following variables through + Configuration: +

      +
    • tfile.io.chunk.size: Value chunk size. Integer (in bytes). Default + to 1MB. Values of the length less than the chunk size is guaranteed to have + known value length in read time (See + {@link TFile.Reader.Scanner.Entry#isValueLengthKnown()}). +
    • tfile.fs.output.buffer.size: Buffer size used for + FSDataOutputStream. Integer (in bytes). Default to 256KB. +
    • tfile.fs.input.buffer.size: Buffer size used for + FSDataInputStream. Integer (in bytes). Default to 256KB. +
    +

    + Suggestions on performance optimization. +

      +
    • Minimum block size. We recommend a setting of minimum block size between + 256KB to 1MB for general usage. Larger block size is preferred if files are + primarily for sequential access. However, it would lead to inefficient random + access (because there are more data to decompress). Smaller blocks are good + for random access, but require more memory to hold the block index, and may + be slower to create (because we must flush the compressor stream at the + conclusion of each data block, which leads to an FS I/O flush). Further, due + to the internal caching in Compression codec, the smallest possible block + size would be around 20KB-30KB. +
    • The current implementation does not offer true multi-threading for + reading. The implementation uses FSDataInputStream seek()+read(), which is + shown to be much faster than positioned-read call in single thread mode. + However, it also means that if multiple threads attempt to access the same + TFile (using multiple scanners) simultaneously, the actual I/O is carried out + sequentially even if they access different DFS blocks. +
    • Compression codec. Use "none" if the data is not very compressable (by + compressable, I mean a compression ratio at least 2:1). Generally, use "lzo" + as the starting point for experimenting. "gz" overs slightly better + compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to + decompress, comparing to "lzo". +
    • File system buffering, if the underlying FSDataInputStream and + FSDataOutputStream is already adequately buffered; or if applications + reads/writes keys and values in large buffers, we can reduce the sizes of + input/output buffering in TFile layer by setting the configuration parameters + "tfile.fs.input.buffer.size" and "tfile.fs.output.buffer.size". +
    + + Some design rationale behind TFile can be found at Hadoop-3315.]]> + + + + + + + + + + + Utils#writeVLong(out, n). + + @param out + output stream + @param n + The integer to be encoded + @throws IOException + @see Utils#writeVLong(DataOutput, long)]]> + + + + + + + + +
  • if n in [-32, 127): encode in one byte with the actual value. + Otherwise, +
  • if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52; + byte[1]=n&0xff. Otherwise, +
  • if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 - + 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise, +
  • if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112; + byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff; byte[3]=n&0xff. Otherwise: +
  • if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] = + (n>>24)&0xff; byte[2]=(n>>16)&0xff; byte[3]=(n>>8)&0xff; byte[4]=n&0xff; +
  • if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] = + (n>>32)&0xff; byte[2]=(n>>24)&0xff; byte[3]=(n>>16)&0xff; + byte[4]=(n>>8)&0xff; byte[5]=n&0xff +
  • if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] = + (n>>40)&0xff; byte[2]=(n>>32)&0xff; byte[3]=(n>>24)&0xff; + byte[4]=(n>>16)&0xff; byte[5]=(n>>8)&0xff; byte[6]=n&0xff; +
  • if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] = + (n>>48)&0xff; byte[2] = (n>>40)&0xff; byte[3]=(n>>32)&0xff; + byte[4]=(n>>24)&0xff; byte[5]=(n>>16)&0xff; byte[6]=(n>>8)&0xff; + byte[7]=n&0xff; +
  • if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] = + (n>>54)&0xff; byte[2] = (n>>48)&0xff; byte[3] = (n>>40)&0xff; + byte[4]=(n>>32)&0xff; byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff; + byte[7]=(n>>8)&0xff; byte[8]=n&0xff; + + + @param out + output stream + @param n + the integer number + @throws IOException]]> + + + + + + + (int)Utils#readVLong(in). + + @param in + input stream + @return the decoded integer + @throws IOException + + @see Utils#readVLong(DataInput)]]> + + + + + + + +
  • if (FB >= -32), return (long)FB; +
  • if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff; +
  • if (FB in [-104, -73]), return (FB+88)<<16 + (NB[0]&0xff)<<8 + + NB[1]&0xff; +
  • if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff)<<16 + + (NB[1]&0xff)<<8 + NB[2]&0xff; +
  • if (FB in [-128, -121]), return interpret NB[FB+129] as a signed + big-endian integer. + + @param in + input stream + @return the decoded long integer. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @param cmp + Comparator for the key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @param cmp + Comparator for the key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + + + + + + + + + + An experimental {@link Serialization} for Java {@link Serializable} classes. +

    + @see JavaSerializationComparator]]> +
    +
    + + + + + + + + + A {@link RawComparator} that uses a {@link JavaSerialization} + {@link Deserializer} to deserialize objects that are then compared via + their {@link Comparable} interfaces. +

    + @param + @see JavaSerialization]]> +
    +
    + + + + + + + + + + + + + +This package provides a mechanism for using different serialization frameworks +in Hadoop. The property "io.serializations" defines a list of +{@link org.apache.hadoop.io.serializer.Serialization}s that know how to create +{@link org.apache.hadoop.io.serializer.Serializer}s and +{@link org.apache.hadoop.io.serializer.Deserializer}s. +

    + +

    +To add a new serialization framework write an implementation of +{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the +"io.serializations" property. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + avro.reflect.pkgs or implement + {@link AvroReflectSerializable} interface.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + +This package provides Avro serialization in Hadoop. This can be used to +serialize/deserialize Avro types in Hadoop. +

    + +

    +Use {@link org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization} for +serialization of classes generated by Avro's 'specific' compiler. +

    + +

    +Use {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} for +other classes. +{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} work for +any class which is either in the package list configured via +{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization#AVRO_REFLECT_PACKAGES} +or implement {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerializable} +interface. +

    ]]> +
    +
mplementations of this interface consume the {@link MetricsRecord} generated + from {@link MetricsSource}. It registers with {@link MetricsSystem} which + periodically pushes the {@link MetricsRecord} to the sink using + {@link #putMetrics(MetricsRecord)} method. If the implementing class also + implements {@link Closeable}, then the MetricsSystem will close the sink when + it is stopped.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the actual type of the source object + @param source object to register + @return the source object + @exception MetricsException]]> + + + + + + + + the actual type of the source object + @param source object to register + @param name of the source. Must be unique or null (then extracted from + the annotations of the source object.) + @param desc the description of the source (or null. See above.) + @return the source object + @exception MetricsException]]> + + + + + + + + + + + + + + + + + + + + +
  • {@link MetricsSource} generate and update metrics information.
  • +
  • {@link MetricsSink} consume the metrics information
  • + + + {@link MetricsSource} and {@link MetricsSink} register with the metrics + system. Implementations of {@link MetricsSystem} polls the + {@link MetricsSource}s periodically and pass the {@link MetricsRecord}s to + {@link MetricsSink}.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
aggregate). + Filter out entries that don't have at least minSamples. + + @return a map of peer DataNode Id to the average latency to that + node seen over the measurement period.]]> + + + + + This class maintains a group of rolling average metrics. It implements the + algorithm of rolling average, i.e. a number of sliding windows are kept to + roll over and evict old subsets of samples. Each window has a subset of + samples in a stream, where sub-sum and sub-total are collected. All sub-sums + and sub-totals in all windows will be aggregated to final-sum and final-total + used to compute final average, which is called rolling average. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is a metrics sink that uses + {@link org.apache.hadoop.fs.FileSystem} to write the metrics logs. Every + roll interval a new directory will be created under the path specified by the + basepath property. All metrics will be logged to a file in the + current interval's directory in a file named <hostname>.log, where + <hostname> is the name of the host on which the metrics logging + process is running. The base path is set by the + <prefix>.sink.<instance>.basepath property. The + time zone used to create the current interval's directory name is GMT. If + the basepath property isn't specified, it will default to + "/tmp", which is the temp directory on whatever default file + system is configured for the cluster.

    + +

    The <prefix>.sink.<instance>.ignore-error + property controls whether an exception is thrown when an error is encountered + writing a log file. The default value is true. When set to + false, file errors are quietly swallowed.

    + +

    The roll-interval property sets the amount of time before + rolling the directory. The default value is 1 hour. The roll interval may + not be less than 1 minute. The property's value should be given as + number unit, where number is an integer value, and + unit is a valid unit. Valid units are minute, hour, + and day. The units are case insensitive and may be abbreviated or + plural. If no units are specified, hours are assumed. For example, + "2", "2h", "2 hour", and + "2 hours" are all valid ways to specify two hours.

    + +

    The roll-offset-interval-millis property sets the upper + bound on a random time interval (in milliseconds) that is used to delay + before the initial roll. All subsequent rolls will happen an integer + number of roll intervals after the initial roll, hence retaining the original + offset. The purpose of this property is to insert some variance in the roll + times so that large clusters using this sink on every node don't cause a + performance impact on HDFS by rolling simultaneously. The default value is + 30000 (30s). When writing to HDFS, as a rule of thumb, the roll offset in + millis should be no less than the number of sink instances times 5. + +

    The primary use of this class is for logging to HDFS. As it uses + {@link org.apache.hadoop.fs.FileSystem} to access the target file system, + however, it can be used to write to the local file system, Amazon S3, or any + other supported file system. The base path for the sink will determine the + file system used. An unqualified path will write to the default file system + set by the configuration.

    + +

    Not all file systems support the ability to append to files. In file + systems without the ability to append to files, only one writer can write to + a file at a time. To allow for concurrent writes from multiple daemons on a + single host, the source property is used to set unique headers + for the log files. The property should be set to the name of + the source daemon, e.g. namenode. The value of the + source property should typically be the same as the property's + prefix. If this property is not set, the source is taken to be + unknown.

    + +

    Instead of appending to an existing file, by default the sink + will create a new file with a suffix of ".<n>&quet;, where + n is the next lowest integer that isn't already used in a file name, + similar to the Hadoop daemon logs. NOTE: the file with the highest + sequence number is the newest file, unlike the Hadoop daemon logs.

    + +

    For file systems that allow append, the sink supports appending to the + existing file instead. If the allow-append property is set to + true, the sink will instead append to the existing file on file systems that + support appends. By default, the allow-append property is + false.

    + +

    Note that when writing to HDFS with allow-append set to true, + there is a minimum acceptable number of data nodes. If the number of data + nodes drops below that minimum, the append will succeed, but reading the + data will fail with an IOException in the DataStreamer class. The minimum + number of data nodes required for a successful append is generally 2 or + 3.

    + +

    Note also that when writing to HDFS, the file size information is not + updated until the file is closed (at the end of the interval) even though + the data is being written successfully. This is a known HDFS limitation that + exists because of the performance cost of updating the metadata. See + HDFS-5478.

    + +

    When using this sink in a secure (Kerberos) environment, two additional + properties must be set: keytab-key and + principal-key. keytab-key should contain the key by + which the keytab file can be found in the configuration, for example, + yarn.nodemanager.keytab. principal-key should + contain the key by which the principal can be found in the configuration, + for example, yarn.nodemanager.principal.]]> + + + + + + + + + + + + + + + + + + + + + + + + + CollectD StatsD plugin). +
    + To configure this plugin, you will need to add the following + entries to your hadoop-metrics2.properties file: +
    +

    + *.sink.statsd.class=org.apache.hadoop.metrics2.sink.StatsDSink
    + [prefix].sink.statsd.server.host=
    + [prefix].sink.statsd.server.port=
    + [prefix].sink.statsd.skip.hostname=true|false (optional)
    + [prefix].sink.statsd.service.name=NameNode (name you want for service)
    + 
    ]]> +
    +
    + +
    + + + + + + + + + + + + + ,name=" + Where the and are the supplied parameters. + + @param serviceName + @param nameName + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + ,name=" + Where the and are the supplied parameters. + + @param serviceName + @param nameName + @param properties - Key value pairs to define additional JMX ObjectName + properties. + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostname or hostname:port. If + the specs string is null, defaults to localhost:defaultPort. + + @param specs server specs (see description) + @param defaultPort the default port if not specified + @return a list of InetSocketAddress objects.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method is used when parts of Hadoop need know whether to apply + single rack vs multi-rack policies, such as during block placement. + Such algorithms behave differently if they are on multi-switch systems. +

    + + @return true if the mapping thinks that it is on a single switch]]> +
    +
    + + + + + + + + + + + + + + + + + This predicate simply assumes that all mappings not derived from + this class are multi-switch. + @param mapping the mapping to query + @return true if the base class says it is single switch, or the mapping + is not derived from this class.]]> + + + + It is not mandatory to + derive {@link DNSToSwitchMapping} implementations from it, but it is strongly + recommended, as it makes it easy for the Hadoop developers to add new methods + to this base class that are automatically picked up by all implementations. +

    + + This class does not extend the Configured + base class, and should not be changed to do so, as it causes problems + for subclasses. The constructor of the Configured calls + the {@link #setConf(Configuration)} method, which will call into the + subclasses before they have been fully constructed.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If a name cannot be resolved to a rack, the implementation + should return {@link NetworkTopology#DEFAULT_RACK}. This + is what the bundled implementations do, though it is not a formal requirement + + @param names the list of hosts to resolve (can be empty) + @return list of resolved network paths. + If names is empty, the returned list is also empty]]> + + + + + + + + + + + + + + + + + + + + + + + + Calling {@link #setConf(Configuration)} will trigger a + re-evaluation of the configuration settings and so be used to + set up the mapping script.]]> + + + + + + + + + + + + + + + + + + + + + This will get called in the superclass constructor, so a check is needed + to ensure that the raw mapping is defined before trying to relaying a null + configuration. + @param conf]]> + + + + + + + + + + It contains a static class RawScriptBasedMapping that performs + the work: reading the configuration parameters, executing any defined + script, handling errors and such like. The outer + class extends {@link CachedDNSToSwitchMapping} to cache the delegated + queries. +

    + This DNS mapper's {@link #isSingleSwitch()} predicate returns + true if and only if a script is defined.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Simple {@link DNSToSwitchMapping} implementation that reads a 2 column text + file. The columns are separated by whitespace. The first column is a DNS or + IP address and the second column specifies the rack where the address maps. +

    +

    + This class uses the configuration parameter {@code + net.topology.table.file.name} to locate the mapping file. +

    +

    + Calls to {@link #resolve(List)} will look up the address as defined in the + mapping file. If no entry corresponding to the address is found, the value + {@code /default-rack} is returned. +

    ]]> +
    +
    + + + + +
    + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + mapping + and mapping]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + /host@realm. + @param principalName principal name of format as described above + @return host name if the the string conforms to the above format, else null]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + "jack" + + @param userName + @return userName without login method]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the return type of the run method + @param action the method to execute + @return the value from the run method]]> + + + + + + + + the return type of the run method + @param action the method to execute + @return the value from the run method + @throws IOException if the action throws an IOException + @throws Error if the action throws an Error + @throws RuntimeException if the action throws a RuntimeException + @throws InterruptedException if the action throws an InterruptedException + @throws UndeclaredThrowableException if the action throws something else]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CredentialProvider implementations must be thread safe.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + does not provide the stack trace for security purposes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A User-Agent String is considered to be a browser if it matches + any of the regex patterns from browser-useragent-regex; the default + behavior is to consider everything a browser that matches the following: + "^Mozilla.*,^Opera.*". Subclasses can optionally override + this method to use different behavior. + + @param userAgent The User-Agent String, or null if there isn't one + @return true if the User-Agent String refers to a browser, false if not]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The type of the token identifierextends TokenIdentifier]]> + + + + + + + + + + DelegationTokenAuthenticatedURL. +

    + An instance of the default {@link DelegationTokenAuthenticator} will be + used.]]> + + + + + DelegationTokenAuthenticatedURL. + + @param authenticator the {@link DelegationTokenAuthenticator} instance to + use, if null the default one will be used.]]> + + + + + DelegationTokenAuthenticatedURL using the default + {@link DelegationTokenAuthenticator} class. + + @param connConfigurator a connection configurator.]]> + + + + + DelegationTokenAuthenticatedURL. + + @param authenticator the {@link DelegationTokenAuthenticator} instance to + use, if null the default one will be used. + @param connConfigurator a connection configurator.]]> + + + + + + + + + + + + The default class is {@link KerberosDelegationTokenAuthenticator} + + @return the delegation token authenticator class to use as default.]]> + + + + + + + This method is provided to enable WebHDFS backwards compatibility. + + @param useQueryString TRUE if the token is transmitted in the + URL query string, FALSE if the delegation token is transmitted + using the {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP + header.]]> + + + + + TRUE if the token is transmitted in the URL query + string, FALSE if the delegation token is transmitted using the + {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP header.]]> + + + + + + + + + + + + + + + + + + Authenticator. + + @param url the URL to connect to. Only HTTP/S URLs are supported. + @param token the authentication token being used for the user. + @return an authenticated {@link HttpURLConnection}. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + Authenticator. If the doAs parameter is not NULL, + the request will be done on behalf of the specified doAs user. + + @param url the URL to connect to. Only HTTP/S URLs are supported. + @param token the authentication token being used for the user. + @param doAs user to do the the request on behalf of, if NULL the request is + as self. + @return an authenticated {@link HttpURLConnection}. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @return a delegation token. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @param doAsUser the user to do as, which will be the token owner. + @return a delegation token. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @throws IOException if an IO error occurred.]]> + + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred.]]> + + + + DelegationTokenAuthenticatedURL is a + {@link AuthenticatedURL} sub-class with built-in Hadoop Delegation Token + functionality. +

    + The authentication mechanisms supported by default are Hadoop Simple + authentication (also known as pseudo authentication) and Kerberos SPNEGO + authentication. +

    + Additional authentication mechanisms can be supported via {@link + DelegationTokenAuthenticator} implementations. +

    + The default {@link DelegationTokenAuthenticator} is the {@link + KerberosDelegationTokenAuthenticator} class which supports + automatic fallback from Kerberos SPNEGO to Hadoop Simple authentication via + the {@link PseudoDelegationTokenAuthenticator} class. +

    + AuthenticatedURL instances are not thread-safe.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @throws IOException if an IO error occurred.]]> + + + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + KerberosDelegationTokenAuthenticator provides support for + Kerberos SPNEGO authentication mechanism and support for Hadoop Delegation + Token operations. +

    + It falls back to the {@link PseudoDelegationTokenAuthenticator} if the HTTP + endpoint does not trigger a SPNEGO authentication]]> + + + + + + + + + PseudoDelegationTokenAuthenticator provides support for + Hadoop's pseudo authentication mechanism that accepts + the user name specified as a query string parameter and support for Hadoop + Delegation Token operations. +

    + This mimics the model of Hadoop Simple authentication trusting the + {@link UserGroupInformation#getCurrentUser()} valuelive. + @return a (snapshotted) map of blocker name->description values]]> + + + + + + + + + + + + + Do nothing if the service is null or not + in a state in which it can be/needs to be stopped. +

    + The service state is checked before the operation begins. + This process is not thread safe. + @param service a service or null]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • Any long-lived operation here will prevent the service state + change from completing in a timely manner.
  • +
  • If another thread is somehow invoked from the listener, and + that thread invokes the methods of the service (including + subclass-specific methods), there is a risk of a deadlock.
  • + + + + @param service the service that has changed.]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + The base implementation logs all arguments at the debug level, + then returns the passed in config unchanged.]]> + + + + + + + The action is to signal success by returning the exit code 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method is called before {@link #init(Configuration)}; + Any non-null configuration that is returned from this operation + becomes the one that is passed on to that {@link #init(Configuration)} + operation. +

    + This permits implementations to change the configuration before + the init operation. As the ServiceLauncher only creates + an instance of the base {@link Configuration} class, it is + recommended to instantiate any subclass (such as YarnConfiguration) + that injects new resources. +

    + @param config the initial configuration build up by the + service launcher. + @param args list of arguments passed to the command line + after any launcher-specific commands have been stripped. + @return the configuration to init the service with. + Recommended: pass down the config parameter with any changes + @throws Exception any problem]]> + + + + + + + The return value becomes the exit code of the launched process. +

    + If an exception is raised, the policy is: +

      +
    1. Any subset of {@link org.apache.hadoop.util.ExitUtil.ExitException}: + the exception is passed up unmodified. +
    2. +
    3. Any exception which implements + {@link org.apache.hadoop.util.ExitCodeProvider}: + A new {@link ServiceLaunchException} is created with the exit code + and message of the thrown exception; the thrown exception becomes the + cause.
    4. +
    5. Any other exception: a new {@link ServiceLaunchException} is created + with the exit code {@link LauncherExitCodes#EXIT_EXCEPTION_THROWN} and + the message of the original exception (which becomes the cause).
    6. +
    + @return the exit code + @throws org.apache.hadoop.util.ExitUtil.ExitException an exception passed + up as the exit code and error text. + @throws Exception any exception to report. If it provides an exit code + this is used in a wrapping exception.]]> +
    +
    + + + The command line options will be passed down before the + {@link Service#init(Configuration)} operation is invoked via an + invocation of {@link LaunchableService#bindArgs(Configuration, List)} + After the service has been successfully started via {@link Service#start()} + the {@link LaunchableService#execute()} method is called to execute the + service. When this method returns, the service launcher will exit, using + the return code from the method as its exit option.]]> + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Approximate HTTP equivalent: {@code 400 Bad Request}]]> + + + + + + approximate HTTP equivalent: Approximate HTTP equivalent: {@code 401 Unauthorized}]]> + + + + + + + + + + + Approximate HTTP equivalent: Approximate HTTP equivalent: {@code 403: Forbidden}]]> + + + + + + Approximate HTTP equivalent: {@code 404: Not Found}]]> + + + + + + Approximate HTTP equivalent: {@code 405: Not allowed}]]> + + + + + + Approximate HTTP equivalent: {@code 406: Not Acceptable}]]> + + + + + + Approximate HTTP equivalent: {@code 408: Request Timeout}]]> + + + + + + Approximate HTTP equivalent: {@code 409: Conflict}]]> + + + + + + Approximate HTTP equivalent: {@code 500 Internal Server Error}]]> + + + + + + Approximate HTTP equivalent: {@code 501: Not Implemented}]]> + + + + + + Approximate HTTP equivalent: {@code 503 Service Unavailable}]]> + + + + + + If raised, this is expected to be raised server-side and likely due + to client/server version incompatibilities. +

    + Approximate HTTP equivalent: {@code 505: Version Not Supported}]]> + + + + + + + + + + + + + + + Codes with a YARN prefix are YARN-related. +

    + Many of the exit codes are designed to resemble HTTP error codes, + squashed into a single byte. e.g 44 , "not found" is the equivalent + of 404. The various 2XX HTTP error codes aren't followed; + the Unix standard of "0" for success is used. +

    +    0-10: general command issues
    +   30-39: equivalent to the 3XX responses, where those responses are
    +          considered errors by the application.
    +   40-49: client-side/CLI/config problems
    +   50-59: service-side problems.
    +   60+  : application specific error codes
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + This uses {@link String#format(String, Object...)} + to build the formatted exception in the ENGLISH locale. +

    + If the last argument is a throwable, it becomes the cause of the exception. + It will also be used as a parameter for the format. + @param exitCode exit code + @param format format for message to use in exception + @param args list of arguments]]> + + + + + When caught by the ServiceLauncher, it will convert that + into a process exit code. + + The {@link #ServiceLaunchException(int, String, Object...)} constructor + generates formatted exceptions.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Clients and/or applications can use the provided Progressable + to explicitly report progress to the Hadoop framework. This is especially + important for operations which take significant amount of time since, + in-lieu of the reported progress, the framework has to assume that an error + has occurred and time-out the operation.

    ]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Class is to be obtained + @return the correctly typed Class of the given object.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + kill -0 command or equivalent]]> + + + + + + + + + + + + + + + + + + + ".cmd" on Windows, or ".sh" otherwise. + + @param parent File parent directory + @param basename String script file basename + @return File referencing the script in the directory]]> + + + + + + ".cmd" on Windows, or ".sh" otherwise. + + @param basename String script file basename + @return String script file name]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + IOException. + @return the path to {@link #WINUTILS_EXE} + @throws RuntimeException if the path is not resolvable]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell. + @return the thread that ran runCommand() that spawned this shell + or null if no thread is waiting for this shell to complete]]> + + + + + + + + + + + + Shell interface. + @param cmd shell command to execute. + @return the output of the executed command.]]> + + + + + + + + + Shell interface. + @param env the map of environment key=value + @param cmd shell command to execute. + @param timeout time in milliseconds after which script should be marked timeout + @return the output of the executed command. + @throws IOException on any problem.]]> + + + + + + + + Shell interface. + @param env the map of environment key=value + @param cmd shell command to execute. + @return the output of the executed command. + @throws IOException on any problem.]]> + + + + + Shell processes. + Iterates through a map of all currently running Shell + processes and destroys them one by one. This method is thread safe]]> + + + + + Shell objects.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CreateProcess synchronization object.]]> + + + + + os.name property.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Important: caller must check for this value being null. + The lack of such checks has led to many support issues being raised. +

    + @deprecated use one of the exception-raising getter methods, + specifically {@link #getWinUtilsPath()} or {@link #getWinUtilsFile()}]]> + + + + + + + + + + + + + + Shell can be used to run shell commands like du or + df. It also offers facilities to gate commands by + time-intervals.]]> + + + + + + + + ShutdownHookManager singleton. + + @return ShutdownHookManager singleton.]]> + + + + + + + Runnable + @param priority priority of the shutdownHook.]]> + + + + + + + + + Runnable + @param priority priority of the shutdownHook + @param timeout timeout of the shutdownHook + @param unit unit of the timeout TimeUnit]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ShutdownHookManager enables running shutdownHook + in a deterministic order, higher priority first. +

    + The JVM runs ShutdownHooks in a non-deterministic order or in parallel. + This class registers a single JVM shutdownHook and run all the + shutdownHooks registered to it (to this class) in order based on their + priority. + + Unless a hook was registered with a shutdown explicitly set through + {@link #addShutdownHook(Runnable, int, long, TimeUnit)}, + the shutdown time allocated to it is set by the configuration option + {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT} in + {@code core-site.xml}, with a default value of + {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT_DEFAULT} + seconds.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool, is the standard for any Map-Reduce tool/application. + The tool/application should delegate the handling of + + standard command-line options to {@link ToolRunner#run(Tool, String[])} + and only handle its custom arguments.

    + +

    Here is how a typical Tool is implemented:

    +

    +     public class MyApp extends Configured implements Tool {
    +     
    +       public int run(String[] args) throws Exception {
    +         // Configuration processed by ToolRunner
    +         Configuration conf = getConf();
    +         
    +         // Create a JobConf using the processed conf
    +         JobConf job = new JobConf(conf, MyApp.class);
    +         
    +         // Process custom command-line options
    +         Path in = new Path(args[1]);
    +         Path out = new Path(args[2]);
    +         
    +         // Specify various job-specific parameters     
    +         job.setJobName("my-app");
    +         job.setInputPath(in);
    +         job.setOutputPath(out);
    +         job.setMapperClass(MyMapper.class);
    +         job.setReducerClass(MyReducer.class);
    +
    +         // Submit the job, then poll for progress until the job is complete
    +         RunningJob runningJob = JobClient.runJob(job);
    +         if (runningJob.isSuccessful()) {
    +           return 0;
    +         } else {
    +           return 1;
    +         }
    +       }
    +       
    +       public static void main(String[] args) throws Exception {
    +         // Let ToolRunner handle generic command-line options 
    +         int res = ToolRunner.run(new Configuration(), new MyApp(), args);
    +         
    +         System.exit(res);
    +       }
    +     }
    + 

    + + @see GenericOptionsParser + @see ToolRunner]]> +
    + + + + + + + + + + + + + Tool by {@link Tool#run(String[])}, after + parsing with the given generic arguments. Uses the given + Configuration, or builds one if null. + + Sets the Tool's configuration with the possibly modified + version of the conf. + + @param conf Configuration for the Tool. + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + Tool with its Configuration. + + Equivalent to run(tool.getConf(), tool, args). + + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + + + + + + + + + + ToolRunner can be used to run classes implementing + Tool interface. It works in conjunction with + {@link GenericOptionsParser} to parse the + + generic hadoop command line arguments and modifies the + Configuration of the Tool. The + application-specific options are passed along without being modified. +

    + + @see Tool + @see GenericOptionsParser]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Bloom filter, as defined by Bloom in 1970. +

    + The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by + the networking research community in the past decade thanks to the bandwidth efficiencies that it + offers for the transmission of set membership information between networked hosts. A sender encodes + the information into a bit vector, the Bloom filter, that is more compact than a conventional + representation. Computation and space costs for construction are linear in the number of elements. + The receiver uses the filter to test whether various elements are members of the set. Though the + filter will occasionally return a false positive, it will never return a false negative. When creating + the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Space/Time Trade-Offs in Hash Coding with Allowable Errors]]> + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this counting Bloom filter. +

    + Invariant: nothing happens if the specified key does not belong to this counter Bloom filter. + @param key The key to remove.]]> + + + + + + + + + + + + key -> count map. +

    NOTE: due to the bucket size of this filter, inserting the same + key more than 15 times will cause an overflow at all filter positions + associated with this key, and it will significantly increase the error + rate for this and other keys. For this reason the filter can only be + used to store small count values 0 <= N << 15. + @param key key to be tested + @return 0 if the key is not present. Otherwise, a positive value v will + be returned such that v == count with probability equal to the + error rate of this filter, and v > count otherwise. + Additionally, if the filter experienced an underflow as a result of + {@link #delete(Key)} operation, the return value may be lower than the + count with the probability of the false negative rate of such + filter.]]> + + + + + + + + + + + + + + + + + + + + + + counting Bloom filter, as defined by Fan et al. in a ToN + 2000 paper. +

    + A counting Bloom filter is an improvement to standard a Bloom filter as it + allows dynamic additions and deletions of set membership information. This + is achieved through the use of a counting vector instead of a bit vector. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Summary cache: a scalable wide-area web cache sharing protocol]]> + + + + + + + + + + + + + + Builds an empty Dynamic Bloom filter. + @param vectorSize The number of bits in the vector. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}). + @param nr The threshold for the maximum number of keys to record in a + dynamic Bloom filter row.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + dynamic Bloom filter, as defined in the INFOCOM 2006 paper. +

    + A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but + each of the s rows is a standard Bloom filter. The creation + process of a DBF is iterative. At the start, the DBF is a 1 * m + bit matrix, i.e., it is composed of a single standard Bloom filter. + It assumes that nr elements are recorded in the + initial bit vector, where nr <= n (n is + the cardinality of the set A to record in the filter). +

    + As the size of A grows during the execution of the application, + several keys must be inserted in the DBF. When inserting a key into the DBF, + one must first get an active Bloom filter in the matrix. A Bloom filter is + active when the number of recorded keys, nr, is + strictly less than the current cardinality of A, n. + If an active Bloom filter is found, the key is inserted and + nr is incremented by one. On the other hand, if there + is no active Bloom filter, a new one is created (i.e., a new row is added to + the matrix) according to the current size of A and the element + is added in this new Bloom filter and the nr value of + this new Bloom filter is set to one. A given key is said to belong to the + DBF if the k positions are set to one in one of the matrix rows. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + + @see Theory and Network Applications of Dynamic Bloom Filters]]> + + + + + + + + + Builds a hash function that must obey to a given maximum number of returned values and a highest value. + @param maxValue The maximum highest returned value. + @param nbHash The number of resulting hashed values. + @param hashType type of the hashing function (see {@link Hash}).]]> + + + + + this hash function. A NOOP]]> + + + + + + + + + + + + + + + + + + + The idea is to randomly select a bit to reset.]]> + + + + + + The idea is to select the bit to reset that will generate the minimum + number of false negative.]]> + + + + + + The idea is to select the bit to reset that will remove the maximum number + of false positive.]]> + + + + + + The idea is to select the bit to reset that will, at the same time, remove + the maximum number of false positve while minimizing the amount of false + negative generated.]]> + + + + + Originally created by + European Commission One-Lab Project 034819.]]> + + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this retouched Bloom filter. +

    + Invariant: if the false positive is null, nothing happens. + @param key The false positive key to add.]]> + + + + + + this retouched Bloom filter. + @param coll The collection of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The list of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The array of false positive.]]> + + + + + + + this retouched Bloom filter. + @param scheme The selective clearing scheme to apply.]]> + + + + + + + + + + + + retouched Bloom filter, as defined in the CoNEXT 2006 paper. +

    + It allows the removal of selected false positives at the cost of introducing + random false negatives, and with the benefit of eliminating some random false + positives at the same time. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + @see RemoveScheme The different selective clearing algorithms + + @see Retouched Bloom Filters: Allowing Networked Applications to Trade Off Selected False Positives Against False Negatives]]> + + + + + + + + + + diff --git a/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.2.4.xml b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.2.4.xml new file mode 100644 index 0000000000000..10a4f0d5f16e5 --- /dev/null +++ b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.2.4.xml @@ -0,0 +1,35426 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + UnsupportedOperationException + + If a key is deprecated in favor of multiple keys, they are all treated as + aliases of each other, and setting any one of them resets all the others + to the new value. + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key + @param newKeys + @param customMessage + @deprecated use {@link #addDeprecation(String key, String newKey, + String customMessage)} instead]]> + + + + + + + + UnsupportedOperationException + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key + @param newKey + @param customMessage]]> + + + + + + + UnsupportedOperationException + + If a key is deprecated in favor of multiple keys, they are all treated as + aliases of each other, and setting any one of them resets all the others + to the new value. + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key Key that is to be deprecated + @param newKeys list of keys that take up the values of deprecated key + @deprecated use {@link #addDeprecation(String key, String newKey)} instead]]> + + + + + + + UnsupportedOperationException + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key Key that is to be deprecated + @param newKey key that takes up the value of deprecated key]]> + + + + + + key is deprecated. + + @param key the parameter which is to be checked for deprecation + @return true if the key is deprecated and + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + final. + + @param name resource to be added, the classpath is examined for a file + with that name.]]> + + + + + + + + + + final. + + @param url url of the resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + + + + + final. + + @param file file-path of resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + + + + + final. + + WARNING: The contents of the InputStream will be cached, by this method. + So use this sparingly because it does increase the memory consumption. + + @param in InputStream to deserialize the object from. In will be read from + when a get or set is called next. After it is read the stream will be + closed.]]> + + + + + + + + + + + final. + + @param in InputStream to deserialize the object from. + @param name the name of the resource because InputStream.toString is not + very descriptive some times.]]> + + + + + + + + + + + final. + + @param conf Configuration object from which to load properties]]> + + + + + + + + + + + name property, null if + no such property exists. If the key is deprecated, it returns the value of + the first key which replaces the deprecated key and is not null. + + Values are processed for variable expansion + before being returned. + + @param name the property name, will be trimmed before get value. + @return the value of the name or its replacing property, + or null if no such property exists.]]> + + + + + + + + + + + + + + + name property, but only for + names which have no valid value, usually non-existent or commented + out in XML. + + @param name the property name + @return true if the property name exists without value]]> + + + + + + name property as a trimmed String, + null if no such property exists. + If the key is deprecated, it returns the value of + the first key which replaces the deprecated key and is not null + + Values are processed for variable expansion + before being returned. + + @param name the property name. + @return the value of the name or its replacing property, + or null if no such property exists.]]> + + + + + + + name property as a trimmed String, + defaultValue if no such property exists. + See @{Configuration#getTrimmed} for more details. + + @param name the property name. + @param defaultValue the property default value. + @return the value of the name or defaultValue + if it is not set.]]> + + + + + + name property, without doing + variable expansion.If the key is + deprecated, it returns the value of the first key which replaces + the deprecated key and is not null. + + @param name the property name. + @return the value of the name property or + its replacing property and null if no such property exists.]]> + + + + + + + value of the name property. If + name is deprecated or there is a deprecated name associated to it, + it sets the value to both names. Name will be trimmed before put into + configuration. + + @param name property name. + @param value property value.]]> + + + + + + + + value of the name property. If + name is deprecated, it also sets the value to + the keys that replace the deprecated key. Name will be trimmed before put + into configuration. + + @param name property name. + @param value property value. + @param source the place that this configuration value came from + (For debugging). + @throws IllegalArgumentException when the value or name is null.]]> + + + + + + + + + + + + + + + + + + + + name. If the key is deprecated, + it returns the value of the first key which replaces the deprecated key + and is not null. + If no such property exists, + then defaultValue is returned. + + @param name property name, will be trimmed before get value. + @param defaultValue default value. + @return property value, or defaultValue if the property + doesn't exist.]]> + + + + + + + name property as an int. + + If no such property exists, the provided default value is returned, + or if the specified value is not a valid int, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as an int, + or defaultValue.]]> + + + + + + name property as a set of comma-delimited + int values. + + If no such property exists, an empty array is returned. + + @param name property name + @return property value interpreted as an array of comma-delimited + int values]]> + + + + + + + name property to an int. + + @param name property name. + @param value int value of the property.]]> + + + + + + + name property as a long. + If no such property exists, the provided default value is returned, + or if the specified value is not a valid long, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a long, + or defaultValue.]]> + + + + + + + name property as a long or + human readable format. If no such property exists, the provided default + value is returned, or if the specified value is not a valid + long or human readable format, then an error is thrown. You + can use the following suffix (case insensitive): k(kilo), m(mega), g(giga), + t(tera), p(peta), e(exa) + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a long, + or defaultValue.]]> + + + + + + + name property to a long. + + @param name property name. + @param value long value of the property.]]> + + + + + + + name property as a float. + If no such property exists, the provided default value is returned, + or if the specified value is not a valid float, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a float, + or defaultValue.]]> + + + + + + + name property to a float. + + @param name property name. + @param value property value.]]> + + + + + + + name property as a double. + If no such property exists, the provided default value is returned, + or if the specified value is not a valid double, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a double, + or defaultValue.]]> + + + + + + + name property to a double. + + @param name property name. + @param value property value.]]> + + + + + + + name property as a boolean. + If no such property is specified, or if the specified value is not a valid + boolean, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a boolean, + or defaultValue.]]> + + + + + + + name property to a boolean. + + @param name property name. + @param value boolean value of the property.]]> + + + + + + + + + + + + + + name property to the given type. This + is equivalent to set(<name>, value.toString()). + @param name property name + @param value new value]]> + + + + + + + + + + + + + + + name to the given time duration. This + is equivalent to set(<name>, value + <time suffix>). + @param name Property name + @param value Time duration + @param unit Unit of time]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + name property as a Pattern. + If no such property is specified, or if the specified value is not a valid + Pattern, then DefaultValue is returned. + Note that the returned value is NOT trimmed by this method. + + @param name property name + @param defaultValue default value + @return property value as a compiled Pattern, or defaultValue]]> + + + + + + + Pattern. + If the pattern is passed as null, sets the empty pattern which results in + further calls to getPattern(...) returning the default value. + + @param name property name + @param pattern new value]]> + + + + + + + + + + + + + + + + + + + name property as + a collection of Strings. + If no such property is specified then empty collection is returned. +

    + This is an optimized version of {@link #getStrings(String)} + + @param name property name. + @return property value as a collection of Strings.]]> + + + + + + name property as + an array of Strings. + If no such property is specified then null is returned. + + @param name property name. + @return property value as an array of Strings, + or null.]]> + + + + + + + name property as + an array of Strings. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of Strings, + or default value.]]> + + + + + + name property as + a collection of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then empty Collection is returned. + + @param name property name. + @return property value as a collection of Strings, or empty Collection]]> + + + + + + name property as + an array of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then an empty array is returned. + + @param name property name. + @return property value as an array of trimmed Strings, + or empty array.]]> + + + + + + + name property as + an array of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of trimmed Strings, + or default value.]]> + + + + + + + name property as + as comma delimited values. + + @param name property name. + @param values The values]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostProperty as a + InetSocketAddress. If hostProperty is + null, addressProperty will be used. This + is useful for cases where we want to differentiate between host + bind address and address clients should use to establish connection. + + @param hostProperty bind host property name. + @param addressProperty address property name. + @param defaultAddressValue the default value + @param defaultPort the default port + @return InetSocketAddress]]> + + + + + + + + name property as a + InetSocketAddress. + @param name property name. + @param defaultAddress the default value + @param defaultPort the default port + @return InetSocketAddress]]> + + + + + + + name property as + a host:port.]]> + + + + + + + + + name property as a host:port. The wildcard + address is replaced with the local host's address. If the host and address + properties are configured the host component of the address will be combined + with the port component of the addr to generate the address. This is to allow + optional control over which host name is used in multi-home bind-host + cases where a host can have multiple names + @param hostProperty the bind-host configuration name + @param addressProperty the service address configuration name + @param defaultAddressValue the service default address configuration value + @param addr InetSocketAddress of the service listener + @return InetSocketAddress for clients to connect]]> + + + + + + + name property as a host:port. The wildcard + address is replaced with the local host's address. + @param name property name. + @param addr InetSocketAddress of a listener to store in the given property + @return InetSocketAddress for clients to connect]]> + + + + + + + + + + + + + + + + + + + + name property + as an array of Class. + The value of the property specifies a list of comma separated class names. + If no such property is specified, then defaultValue is + returned. + + @param name the property name. + @param defaultValue default value. + @return property value as a Class[], + or defaultValue.]]> + + + + + + + name property as a Class. + If no such property is specified, then defaultValue is + returned. + + @param name the class name. + @param defaultValue default value. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property as a Class + implementing the interface specified by xface. + + If no such property is specified, then defaultValue is + returned. + + An exception is thrown if the returned class does not implement the named + interface. + + @param name the class name. + @param defaultValue default value. + @param xface the interface implemented by the named class. + @return property value as a Class, + or defaultValue.]]> + + + + + + + name property as a List + of objects implementing the interface specified by xface. + + An exception is thrown if any of the classes does not exist, or if it does + not implement the named interface. + + @param name the property name. + @param xface the interface implemented by the classes named by + name. + @return a List of objects implementing xface.]]> + + + + + + + + name property to the name of a + theClass implementing the given interface xface. + + An exception is thrown if theClass does not implement the + interface xface. + + @param name property name. + @param theClass property value. + @param xface the interface implemented by the named class.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + + + + + name. + + @param name configuration resource name. + @return an input stream attached to the resource.]]> + + + + + + name. + + @param name configuration resource name. + @return a reader attached to the resource.]]> + + + + + + + + + + + + + + + + + + + + + + String + key-value pairs in the configuration. + + @return an iterator over the entries.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When property name is not empty and the property exists in the + configuration, this method writes the property and its attributes + to the {@link Writer}. + +

    + +

  • + When property name is null or empty, this method writes all the + configuration properties and their attributes to the {@link Writer}. +
  • +

    + +

  • + When property name is not empty but the property doesn't exist in + the configuration, this method throws an {@link IllegalArgumentException}. +
  • +

    + @param out the writer to write to.]]> + + + + + + + + + + When propertyName is not empty, and the property exists + in the configuration, the format of the output would be, +

    +  {
    +    "property": {
    +      "key" : "key1",
    +      "value" : "value1",
    +      "isFinal" : "key1.isFinal",
    +      "resource" : "key1.resource"
    +    }
    +  }
    +  
    + + +
  • + When propertyName is null or empty, it behaves same as + {@link #dumpConfiguration(Configuration, Writer)}, the + output would be, +
    +  { "properties" :
    +      [ { key : "key1",
    +          value : "value1",
    +          isFinal : "key1.isFinal",
    +          resource : "key1.resource" },
    +        { key : "key2",
    +          value : "value2",
    +          isFinal : "ke2.isFinal",
    +          resource : "key2.resource" }
    +       ]
    +   }
    +  
    +
  • + +
  • + When propertyName is not empty, and the property is not + found in the configuration, this method will throw an + {@link IllegalArgumentException}. +
  • +

    + @param config the configuration + @param propertyName property name + @param out the Writer to write to + @throws IOException + @throws IllegalArgumentException when property name is not + empty and the property is not found in configuration]]> + + + + + + + + + { "properties" : + [ { key : "key1", + value : "value1", + isFinal : "key1.isFinal", + resource : "key1.resource" }, + { key : "key2", + value : "value2", + isFinal : "ke2.isFinal", + resource : "key2.resource" } + ] + } + + + It does not output the properties of the configuration object which + is loaded from an input stream. +

    + + @param config the configuration + @param out the Writer to write to + @throws IOException]]> + + + + + + + + + + + + + + + + + + + true to set quiet-mode on, false + to turn it off.]]> + + + + + + + + + + + + + + + + + + + + + with matching keys]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Resources + +

    Configurations are specified by resources. A resource contains a set of + name/value pairs as XML data. Each resource is named by either a + String or by a {@link Path}. If named by a String, + then the classpath is examined for a file with that name. If named by a + Path, then the local filesystem is examined directly, without + referring to the classpath. + +

    Unless explicitly turned off, Hadoop by default specifies two + resources, loaded in-order from the classpath:

      +
    1. + + core-default.xml: Read-only defaults for hadoop.
    2. +
    3. core-site.xml: Site-specific configuration for a given hadoop + installation.
    4. +
    + Applications may add additional resources, which are loaded + subsequent to these resources in the order they are added. + +

    Final Parameters

    + +

    Configuration parameters may be declared final. + Once a resource declares a value final, no subsequently-loaded + resource can alter that value. + For example, one might define a final parameter with: +

    +  <property>
    +    <name>dfs.hosts.include</name>
    +    <value>/etc/hadoop/conf/hosts.include</value>
    +    <final>true</final>
    +  </property>
    + + Administrators typically define parameters as final in + core-site.xml for values that user applications may not alter. + +

    Variable Expansion

    + +

    Value strings are first processed for variable expansion. The + available properties are:

      +
    1. Other properties defined in this Configuration; and, if a name is + undefined here,
    2. +
    3. Environment variables in {@link System#getenv()} if a name starts with + "env.", or
    4. +
    5. Properties in {@link System#getProperties()}.
    6. +
    + +

    For example, if a configuration resource contains the following property + definitions: +

    +  <property>
    +    <name>basedir</name>
    +    <value>/user/${user.name}</value>
    +  </property>
    +  
    +  <property>
    +    <name>tempdir</name>
    +    <value>${basedir}/tmp</value>
    +  </property>
    +
    +  <property>
    +    <name>otherdir</name>
    +    <value>${env.BASE_DIR}/other</value>
    +  </property>
    +  
    + +

    When conf.get("tempdir") is called, then ${basedir} + will be resolved to another property in this Configuration, while + ${user.name} would then ordinarily be resolved to the value + of the System property with that name. +

    When conf.get("otherdir") is called, then ${env.BASE_DIR} + will be resolved to the value of the ${BASE_DIR} environment variable. + It supports ${env.NAME:-default} and ${env.NAME-default} notations. + The former is resolved to "default" if ${NAME} environment variable is undefined + or its value is empty. + The latter behaves the same way only if ${NAME} is undefined. +

    By default, warnings will be given to any deprecated configuration + parameters and these are suppressible by configuring + log4j.logger.org.apache.hadoop.conf.Configuration.deprecation in + log4j.properties file. + +

    Tags

    + +

    Optionally we can tag related properties together by using tag + attributes. System tags are defined by hadoop.tags.system property. Users + can define there own custom tags in hadoop.tags.custom property. + +

    For example, we can tag existing property as: +

    +  <property>
    +    <name>dfs.replication</name>
    +    <value>3</value>
    +    <tag>HDFS,REQUIRED</tag>
    +  </property>
    +
    +  <property>
    +    <name>dfs.data.transfer.protection</name>
    +    <value>3</value>
    +    <tag>HDFS,SECURITY</tag>
    +  </property>
    + 
    +

    Properties marked with tags can be retrieved with conf + .getAllPropertiesByTag("HDFS") or conf.getAllPropertiesByTags + (Arrays.asList("YARN","SECURITY")).

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This implementation generates the key material and calls the + {@link #createKey(String, byte[], Options)} method. + + @param name the base name of the key + @param options the options for the new key. + @return the version name of the first version of the key. + @throws IOException + @throws NoSuchAlgorithmException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This implementation generates the key material and calls the + {@link #rollNewVersion(String, byte[])} method. + + @param name the basename of the key + @return the name of the new version of the key + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + KeyProvider implementations must be thread safe.]]> + + + + + + + + + + + + + + + + + + + + + + NULL if + a provider for the specified URI scheme could not be found. + @throws IOException thrown if the provider failed to initialize.]]> + + + + + + + + + + + + + + + + + + + + + + uri has syntax error]]> + + + + + + + + + + + + + + + + uri is + not found]]> + + + + + + + + + + + + + + + + + + + + + + + uri + determines a configuration property name, + fs.AbstractFileSystem.scheme.impl whose value names the + AbstractFileSystem class. + + The entire URI and conf is passed to the AbstractFileSystem factory method. + + @param uri for the file system to be created. + @param conf which is passed to the file system impl. + + @return file system for the given URI. + + @throws UnsupportedFileSystemException if the file system for + uri is not supported.]]> + + + + + + + + + + + + default portn some FileSystem implementations such as HDFS metadata + synchronization is essential to guarantee consistency of read requests + particularly in HA setting. + @throws IOException + @throws UnsupportedOperationException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + describing modifications + @throws IOException if an ACL could not be modified]]> + + + + + + + + describing entries to remove + @throws IOException if an ACL could not be modified]]> + + + + + + + + + + + + + + + + + + + + + + describing modifications, must include entries + for user, group, and others for compatibility with permission bits. + @throws IOException if an ACL could not be modified]]> + + + + + + + which returns each AclStatus + @throws IOException if an ACL could not be read]]> + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @throws IOException]]> + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @param flag xattr set flag + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attribute + @param name xattr name. + @return byte[] xattr value. + @throws IOException]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return Map describing the XAttrs of the file or directory + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @param names XAttr names. + @return Map describing the XAttrs of the file or directory + @throws IOException]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return Map describing the XAttrs of the file or directory + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to remove extended attribute + @param name xattr name + @throws IOExceptionlockLocation(offset: 0, length: BLOCK_SIZE, + hosts: {"host1:9866", "host2:9866, host3:9866"}) + + + And if the file is erasure-coded, each BlockLocation represents a logical + block groups. Value offset is the offset of a block group in the file and + value length is the total length of a block group. Hosts of a BlockLocation + are the datanodes that holding all the data blocks and parity blocks of a + block group. + Suppose we have a RS_3_2 coded file (3 data units and 2 parity units). + A BlockLocation example will be like: +
    + BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
    +   "host2:9866","host3:9866","host4:9866","host5:9866"})
    + 
    + + Please refer to + {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} or + {@link FileContext#getFileBlockLocations(Path, long, long)} + for more examples.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + After a successful call, buf.position() will be advanced by the number + of bytes read and buf.limit() should be unchanged. +

    + In the case of an exception, the values of buf.position() and buf.limit() + are undefined, and callers should be prepared to recover from this + eventuality. +

    + Many implementations will throw {@link UnsupportedOperationException}, so + callers that are not confident in support for this method from the + underlying filesystem should be prepared to handle that exception. +

    + Implementations should treat 0-length requests as legitimate, and must not + signal an error upon their receipt. + + @param buf + the ByteBuffer to receive the results of the read operation. + @return the number of bytes read, possibly zero, or -1 if + reach end-of-stream + @throws IOException + if there is some error performing the read]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + setReplication of FileSystem + @param src file name + @param replication new replication + @throws IOException + @return true if successful; + false if file does not exist or is a directory]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + EnumSet.of(CreateFlag.CREATE, CreateFlag.APPEND) + +

    + + Use the CreateFlag as follows: +

      +
    1. CREATE - to create a file if it does not exist, + else throw FileAlreadyExists.
    2. +
    3. APPEND - to append to a file if it exists, + else throw FileNotFoundException.
    4. +
    5. OVERWRITE - to truncate a file if it exists, + else throw FileNotFoundException.
    6. +
    7. CREATE|APPEND - to create a file if it does not exist, + else append to an existing file.
    8. +
    9. CREATE|OVERWRITE - to create a file if it does not exist, + else overwrite an existing file.
    10. +
    11. SYNC_BLOCK - to force closed blocks to the disk device. + In addition {@link Syncable#hsync()} should be called after each write, + if true synchronous behavior is required.
    12. +
    13. LAZY_PERSIST - Create the block on transient storage (RAM) if + available.
    14. +
    15. APPEND_NEWBLOCK - Append data to a new block instead of end of the last + partial block.
    16. +
    + + Following combinations are not valid and will result in + {@link HadoopIllegalArgumentException}: +
      +
    1. APPEND|OVERWRITE
    2. +
    3. CREATE|APPEND|OVERWRITE
    4. +
    ]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + absOrFqPath is not supported. + @throws IOException If the file system for absOrFqPath could + not be instantiated.]]> + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + defaultFsUri is not supported]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NewWdir can be one of: +
      +
    • relative path: "foo/bar";
    • +
    • absolute without scheme: "/foo/bar"
    • +
    • fully qualified with scheme: "xx://auth/foo/bar"
    • +
    +
    + Illegal WDs: +
      +
    • relative with scheme: "xx:foo/bar"
    • +
    • non existent directory
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + f does not exist + @throws AccessControlException if access denied + @throws IOException If an IO Error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is not valid]]> + + + + + + + + + + + + + + + + + + + + +
  • Progress - to report progress on the operation - default null +
  • Permission - umask is applied against permission: default is + FsPermissions:getDefault() + +
  • CreateParent - create missing parent path; default is to not + to create parents +
  • The defaults for the following are SS defaults of the file + server implementing the target path. Not all parameters make sense + for all kinds of file system - eg. localFS ignores Blocksize, + replication, checksum +
      +
    • BufferSize - buffersize used in FSDataOutputStream +
    • Blocksize - block size for file blocks +
    • ReplicationFactor - replication for blocks +
    • ChecksumParam - Checksum parameters. server default is used + if not specified. +
    + + + @return {@link FSDataOutputStream} for created file + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If file f already exists + @throws FileNotFoundException If parent of f does not exist + and createParent is false + @throws ParentNotDirectoryException If parent of f is not a + directory. + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is not valid]]> + + + + + + + + + + + + + + + + + + + + + dir already + exists + @throws FileNotFoundException If parent of dir does not exist + and createParent is false + @throws ParentNotDirectoryException If parent of dir is not a + directory + @throws UnsupportedFileSystemException If file system for dir + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path dir is not valid]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is invalid]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + +
  • Fails if path is a directory. +
  • Fails if path does not exist. +
  • Fails if path is not closed. +
  • Fails if new size is greater than current size. + + @param f The path to the file to be truncated + @param newLength The size the file is to be truncated to + + @return true if the file has been truncated to the desired + newLength and is immediately available to be reused for + write operations such as append, or + false if a background process of adjusting the length of + the last block has been started, and clients should wait for it to + complete before proceeding with further file updates. + + @throws AccessControlException If access is denied + @throws FileNotFoundException If file f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + +
  • Fails if src is a file and dst is a directory. +
  • Fails if src is a directory and dst is a file. +
  • Fails if the parent of dst does not exist or is a file. + +

    + If OVERWRITE option is not passed as an argument, rename fails if the dst + already exists. +

    + If OVERWRITE option is passed as an argument, rename overwrites the dst if + it is a file or an empty directory. Rename fails if dst is a non-empty + directory. +

    + Note that atomicity of rename is dependent on the file system + implementation. Please refer to the file system documentation for details +

    + + @param src path to be renamed + @param dst new path after rename + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If dst already exists and + options has {@link Options.Rename#OVERWRITE} + option false. + @throws FileNotFoundException If src does not exist + @throws ParentNotDirectoryException If parent of dst is not a + directory + @throws UnsupportedFileSystemException If file system for src + and dst is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws HadoopIllegalArgumentException If username or + groupname is invalid.]]> + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If the given path does not refer to a symlink + or an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + Given a path referring to a symlink of form: + + <---X---> + fs://host/A/B/link + <-----Y-----> + + In this path X is the scheme and authority that identify the file system, + and Y is the path leading up to the final path component "link". If Y is + a symlink itself then let Y' be the target of Y and X' be the scheme and + authority of Y'. Symlink targets may: + + 1. Fully qualified URIs + + fs://hostX/A/B/file Resolved according to the target file system. + + 2. Partially qualified URIs (eg scheme but no host) + + fs:///A/B/file Resolved according to the target file system. Eg resolving + a symlink to hdfs:///A results in an exception because + HDFS URIs must be fully qualified, while a symlink to + file:///A will not since Hadoop's local file systems + require partially qualified URIs. + + 3. Relative paths + + path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path + is "../B/file" then [Y'][path] is hdfs://host/B/file + + 4. Absolute paths + + path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path + is "/file" then [X][path] is hdfs://host/file + + + @param target the target of the symbolic link + @param link the path to be created that points to target + @param createParent if true then missing parent dirs are created if + false then parent must exist + + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If file linkcode> already exists + @throws FileNotFoundException If target does not exist + @throws ParentNotDirectoryException If parent of link is not a + directory. + @throws UnsupportedFileSystemException If file system for + target or link is not supported + @throws IOException If an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + describing modifications + @throws IOException if an ACL could not be modified]]> + + + + + + + + describing entries to remove + @throws IOException if an ACL could not be modified]]> + + + + + + + + + + + + + + + + + + + + + + describing modifications, must include entries + for user, group, and others for compatibility with permission bits. + @throws IOException if an ACL could not be modified]]> + + + + + + + which returns each AclStatus + @throws IOException if an ACL could not be read]]> + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @throws IOException]]> + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @param flag xattr set flag + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attribute + @param name xattr name. + @return byte[] xattr value. + @throws IOException]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return Map describing the XAttrs of the file or directory + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @param names XAttr names. + @return Map describing the XAttrs of the file or directory + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to remove extended attribute + @param name xattr name + @throws IOException]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return List of the XAttr names of the file or directory + @throws IOException]]> + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Path Names + + The Hadoop file system supports a URI namespace and URI names. This enables + multiple types of file systems to be referenced using fully-qualified URIs. + Two common Hadoop file system implementations are +

      +
    • the local file system: file:///path +
    • the HDFS file system: hdfs://nnAddress:nnPort/path +
    + + The Hadoop file system also supports additional naming schemes besides URIs. + Hadoop has the concept of a default file system, which implies a + default URI scheme and authority. This enables slash-relative names + relative to the default FS, which are more convenient for users and + application writers. The default FS is typically set by the user's + environment, though it can also be manually specified. +

    + + Hadoop also supports working-directory-relative names, which are paths + relative to the current working directory (similar to Unix). The working + directory can be in a different file system than the default FS. +

    + Thus, Hadoop path names can be specified as one of the following: +

      +
    • a fully-qualified URI: scheme://authority/path (e.g. + hdfs://nnAddress:nnPort/foo/bar) +
    • a slash-relative name: path relative to the default file system (e.g. + /foo/bar) +
    • a working-directory-relative name: path relative to the working dir (e.g. + foo/bar) +
    + Relative paths with scheme (scheme:foo/bar) are illegal. + +

    Role of FileContext and Configuration Defaults

    + + The FileContext is the analogue of per-process file-related state in Unix. It + contains two properties: + +
      +
    • the default file system (for resolving slash-relative names) +
    • the umask (for file permissions) +
    + In general, these properties are obtained from the default configuration file + in the user's environment (see {@link Configuration}). + + Further file system properties are specified on the server-side. File system + operations default to using these server-side defaults unless otherwise + specified. +

    + The file system related server-side defaults are: +

      +
    • the home directory (default is "/user/userName") +
    • the initial wd (only for local fs) +
    • replication factor +
    • block size +
    • buffer size +
    • encryptDataTransfer +
    • checksum option. (checksumType and bytesPerChecksum) +
    + +

    Example Usage

    + + Example 1: use the default config read from the $HADOOP_CONFIG/core.xml. + Unspecified values come from core-defaults.xml in the release jar. +
      +
    • myFContext = FileContext.getFileContext(); // uses the default config + // which has your default FS +
    • myFContext.create(path, ...); +
    • myFContext.setWorkingDir(path); +
    • myFContext.open (path, ...); +
    • ... +
    + Example 2: Get a FileContext with a specific URI as the default FS +
      +
    • myFContext = FileContext.getFileContext(URI); +
    • myFContext.create(path, ...); +
    • ... +
    + Example 3: FileContext with local file system as the default +
      +
    • myFContext = FileContext.getLocalFSFileContext(); +
    • myFContext.create(path, ...); +
    • ... +
    + Example 4: Use a specific config, ignoring $HADOOP_CONFIG + Generally you should not need use a config unless you are doing +
      +
    • configX = someConfigSomeOnePassedToYou; +
    • myFContext = getFileContext(configX); // configX is not changed, + // is passed down +
    • myFContext.create(path, ...); +
    • ... +
his implementation throws an UnsupportedOperationException. + + @return the protocol scheme for this FileSystem. + @throws UnsupportedOperationException if the operation is unsupported + (default).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • + If the configuration has the property + {@code "fs.$SCHEME.impl.disable.cache"} set to true, + a new instance will be created, initialized with the supplied URI and + configuration, then returned without being cached. +
  • +
  • + If the there is a cached FS instance matching the same URI, it will + be returned. +
  • +
  • + Otherwise: a new FS instance will be created, initialized with the + configuration and URI, cached and returned to the caller. +
  • + + @throws IOException if the FileSystem cannot be instantiated.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + if f == null : + result = null + elif f.getLen() <= start: + result = [] + else result = [ locations(FS, b) for b in blocks(FS, p, s, s+l)] + + This call is most helpful with and distributed filesystem + where the hostnames of machines that contain blocks of the given file + can be determined. + + The default implementation returns an array containing one element: +
    + BlockLocation( { "localhost:9866" },  { "localhost" }, 0, file.getLen())
    + 
    + + In HDFS, if file is three-replicated, the returned array contains + elements like: +
    + BlockLocation(offset: 0, length: BLOCK_SIZE,
    +   hosts: {"host1:9866", "host2:9866, host3:9866"})
    + BlockLocation(offset: BLOCK_SIZE, length: BLOCK_SIZE,
    +   hosts: {"host2:9866", "host3:9866, host4:9866"})
    + 
    + + And if a file is erasure-coded, the returned BlockLocation are logical + block groups. + + Suppose we have a RS_3_2 coded file (3 data units and 2 parity units). + 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then + there will be one BlockLocation returned, with 0 offset, actual file size + and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks. + 3. If the file size is less than one group size but greater than one + stripe size, then there will be one BlockLocation returned, with 0 offset, + actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting + the actual blocks. + 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123 + for example, then the result will be like: +
    + BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
    +   "host2:9866","host3:9866","host4:9866","host5:9866"})
    + BlockLocation(offset: 3 * BLOCK_SIZE, length: 123, hosts: {"host1:9866",
    +   "host4:9866", "host5:9866"})
    + 
    + + @param file FilesStatus to get data from + @param start offset into the given file + @param len length for which to get locations for + @throws IOException IO failure]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Important: the default implementation is not atomic + @param f path to use for create + @throws IOException IO failure]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • Fails if src is a file and dst is a directory.
  • +
  • Fails if src is a directory and dst is a file.
  • +
  • Fails if the parent of dst does not exist or is a file.
  • + +

    + If OVERWRITE option is not passed as an argument, rename fails + if the dst already exists. +

    + If OVERWRITE option is passed as an argument, rename overwrites + the dst if it is a file or an empty directory. Rename fails if dst is + a non-empty directory. +

    + Note that atomicity of rename is dependent on the file system + implementation. Please refer to the file system documentation for + details. This default implementation is non atomic. +

    + This method is deprecated since it is a temporary method added to + support the transition from FileSystem to FileContext for user + applications. + + @param src path to be renamed + @param dst new path after rename + @throws FileNotFoundException src path does not exist, or the parent + path of dst does not exist. + @throws FileAlreadyExistsException dest path exists and is a file + @throws ParentNotDirectoryException if the parent path of dest is not + a directory + @throws IOException on failure]]> + + + + + + + + +

  • Fails if path is a directory.
  • +
  • Fails if path does not exist.
  • +
  • Fails if path is not closed.
  • +
  • Fails if new size is greater than current size.
  • + + @param f The path to the file to be truncated + @param newLength The size the file is to be truncated to + + @return true if the file has been truncated to the desired + newLength and is immediately available to be reused for + write operations such as append, or + false if a background process of adjusting the length of + the last block has been started, and clients should wait for it to + complete before proceeding with further file updates. + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default).]]> +
    +
    + + + + + + + + + + + + + + + + + + + + +
  • Clean shutdown of the JVM cannot be guaranteed.
  • +
  • The time to shut down a FileSystem will depends on the number of + files to delete. For filesystems where the cost of checking + for the existence of a file/directory and the actual delete operation + (for example: object stores) is high, the time to shutdown the JVM can be + significantly extended by over-use of this feature.
  • +
  • Connectivity problems with a remote filesystem may delay shutdown + further, and may cause the files to not be deleted.
  • + + @param f the path to delete. + @return true if deleteOnExit is successful, otherwise false. + @throws IOException IO failure]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. +

    + Will not return null. Expect IOException upon access error. + @param f given path + @return the statuses of the files/directories in the given patch + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + + + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. + + @param f + a path name + @param filter + the user-supplied path filter + @return an array of FileStatus objects for the files under the given path + after applying the filter + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. + + @param files + a list of paths + @return a list of statuses for the files under the given paths after + applying the filter default Path filter + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. + + @param files + a list of paths + @param filter + the user-supplied path filter + @return a list of statuses for the files under the given paths after + applying the filter + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + Return all the files that match filePattern and are not checksum + files. Results are sorted by their names. + +

    + A filename pattern is composed of regular characters and + special pattern matching characters, which are: + +

    +
    +
    +

    +

    ? +
    Matches any single character. + +

    +

    * +
    Matches zero or more characters. + +

    +

    [abc] +
    Matches a single character from character set + {a,b,c}. + +

    +

    [a-b] +
    Matches a single character from the character range + {a...b}. Note that character a must be + lexicographically less than or equal to character b. + +

    +

    [^a] +
    Matches a single character that is not from character set or range + {a}. Note that the ^ character must occur + immediately to the right of the opening bracket. + +

    +

    \c +
    Removes (escapes) any special meaning of character c. + +

    +

    {ab,cd} +
    Matches a string from the string set {ab, cd} + +

    +

    {ab,c{de,fh}} +
    Matches a string from the string set {ab, cde, cfh} + +
    +
    +
    + + @param pathPattern a glob specifying a path pattern + + @return an array of paths that match the path pattern + @throws IOException IO failure]]> +
    +
    + + + + + + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred]]> + + + + + + + + + f does not exist + @throws IOException if any I/O error occurred]]> + + + + + + + + p does not exist + @throws IOException if any I/O error occurred]]> + + + + + + + + + + If the path is a directory, + if recursive is false, returns files in the directory; + if recursive is true, return files in the subtree rooted at the path. + If the path is a file, return the file's status and block locations. + + @param f is the path + @param recursive if the subdirectories need to be traversed recursively + + @return an iterator that traverses statuses of the files + + @throws FileNotFoundException when the path does not exist; + @throws IOException see specific implementation]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + undefined. + @throws IOException IO failure]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In some FileSystem implementations such as HDFS metadata + synchronization is essential to guarantee consistency of read requests + particularly in HA setting. + @throws IOException + @throws UnsupportedOperationException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + describing modifications + @throws IOException if an ACL could not be modified + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @param flag xattr set flag + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attribute + @param name xattr name. + @return byte[] xattr value. + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return Map describing the XAttrs of the file or directory + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @param names XAttr names. + @return Map describing the XAttrs of the file or directory + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return List of the XAttr names of the file or directory + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to remove extended attribute + @param name xattr name + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is a default method which is intended to be overridden by + subclasses. The default implementation returns an empty storage statistics + object.

    + + @return The StorageStatistics for this FileSystem instance. + Will never be null.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + All user code that may potentially use the Hadoop Distributed + File System should be written to use a FileSystem object or its + successor, {@link FileContext}. + +

    + The local implementation is {@link LocalFileSystem} and distributed + implementation is DistributedFileSystem. There are other implementations + for object stores and (outside the Apache Hadoop codebase), + third party filesystems. +

    + Notes +

      +
    1. The behaviour of the filesystem is + + specified in the Hadoop documentation. + However, the normative specification of the behavior of this class is + actually HDFS: if HDFS does not behave the way these Javadocs or + the specification in the Hadoop documentations define, assume that + the documentation is incorrect. +
    2. +
    3. The term {@code FileSystem} refers to an instance of this class.
    4. +
    5. The acronym "FS" is used as an abbreviation of FileSystem.
    6. +
    7. The term {@code filesystem} refers to the distributed/local filesystem + itself, rather than the class used to interact with it.
    8. +
    9. The term "file" refers to a file in the remote filesystem, + rather than instances of {@code java.io.File}.
    10. +
    ]]> +
caller's environment variables to use + for expansion + @return String[] with absolute path to new jar in position 0 and + unexpanded wild card entry path in position 1 + @throws IOException if there is an I/O error while writing the jar fileilterFileSystem contains + some other file system, which it uses as + its basic file system, possibly transforming + the data along the way or providing additional + functionality. The class FilterFileSystem + itself simply overrides all methods of + FileSystem with versions that + pass all requests to the contained file + system. Subclasses of FilterFileSystem + may further override some of these methods + and may also provide additional methods + and fields.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -1 + if there is no more data because the end of the stream has been + reached]]> + + + + + + + + + + length bytes have been read. + + @param position position in the input stream to seek + @param buffer buffer into which data is read + @param offset offset into the buffer in which data is written + @param length the number of bytes to read + @throws IOException IO problems + @throws EOFException If the end of stream is reached while reading. + If an exception is thrown an undetermined number + of bytes in the buffer may have been writtenon't + if (fs instanceof FooFileSystem) { + FooFileSystem fs = (FooFileSystem) fs; + OutputStream out = dfs.createFile(path) + .optionA() + .optionB("value") + .cache() + .build() + } else if (fs instanceof BarFileSystem) { + ... + } + + // Do + OutputStream out = fs.createFile(path) + .permission(perm) + .bufferSize(bufSize) + .opt("foofs:option.a", true) + .opt("foofs:option.b", "value") + .opt("barfs:cache", true) + .must("foofs:cache", true) + .must("barfs:cache-size", 256 * 1024 * 1024) + .build(); + + + If the option is not related to the file system, the option will be ignored. + If the option is must, but not supported by the file system, a + {@link IllegalArgumentException} will be thrown.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + path is invalid]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + and the scheme is null, and the authority + is null. + + @return whether the path is absolute and the URI has no scheme nor + authority parts]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if and only if pathname + should be included]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Warning: Not all filesystems satisfy the thread-safety requirement. + @param position position within file + @param buffer destination buffer + @param offset offset in the buffer + @param length number of bytes to read + @return actual number of bytes read; -1 means "none" + @throws IOException IO problems.]]> + + + + + + + + + + Warning: Not all filesystems satisfy the thread-safety requirement. + @param position position within file + @param buffer destination buffer + @param offset offset in the buffer + @param length number of bytes to read + @throws IOException IO problems. + @throws EOFException the end of the data was reached before + the read operation completed]]> + + + + + + + + Warning: Not all filesystems satisfy the thread-safety requirement. + @param position position within file + @param buffer destination buffer + @throws IOException IO problems. + @throws EOFException the end of the data was reached before + the read operation completedote: Returned list is not sorted in any given order, + due to reliance on Java's {@link File#list()}ttr is byte[], this class is to + covert byte[] to some kind of string representation or convert back. + String representation is convenient for display and input. For example + display in screen as shell response and json response, input as http + or shell parameter.]]> + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + @return ftp]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} backed by an FTP client provided by Apache Commons Net. +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Since these methods are often vendor- or device-specific, operators + may implement this interface in order to achieve fencing. +

    + Fencing is configured by the operator as an ordered list of methods to + attempt. Each method will be tried in turn, and the next in the list + will only be attempted if the previous one fails. See {@link NodeFencer} + for more information. +

    + If an implementation also implements {@link Configurable} then its + setConf method will be called upon instantiation.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + state (e.g ACTIVE/STANDBY) as well as + some additional information. + + @throws AccessControlException + if access is denied. + @throws IOException + if other errors happen + @see HAServiceStatus]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.http.filter.initializers. + +

      +
    • StaticUserWebFilter - An authorization plugin that makes all +users a static configured user. +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + public class IntArrayWritable extends ArrayWritable { + public IntArrayWritable() { + super(IntWritable.class); + } + }o is a ByteWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the item + @param conf the configuration to store + @param item the object to be stored + @param keyName the name of the key to use + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param items the objects to be stored + @param keyName the name of the key to use + @throws IndexOutOfBoundsException if the items array is empty + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + DefaultStringifier offers convenience methods to store/load objects to/from + the configuration. + + @param the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a DoubleWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + value argument is null or + its size is zero, the elementType argument must not be null. If + the argument value's size is bigger than zero, the argument + elementType is not be used. + + @param value + @param elementType]]> + + + + + value should not be null + or empty. + + @param value]]> + + + + + + + + + + + + + + value and elementType. If the value argument + is null or its size is zero, the elementType argument must not be + null. If the argument value's size is bigger than zero, the + argument elementType is not be used. + + @param value + @param elementType]]> + + + + + + + + + + + + + + + + + + + o is an EnumSetWritable with the same value, + or both are null.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a FloatWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When two sequence files, which have same Key type but different Value + types, are mapped out to reduce, multiple Value types is not allowed. + In this case, this class can help you wrap instances with different types. +

    + +

    + Compared with ObjectWritable, this class is much more effective, + because ObjectWritable will append the class declaration as a String + into the output file in every Key-Value pair. +

    + +

    + Generic Writable implements {@link Configurable} interface, so that it will be + configured by the framework. The configuration is passed to the wrapped objects + implementing {@link Configurable} interface before deserialization. +

    + + how to use it:
    + 1. Write your own class, such as GenericObject, which extends GenericWritable.
    + 2. Implements the abstract method getTypes(), defines + the classes which will be wrapped in GenericObject in application. + Attention: this classes defined in getTypes() method, must + implement Writable interface. +

    + + The code looks like this: +
    + public class GenericObject extends GenericWritable {
    + 
    +   private static Class[] CLASSES = {
    +               ClassType1.class, 
    +               ClassType2.class,
    +               ClassType3.class,
    +               };
    +
    +   protected Class[] getTypes() {
    +       return CLASSES;
    +   }
    +
    + }
    + 
    + + @since Nov 8, 2006]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a IntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + closes the input and output streams + at the end. + + @param in InputStrem to read from + @param out OutputStream to write to + @param conf the Configuration object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ignore any {@link Throwable} or + null pointers. Must only be used for cleanup in exception handlers. + + @param log the log to record problems to at debug level. Can be null. + @param closeables the objects to close + @deprecated use {@link #cleanupWithLogger(Logger, java.io.Closeable...)} + instead]]> + + + + + + + ignore any {@link Throwable} or + null pointers. Must only be used for cleanup in exception handlers. + + @param logger the log to record problems to at debug level. Can be null. + @param closeables the objects to close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is better than File#listDir because it does not ignore IOExceptions. + + @param dir The directory to list. + @param filter If non-null, the filter to use when listing + this directory. + @return The list of files in the directory. + + @throws IOException On I/O error]]> + + + + + + + + Borrowed from Uwe Schindler in LUCENE-5588 + @param fileToSync the file to fsync]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a LongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A map is a directory containing two files, the data file, + containing all keys and values in the map, and a smaller index + file, containing a fraction of the keys. The fraction is determined by + {@link Writer#getIndexInterval()}. + +

    The index file is read entirely into memory. Thus key implementations + should try to keep themselves small. + +

    Map files are created by adding entries in-order. To maintain a large + database, perform updates by copying the previous version of a database and + merging in a sorted change list, to create a new version of the database in + a new file. Sorting large change lists can be done with {@link + SequenceFile.Sorter}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is an MD5Hash whose digest contains the + same values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + className by first finding + it in the specified conf. If the specified conf is null, + try load it directly.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link Comparator} that operates directly on byte representations of + objects. +

    + @param + @see DeserializerComparator]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SequenceFiles are flat files consisting of binary key/value + pairs. + +

    SequenceFile provides {@link SequenceFile.Writer}, + {@link SequenceFile.Reader} and {@link Sorter} classes for writing, + reading and sorting respectively.

    + + There are three SequenceFile Writers based on the + {@link CompressionType} used to compress key/value pairs: +
      +
    1. + Writer : Uncompressed records. +
    2. +
    3. + RecordCompressWriter : Record-compressed files, only compress + values. +
    4. +
    5. + BlockCompressWriter : Block-compressed files, both keys & + values are collected in 'blocks' + separately and compressed. The size of + the 'block' is configurable. +
    + +

    The actual compression algorithm used to compress key and/or values can be + specified by using the appropriate {@link CompressionCodec}.

    + +

    The recommended way is to use the static createWriter methods + provided by the SequenceFile to chose the preferred format.

    + +

    The {@link SequenceFile.Reader} acts as the bridge and can read any of the + above SequenceFile formats.

    + +

    SequenceFile Formats

    + +

    Essentially there are 3 different formats for SequenceFiles + depending on the CompressionType specified. All of them share a + common header described below. + +

    +
      +
    • + version - 3 bytes of magic header SEQ, followed by 1 byte of actual + version number (e.g. SEQ4 or SEQ6) +
    • +
    • + keyClassName -key class +
    • +
    • + valueClassName - value class +
    • +
    • + compression - A boolean which specifies if compression is turned on for + keys/values in this file. +
    • +
    • + blockCompression - A boolean which specifies if block-compression is + turned on for keys/values in this file. +
    • +
    • + compression codec - CompressionCodec class which is used for + compression of keys and/or values (if compression is + enabled). +
    • +
    • + metadata - {@link Metadata} for this file. +
    • +
    • + sync - A sync marker to denote end of the header. +
    • +
    + +
    Uncompressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Value
      • +
      +
    • +
    • + A sync-marker every few 100 kilobytes or so. +
    • +
    + +
    Record-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Compressed Value
      • +
      +
    • +
    • + A sync-marker every few 100 kilobytes or so. +
    • +
    + +
    Block-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record Block +
        +
      • Uncompressed number of records in the block
      • +
      • Compressed key-lengths block-size
      • +
      • Compressed key-lengths block
      • +
      • Compressed keys block-size
      • +
      • Compressed keys block
      • +
      • Compressed value-lengths block-size
      • +
      • Compressed value-lengths block
      • +
      • Compressed values block-size
      • +
      • Compressed values block
      • +
      +
    • +
    • + A sync-marker every block. +
    • +
    + +

    The compressed blocks of key lengths and value lengths consist of the + actual lengths of individual keys/values encoded in ZeroCompressedInteger + format.

    + + @see CompressionCodec]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a ShortWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + position. Note that this + method avoids using the converter or doing String instantiation + @return the Unicode scalar value at position or -1 + if the position is invalid or points to a + trailing byte]]> + + + + + + + + + + what in the backing + buffer, starting as position start. The starting + position is measured in bytes and the return value is in + terms of byte position in the buffer. The backing buffer is + not converted to a string for this operation. + @return byte position of the first occurrence of the search + string in the UTF-8 buffer or -1 if not found]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Note: For performance reasons, this call does not clear the + underlying byte array that is retrievable via {@link #getBytes()}. + In order to free the byte-array memory, call {@link #set(byte[])} + with an empty byte array (For example, new byte[0]).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a Text with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException.]]> + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + @return ByteBuffer: bytes stores at ByteBuffer.array() + and length is ByteBuffer.limit()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In + addition, it provides methods for string traversal without converting the + byte array to a string.

    Also includes utilities for + serializing/deserialing a string, coding/decoding a string, checking if a + byte array contains valid UTF8 code, calculating the length of an encoded + string.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is useful when a class may evolve, so that instances written by the + old version of the class may still be processed by the new version. To + handle this situation, {@link #readFields(DataInput)} + implementations should catch {@link VersionMismatchException}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VIntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VLongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + out. + + @param out DataOuput to serialize this object into. + @throws IOException]]> + + + + + + + in. + +

    For efficiency, implementations should attempt to re-use storage in the + existing object where possible.

    + + @param in DataInput to deseriablize this object from. + @throws IOException]]> +
    + + + Any key or value type in the Hadoop Map-Reduce + framework implements this interface.

    + +

    Implementations typically implement a static read(DataInput) + method which constructs a new instance, calls {@link #readFields(DataInput)} + and returns the instance.

    + +

    Example:

    +

    +     public class MyWritable implements Writable {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +
    +       // Default constructor to allow (de)serialization
    +       MyWritable() { }
    +
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +
    +       public static MyWritable read(DataInput in) throws IOException {
    +         MyWritable w = new MyWritable();
    +         w.readFields(in);
    +         return w;
    +       }
    +     }
    + 

    ]]> +
    + + + + + + + + WritableComparables can be compared to each other, typically + via Comparators. Any type which is to be used as a + key in the Hadoop Map-Reduce framework should implement this + interface.

    + +

    Note that hashCode() is frequently used in Hadoop to partition + keys. It's important that your implementation of hashCode() returns the same + result across different instances of the JVM. Note also that the default + hashCode() implementation in Object does not + satisfy this property.

    + +

    Example:

    +

    +     public class MyWritableComparable implements WritableComparable {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public int compareTo(MyWritableComparable o) {
    +         int thisValue = this.value;
    +         int thatValue = o.value;
    +         return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
    +       }
    +
    +       public int hashCode() {
    +         final int prime = 31;
    +         int result = 1;
    +         result = prime * result + counter;
    +         result = prime * result + (int) (timestamp ^ (timestamp >>> 32));
    +         return result
    +       }
    +     }
    + 

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The default implementation reads the data into two {@link + WritableComparable}s (using {@link + Writable#readFields(DataInput)}, then calls {@link + #compare(WritableComparable,WritableComparable)}.]]> + + + + + + + The default implementation uses the natural ordering, calling {@link + Comparable#compareTo(Object)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This base implementation uses the natural ordering. To define alternate + orderings, override {@link #compare(WritableComparable,WritableComparable)}. + +

    One may optimize compare-intensive operations by overriding + {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are + provided to assist in optimized implementations of this method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Enum type + @param in DataInput to read from + @param enumType Class type of Enum + @return Enum represented by String read from DataInput + @throws IOException]]> + + + + + + + + + + + + + + + + len number of bytes in input streamin + @param in input stream + @param len number of bytes to skip + @throws IOException when skipped less number of bytes]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CompressionCodec for which to get the + Compressor + @param conf the Configuration object which contains confs for creating or reinit the compressor + @return Compressor for the given + CompressionCodec from the pool or a new one]]> + + + + + + + + + CompressionCodec for which to get the + Decompressor + @return Decompressor for the given + CompressionCodec the pool or a new one]]> + + + + + + Compressor to be returned to the pool]]> + + + + + + Decompressor to be returned to the + pool]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Codec aliases are case insensitive. +

    + The code alias is the short class name (without the package name). + If the short class name ends with 'Codec', then there are two aliases for + the codec, the complete short class name and the short class name without + the 'Codec' ending. For example for the 'GzipCodec' codec class name the + alias are 'gzip' and 'gzipcodec'. + + @param codecName the canonical class name of the codec + @return the codec object]]> + + + + + + + Codec aliases are case insensitive. +

    + The code alias is the short class name (without the package name). + If the short class name ends with 'Codec', then there are two aliases for + the codec, the complete short class name and the short class name without + the 'Codec' ending. For example for the 'GzipCodec' codec class name the + alias are 'gzip' and 'gzipcodec'. + + @param codecName the canonical class name of the codec + @return the codec class]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations are assumed to be buffered. This permits clients to + reposition the underlying input stream then call {@link #resetState()}, + without having to also synchronize client buffers.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + (Both native and non-native versions of various Decompressors require + that the data passed in via b[] remain unmodified until + the caller is explicitly notified--via {@link #needsInput()}--that the + buffer may be safely modified. With this requirement, an extra + buffer-copy can be avoided.) + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + {@link #setInput(byte[], int, int)} should be called to + provide more input. + + @return true if the input data buffer is empty and + {@link #setInput(byte[], int, int)} should be called in + order to provide more input.]]> + + + + + + + + + + + + + true if a preset dictionary is needed for decompression. + @return true if a preset dictionary is needed for decompression]]> + + + + + true if the end of the decompressed + data output stream has been reached. Indicates a concatenated data stream + when finished() returns true and {@link #getRemaining()} + returns a positive value. finished() will be reset with the + {@link #reset()} method. + @return true if the end of the decompressed + data output stream has been reached.]]> + + + + + + + + + + + + + + true and getRemaining() returns a positive value. If + {@link #finished()} returns true and getRemaining() returns + a zero value, indicates that the end of data stream has been reached and + is not a concatenated data stream. + @return The number of bytes remaining in the compressed data buffer.]]> + + + + + true and {@link #getRemaining()} returns a positive value, + reset() is called before processing of the next data stream in the + concatenated data stream. {@link #finished()} will be reset and will + return false when reset() is called

  • "none" - No compression. +
  • "lzo" - LZO compression. +
  • "gz" - GZIP compression. + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • Block Compression. +
  • Named meta data blocks. +
  • Sorted or unsorted keys. +
  • Seek by key or by file offset. + + The memory footprint of a TFile includes the following: +
      +
    • Some constant overhead of reading or writing a compressed block. +
        +
      • Each compressed block requires one compression/decompression codec for + I/O. +
      • Temporary space to buffer the key. +
      • Temporary space to buffer the value (for TFile.Writer only). Values are + chunk encoded, so that we buffer at most one chunk of user data. By default, + the chunk buffer is 1MB. Reading chunked value does not require additional + memory. +
      +
    • TFile index, which is proportional to the total number of Data Blocks. + The total amount of memory needed to hold the index can be estimated as + (56+AvgKeySize)*NumBlocks. +
    • MetaBlock index, which is proportional to the total number of Meta + Blocks.The total amount of memory needed to hold the index for Meta Blocks + can be estimated as (40+AvgMetaBlockName)*NumMetaBlock. +
    +

    + The behavior of TFile can be customized by the following variables through + Configuration: +

      +
    • tfile.io.chunk.size: Value chunk size. Integer (in bytes). Default + to 1MB. Values of the length less than the chunk size is guaranteed to have + known value length in read time (See + {@link TFile.Reader.Scanner.Entry#isValueLengthKnown()}). +
    • tfile.fs.output.buffer.size: Buffer size used for + FSDataOutputStream. Integer (in bytes). Default to 256KB. +
    • tfile.fs.input.buffer.size: Buffer size used for + FSDataInputStream. Integer (in bytes). Default to 256KB. +
    +

    + Suggestions on performance optimization. +

      +
    • Minimum block size. We recommend a setting of minimum block size between + 256KB to 1MB for general usage. Larger block size is preferred if files are + primarily for sequential access. However, it would lead to inefficient random + access (because there are more data to decompress). Smaller blocks are good + for random access, but require more memory to hold the block index, and may + be slower to create (because we must flush the compressor stream at the + conclusion of each data block, which leads to an FS I/O flush). Further, due + to the internal caching in Compression codec, the smallest possible block + size would be around 20KB-30KB. +
    • The current implementation does not offer true multi-threading for + reading. The implementation uses FSDataInputStream seek()+read(), which is + shown to be much faster than positioned-read call in single thread mode. + However, it also means that if multiple threads attempt to access the same + TFile (using multiple scanners) simultaneously, the actual I/O is carried out + sequentially even if they access different DFS blocks. +
    • Compression codec. Use "none" if the data is not very compressable (by + compressable, I mean a compression ratio at least 2:1). Generally, use "lzo" + as the starting point for experimenting. "gz" overs slightly better + compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to + decompress, comparing to "lzo". +
    • File system buffering, if the underlying FSDataInputStream and + FSDataOutputStream is already adequately buffered; or if applications + reads/writes keys and values in large buffers, we can reduce the sizes of + input/output buffering in TFile layer by setting the configuration parameters + "tfile.fs.input.buffer.size" and "tfile.fs.output.buffer.size". +
    + + Some design rationale behind TFile can be found at Hadoop-3315.]]> + + + + + + + + + + + Utils#writeVLong(out, n). + + @param out + output stream + @param n + The integer to be encoded + @throws IOException + @see Utils#writeVLong(DataOutput, long)]]> + + + + + + + + +
  • if n in [-32, 127): encode in one byte with the actual value. + Otherwise, +
  • if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52; + byte[1]=n&0xff. Otherwise, +
  • if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 - + 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise, +
  • if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112; + byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff; byte[3]=n&0xff. Otherwise: +
  • if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] = + (n>>24)&0xff; byte[2]=(n>>16)&0xff; byte[3]=(n>>8)&0xff; byte[4]=n&0xff; +
  • if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] = + (n>>32)&0xff; byte[2]=(n>>24)&0xff; byte[3]=(n>>16)&0xff; + byte[4]=(n>>8)&0xff; byte[5]=n&0xff +
  • if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] = + (n>>40)&0xff; byte[2]=(n>>32)&0xff; byte[3]=(n>>24)&0xff; + byte[4]=(n>>16)&0xff; byte[5]=(n>>8)&0xff; byte[6]=n&0xff; +
  • if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] = + (n>>48)&0xff; byte[2] = (n>>40)&0xff; byte[3]=(n>>32)&0xff; + byte[4]=(n>>24)&0xff; byte[5]=(n>>16)&0xff; byte[6]=(n>>8)&0xff; + byte[7]=n&0xff; +
  • if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] = + (n>>54)&0xff; byte[2] = (n>>48)&0xff; byte[3] = (n>>40)&0xff; + byte[4]=(n>>32)&0xff; byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff; + byte[7]=(n>>8)&0xff; byte[8]=n&0xff; + + + @param out + output stream + @param n + the integer number + @throws IOException]]> + + + + + + + (int)Utils#readVLong(in). + + @param in + input stream + @return the decoded integer + @throws IOException + + @see Utils#readVLong(DataInput)]]> + + + + + + + +
  • if (FB >= -32), return (long)FB; +
  • if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff; +
  • if (FB in [-104, -73]), return (FB+88)<<16 + (NB[0]&0xff)<<8 + + NB[1]&0xff; +
  • if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff)<<16 + + (NB[1]&0xff)<<8 + NB[2]&0xff; +
  • if (FB in [-128, -121]), return interpret NB[FB+129] as a signed + big-endian integer. + + @param in + input stream + @return the decoded long integer. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @param cmp + Comparator for the key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @param cmp + Comparator for the key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + + + + + + + + + + An experimental {@link Serialization} for Java {@link Serializable} classes. +

    + @see JavaSerializationComparator]]> +
    +
    + + + + + + + + + A {@link RawComparator} that uses a {@link JavaSerialization} + {@link Deserializer} to deserialize objects that are then compared via + their {@link Comparable} interfaces. +

    + @param + @see JavaSerialization]]> +
    +
    + + + + + + + + + + + + + +This package provides a mechanism for using different serialization frameworks +in Hadoop. The property "io.serializations" defines a list of +{@link org.apache.hadoop.io.serializer.Serialization}s that know how to create +{@link org.apache.hadoop.io.serializer.Serializer}s and +{@link org.apache.hadoop.io.serializer.Deserializer}s. +

    + +

    +To add a new serialization framework write an implementation of +{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the +"io.serializations" property. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + avro.reflect.pkgs or implement + {@link AvroReflectSerializable} interface.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + +This package provides Avro serialization in Hadoop. This can be used to +serialize/deserialize Avro types in Hadoop. +

    + +

    +Use {@link org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization} for +serialization of classes generated by Avro's 'specific' compiler. +

    + +

    +Use {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} for +other classes. +{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} work for +any class which is either in the package list configured via +{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization#AVRO_REFLECT_PACKAGES} +or implement {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerializable} +interface. +

    ]]> +
    +
mplementations of this interface consume the {@link MetricsRecord} generated + from {@link MetricsSource}. It registers with {@link MetricsSystem} which + periodically pushes the {@link MetricsRecord} to the sink using + {@link #putMetrics(MetricsRecord)} method. If the implementing class also + implements {@link Closeable}, then the MetricsSystem will close the sink when + it is stopped.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the actual type of the source object + @param source object to register + @return the source object + @exception MetricsException]]> + + + + + + + + the actual type of the source object + @param source object to register + @param name of the source. Must be unique or null (then extracted from + the annotations of the source object.) + @param desc the description of the source (or null. See above.) + @return the source object + @exception MetricsException]]> + + + + + + + + + + + + + + + + + + + + +
  • {@link MetricsSource} generate and update metrics information.
  • +
  • {@link MetricsSink} consume the metrics information
  • + + + {@link MetricsSource} and {@link MetricsSink} register with the metrics + system. Implementations of {@link MetricsSystem} polls the + {@link MetricsSource}s periodically and pass the {@link MetricsRecord}s to + {@link MetricsSink}.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
aggregate). + Filter out entries that don't have at least minSamples. + + @return a map of peer DataNode Id to the average latency to that + node seen over the measurement period.]]> + + + + + + + + + + + This class maintains a group of rolling average metrics. It implements the + algorithm of rolling average, i.e. a number of sliding windows are kept to + roll over and evict old subsets of samples. Each window has a subset of + samples in a stream, where sub-sum and sub-total are collected. All sub-sums + and sub-totals in all windows will be aggregated to final-sum and final-total + used to compute final average, which is called rolling average. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is a metrics sink that uses + {@link org.apache.hadoop.fs.FileSystem} to write the metrics logs. Every + roll interval a new directory will be created under the path specified by the + basepath property. All metrics will be logged to a file in the + current interval's directory in a file named <hostname>.log, where + <hostname> is the name of the host on which the metrics logging + process is running. The base path is set by the + <prefix>.sink.<instance>.basepath property. The + time zone used to create the current interval's directory name is GMT. If + the basepath property isn't specified, it will default to + "/tmp", which is the temp directory on whatever default file + system is configured for the cluster.

    + +

    The <prefix>.sink.<instance>.ignore-error + property controls whether an exception is thrown when an error is encountered + writing a log file. The default value is true. When set to + false, file errors are quietly swallowed.

    + +

    The roll-interval property sets the amount of time before + rolling the directory. The default value is 1 hour. The roll interval may + not be less than 1 minute. The property's value should be given as + number unit, where number is an integer value, and + unit is a valid unit. Valid units are minute, hour, + and day. The units are case insensitive and may be abbreviated or + plural. If no units are specified, hours are assumed. For example, + "2", "2h", "2 hour", and + "2 hours" are all valid ways to specify two hours.

    + +

    The roll-offset-interval-millis property sets the upper + bound on a random time interval (in milliseconds) that is used to delay + before the initial roll. All subsequent rolls will happen an integer + number of roll intervals after the initial roll, hence retaining the original + offset. The purpose of this property is to insert some variance in the roll + times so that large clusters using this sink on every node don't cause a + performance impact on HDFS by rolling simultaneously. The default value is + 30000 (30s). When writing to HDFS, as a rule of thumb, the roll offset in + millis should be no less than the number of sink instances times 5. + +

    The primary use of this class is for logging to HDFS. As it uses + {@link org.apache.hadoop.fs.FileSystem} to access the target file system, + however, it can be used to write to the local file system, Amazon S3, or any + other supported file system. The base path for the sink will determine the + file system used. An unqualified path will write to the default file system + set by the configuration.

    + +

    Not all file systems support the ability to append to files. In file + systems without the ability to append to files, only one writer can write to + a file at a time. To allow for concurrent writes from multiple daemons on a + single host, the source property is used to set unique headers + for the log files. The property should be set to the name of + the source daemon, e.g. namenode. The value of the + source property should typically be the same as the property's + prefix. If this property is not set, the source is taken to be + unknown.

    + +

    Instead of appending to an existing file, by default the sink + will create a new file with a suffix of ".<n>&quet;, where + n is the next lowest integer that isn't already used in a file name, + similar to the Hadoop daemon logs. NOTE: the file with the highest + sequence number is the newest file, unlike the Hadoop daemon logs.

    + +

    For file systems that allow append, the sink supports appending to the + existing file instead. If the allow-append property is set to + true, the sink will instead append to the existing file on file systems that + support appends. By default, the allow-append property is + false.

    + +

    Note that when writing to HDFS with allow-append set to true, + there is a minimum acceptable number of data nodes. If the number of data + nodes drops below that minimum, the append will succeed, but reading the + data will fail with an IOException in the DataStreamer class. The minimum + number of data nodes required for a successful append is generally 2 or + 3.

    + +

    Note also that when writing to HDFS, the file size information is not + updated until the file is closed (at the end of the interval) even though + the data is being written successfully. This is a known HDFS limitation that + exists because of the performance cost of updating the metadata. See + HDFS-5478.

    + +

    When using this sink in a secure (Kerberos) environment, two additional + properties must be set: keytab-key and + principal-key. keytab-key should contain the key by + which the keytab file can be found in the configuration, for example, + yarn.nodemanager.keytab. principal-key should + contain the key by which the principal can be found in the configuration, + for example, yarn.nodemanager.principal.]]> + + + + + + + + + + + + + + + + + + + + + + + + + CollectD StatsD plugin). +
    + To configure this plugin, you will need to add the following + entries to your hadoop-metrics2.properties file: +
    +

    + *.sink.statsd.class=org.apache.hadoop.metrics2.sink.StatsDSink
    + [prefix].sink.statsd.server.host=
    + [prefix].sink.statsd.server.port=
    + [prefix].sink.statsd.skip.hostname=true|false (optional)
    + [prefix].sink.statsd.service.name=NameNode (name you want for service)
    + 
    ]]> +
    +
    + +
    + + + + + + + + + + + + + ,name=" + Where the and are the supplied parameters. + + @param serviceName + @param nameName + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + ,name=" + Where the and are the supplied parameters. + + @param serviceName + @param nameName + @param properties - Key value pairs to define additional JMX ObjectName + properties. + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostname or hostname:port. If + the specs string is null, defaults to localhost:defaultPort. + + @param specs server specs (see description) + @param defaultPort the default port if not specified + @return a list of InetSocketAddress objects.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method is used when parts of Hadoop need know whether to apply + single rack vs multi-rack policies, such as during block placement. + Such algorithms behave differently if they are on multi-switch systems. +

    + + @return true if the mapping thinks that it is on a single switch]]> +
    +
    + + + + + + + + + + + + + + + + + This predicate simply assumes that all mappings not derived from + this class are multi-switch. + @param mapping the mapping to query + @return true if the base class says it is single switch, or the mapping + is not derived from this class.]]> + + + + It is not mandatory to + derive {@link DNSToSwitchMapping} implementations from it, but it is strongly + recommended, as it makes it easy for the Hadoop developers to add new methods + to this base class that are automatically picked up by all implementations. +

    + + This class does not extend the Configured + base class, and should not be changed to do so, as it causes problems + for subclasses. The constructor of the Configured calls + the {@link #setConf(Configuration)} method, which will call into the + subclasses before they have been fully constructed.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If a name cannot be resolved to a rack, the implementation + should return {@link NetworkTopology#DEFAULT_RACK}. This + is what the bundled implementations do, though it is not a formal requirement + + @param names the list of hosts to resolve (can be empty) + @return list of resolved network paths. + If names is empty, the returned list is also empty]]> + + + + + + + + + + + + + + + + + + + + + + + + Calling {@link #setConf(Configuration)} will trigger a + re-evaluation of the configuration settings and so be used to + set up the mapping script.]]> + + + + + + + + + + + + + + + + + + + + + This will get called in the superclass constructor, so a check is needed + to ensure that the raw mapping is defined before trying to relaying a null + configuration. + @param conf]]> + + + + + + + + + + It contains a static class RawScriptBasedMapping that performs + the work: reading the configuration parameters, executing any defined + script, handling errors and such like. The outer + class extends {@link CachedDNSToSwitchMapping} to cache the delegated + queries. +

    + This DNS mapper's {@link #isSingleSwitch()} predicate returns + true if and only if a script is defined.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Simple {@link DNSToSwitchMapping} implementation that reads a 2 column text + file. The columns are separated by whitespace. The first column is a DNS or + IP address and the second column specifies the rack where the address maps. +

    +

    + This class uses the configuration parameter {@code + net.topology.table.file.name} to locate the mapping file. +

    +

    + Calls to {@link #resolve(List)} will look up the address as defined in the + mapping file. If no entry corresponding to the address is found, the value + {@code /default-rack} is returned. +

    ]]> +
    +
    + + + + +
    + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + mapping + and mapping]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + /host@realm. + @param principalName principal name of format as described above + @return host name if the the string conforms to the above format, else null]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + "jack" + + @param userName + @return userName without login method]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the return type of the run method + @param action the method to execute + @return the value from the run method]]> + + + + + + + + the return type of the run method + @param action the method to execute + @return the value from the run method + @throws IOException if the action throws an IOException + @throws Error if the action throws an Error + @throws RuntimeException if the action throws a RuntimeException + @throws InterruptedException if the action throws an InterruptedException + @throws UndeclaredThrowableException if the action throws something else]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CredentialProvider implementations must be thread safe.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + does not provide the stack trace for security purposes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A User-Agent String is considered to be a browser if it matches + any of the regex patterns from browser-useragent-regex; the default + behavior is to consider everything a browser that matches the following: + "^Mozilla.*,^Opera.*". Subclasses can optionally override + this method to use different behavior. + + @param userAgent The User-Agent String, or null if there isn't one + @return true if the User-Agent String refers to a browser, false if not]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The type of the token identifierextends TokenIdentifier]]> + + + + + + + + + + DelegationTokenAuthenticatedURL. +

    + An instance of the default {@link DelegationTokenAuthenticator} will be + used.]]> + + + + + DelegationTokenAuthenticatedURL. + + @param authenticator the {@link DelegationTokenAuthenticator} instance to + use, if null the default one will be used.]]> + + + + + DelegationTokenAuthenticatedURL using the default + {@link DelegationTokenAuthenticator} class. + + @param connConfigurator a connection configurator.]]> + + + + + DelegationTokenAuthenticatedURL. + + @param authenticator the {@link DelegationTokenAuthenticator} instance to + use, if null the default one will be used. + @param connConfigurator a connection configurator.]]> + + + + + + + + + + + + The default class is {@link KerberosDelegationTokenAuthenticator} + + @return the delegation token authenticator class to use as default.]]> + + + + + + + This method is provided to enable WebHDFS backwards compatibility. + + @param useQueryString TRUE if the token is transmitted in the + URL query string, FALSE if the delegation token is transmitted + using the {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP + header.]]> + + + + + TRUE if the token is transmitted in the URL query + string, FALSE if the delegation token is transmitted using the + {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP header.]]> + + + + + + + + + + + + + + + + + + Authenticator. + + @param url the URL to connect to. Only HTTP/S URLs are supported. + @param token the authentication token being used for the user. + @return an authenticated {@link HttpURLConnection}. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + Authenticator. If the doAs parameter is not NULL, + the request will be done on behalf of the specified doAs user. + + @param url the URL to connect to. Only HTTP/S URLs are supported. + @param token the authentication token being used for the user. + @param doAs user to do the the request on behalf of, if NULL the request is + as self. + @return an authenticated {@link HttpURLConnection}. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @return a delegation token. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @param doAsUser the user to do as, which will be the token owner. + @return a delegation token. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @throws IOException if an IO error occurred.]]> + + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred.]]> + + + + DelegationTokenAuthenticatedURL is a + {@link AuthenticatedURL} sub-class with built-in Hadoop Delegation Token + functionality. +

    + The authentication mechanisms supported by default are Hadoop Simple + authentication (also known as pseudo authentication) and Kerberos SPNEGO + authentication. +

    + Additional authentication mechanisms can be supported via {@link + DelegationTokenAuthenticator} implementations. +

    + The default {@link DelegationTokenAuthenticator} is the {@link + KerberosDelegationTokenAuthenticator} class which supports + automatic fallback from Kerberos SPNEGO to Hadoop Simple authentication via + the {@link PseudoDelegationTokenAuthenticator} class. +

    + AuthenticatedURL instances are not thread-safe.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @throws IOException if an IO error occurred.]]> + + + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + KerberosDelegationTokenAuthenticator provides support for + Kerberos SPNEGO authentication mechanism and support for Hadoop Delegation + Token operations. +

    + It falls back to the {@link PseudoDelegationTokenAuthenticator} if the HTTP + endpoint does not trigger a SPNEGO authentication]]> + + + + + + + + + PseudoDelegationTokenAuthenticator provides support for + Hadoop's pseudo authentication mechanism that accepts + the user name specified as a query string parameter and support for Hadoop + Delegation Token operations. +

    + This mimics the model of Hadoop Simple authentication trusting the + {@link UserGroupInformation#getCurrentUser()} valuelive. + @return a (snapshotted) map of blocker name->description values]]> + + + + + + + + + + + + + Do nothing if the service is null or not + in a state in which it can be/needs to be stopped. +

    + The service state is checked before the operation begins. + This process is not thread safe. + @param service a service or null]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • Any long-lived operation here will prevent the service state + change from completing in a timely manner.
  • +
  • If another thread is somehow invoked from the listener, and + that thread invokes the methods of the service (including + subclass-specific methods), there is a risk of a deadlock.
  • + + + + @param service the service that has changed.]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + The base implementation logs all arguments at the debug level, + then returns the passed in config unchanged.]]> + + + + + + + The action is to signal success by returning the exit code 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method is called before {@link #init(Configuration)}; + Any non-null configuration that is returned from this operation + becomes the one that is passed on to that {@link #init(Configuration)} + operation. +

    + This permits implementations to change the configuration before + the init operation. As the ServiceLauncher only creates + an instance of the base {@link Configuration} class, it is + recommended to instantiate any subclass (such as YarnConfiguration) + that injects new resources. +

    + @param config the initial configuration build up by the + service launcher. + @param args list of arguments passed to the command line + after any launcher-specific commands have been stripped. + @return the configuration to init the service with. + Recommended: pass down the config parameter with any changes + @throws Exception any problem]]> + + + + + + + The return value becomes the exit code of the launched process. +

    + If an exception is raised, the policy is: +

      +
    1. Any subset of {@link org.apache.hadoop.util.ExitUtil.ExitException}: + the exception is passed up unmodified. +
    2. +
    3. Any exception which implements + {@link org.apache.hadoop.util.ExitCodeProvider}: + A new {@link ServiceLaunchException} is created with the exit code + and message of the thrown exception; the thrown exception becomes the + cause.
    4. +
    5. Any other exception: a new {@link ServiceLaunchException} is created + with the exit code {@link LauncherExitCodes#EXIT_EXCEPTION_THROWN} and + the message of the original exception (which becomes the cause).
    6. +
    + @return the exit code + @throws org.apache.hadoop.util.ExitUtil.ExitException an exception passed + up as the exit code and error text. + @throws Exception any exception to report. If it provides an exit code + this is used in a wrapping exception.]]> +
    +
    + + + The command line options will be passed down before the + {@link Service#init(Configuration)} operation is invoked via an + invocation of {@link LaunchableService#bindArgs(Configuration, List)} + After the service has been successfully started via {@link Service#start()} + the {@link LaunchableService#execute()} method is called to execute the + service. When this method returns, the service launcher will exit, using + the return code from the method as its exit option.]]> + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Approximate HTTP equivalent: {@code 400 Bad Request}]]> + + + + + + approximate HTTP equivalent: Approximate HTTP equivalent: {@code 401 Unauthorized}]]> + + + + + + + + + + + Approximate HTTP equivalent: Approximate HTTP equivalent: {@code 403: Forbidden}]]> + + + + + + Approximate HTTP equivalent: {@code 404: Not Found}]]> + + + + + + Approximate HTTP equivalent: {@code 405: Not allowed}]]> + + + + + + Approximate HTTP equivalent: {@code 406: Not Acceptable}]]> + + + + + + Approximate HTTP equivalent: {@code 408: Request Timeout}]]> + + + + + + Approximate HTTP equivalent: {@code 409: Conflict}]]> + + + + + + Approximate HTTP equivalent: {@code 500 Internal Server Error}]]> + + + + + + Approximate HTTP equivalent: {@code 501: Not Implemented}]]> + + + + + + Approximate HTTP equivalent: {@code 503 Service Unavailable}]]> + + + + + + If raised, this is expected to be raised server-side and likely due + to client/server version incompatibilities. +

    + Approximate HTTP equivalent: {@code 505: Version Not Supported}]]> + + + + + + + + + + + + + + + Codes with a YARN prefix are YARN-related. +

    + Many of the exit codes are designed to resemble HTTP error codes, + squashed into a single byte. e.g 44 , "not found" is the equivalent + of 404. The various 2XX HTTP error codes aren't followed; + the Unix standard of "0" for success is used. +

    +    0-10: general command issues
    +   30-39: equivalent to the 3XX responses, where those responses are
    +          considered errors by the application.
    +   40-49: client-side/CLI/config problems
    +   50-59: service-side problems.
    +   60+  : application specific error codes
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + This uses {@link String#format(String, Object...)} + to build the formatted exception in the ENGLISH locale. +

    + If the last argument is a throwable, it becomes the cause of the exception. + It will also be used as a parameter for the format. + @param exitCode exit code + @param format format for message to use in exception + @param args list of arguments]]> + + + + + When caught by the ServiceLauncher, it will convert that + into a process exit code. + + The {@link #ServiceLaunchException(int, String, Object...)} constructor + generates formatted exceptions.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Clients and/or applications can use the provided Progressable + to explicitly report progress to the Hadoop framework. This is especially + important for operations which take significant amount of time since, + in-lieu of the reported progress, the framework has to assume that an error + has occurred and time-out the operation.

    ]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Class is to be obtained + @return the correctly typed Class of the given object.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + kill -0 command or equivalent]]> + + + + + + + + + + + + + + + + + + + ".cmd" on Windows, or ".sh" otherwise. + + @param parent File parent directory + @param basename String script file basename + @return File referencing the script in the directory]]> + + + + + + ".cmd" on Windows, or ".sh" otherwise. + + @param basename String script file basename + @return String script file name]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + IOException. + @return the path to {@link #WINUTILS_EXE} + @throws RuntimeException if the path is not resolvable]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell. + @return the thread that ran runCommand() that spawned this shell + or null if no thread is waiting for this shell to complete]]> + + + + + + + + + + + + Shell interface. + @param cmd shell command to execute. + @return the output of the executed command.]]> + + + + + + + + + Shell interface. + @param env the map of environment key=value + @param cmd shell command to execute. + @param timeout time in milliseconds after which script should be marked timeout + @return the output of the executed command. + @throws IOException on any problem.]]> + + + + + + + + Shell interface. + @param env the map of environment key=value + @param cmd shell command to execute. + @return the output of the executed command. + @throws IOException on any problem.]]> + + + + + Shell processes. + Iterates through a map of all currently running Shell + processes and destroys them one by one. This method is thread safe]]> + + + + + Shell objects.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CreateProcess synchronization object.]]> + + + + + os.name property.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Important: caller must check for this value being null. + The lack of such checks has led to many support issues being raised. +

    + @deprecated use one of the exception-raising getter methods, + specifically {@link #getWinUtilsPath()} or {@link #getWinUtilsFile()}]]> + + + + + + + + + + + + + + Shell can be used to run shell commands like du or + df. It also offers facilities to gate commands by + time-intervals.]]> + + + + + + + + ShutdownHookManager singleton. + + @return ShutdownHookManager singleton.]]> + + + + + + + Runnable + @param priority priority of the shutdownHook.]]> + + + + + + + + + Runnable + @param priority priority of the shutdownHook + @param timeout timeout of the shutdownHook + @param unit unit of the timeout TimeUnit]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ShutdownHookManager enables running shutdownHook + in a deterministic order, higher priority first. +

    + The JVM runs ShutdownHooks in a non-deterministic order or in parallel. + This class registers a single JVM shutdownHook and run all the + shutdownHooks registered to it (to this class) in order based on their + priority. + + Unless a hook was registered with a shutdown explicitly set through + {@link #addShutdownHook(Runnable, int, long, TimeUnit)}, + the shutdown time allocated to it is set by the configuration option + {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT} in + {@code core-site.xml}, with a default value of + {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT_DEFAULT} + seconds.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool, is the standard for any Map-Reduce tool/application. + The tool/application should delegate the handling of + + standard command-line options to {@link ToolRunner#run(Tool, String[])} + and only handle its custom arguments.

    + +

    Here is how a typical Tool is implemented:

    +

    +     public class MyApp extends Configured implements Tool {
    +     
    +       public int run(String[] args) throws Exception {
    +         // Configuration processed by ToolRunner
    +         Configuration conf = getConf();
    +         
    +         // Create a JobConf using the processed conf
    +         JobConf job = new JobConf(conf, MyApp.class);
    +         
    +         // Process custom command-line options
    +         Path in = new Path(args[1]);
    +         Path out = new Path(args[2]);
    +         
    +         // Specify various job-specific parameters     
    +         job.setJobName("my-app");
    +         job.setInputPath(in);
    +         job.setOutputPath(out);
    +         job.setMapperClass(MyMapper.class);
    +         job.setReducerClass(MyReducer.class);
    +
    +         // Submit the job, then poll for progress until the job is complete
    +         RunningJob runningJob = JobClient.runJob(job);
    +         if (runningJob.isSuccessful()) {
    +           return 0;
    +         } else {
    +           return 1;
    +         }
    +       }
    +       
    +       public static void main(String[] args) throws Exception {
    +         // Let ToolRunner handle generic command-line options 
    +         int res = ToolRunner.run(new Configuration(), new MyApp(), args);
    +         
    +         System.exit(res);
    +       }
    +     }
    + 

    + + @see GenericOptionsParser + @see ToolRunner]]> +
    + + + + + + + + + + + + + Tool by {@link Tool#run(String[])}, after + parsing with the given generic arguments. Uses the given + Configuration, or builds one if null. + + Sets the Tool's configuration with the possibly modified + version of the conf. + + @param conf Configuration for the Tool. + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + Tool with its Configuration. + + Equivalent to run(tool.getConf(), tool, args). + + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + + + + + + + + + + ToolRunner can be used to run classes implementing + Tool interface. It works in conjunction with + {@link GenericOptionsParser} to parse the + + generic hadoop command line arguments and modifies the + Configuration of the Tool. The + application-specific options are passed along without being modified. +

    + + @see Tool + @see GenericOptionsParser]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Bloom filter, as defined by Bloom in 1970. +

    + The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by + the networking research community in the past decade thanks to the bandwidth efficiencies that it + offers for the transmission of set membership information between networked hosts. A sender encodes + the information into a bit vector, the Bloom filter, that is more compact than a conventional + representation. Computation and space costs for construction are linear in the number of elements. + The receiver uses the filter to test whether various elements are members of the set. Though the + filter will occasionally return a false positive, it will never return a false negative. When creating + the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Space/Time Trade-Offs in Hash Coding with Allowable Errors]]> + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this counting Bloom filter. +

    + Invariant: nothing happens if the specified key does not belong to this counter Bloom filter. + @param key The key to remove.]]> + + + + + + + + + + + + key -> count map. +

    NOTE: due to the bucket size of this filter, inserting the same + key more than 15 times will cause an overflow at all filter positions + associated with this key, and it will significantly increase the error + rate for this and other keys. For this reason the filter can only be + used to store small count values 0 <= N << 15. + @param key key to be tested + @return 0 if the key is not present. Otherwise, a positive value v will + be returned such that v == count with probability equal to the + error rate of this filter, and v > count otherwise. + Additionally, if the filter experienced an underflow as a result of + {@link #delete(Key)} operation, the return value may be lower than the + count with the probability of the false negative rate of such + filter.]]> + + + + + + + + + + + + + + + + + + + + + + counting Bloom filter, as defined by Fan et al. in a ToN + 2000 paper. +

    + A counting Bloom filter is an improvement to standard a Bloom filter as it + allows dynamic additions and deletions of set membership information. This + is achieved through the use of a counting vector instead of a bit vector. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Summary cache: a scalable wide-area web cache sharing protocol]]> + + + + + + + + + + + + + + Builds an empty Dynamic Bloom filter. + @param vectorSize The number of bits in the vector. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}). + @param nr The threshold for the maximum number of keys to record in a + dynamic Bloom filter row.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + dynamic Bloom filter, as defined in the INFOCOM 2006 paper. +

    + A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but + each of the s rows is a standard Bloom filter. The creation + process of a DBF is iterative. At the start, the DBF is a 1 * m + bit matrix, i.e., it is composed of a single standard Bloom filter. + It assumes that nr elements are recorded in the + initial bit vector, where nr <= n (n is + the cardinality of the set A to record in the filter). +

    + As the size of A grows during the execution of the application, + several keys must be inserted in the DBF. When inserting a key into the DBF, + one must first get an active Bloom filter in the matrix. A Bloom filter is + active when the number of recorded keys, nr, is + strictly less than the current cardinality of A, n. + If an active Bloom filter is found, the key is inserted and + nr is incremented by one. On the other hand, if there + is no active Bloom filter, a new one is created (i.e., a new row is added to + the matrix) according to the current size of A and the element + is added in this new Bloom filter and the nr value of + this new Bloom filter is set to one. A given key is said to belong to the + DBF if the k positions are set to one in one of the matrix rows. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + + @see Theory and Network Applications of Dynamic Bloom Filters]]> + + + + + + + + + Builds a hash function that must obey to a given maximum number of returned values and a highest value. + @param maxValue The maximum highest returned value. + @param nbHash The number of resulting hashed values. + @param hashType type of the hashing function (see {@link Hash}).]]> + + + + + this hash function. A NOOP]]> + + + + + + + + + + + + + + + + + + + The idea is to randomly select a bit to reset.]]> + + + + + + The idea is to select the bit to reset that will generate the minimum + number of false negative.]]> + + + + + + The idea is to select the bit to reset that will remove the maximum number + of false positive.]]> + + + + + + The idea is to select the bit to reset that will, at the same time, remove + the maximum number of false positve while minimizing the amount of false + negative generated.]]> + + + + + Originally created by + European Commission One-Lab Project 034819.]]> + + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this retouched Bloom filter. +

    + Invariant: if the false positive is null, nothing happens. + @param key The false positive key to add.]]> + + + + + + this retouched Bloom filter. + @param coll The collection of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The list of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The array of false positive.]]> + + + + + + + this retouched Bloom filter. + @param scheme The selective clearing scheme to apply.]]> + + + + + + + + + + + + retouched Bloom filter, as defined in the CoNEXT 2006 paper. +

    + It allows the removal of selected false positives at the cost of introducing + random false negatives, and with the benefit of eliminating some random false + positives at the same time. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + @see RemoveScheme The different selective clearing algorithms + + @see Retouched Bloom Filters: Allowing Networked Applications to Trade Off Selected False Positives Against False Negatives]]> + + + + + + + + + + diff --git a/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.3.xml b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.3.xml new file mode 100644 index 0000000000000..448df9ddd686b --- /dev/null +++ b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.3.xml @@ -0,0 +1,39037 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + UnsupportedOperationException + + If a key is deprecated in favor of multiple keys, they are all treated as + aliases of each other, and setting any one of them resets all the others + to the new value. + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key to be deprecated + @param newKeys list of keys that take up the values of deprecated key + @param customMessage depcrication message + @deprecated use {@link #addDeprecation(String key, String newKey, + String customMessage)} instead]]> + + + + + + + + UnsupportedOperationException + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key to be deprecated + @param newKey key that take up the values of deprecated key + @param customMessage deprecation message]]> + + + + + + + UnsupportedOperationException + + If a key is deprecated in favor of multiple keys, they are all treated as + aliases of each other, and setting any one of them resets all the others + to the new value. + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key Key that is to be deprecated + @param newKeys list of keys that take up the values of deprecated key + @deprecated use {@link #addDeprecation(String key, String newKey)} instead]]> + + + + + + + UnsupportedOperationException + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key Key that is to be deprecated + @param newKey key that takes up the value of deprecated key]]> + + + + + + key is deprecated. + + @param key the parameter which is to be checked for deprecation + @return true if the key is deprecated and + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + final. + + @param name resource to be added, the classpath is examined for a file + with that name.]]> + + + + + + + + + + final. + + @param url url of the resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + + + + + final. + + @param file file-path of resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + + + + + final. + + WARNING: The contents of the InputStream will be cached, by this method. + So use this sparingly because it does increase the memory consumption. + + @param in InputStream to deserialize the object from. In will be read from + when a get or set is called next. After it is read the stream will be + closed.]]> + + + + + + + + + + + final. + + @param in InputStream to deserialize the object from. + @param name the name of the resource because InputStream.toString is not + very descriptive some times.]]> + + + + + + + + + + + final. + + @param conf Configuration object from which to load properties]]> + + + + + + + + + + + name property, null if + no such property exists. If the key is deprecated, it returns the value of + the first key which replaces the deprecated key and is not null. + + Values are processed for variable expansion + before being returned. + + As a side effect get loads the properties from the sources if called for + the first time as a lazy init. + + @param name the property name, will be trimmed before get value. + @return the value of the name or its replacing property, + or null if no such property exists.]]> + + + + + + + + + + + + + + + name property, but only for + names which have no valid value, usually non-existent or commented + out in XML. + + @param name the property name + @return true if the property name exists without value]]> + + + + + + name property as a trimmed String, + null if no such property exists. + If the key is deprecated, it returns the value of + the first key which replaces the deprecated key and is not null + + Values are processed for variable expansion + before being returned. + + @param name the property name. + @return the value of the name or its replacing property, + or null if no such property exists.]]> + + + + + + + name property as a trimmed String, + defaultValue if no such property exists. + See @{Configuration#getTrimmed} for more details. + + @param name the property name. + @param defaultValue the property default value. + @return the value of the name or defaultValue + if it is not set.]]> + + + + + + name property, without doing + variable expansion.If the key is + deprecated, it returns the value of the first key which replaces + the deprecated key and is not null. + + @param name the property name. + @return the value of the name property or + its replacing property and null if no such property exists.]]> + + + + + + + value of the name property. If + name is deprecated or there is a deprecated name associated to it, + it sets the value to both names. Name will be trimmed before put into + configuration. + + @param name property name. + @param value property value.]]> + + + + + + + + value of the name property. If + name is deprecated, it also sets the value to + the keys that replace the deprecated key. Name will be trimmed before put + into configuration. + + @param name property name. + @param value property value. + @param source the place that this configuration value came from + (For debugging). + @throws IllegalArgumentException when the value or name is null.]]> + + + + + + + + + + + + + + + + + + + + name. If the key is deprecated, + it returns the value of the first key which replaces the deprecated key + and is not null. + If no such property exists, + then defaultValue is returned. + + @param name property name, will be trimmed before get value. + @param defaultValue default value. + @return property value, or defaultValue if the property + doesn't exist.]]> + + + + + + + name property as an int. + + If no such property exists, the provided default value is returned, + or if the specified value is not a valid int, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as an int, + or defaultValue.]]> + + + + + + name property as a set of comma-delimited + int values. + + If no such property exists, an empty array is returned. + + @param name property name + @return property value interpreted as an array of comma-delimited + int values]]> + + + + + + + name property to an int. + + @param name property name. + @param value int value of the property.]]> + + + + + + + name property as a long. + If no such property exists, the provided default value is returned, + or if the specified value is not a valid long, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a long, + or defaultValue.]]> + + + + + + + name property as a long or + human readable format. If no such property exists, the provided default + value is returned, or if the specified value is not a valid + long or human readable format, then an error is thrown. You + can use the following suffix (case insensitive): k(kilo), m(mega), g(giga), + t(tera), p(peta), e(exa) + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a long, + or defaultValue.]]> + + + + + + + name property to a long. + + @param name property name. + @param value long value of the property.]]> + + + + + + + name property as a float. + If no such property exists, the provided default value is returned, + or if the specified value is not a valid float, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a float, + or defaultValue.]]> + + + + + + + name property to a float. + + @param name property name. + @param value property value.]]> + + + + + + + name property as a double. + If no such property exists, the provided default value is returned, + or if the specified value is not a valid double, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a double, + or defaultValue.]]> + + + + + + + name property to a double. + + @param name property name. + @param value property value.]]> + + + + + + + name property as a boolean. + If no such property is specified, or if the specified value is not a valid + boolean, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a boolean, + or defaultValue.]]> + + + + + + + name property to a boolean. + + @param name property name. + @param value boolean value of the property.]]> + + + + + + + + + + + + + + name property to the given type. This + is equivalent to set(<name>, value.toString()). + @param name property name + @param value new value + @param enumeration type]]> + + + + + + + enumeration type + @throws IllegalArgumentException If mapping is illegal for the type + provided + @return enumeration type]]> + + + + + + + + name to the given time duration. This + is equivalent to set(<name>, value + <time suffix>). + @param name Property name + @param value Time duration + @param unit Unit of time]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + name property as a Pattern. + If no such property is specified, or if the specified value is not a valid + Pattern, then DefaultValue is returned. + Note that the returned value is NOT trimmed by this method. + + @param name property name + @param defaultValue default value + @return property value as a compiled Pattern, or defaultValue]]> + + + + + + + Pattern. + If the pattern is passed as null, sets the empty pattern which results in + further calls to getPattern(...) returning the default value. + + @param name property name + @param pattern new value]]> + + + + + + + + + + + + + + + + + + + name property as + a collection of Strings. + If no such property is specified then empty collection is returned. +

    + This is an optimized version of {@link #getStrings(String)} + + @param name property name. + @return property value as a collection of Strings.]]> + + + + + + name property as + an array of Strings. + If no such property is specified then null is returned. + + @param name property name. + @return property value as an array of Strings, + or null.]]> + + + + + + + name property as + an array of Strings. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of Strings, + or default value.]]> + + + + + + name property as + a collection of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then empty Collection is returned. + + @param name property name. + @return property value as a collection of Strings, or empty Collection]]> + + + + + + name property as + an array of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then an empty array is returned. + + @param name property name. + @return property value as an array of trimmed Strings, + or empty array.]]> + + + + + + + name property as + an array of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of trimmed Strings, + or default value.]]> + + + + + + + name property as + as comma delimited values. + + @param name property name. + @param values The values]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostProperty as a + InetSocketAddress. If hostProperty is + null, addressProperty will be used. This + is useful for cases where we want to differentiate between host + bind address and address clients should use to establish connection. + + @param hostProperty bind host property name. + @param addressProperty address property name. + @param defaultAddressValue the default value + @param defaultPort the default port + @return InetSocketAddress]]> + + + + + + + + name property as a + InetSocketAddress. + @param name property name. + @param defaultAddress the default value + @param defaultPort the default port + @return InetSocketAddress]]> + + + + + + + name property as + a host:port.]]> + + + + + + + + + name property as a host:port. The wildcard + address is replaced with the local host's address. If the host and address + properties are configured the host component of the address will be combined + with the port component of the addr to generate the address. This is to allow + optional control over which host name is used in multi-home bind-host + cases where a host can have multiple names + @param hostProperty the bind-host configuration name + @param addressProperty the service address configuration name + @param defaultAddressValue the service default address configuration value + @param addr InetSocketAddress of the service listener + @return InetSocketAddress for clients to connect]]> + + + + + + + name property as a host:port. The wildcard + address is replaced with the local host's address. + @param name property name. + @param addr InetSocketAddress of a listener to store in the given property + @return InetSocketAddress for clients to connect]]> + + + + + + + + + + + + + + + + + + + + name property + as an array of Class. + The value of the property specifies a list of comma separated class names. + If no such property is specified, then defaultValue is + returned. + + @param name the property name. + @param defaultValue default value. + @return property value as a Class[], + or defaultValue.]]> + + + + + + + name property as a Class. + If no such property is specified, then defaultValue is + returned. + + @param name the conf key name. + @param defaultValue default value. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property as a Class + implementing the interface specified by xface. + + If no such property is specified, then defaultValue is + returned. + + An exception is thrown if the returned class does not implement the named + interface. + + @param name the conf key name. + @param defaultValue default value. + @param xface the interface implemented by the named class. + @return property value as a Class, + or defaultValue.]]> + + + + + + + name property as a List + of objects implementing the interface specified by xface. + + An exception is thrown if any of the classes does not exist, or if it does + not implement the named interface. + + @param name the property name. + @param xface the interface implemented by the classes named by + name. + @return a List of objects implementing xface.]]> + + + + + + + + name property to the name of a + theClass implementing the given interface xface. + + An exception is thrown if theClass does not implement the + interface xface. + + @param name property name. + @param theClass property value. + @param xface the interface implemented by the named class.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + + + + + name. + + @param name configuration resource name. + @return an input stream attached to the resource.]]> + + + + + + name. + + @param name configuration resource name. + @return a reader attached to the resource.]]> + + + + + + + + + + + + + + + + + + + + + + String + key-value pairs in the configuration. + + @return an iterator over the entries.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • + When property name is not empty and the property exists in the + configuration, this method writes the property and its attributes + to the {@link Writer}. +
  • + +
  • + When property name is null or empty, this method writes all the + configuration properties and their attributes to the {@link Writer}. +
  • + +
  • + When property name is not empty but the property doesn't exist in + the configuration, this method throws an {@link IllegalArgumentException}. +
  • + + @param out the writer to write to.]]> +
    + + + + + + + + +
  • + When propertyName is not empty, and the property exists + in the configuration, the format of the output would be, +
    +  {
    +    "property": {
    +      "key" : "key1",
    +      "value" : "value1",
    +      "isFinal" : "key1.isFinal",
    +      "resource" : "key1.resource"
    +    }
    +  }
    +  
    +
  • + +
  • + When propertyName is null or empty, it behaves same as + {@link #dumpConfiguration(Configuration, Writer)}, the + output would be, +
    +  { "properties" :
    +      [ { key : "key1",
    +          value : "value1",
    +          isFinal : "key1.isFinal",
    +          resource : "key1.resource" },
    +        { key : "key2",
    +          value : "value2",
    +          isFinal : "ke2.isFinal",
    +          resource : "key2.resource" }
    +       ]
    +   }
    +  
    +
  • + +
  • + When propertyName is not empty, and the property is not + found in the configuration, this method will throw an + {@link IllegalArgumentException}. +
  • + +

    + @param config the configuration + @param propertyName property name + @param out the Writer to write to + @throws IOException + @throws IllegalArgumentException when property name is not + empty and the property is not found in configuration]]> + + + + + + + + + { "properties" : + [ { key : "key1", + value : "value1", + isFinal : "key1.isFinal", + resource : "key1.resource" }, + { key : "key2", + value : "value2", + isFinal : "ke2.isFinal", + resource : "key2.resource" } + ] + } + + + It does not output the properties of the configuration object which + is loaded from an input stream. +

    + + @param config the configuration + @param out the Writer to write to + @throws IOException]]> + + + + + + + + + + + + + + + + + + + true to set quiet-mode on, false + to turn it off.]]> + + + + + + + + + + + + + + + + + + + + + } with matching keys]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Resources + +

    Configurations are specified by resources. A resource contains a set of + name/value pairs as XML data. Each resource is named by either a + String or by a {@link Path}. If named by a String, + then the classpath is examined for a file with that name. If named by a + Path, then the local filesystem is examined directly, without + referring to the classpath. + +

    Unless explicitly turned off, Hadoop by default specifies two + resources, loaded in-order from the classpath:

      +
    1. + + core-default.xml: Read-only defaults for hadoop.
    2. +
    3. core-site.xml: Site-specific configuration for a given hadoop + installation.
    4. +
    + Applications may add additional resources, which are loaded + subsequent to these resources in the order they are added. + +

    Final Parameters

    + +

    Configuration parameters may be declared final. + Once a resource declares a value final, no subsequently-loaded + resource can alter that value. + For example, one might define a final parameter with: +

    
    +  <property>
    +    <name>dfs.hosts.include</name>
    +    <value>/etc/hadoop/conf/hosts.include</value>
    +    <final>true</final>
    +  </property>
    + + Administrators typically define parameters as final in + core-site.xml for values that user applications may not alter. + +

    Variable Expansion

    + +

    Value strings are first processed for variable expansion. The + available properties are:

      +
    1. Other properties defined in this Configuration; and, if a name is + undefined here,
    2. +
    3. Environment variables in {@link System#getenv()} if a name starts with + "env.", or
    4. +
    5. Properties in {@link System#getProperties()}.
    6. +
    + +

    For example, if a configuration resource contains the following property + definitions: +

    
    +  <property>
    +    <name>basedir</name>
    +    <value>/user/${user.name}</value>
    +  </property>
    +  
    +  <property>
    +    <name>tempdir</name>
    +    <value>${basedir}/tmp</value>
    +  </property>
    +
    +  <property>
    +    <name>otherdir</name>
    +    <value>${env.BASE_DIR}/other</value>
    +  </property>
    +  
    + +

    When conf.get("tempdir") is called, then ${basedir} + will be resolved to another property in this Configuration, while + ${user.name} would then ordinarily be resolved to the value + of the System property with that name. +

    When conf.get("otherdir") is called, then ${env.BASE_DIR} + will be resolved to the value of the ${BASE_DIR} environment variable. + It supports ${env.NAME:-default} and ${env.NAME-default} notations. + The former is resolved to "default" if ${NAME} environment variable is undefined + or its value is empty. + The latter behaves the same way only if ${NAME} is undefined. +

    By default, warnings will be given to any deprecated configuration + parameters and these are suppressible by configuring + log4j.logger.org.apache.hadoop.conf.Configuration.deprecation in + log4j.properties file. + +

    Tags

    + +

    Optionally we can tag related properties together by using tag + attributes. System tags are defined by hadoop.tags.system property. Users + can define there own custom tags in hadoop.tags.custom property. + +

    For example, we can tag existing property as: +

    
    +  <property>
    +    <name>dfs.replication</name>
    +    <value>3</value>
    +    <tag>HDFS,REQUIRED</tag>
    +  </property>
    +
    +  <property>
    +    <name>dfs.data.transfer.protection</name>
    +    <value>3</value>
    +    <tag>HDFS,SECURITY</tag>
    +  </property>
    + 
    +

    Properties marked with tags can be retrieved with conf + .getAllPropertiesByTag("HDFS") or conf.getAllPropertiesByTags + (Arrays.asList("YARN","SECURITY")).

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This implementation generates the key material and calls the + {@link #createKey(String, byte[], Options)} method. + + @param name the base name of the key + @param options the options for the new key. + @return the version name of the first version of the key. + @throws IOException + @throws NoSuchAlgorithmException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This implementation generates the key material and calls the + {@link #rollNewVersion(String, byte[])} method. + + @param name the basename of the key + @return the name of the new version of the key + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + KeyProvider implementations must be thread safe.]]> + + + + + + + + + + + + + + + + + + + + + + NULL if + a provider for the specified URI scheme could not be found. + @throws IOException thrown if the provider failed to initialize.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + uri has syntax error]]> + + + + + + + + + + + + + + + + uri is + not found]]> + + + + + + + + + + + + + + + + + + + + + + + uri + determines a configuration property name, + fs.AbstractFileSystem.scheme.impl whose value names the + AbstractFileSystem class. + + The entire URI and conf is passed to the AbstractFileSystem factory method. + + @param uri for the file system to be created. + @param conf which is passed to the file system impl. + + @return file system for the given URI. + + @throws UnsupportedFileSystemException if the file system for + uri is not supportedn some FileSystem implementations such as HDFS metadata + synchronization is essential to guarantee consistency of read requests + particularly in HA setting. + @throws IOException + @throws UnsupportedOperationException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } describing modifications + @throws IOException if an ACL could not be modified]]> + + + + + + + + } describing entries to remove + @throws IOException if an ACL could not be modified]]> + + + + + + + + + + + + + + + + + + + + + + } describing modifications, must + include entries for user, group, and others for compatibility with + permission bits. + @throws IOException if an ACL could not be modified]]> + + + + + + + } which returns each AclStatus + @throws IOException if an ACL could not be read]]> + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @throws IOException]]> + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @param flag xattr set flag + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attribute + @param name xattr name. + @return byte[] xattr value. + @throws IOException]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + + @return {@literal Map} describing the XAttrs of the file + or directory + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @param names XAttr names. + @return {@literal Map} describing the XAttrs of the file + or directory + @throws IOException]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return {@literal Map} describing the XAttrs of the file + or directory + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to remove extended attribute + @param name xattr name + @throws IOExceptionlockLocation(offset: 0, length: BLOCK_SIZE, + hosts: {"host1:9866", "host2:9866, host3:9866"}) + + + And if the file is erasure-coded, each BlockLocation represents a logical + block groups. Value offset is the offset of a block group in the file and + value length is the total length of a block group. Hosts of a BlockLocation + are the datanodes that holding all the data blocks and parity blocks of a + block group. + Suppose we have a RS_3_2 coded file (3 data units and 2 parity units). + A BlockLocation example will be like: +
    + BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
    +   "host2:9866","host3:9866","host4:9866","host5:9866"})
    + 
    + + Please refer to + {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} or + {@link FileContext#getFileBlockLocations(Path, long, long)} + for more examples.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + After a successful call, {@code buf.position()} will be advanced by the + number of bytes read and {@code buf.limit()} will be unchanged. +

    + In the case of an exception, the state of the buffer (the contents of the + buffer, the {@code buf.position()}, the {@code buf.limit()}, etc.) is + undefined, and callers should be prepared to recover from this + eventuality. +

    + Callers should use {@link StreamCapabilities#hasCapability(String)} with + {@link StreamCapabilities#PREADBYTEBUFFER} to check if the underlying + stream supports this interface, otherwise they might get a + {@link UnsupportedOperationException}. +

    + Implementations should treat 0-length requests as legitimate, and must not + signal an error upon their receipt. +

    + This does not change the current offset of a file, and is thread-safe. + + @param position position within file + @param buf the ByteBuffer to receive the results of the read operation. + @return the number of bytes read, possibly zero, or -1 if reached + end-of-stream + @throws IOException if there is some error performing the read]]> + + + + + + + + + This operation provides similar semantics to + {@link #read(long, ByteBuffer)}, the difference is that this method is + guaranteed to read data until the {@link ByteBuffer} is full, or until + the end of the data stream is reached. + + @param position position within file + @param buf the ByteBuffer to receive the results of the read operation. + @throws IOException if there is some error performing the read + @throws EOFException the end of the data was reached before + the read operation completed + @see #read(long, ByteBuffer)]]> + + + + + + + + + + + + + + + After a successful call, {@code buf.position()} will be advanced by the + number of bytes read and {@code buf.limit()} will be unchanged. +

    + In the case of an exception, the state of the buffer (the contents of the + buffer, the {@code buf.position()}, the {@code buf.limit()}, etc.) is + undefined, and callers should be prepared to recover from this + eventuality. +

    + Callers should use {@link StreamCapabilities#hasCapability(String)} with + {@link StreamCapabilities#READBYTEBUFFER} to check if the underlying + stream supports this interface, otherwise they might get a + {@link UnsupportedOperationException}. +

    + Implementations should treat 0-length requests as legitimate, and must not + signal an error upon their receipt. + + @param buf + the ByteBuffer to receive the results of the read operation. + @return the number of bytes read, possibly zero, or -1 if + reach end-of-stream + @throws IOException + if there is some error performing the read]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + setReplication of FileSystem + @param src file name + @param replication new replication + @throws IOException + @return true if successful; + false if file does not exist or is a directory]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + +

    + A higher number here does not necessarily improve performance, especially + for object stores, where multiple threads may be attempting to create an FS + instance for the same URI. +

    + Default value: {@value}.]]> +
    + + + +

    + Default value: {@value}.]]> +
    +
    + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + EnumSet.of(CreateFlag.CREATE, CreateFlag.APPEND) + +

    + + Use the CreateFlag as follows: +

      +
    1. CREATE - to create a file if it does not exist, + else throw FileAlreadyExists.
    2. +
    3. APPEND - to append to a file if it exists, + else throw FileNotFoundException.
    4. +
    5. OVERWRITE - to truncate a file if it exists, + else throw FileNotFoundException.
    6. +
    7. CREATE|APPEND - to create a file if it does not exist, + else append to an existing file.
    8. +
    9. CREATE|OVERWRITE - to create a file if it does not exist, + else overwrite an existing file.
    10. +
    11. SYNC_BLOCK - to force closed blocks to the disk device. + In addition {@link Syncable#hsync()} should be called after each write, + if true synchronous behavior is required.
    12. +
    13. LAZY_PERSIST - Create the block on transient storage (RAM) if + available.
    14. +
    15. APPEND_NEWBLOCK - Append data to a new block instead of end of the last + partial block.
    16. +
    + + Following combinations are not valid and will result in + {@link HadoopIllegalArgumentException}: +
      +
    1. APPEND|OVERWRITE
    2. +
    3. CREATE|APPEND|OVERWRITE
    4. +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + absOrFqPath is not supported. + @throws IOException If the file system for absOrFqPath could + not be instantiated.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + defaultFsUri is not supported]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NewWdir can be one of: +
      +
    • relative path: "foo/bar";
    • +
    • absolute without scheme: "/foo/bar"
    • +
    • fully qualified with scheme: "xx://auth/foo/bar"
    • +
    +
    + Illegal WDs: +
      +
    • relative with scheme: "xx:foo/bar"
    • +
    • non existent directory
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + f does not exist + @throws AccessControlException if access denied + @throws IOException If an IO Error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is not valid]]> + + + + + + + + + + + + + + + + + + + + +
  • Progress - to report progress on the operation - default null +
  • Permission - umask is applied against permission: default is + FsPermissions:getDefault() + +
  • CreateParent - create missing parent path; default is to not + to create parents +
  • The defaults for the following are SS defaults of the file + server implementing the target path. Not all parameters make sense + for all kinds of file system - eg. localFS ignores Blocksize, + replication, checksum +
      +
    • BufferSize - buffersize used in FSDataOutputStream +
    • Blocksize - block size for file blocks +
    • ReplicationFactor - replication for blocks +
    • ChecksumParam - Checksum parameters. server default is used + if not specified. +
    + + + @return {@link FSDataOutputStream} for created file + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If file f already exists + @throws FileNotFoundException If parent of f does not exist + and createParent is false + @throws ParentNotDirectoryException If parent of f is not a + directory. + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is not valid]]> + + + + + + + + + + + + + + + + + + + + + dir already + exists + @throws FileNotFoundException If parent of dir does not exist + and createParent is false + @throws ParentNotDirectoryException If parent of dir is not a + directory + @throws UnsupportedFileSystemException If file system for dir + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path dir is not valid]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is invalid]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + +
  • Fails if path is a directory. +
  • Fails if path does not exist. +
  • Fails if path is not closed. +
  • Fails if new size is greater than current size. + + @param f The path to the file to be truncated + @param newLength The size the file is to be truncated to + + @return true if the file has been truncated to the desired + newLength and is immediately available to be reused for + write operations such as append, or + false if a background process of adjusting the length of + the last block has been started, and clients should wait for it to + complete before proceeding with further file updates. + + @throws AccessControlException If access is denied + @throws FileNotFoundException If file f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + +
  • Fails if src is a file and dst is a directory. +
  • Fails if src is a directory and dst is a file. +
  • Fails if the parent of dst does not exist or is a file. + +

    + If OVERWRITE option is not passed as an argument, rename fails if the dst + already exists. +

    + If OVERWRITE option is passed as an argument, rename overwrites the dst if + it is a file or an empty directory. Rename fails if dst is a non-empty + directory. +

    + Note that atomicity of rename is dependent on the file system + implementation. Please refer to the file system documentation for details +

    + + @param src path to be renamed + @param dst new path after rename + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If dst already exists and + options has {@link Options.Rename#OVERWRITE} + option false. + @throws FileNotFoundException If src does not exist + @throws ParentNotDirectoryException If parent of dst is not a + directory + @throws UnsupportedFileSystemException If file system for src + and dst is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws HadoopIllegalArgumentException If username or + groupname is invalid.]]> + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If the given path does not refer to a symlink + or an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + Given a path referring to a symlink of form: + + {@literal <---}X{@literal --->} + fs://host/A/B/link + {@literal <-----}Y{@literal ----->} + + In this path X is the scheme and authority that identify the file system, + and Y is the path leading up to the final path component "link". If Y is + a symlink itself then let Y' be the target of Y and X' be the scheme and + authority of Y'. Symlink targets may: + + 1. Fully qualified URIs + + fs://hostX/A/B/file Resolved according to the target file system. + + 2. Partially qualified URIs (eg scheme but no host) + + fs:///A/B/file Resolved according to the target file system. Eg resolving + a symlink to hdfs:///A results in an exception because + HDFS URIs must be fully qualified, while a symlink to + file:///A will not since Hadoop's local file systems + require partially qualified URIs. + + 3. Relative paths + + path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path + is "../B/file" then [Y'][path] is hdfs://host/B/file + + 4. Absolute paths + + path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path + is "/file" then [X][path] is hdfs://host/file + + + @param target the target of the symbolic link + @param link the path to be created that points to target + @param createParent if true then missing parent dirs are created if + false then parent must exist + + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If file link already exists + @throws FileNotFoundException If target does not exist + @throws ParentNotDirectoryException If parent of link is not a + directory. + @throws UnsupportedFileSystemException If file system for + target or link is not supported + @throws IOException If an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } describing + modifications + @throws IOException if an ACL could not be modified]]> + + + + + + + + } describing entries + to remove + @throws IOException if an ACL could not be modified]]> + + + + + + + + + + + + + + + + + + + + + + } describing + modifications, must include entries for user, group, and others for + compatibility with permission bits. + @throws IOException if an ACL could not be modified]]> + + + + + + + } which returns + each AclStatus + @throws IOException if an ACL could not be read]]> + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @throws IOException]]> + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @param flag xattr set flag + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attribute + @param name xattr name. + @return byte[] xattr value. + @throws IOException]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs + of the file or directory + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @param names XAttr names. + @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs + of the file or directory + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to remove extended attribute + @param name xattr name + @throws IOException]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return List{@literal <}String{@literal >} of the XAttr names of the + file or directory + @throws IOException]]> + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Path Names + + The Hadoop file system supports a URI namespace and URI names. This enables + multiple types of file systems to be referenced using fully-qualified URIs. + Two common Hadoop file system implementations are +

      +
    • the local file system: file:///path +
    • the HDFS file system: hdfs://nnAddress:nnPort/path +
    + + The Hadoop file system also supports additional naming schemes besides URIs. + Hadoop has the concept of a default file system, which implies a + default URI scheme and authority. This enables slash-relative names + relative to the default FS, which are more convenient for users and + application writers. The default FS is typically set by the user's + environment, though it can also be manually specified. +

    + + Hadoop also supports working-directory-relative names, which are paths + relative to the current working directory (similar to Unix). The working + directory can be in a different file system than the default FS. +

    + Thus, Hadoop path names can be specified as one of the following: +

      +
    • a fully-qualified URI: scheme://authority/path (e.g. + hdfs://nnAddress:nnPort/foo/bar) +
    • a slash-relative name: path relative to the default file system (e.g. + /foo/bar) +
    • a working-directory-relative name: path relative to the working dir (e.g. + foo/bar) +
    + Relative paths with scheme (scheme:foo/bar) are illegal. + +

    Role of FileContext and Configuration Defaults

    + + The FileContext is the analogue of per-process file-related state in Unix. It + contains two properties: + +
      +
    • the default file system (for resolving slash-relative names) +
    • the umask (for file permissions) +
    + In general, these properties are obtained from the default configuration file + in the user's environment (see {@link Configuration}). + + Further file system properties are specified on the server-side. File system + operations default to using these server-side defaults unless otherwise + specified. +

    + The file system related server-side defaults are: +

      +
    • the home directory (default is "/user/userName") +
    • the initial wd (only for local fs) +
    • replication factor +
    • block size +
    • buffer size +
    • encryptDataTransfer +
    • checksum option. (checksumType and bytesPerChecksum) +
    + +

    Example Usage

    + + Example 1: use the default config read from the $HADOOP_CONFIG/core.xml. + Unspecified values come from core-defaults.xml in the release jar. +
      +
    • myFContext = FileContext.getFileContext(); // uses the default config + // which has your default FS +
    • myFContext.create(path, ...); +
    • myFContext.setWorkingDir(path); +
    • myFContext.open (path, ...); +
    • ... +
    + Example 2: Get a FileContext with a specific URI as the default FS +
      +
    • myFContext = FileContext.getFileContext(URI); +
    • myFContext.create(path, ...); +
    • ... +
    + Example 3: FileContext with local file system as the default +
      +
    • myFContext = FileContext.getLocalFSFileContext(); +
    • myFContext.create(path, ...); +
    • ... +
    + Example 4: Use a specific config, ignoring $HADOOP_CONFIG + Generally you should not need use a config unless you are doing +
      +
    • configX = someConfigSomeOnePassedToYou; +
    • myFContext = getFileContext(configX); // configX is not changed, + // is passed down +
    • myFContext.create(path, ...); +
    • ... +
his implementation throws an UnsupportedOperationException. + + @return the protocol scheme for this FileSystem. + @throws UnsupportedOperationException if the operation is unsupported + (default).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • + If the configuration has the property + {@code "fs.$SCHEME.impl.disable.cache"} set to true, + a new instance will be created, initialized with the supplied URI and + configuration, then returned without being cached. +
  • +
  • + If the there is a cached FS instance matching the same URI, it will + be returned. +
  • +
  • + Otherwise: a new FS instance will be created, initialized with the + configuration and URI, cached and returned to the caller. +
  • + + @throws IOException if the FileSystem cannot be instantiated.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + if f == null : + result = null + elif f.getLen() {@literal <=} start: + result = [] + else result = [ locations(FS, b) for b in blocks(FS, p, s, s+l)] + + This call is most helpful with and distributed filesystem + where the hostnames of machines that contain blocks of the given file + can be determined. + + The default implementation returns an array containing one element: +
    + BlockLocation( { "localhost:9866" },  { "localhost" }, 0, file.getLen())
    + 
    + + In HDFS, if file is three-replicated, the returned array contains + elements like: +
    + BlockLocation(offset: 0, length: BLOCK_SIZE,
    +   hosts: {"host1:9866", "host2:9866, host3:9866"})
    + BlockLocation(offset: BLOCK_SIZE, length: BLOCK_SIZE,
    +   hosts: {"host2:9866", "host3:9866, host4:9866"})
    + 
    + + And if a file is erasure-coded, the returned BlockLocation are logical + block groups. + + Suppose we have a RS_3_2 coded file (3 data units and 2 parity units). + 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then + there will be one BlockLocation returned, with 0 offset, actual file size + and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks. + 3. If the file size is less than one group size but greater than one + stripe size, then there will be one BlockLocation returned, with 0 offset, + actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting + the actual blocks. + 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123 + for example, then the result will be like: +
    + BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
    +   "host2:9866","host3:9866","host4:9866","host5:9866"})
    + BlockLocation(offset: 3 * BLOCK_SIZE, length: 123, hosts: {"host1:9866",
    +   "host4:9866", "host5:9866"})
    + 
    + + @param file FilesStatus to get data from + @param start offset into the given file + @param len length for which to get locations for + @throws IOException IO failure]]> +
    +
mportant: the default implementation is not atomic + @param f path to use for create + @throws IOException IO failure]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • Fails if src is a file and dst is a directory.
  • +
  • Fails if src is a directory and dst is a file.
  • +
  • Fails if the parent of dst does not exist or is a file.
  • + +

    + If OVERWRITE option is not passed as an argument, rename fails + if the dst already exists. +

    + If OVERWRITE option is passed as an argument, rename overwrites + the dst if it is a file or an empty directory. Rename fails if dst is + a non-empty directory. +

    + Note that atomicity of rename is dependent on the file system + implementation. Please refer to the file system documentation for + details. This default implementation is non atomic. +

    + This method is deprecated since it is a temporary method added to + support the transition from FileSystem to FileContext for user + applications. + + @param src path to be renamed + @param dst new path after rename + @throws FileNotFoundException src path does not exist, or the parent + path of dst does not exist. + @throws FileAlreadyExistsException dest path exists and is a file + @throws ParentNotDirectoryException if the parent path of dest is not + a directory + @throws IOException on failure]]> + + + + + + + + +

  • Fails if path is a directory.
  • +
  • Fails if path does not exist.
  • +
  • Fails if path is not closed.
  • +
  • Fails if new size is greater than current size.
  • + + @param f The path to the file to be truncated + @param newLength The size the file is to be truncated to + + @return true if the file has been truncated to the desired + newLength and is immediately available to be reused for + write operations such as append, or + false if a background process of adjusting the length of + the last block has been started, and clients should wait for it to + complete before proceeding with further file updates. + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default).]]> +
    +
    + + + + + + + + + + + + + + + + + + + + +
  • Clean shutdown of the JVM cannot be guaranteed.
  • +
  • The time to shut down a FileSystem will depends on the number of + files to delete. For filesystems where the cost of checking + for the existence of a file/directory and the actual delete operation + (for example: object stores) is high, the time to shutdown the JVM can be + significantly extended by over-use of this feature.
  • +
  • Connectivity problems with a remote filesystem may delay shutdown + further, and may cause the files to not be deleted.
  • + + @param f the path to delete. + @return true if deleteOnExit is successful, otherwise false. + @throws IOException IO failure]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. +

    + Will not return null. Expect IOException upon access error. + @param f given path + @return the statuses of the files/directories in the given patch + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + + + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. + + @param f + a path name + @param filter + the user-supplied path filter + @return an array of FileStatus objects for the files under the given path + after applying the filter + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. + + @param files + a list of paths + @return a list of statuses for the files under the given paths after + applying the filter default Path filter + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. + + @param files + a list of paths + @param filter + the user-supplied path filter + @return a list of statuses for the files under the given paths after + applying the filter + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + Return all the files that match filePattern and are not checksum + files. Results are sorted by their names. + +

    + A filename pattern is composed of regular characters and + special pattern matching characters, which are: + +

    +
    +
    +
    ? +
    Matches any single character. + +

    +

    * +
    Matches zero or more characters. + +

    +

    [abc] +
    Matches a single character from character set + {a,b,c}. + +

    +

    [a-b] +
    Matches a single character from the character range + {a...b}. Note that character a must be + lexicographically less than or equal to character b. + +

    +

    [^a] +
    Matches a single character that is not from character set or range + {a}. Note that the ^ character must occur + immediately to the right of the opening bracket. + +

    +

    \c +
    Removes (escapes) any special meaning of character c. + +

    +

    {ab,cd} +
    Matches a string from the string set {ab, cd} + +

    +

    {ab,c{de,fh}} +
    Matches a string from the string set {ab, cde, cfh} + +
    +
    +
    + + @param pathPattern a glob specifying a path pattern + + @return an array of paths that match the path pattern + @throws IOException IO failure]]> +
    +
    + + + + + + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred]]> + + + + + + + + + f does not exist + @throws IOException if any I/O error occurred]]> + + + + + + + + p does not exist + @throws IOException if any I/O error occurred]]> + + + + + + + + + + If the path is a directory, + if recursive is false, returns files in the directory; + if recursive is true, return files in the subtree rooted at the path. + If the path is a file, return the file's status and block locations. + + @param f is the path + @param recursive if the subdirectories need to be traversed recursively + + @return an iterator that traverses statuses of the files + + @throws FileNotFoundException when the path does not exist; + @throws IOException see specific implementation]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + undefined. + @throws IOException IO failure]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In some FileSystem implementations such as HDFS metadata + synchronization is essential to guarantee consistency of read requests + particularly in HA setting. + @throws IOException + @throws UnsupportedOperationException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @param flag xattr set flag + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attribute + @param name xattr name. + @return byte[] xattr value. + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return Map describing the XAttrs of the file or directory + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @param names XAttr names. + @return Map describing the XAttrs of the file or directory + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return List{@literal } of the XAttr names of the file or directory + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to remove extended attribute + @param name xattr name + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is a default method which is intended to be overridden by + subclasses. The default implementation returns an empty storage statistics + object.

    + + @return The StorageStatistics for this FileSystem instance. + Will never be null.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + All user code that may potentially use the Hadoop Distributed + File System should be written to use a FileSystem object or its + successor, {@link FileContext}. + +

    + The local implementation is {@link LocalFileSystem} and distributed + implementation is DistributedFileSystem. There are other implementations + for object stores and (outside the Apache Hadoop codebase), + third party filesystems. +

    + Notes +

      +
    1. The behaviour of the filesystem is + + specified in the Hadoop documentation. + However, the normative specification of the behavior of this class is + actually HDFS: if HDFS does not behave the way these Javadocs or + the specification in the Hadoop documentations define, assume that + the documentation is incorrect. +
    2. +
    3. The term {@code FileSystem} refers to an instance of this class.
    4. +
    5. The acronym "FS" is used as an abbreviation of FileSystem.
    6. +
    7. The term {@code filesystem} refers to the distributed/local filesystem + itself, rather than the class used to interact with it.
    8. +
    9. The term "file" refers to a file in the remote filesystem, + rather than instances of {@code java.io.File}.
    10. +
    + + This is a carefully evolving class. + New methods may be marked as Unstable or Evolving for their initial release, + as a warning that they are new and may change based on the + experience of use in applications. +

    + Important note for developers +

    + If you are making changes here to the public API or protected methods, + you must review the following subclasses and make sure that + they are filtering/passing through new methods as appropriate. +

    + + {@link FilterFileSystem}: methods are passed through. If not, + then {@code TestFilterFileSystem.MustNotImplement} must be + updated with the unsupported interface. + Furthermore, if the new API's support is probed for via + {@link #hasPathCapability(Path, String)} then + {@link FilterFileSystem#hasPathCapability(Path, String)} + must return false, always. +

    + {@link ChecksumFileSystem}: checksums are created and + verified. +

    + {@code TestHarFileSystem} will need its {@code MustNotImplement} + interface updated. +

    + + There are some external places your changes will break things. + Do co-ordinate changes here. +

    + + HBase: HBoss +

    + Hive: HiveShim23 + {@code shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java}]]> +
    +
} caller's + environment variables to use for expansion + @return String[] with absolute path to new jar in position 0 and + unexpanded wild card entry path in position 1 + @throws IOException if there is an I/O error while writing the jar fileilterFileSystem contains + some other file system, which it uses as + its basic file system, possibly transforming + the data along the way or providing additional + functionality. The class FilterFileSystem + itself simply overrides all methods of + FileSystem with versions that + pass all requests to the contained file + system. Subclasses of FilterFileSystem + may further override some of these methods + and may also provide additional methods + and fields.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Return type on the {@link #build()} call. + @param type of builder itself.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -1 + if there is no more data because the end of the stream has been + reached]]> + + + + + + + + + + length bytes have been read. + + @param position position in the input stream to seek + @param buffer buffer into which data is read + @param offset offset into the buffer in which data is written + @param length the number of bytes to read + @throws IOException IO problems + @throws EOFException If the end of stream is reached while reading. + If an exception is thrown an undetermined number + of bytes in the buffer may have been written.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + // Don't + if (fs instanceof FooFileSystem) { + FooFileSystem fs = (FooFileSystem) fs; + OutputStream out = dfs.createFile(path) + .optionA() + .optionB("value") + .cache() + .build() + } else if (fs instanceof BarFileSystem) { + ... + } + + // Do + OutputStream out = fs.createFile(path) + .permission(perm) + .bufferSize(bufSize) + .opt("foofs:option.a", true) + .opt("foofs:option.b", "value") + .opt("barfs:cache", true) + .must("foofs:cache", true) + .must("barfs:cache-size", 256 * 1024 * 1024) + .build(); + + + If the option is not related to the file system, the option will be ignored. + If the option is must, but not supported by the file system, a + {@link IllegalArgumentException} will be thrown.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + path is invalid]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    + The interface extends {@link IOStatisticsSource} so that there is no + need to cast an instance to see if is a source of statistics. + However, implementations MAY return null for their actual statistics.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ListingBatch behaves similar to a Future, in that getting the result via + {@link #get()} will throw an Exception if there was a failure.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + and the scheme is null, and the authority + is null. + + @return whether the path is absolute and the URI has no scheme nor + authority parts]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if and only if pathname + should be included]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Warning: Not all filesystems satisfy the thread-safety requirement. + @param position position within file + @param buffer destination buffer + @param offset offset in the buffer + @param length number of bytes to read + @return actual number of bytes read; -1 means "none" + @throws IOException IO problems.]]> + + + + + + + + + + Warning: Not all filesystems satisfy the thread-safety requirement. + @param position position within file + @param buffer destination buffer + @param offset offset in the buffer + @param length number of bytes to read + @throws IOException IO problems. + @throws EOFException the end of the data was reached before + the read operation completed]]> + + + + + + + + Warning: Not all filesystems satisfy the thread-safety requirement. + @param position position within file + @param buffer destination buffer + @throws IOException IO problems. + @throws EOFException the end of the data was reached before + the read operation completed]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Note
    : Returned list is not sorted in any given order, + due to reliance on Java's {@link File#list()} API.)]]> + +
ttr is byte[], this class is to + covert byte[] to some kind of string representation or convert back. + String representation is convenient for display and input. For example + display in screen as shell response and json response, input as http + or shell parameter.]]> + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return ftp]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} backed by an FTP client provided by Apache Commons Net. +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is for reporting and testing.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + These are low-cost per-instance statistics provided by any Hadoop + I/O class instance. +

    + Consult the filesystem specification document for the requirements + of an implementation of this interface.]]> + + + + + + + + + + + + + + + + + + + + + + + Exceptions are caught and downgraded to debug logging. + @param source source of statistics. + @return a string for logging.]]> + + + + + + + + + + + + + + + + + + + Whenever this object's toString() method is called, it evaluates the + statistics. +

    + This is designed to affordable to use in log statements. + @param source source of statistics -may be null. + @return an object whose toString() operation returns the current values.]]> + + + + + + + Whenever this object's toString() method is called, it evaluates the + statistics. +

    + This is for use in log statements where for the cost of creation + of this entry is low; it is affordable to use in log statements. + @param statistics statistics to stringify -may be null. + @return an object whose toString() operation returns the current values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It is serializable so that frameworks which can use java serialization + to propagate data (Spark, Flink...) can send the statistics + back. For this reason, TreeMaps are explicitly used as field types, + even though IDEs can recommend use of Map instead. + For security reasons, untrusted java object streams should never be + deserialized. If for some reason this is required, use + {@link #requiredSerializationClasses()} to get the list of classes + used when deserializing instances of this object. +

    +

    + It is annotated for correct serializations with jackson2. +

    ]]> +
    + + + + + + + + + This is not an atomic option. +

    + The instance can be serialized, and its + {@code toString()} method lists all the values. + @param statistics statistics + @return a snapshot of the current values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It can be used to accrue values so as to dynamically update + the mean. If so, know that there is no synchronization + on the methods. +

    +

    + If a statistic has 0 samples then it is considered to be empty. +

    +

    + All 'empty' statistics are equivalent, independent of the sum value. +

    +

    + For non-empty statistics, sum and sample values must match + for equality. +

    +

    + It is serializable and annotated for correct serializations with jackson2. +

    +

    + Thread safety. The operations to add/copy sample data, are thread safe. +

    +
      +
    1. {@link #add(MeanStatistic)}
    2. +
    3. {@link #addSample(long)}
    4. +
    5. {@link #clear()}
    6. +
    7. {@link #setSamplesAndSum(long, long)}
    8. +
    9. {@link #set(MeanStatistic)}
    10. +
    11. {@link #setSamples(long)} and {@link #setSum(long)}
    12. +
    +

    + So is the {@link #mean()} method. This ensures that when + used to aggregated statistics, the aggregate value and sample + count are set and evaluated consistently. +

    +

    + Other methods marked as synchronized because Findbugs overreacts + to the idea that some operations to update sum and sample count + are synchronized, but that things like equals are not. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + names)}: {@value}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When adding new common statistic name constants, please make them unique. + By convention: +

    +
      +
    • the name of the constants are uppercase, words separated by + underscores.
    • +
    • the value of the constants are lowercase of the constant names.
    • +
    ]]> +
    +
hen adding new common statistic name constants, please make them unique. + By convention, they are implicitly unique: +
      +
    • + The name of the constants are uppercase, words separated by + underscores. +
    • +
    • + The value of the constants are lowercase of the constant names. +
    • +
    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Since these methods are often vendor- or device-specific, operators + may implement this interface in order to achieve fencing. +

    + Fencing is configured by the operator as an ordered list of methods to + attempt. Each method will be tried in turn, and the next in the list + will only be attempted if the previous one fails. See {@link NodeFencer} + for more information. +

    + If an implementation also implements {@link Configurable} then its + setConf method will be called upon instantiation.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + state (e.g ACTIVE/STANDBY) as well as + some additional information. + + @throws AccessControlException + if access is denied. + @throws IOException + if other errors happen + @see HAServiceStatus]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.http.filter.initializers. + +

      +
    • StaticUserWebFilter - An authorization plugin that makes all +users a static configured user. +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + public class IntArrayWritable extends ArrayWritable { + public IntArrayWritable() { + super(IntWritable.class); + } + }o is a ByteWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the item + @param conf the configuration to store + @param item the object to be stored + @param keyName the name of the key to use + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param items the objects to be stored + @param keyName the name of the key to use + @throws IndexOutOfBoundsException if the items array is empty + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + DefaultStringifier offers convenience methods to store/load objects to/from + the configuration. + + @param the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a DoubleWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + value argument is null or + its size is zero, the elementType argument must not be null. If + the argument value's size is bigger than zero, the argument + elementType is not be used. + + @param value + @param elementType]]> + + + + + value should not be null + or empty. + + @param value]]> + + + + + + + + + + + + + + value and elementType. If the value argument + is null or its size is zero, the elementType argument must not be + null. If the argument value's size is bigger than zero, the + argument elementType is not be used. + + @param value + @param elementType]]> + + + + + + + + + + + + + + + + + + + o is an EnumSetWritable with the same value, + or both are null.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a FloatWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When two sequence files, which have same Key type but different Value + types, are mapped out to reduce, multiple Value types is not allowed. + In this case, this class can help you wrap instances with different types. +

    + +

    + Compared with ObjectWritable, this class is much more effective, + because ObjectWritable will append the class declaration as a String + into the output file in every Key-Value pair. +

    + +

    + Generic Writable implements {@link Configurable} interface, so that it will be + configured by the framework. The configuration is passed to the wrapped objects + implementing {@link Configurable} interface before deserialization. +

    + + how to use it:
    + 1. Write your own class, such as GenericObject, which extends GenericWritable.
    + 2. Implements the abstract method getTypes(), defines + the classes which will be wrapped in GenericObject in application. + Attention: this classes defined in getTypes() method, must + implement Writable interface. +

    + + The code looks like this: +
    + public class GenericObject extends GenericWritable {
    + 
    +   private static Class[] CLASSES = {
    +               ClassType1.class, 
    +               ClassType2.class,
    +               ClassType3.class,
    +               };
    +
    +   protected Class[] getTypes() {
    +       return CLASSES;
    +   }
    +
    + }
    + 
    + + @since Nov 8, 2006]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a IntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + closes the input and output streams + at the end. + + @param in InputStrem to read from + @param out OutputStream to write to + @param conf the Configuration object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ignore any {@link Throwable} or + null pointers. Must only be used for cleanup in exception handlers. + + @param log the log to record problems to at debug level. Can be null. + @param closeables the objects to close + @deprecated use {@link #cleanupWithLogger(Logger, java.io.Closeable...)} + instead]]> + + + + + + + ignore any {@link Throwable} or + null pointers. Must only be used for cleanup in exception handlers. + + @param logger the log to record problems to at debug level. Can be null. + @param closeables the objects to close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is better than File#listDir because it does not ignore IOExceptions. + + @param dir The directory to list. + @param filter If non-null, the filter to use when listing + this directory. + @return The list of files in the directory. + + @throws IOException On I/O error]]> + + + + + + + + Borrowed from Uwe Schindler in LUCENE-5588 + @param fileToSync the file to fsync]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a LongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A map is a directory containing two files, the data file, + containing all keys and values in the map, and a smaller index + file, containing a fraction of the keys. The fraction is determined by + {@link Writer#getIndexInterval()}. + +

    The index file is read entirely into memory. Thus key implementations + should try to keep themselves small. + +

    Map files are created by adding entries in-order. To maintain a large + database, perform updates by copying the previous version of a database and + merging in a sorted change list, to create a new version of the database in + a new file. Sorting large change lists can be done with {@link + SequenceFile.Sorter}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is an MD5Hash whose digest contains the + same values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + className by first finding + it in the specified conf. If the specified conf is null, + try load it directly.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link Comparator} that operates directly on byte representations of + objects. +

    + @param + @see DeserializerComparator]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SequenceFiles are flat files consisting of binary key/value + pairs. + +

    SequenceFile provides {@link SequenceFile.Writer}, + {@link SequenceFile.Reader} and {@link Sorter} classes for writing, + reading and sorting respectively.

    + + There are three SequenceFile Writers based on the + {@link CompressionType} used to compress key/value pairs: +
      +
    1. + Writer : Uncompressed records. +
    2. +
    3. + RecordCompressWriter : Record-compressed files, only compress + values. +
    4. +
    5. + BlockCompressWriter : Block-compressed files, both keys & + values are collected in 'blocks' + separately and compressed. The size of + the 'block' is configurable. +
    + +

    The actual compression algorithm used to compress key and/or values can be + specified by using the appropriate {@link CompressionCodec}.

    + +

    The recommended way is to use the static createWriter methods + provided by the SequenceFile to chose the preferred format.

    + +

    The {@link SequenceFile.Reader} acts as the bridge and can read any of the + above SequenceFile formats.

    + +

    SequenceFile Formats

    + +

    Essentially there are 3 different formats for SequenceFiles + depending on the CompressionType specified. All of them share a + common header described below. + +

    +
      +
    • + version - 3 bytes of magic header SEQ, followed by 1 byte of actual + version number (e.g. SEQ4 or SEQ6) +
    • +
    • + keyClassName -key class +
    • +
    • + valueClassName - value class +
    • +
    • + compression - A boolean which specifies if compression is turned on for + keys/values in this file. +
    • +
    • + blockCompression - A boolean which specifies if block-compression is + turned on for keys/values in this file. +
    • +
    • + compression codec - CompressionCodec class which is used for + compression of keys and/or values (if compression is + enabled). +
    • +
    • + metadata - {@link Metadata} for this file. +
    • +
    • + sync - A sync marker to denote end of the header. +
    • +
    + +
    Uncompressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Value
      • +
      +
    • +
    • + A sync-marker every few 100 kilobytes or so. +
    • +
    + +
    Record-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Compressed Value
      • +
      +
    • +
    • + A sync-marker every few 100 kilobytes or so. +
    • +
    + +
    Block-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record Block +
        +
      • Uncompressed number of records in the block
      • +
      • Compressed key-lengths block-size
      • +
      • Compressed key-lengths block
      • +
      • Compressed keys block-size
      • +
      • Compressed keys block
      • +
      • Compressed value-lengths block-size
      • +
      • Compressed value-lengths block
      • +
      • Compressed values block-size
      • +
      • Compressed values block
      • +
      +
    • +
    • + A sync-marker every block. +
    • +
    + +

    The compressed blocks of key lengths and value lengths consist of the + actual lengths of individual keys/values encoded in ZeroCompressedInteger + format.

    + + @see CompressionCodec]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a ShortWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + position. Note that this + method avoids using the converter or doing String instantiation + @return the Unicode scalar value at position or -1 + if the position is invalid or points to a + trailing byte]]> + + + + + + + + + + what in the backing + buffer, starting as position start. The starting + position is measured in bytes and the return value is in + terms of byte position in the buffer. The backing buffer is + not converted to a string for this operation. + @return byte position of the first occurrence of the search + string in the UTF-8 buffer or -1 if not found]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Note: For performance reasons, this call does not clear the + underlying byte array that is retrievable via {@link #getBytes()}. + In order to free the byte-array memory, call {@link #set(byte[])} + with an empty byte array (For example, new byte[0]).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a Text with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException.]]> + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + @return ByteBuffer: bytes stores at ByteBuffer.array() + and length is ByteBuffer.limit()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In + addition, it provides methods for string traversal without converting the + byte array to a string.

    Also includes utilities for + serializing/deserialing a string, coding/decoding a string, checking if a + byte array contains valid UTF8 code, calculating the length of an encoded + string.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is useful when a class may evolve, so that instances written by the + old version of the class may still be processed by the new version. To + handle this situation, {@link #readFields(DataInput)} + implementations should catch {@link VersionMismatchException}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VIntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VLongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + out. + + @param out DataOuput to serialize this object into. + @throws IOException]]> + + + + + + + in. + +

    For efficiency, implementations should attempt to re-use storage in the + existing object where possible.

    + + @param in DataInput to deseriablize this object from. + @throws IOException]]> +
    + + + Any key or value type in the Hadoop Map-Reduce + framework implements this interface.

    + +

    Implementations typically implement a static read(DataInput) + method which constructs a new instance, calls {@link #readFields(DataInput)} + and returns the instance.

    + +

    Example:

    +
    +     public class MyWritable implements Writable {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +
    +       // Default constructor to allow (de)serialization
    +       MyWritable() { }
    +
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +
    +       public static MyWritable read(DataInput in) throws IOException {
    +         MyWritable w = new MyWritable();
    +         w.readFields(in);
    +         return w;
    +       }
    +     }
    + 
    ]]> +
    + + + + + + + + WritableComparables can be compared to each other, typically + via Comparators. Any type which is to be used as a + key in the Hadoop Map-Reduce framework should implement this + interface.

    + +

    Note that hashCode() is frequently used in Hadoop to partition + keys. It's important that your implementation of hashCode() returns the same + result across different instances of the JVM. Note also that the default + hashCode() implementation in Object does not + satisfy this property.

    + +

    Example:

    +
    +     public class MyWritableComparable implements
    +      WritableComparable{@literal } {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public int compareTo(MyWritableComparable o) {
    +         int thisValue = this.value;
    +         int thatValue = o.value;
    +         return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
    +       }
    +
    +       public int hashCode() {
    +         final int prime = 31;
    +         int result = 1;
    +         result = prime * result + counter;
    +         result = prime * result + (int) (timestamp ^ (timestamp >>> 32));
    +         return result
    +       }
    +     }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The default implementation reads the data into two {@link + WritableComparable}s (using {@link + Writable#readFields(DataInput)}, then calls {@link + #compare(WritableComparable,WritableComparable)}.]]> + + + + + + + The default implementation uses the natural ordering, calling {@link + Comparable#compareTo(Object)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This base implementation uses the natural ordering. To define alternate + orderings, override {@link #compare(WritableComparable,WritableComparable)}. + +

    One may optimize compare-intensive operations by overriding + {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are + provided to assist in optimized implementations of this method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Enum type + @param in DataInput to read from + @param enumType Class type of Enum + @return Enum represented by String read from DataInput + @throws IOException]]> + + + + + + + + + + + + + + + + len number of bytes in input streamin + @param in input stream + @param len number of bytes to skip + @throws IOException when skipped less number of bytes]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CompressionCodec for which to get the + Compressor + @param conf the Configuration object which contains confs for creating or reinit the compressor + @return Compressor for the given + CompressionCodec from the pool or a new one]]> + + + + + + + + + CompressionCodec for which to get the + Decompressor + @return Decompressor for the given + CompressionCodec the pool or a new one]]> + + + + + + Compressor to be returned to the pool]]> + + + + + + Decompressor to be returned to the + pool]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Codec aliases are case insensitive. +

    + The code alias is the short class name (without the package name). + If the short class name ends with 'Codec', then there are two aliases for + the codec, the complete short class name and the short class name without + the 'Codec' ending. For example for the 'GzipCodec' codec class name the + alias are 'gzip' and 'gzipcodec'. + + @param codecName the canonical class name of the codec + @return the codec object]]> + + + + + + + Codec aliases are case insensitive. +

    + The code alias is the short class name (without the package name). + If the short class name ends with 'Codec', then there are two aliases for + the codec, the complete short class name and the short class name without + the 'Codec' ending. For example for the 'GzipCodec' codec class name the + alias are 'gzip' and 'gzipcodec'. + + @param codecName the canonical class name of the codec + @return the codec class]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations are assumed to be buffered. This permits clients to + reposition the underlying input stream then call {@link #resetState()}, + without having to also synchronize client buffers.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + (Both native and non-native versions of various Decompressors require + that the data passed in via b[] remain unmodified until + the caller is explicitly notified--via {@link #needsInput()}--that the + buffer may be safely modified. With this requirement, an extra + buffer-copy can be avoided.) + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + {@link #setInput(byte[], int, int)} should be called to + provide more input. + + @return true if the input data buffer is empty and + {@link #setInput(byte[], int, int)} should be called in + order to provide more input.]]> + + + + + + + + + + + + + true if a preset dictionary is needed for decompression. + @return true if a preset dictionary is needed for decompression]]> + + + + + true if the end of the decompressed + data output stream has been reached. Indicates a concatenated data stream + when finished() returns true and {@link #getRemaining()} + returns a positive value. finished() will be reset with the + {@link #reset()} method. + @return true if the end of the decompressed + data output stream has been reached.]]> + + + + + + + + + + + + + + true and getRemaining() returns a positive value. If + {@link #finished()} returns true and getRemaining() returns + a zero value, indicates that the end of data stream has been reached and + is not a concatenated data stream. + @return The number of bytes remaining in the compressed data buffer.]]> + + + + + true and {@link #getRemaining()} returns a positive value, + reset() is called before processing of the next data stream in the + concatenated data stream. {@link #finished()} will be reset and will + return false when reset() is called.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io.compression.codecs = org.apache.hadoop.io.compress.PassthroughCodec + io.compress.passthrough.extension = .gz + + + Note: this is not a Splittable codec: it doesn't know the + capabilities of the passed in stream. It should be possible to + extend this in a subclass: the inner classes are marked as protected + to enable this. Do not retrofit splitting to this class..]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • "none" - No compression. +
  • "lzo" - LZO compression. +
  • "gz" - GZIP compression. + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • Block Compression. +
  • Named meta data blocks. +
  • Sorted or unsorted keys. +
  • Seek by key or by file offset. + + The memory footprint of a TFile includes the following: +
      +
    • Some constant overhead of reading or writing a compressed block. +
        +
      • Each compressed block requires one compression/decompression codec for + I/O. +
      • Temporary space to buffer the key. +
      • Temporary space to buffer the value (for TFile.Writer only). Values are + chunk encoded, so that we buffer at most one chunk of user data. By default, + the chunk buffer is 1MB. Reading chunked value does not require additional + memory. +
      +
    • TFile index, which is proportional to the total number of Data Blocks. + The total amount of memory needed to hold the index can be estimated as + (56+AvgKeySize)*NumBlocks. +
    • MetaBlock index, which is proportional to the total number of Meta + Blocks.The total amount of memory needed to hold the index for Meta Blocks + can be estimated as (40+AvgMetaBlockName)*NumMetaBlock. +
    +

    + The behavior of TFile can be customized by the following variables through + Configuration: +

      +
    • tfile.io.chunk.size: Value chunk size. Integer (in bytes). Default + to 1MB. Values of the length less than the chunk size is guaranteed to have + known value length in read time (See + {@link TFile.Reader.Scanner.Entry#isValueLengthKnown()}). +
    • tfile.fs.output.buffer.size: Buffer size used for + FSDataOutputStream. Integer (in bytes). Default to 256KB. +
    • tfile.fs.input.buffer.size: Buffer size used for + FSDataInputStream. Integer (in bytes). Default to 256KB. +
    +

    + Suggestions on performance optimization. +

      +
    • Minimum block size. We recommend a setting of minimum block size between + 256KB to 1MB for general usage. Larger block size is preferred if files are + primarily for sequential access. However, it would lead to inefficient random + access (because there are more data to decompress). Smaller blocks are good + for random access, but require more memory to hold the block index, and may + be slower to create (because we must flush the compressor stream at the + conclusion of each data block, which leads to an FS I/O flush). Further, due + to the internal caching in Compression codec, the smallest possible block + size would be around 20KB-30KB. +
    • The current implementation does not offer true multi-threading for + reading. The implementation uses FSDataInputStream seek()+read(), which is + shown to be much faster than positioned-read call in single thread mode. + However, it also means that if multiple threads attempt to access the same + TFile (using multiple scanners) simultaneously, the actual I/O is carried out + sequentially even if they access different DFS blocks. +
    • Compression codec. Use "none" if the data is not very compressable (by + compressable, I mean a compression ratio at least 2:1). Generally, use "lzo" + as the starting point for experimenting. "gz" overs slightly better + compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to + decompress, comparing to "lzo". +
    • File system buffering, if the underlying FSDataInputStream and + FSDataOutputStream is already adequately buffered; or if applications + reads/writes keys and values in large buffers, we can reduce the sizes of + input/output buffering in TFile layer by setting the configuration parameters + "tfile.fs.input.buffer.size" and "tfile.fs.output.buffer.size". +
    + + Some design rationale behind TFile can be found at Hadoop-3315.]]> + + + + + + + + + + + Utils#writeVLong(out, n). + + @param out + output stream + @param n + The integer to be encoded + @throws IOException + @see Utils#writeVLong(DataOutput, long)]]> + + + + + + + + +
  • if n in [-32, 127): encode in one byte with the actual value. + Otherwise, +
  • if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52; + byte[1]=n&0xff. Otherwise, +
  • if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 - + 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise, +
  • if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112; + byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff; + byte[3]=n&0xff. + Otherwise: +
  • if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] = + (n>>24)&0xff; byte[2]=(n>>16)&0xff; + byte[3]=(n>>8)&0xff; byte[4]=n&0xff; +
  • if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] = + (n>>32)&0xff; byte[2]=(n>>24)&0xff; + byte[3]=(n>>16)&0xff; byte[4]=(n>>8)&0xff; + byte[5]=n&0xff +
  • if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] = + (n>>40)&0xff; byte[2]=(n>>32)&0xff; + byte[3]=(n>>24)&0xff; byte[4]=(n>>16)&0xff; + byte[5]=(n>>8)&0xff; byte[6]=n&0xff; +
  • if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] = + (n>>48)&0xff; byte[2] = (n>>40)&0xff; + byte[3]=(n>>32)&0xff; byte[4]=(n>>24)&0xff; byte[5]= + (n>>16)&0xff; byte[6]=(n>>8)&0xff; byte[7]=n&0xff; +
  • if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] = + (n>>54)&0xff; byte[2] = (n>>48)&0xff; + byte[3] = (n>>40)&0xff; byte[4]=(n>>32)&0xff; + byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff; byte[7]= + (n>>8)&0xff; byte[8]=n&0xff; + + + @param out + output stream + @param n + the integer number + @throws IOException]]> + + + + + + + (int)Utils#readVLong(in). + + @param in + input stream + @return the decoded integer + @throws IOException + + @see Utils#readVLong(DataInput)]]> + + + + + + + +
  • if (FB >= -32), return (long)FB; +
  • if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff; +
  • if (FB in [-104, -73]), return (FB+88)<<16 + + (NB[0]&0xff)<<8 + NB[1]&0xff; +
  • if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff) + <<16 + (NB[1]&0xff)<<8 + NB[2]&0xff; +
  • if (FB in [-128, -121]), return interpret NB[FB+129] as a signed + big-endian integer. + + @param in + input stream + @return the decoded long integer. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @param cmp + Comparator for the key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @param cmp + Comparator for the key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + + + + + + + + + + An experimental {@link Serialization} for Java {@link Serializable} classes. +

    + @see JavaSerializationComparator]]> +
    +
    + + + + + + + + + A {@link RawComparator} that uses a {@link JavaSerialization} + {@link Deserializer} to deserialize objects that are then compared via + their {@link Comparable} interfaces. +

    + @param + @see JavaSerialization]]> +
    +
    + + + + + + + + + + + + + +This package provides a mechanism for using different serialization frameworks +in Hadoop. The property "io.serializations" defines a list of +{@link org.apache.hadoop.io.serializer.Serialization}s that know how to create +{@link org.apache.hadoop.io.serializer.Serializer}s and +{@link org.apache.hadoop.io.serializer.Deserializer}s. +

    + +

    +To add a new serialization framework write an implementation of +{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the +"io.serializations" property. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + avro.reflect.pkgs or implement + {@link AvroReflectSerializable} interface.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + +This package provides Avro serialization in Hadoop. This can be used to +serialize/deserialize Avro types in Hadoop. +

    + +

    +Use {@link org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization} for +serialization of classes generated by Avro's 'specific' compiler. +

    + +

    +Use {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} for +other classes. +{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} work for +any class which is either in the package list configured via +{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization#AVRO_REFLECT_PACKAGES} +or implement {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerializable} +interface. +

    ]]> +
    +
mplementations of this interface consume the {@link MetricsRecord} generated + from {@link MetricsSource}. It registers with {@link MetricsSystem} which + periodically pushes the {@link MetricsRecord} to the sink using + {@link #putMetrics(MetricsRecord)} method. If the implementing class also + implements {@link Closeable}, then the MetricsSystem will close the sink when + it is stopped.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the actual type of the source object + @param source object to register + @return the source object + @exception MetricsException]]> + + + + + + + + the actual type of the source object + @param source object to register + @param name of the source. Must be unique or null (then extracted from + the annotations of the source object.) + @param desc the description of the source (or null. See above.) + @return the source object + @exception MetricsException]]> + + + + + + + + + + + + + + + + + + + + +
  • {@link MetricsSource} generate and update metrics information.
  • +
  • {@link MetricsSink} consume the metrics information
  • + + + {@link MetricsSource} and {@link MetricsSink} register with the metrics + system. Implementations of {@link MetricsSystem} polls the + {@link MetricsSource}s periodically and pass the {@link MetricsRecord}s to + {@link MetricsSink}.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
} (aggregate). + Filter out entries that don't have at least minSamples. + + @return a map of peer DataNode Id to the average latency to that + node seen over the measurement period.]]> + + + + + + + + + + + This class maintains a group of rolling average metrics. It implements the + algorithm of rolling average, i.e. a number of sliding windows are kept to + roll over and evict old subsets of samples. Each window has a subset of + samples in a stream, where sub-sum and sub-total are collected. All sub-sums + and sub-totals in all windows will be aggregated to final-sum and final-total + used to compute final average, which is called rolling average. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is a metrics sink that uses + {@link org.apache.hadoop.fs.FileSystem} to write the metrics logs. Every + roll interval a new directory will be created under the path specified by the + basepath property. All metrics will be logged to a file in the + current interval's directory in a file named <hostname>.log, where + <hostname> is the name of the host on which the metrics logging + process is running. The base path is set by the + <prefix>.sink.<instance>.basepath property. The + time zone used to create the current interval's directory name is GMT. If + the basepath property isn't specified, it will default to + "/tmp", which is the temp directory on whatever default file + system is configured for the cluster.

    + +

    The <prefix>.sink.<instance>.ignore-error + property controls whether an exception is thrown when an error is encountered + writing a log file. The default value is true. When set to + false, file errors are quietly swallowed.

    + +

    The roll-interval property sets the amount of time before + rolling the directory. The default value is 1 hour. The roll interval may + not be less than 1 minute. The property's value should be given as + number unit, where number is an integer value, and + unit is a valid unit. Valid units are minute, hour, + and day. The units are case insensitive and may be abbreviated or + plural. If no units are specified, hours are assumed. For example, + "2", "2h", "2 hour", and + "2 hours" are all valid ways to specify two hours.

    + +

    The roll-offset-interval-millis property sets the upper + bound on a random time interval (in milliseconds) that is used to delay + before the initial roll. All subsequent rolls will happen an integer + number of roll intervals after the initial roll, hence retaining the original + offset. The purpose of this property is to insert some variance in the roll + times so that large clusters using this sink on every node don't cause a + performance impact on HDFS by rolling simultaneously. The default value is + 30000 (30s). When writing to HDFS, as a rule of thumb, the roll offset in + millis should be no less than the number of sink instances times 5. + +

    The primary use of this class is for logging to HDFS. As it uses + {@link org.apache.hadoop.fs.FileSystem} to access the target file system, + however, it can be used to write to the local file system, Amazon S3, or any + other supported file system. The base path for the sink will determine the + file system used. An unqualified path will write to the default file system + set by the configuration.

    + +

    Not all file systems support the ability to append to files. In file + systems without the ability to append to files, only one writer can write to + a file at a time. To allow for concurrent writes from multiple daemons on a + single host, the source property is used to set unique headers + for the log files. The property should be set to the name of + the source daemon, e.g. namenode. The value of the + source property should typically be the same as the property's + prefix. If this property is not set, the source is taken to be + unknown.

    + +

    Instead of appending to an existing file, by default the sink + will create a new file with a suffix of ".<n>", where + n is the next lowest integer that isn't already used in a file name, + similar to the Hadoop daemon logs. NOTE: the file with the highest + sequence number is the newest file, unlike the Hadoop daemon logs.

    + +

    For file systems that allow append, the sink supports appending to the + existing file instead. If the allow-append property is set to + true, the sink will instead append to the existing file on file systems that + support appends. By default, the allow-append property is + false.

    + +

    Note that when writing to HDFS with allow-append set to true, + there is a minimum acceptable number of data nodes. If the number of data + nodes drops below that minimum, the append will succeed, but reading the + data will fail with an IOException in the DataStreamer class. The minimum + number of data nodes required for a successful append is generally 2 or + 3.

    + +

    Note also that when writing to HDFS, the file size information is not + updated until the file is closed (at the end of the interval) even though + the data is being written successfully. This is a known HDFS limitation that + exists because of the performance cost of updating the metadata. See + HDFS-5478.

    + +

    When using this sink in a secure (Kerberos) environment, two additional + properties must be set: keytab-key and + principal-key. keytab-key should contain the key by + which the keytab file can be found in the configuration, for example, + yarn.nodemanager.keytab. principal-key should + contain the key by which the principal can be found in the configuration, + for example, yarn.nodemanager.principal.]]> + + + + + + + + + + + + + + + + + + + + + + + + + CollectD StatsD plugin). +
    + To configure this plugin, you will need to add the following + entries to your hadoop-metrics2.properties file: +
    +

    + *.sink.statsd.class=org.apache.hadoop.metrics2.sink.StatsDSink
    + [prefix].sink.statsd.server.host=
    + [prefix].sink.statsd.server.port=
    + [prefix].sink.statsd.skip.hostname=true|false (optional)
    + [prefix].sink.statsd.service.name=NameNode (name you want for service)
    + 
    ]]> +
    +
    + +
    + + + + + + + + + + + + + ,name=}" + Where the {@literal and } are the supplied + parameters. + + @param serviceName + @param nameName + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + ,name=}" + Where the {@literal and } are the supplied + parameters. + + @param serviceName + @param nameName + @param properties - Key value pairs to define additional JMX ObjectName + properties. + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostname or hostname:port. If + the specs string is null, defaults to localhost:defaultPort. + + @param specs server specs (see description) + @param defaultPort the default port if not specified + @return a list of InetSocketAddress objects.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method is used when parts of Hadoop need know whether to apply + single rack vs multi-rack policies, such as during block placement. + Such algorithms behave differently if they are on multi-switch systems. +

    + + @return true if the mapping thinks that it is on a single switch]]> +
    +
    + + + + + + + + + + + + + + + + + This predicate simply assumes that all mappings not derived from + this class are multi-switch. + @param mapping the mapping to query + @return true if the base class says it is single switch, or the mapping + is not derived from this class.]]> + + + + It is not mandatory to + derive {@link DNSToSwitchMapping} implementations from it, but it is strongly + recommended, as it makes it easy for the Hadoop developers to add new methods + to this base class that are automatically picked up by all implementations. +

    + + This class does not extend the Configured + base class, and should not be changed to do so, as it causes problems + for subclasses. The constructor of the Configured calls + the {@link #setConf(Configuration)} method, which will call into the + subclasses before they have been fully constructed.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If a name cannot be resolved to a rack, the implementation + should return {@link NetworkTopology#DEFAULT_RACK}. This + is what the bundled implementations do, though it is not a formal requirement + + @param names the list of hosts to resolve (can be empty) + @return list of resolved network paths. + If names is empty, the returned list is also empty]]> + + + + + + + + + + + + + + + + + + + + + + + + Calling {@link #setConf(Configuration)} will trigger a + re-evaluation of the configuration settings and so be used to + set up the mapping script.]]> + + + + + + + + + + + + + + + + + + + + + This will get called in the superclass constructor, so a check is needed + to ensure that the raw mapping is defined before trying to relaying a null + configuration. + @param conf]]> + + + + + + + + + + It contains a static class RawScriptBasedMapping that performs + the work: reading the configuration parameters, executing any defined + script, handling errors and such like. The outer + class extends {@link CachedDNSToSwitchMapping} to cache the delegated + queries. +

    + This DNS mapper's {@link #isSingleSwitch()} predicate returns + true if and only if a script is defined.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Simple {@link DNSToSwitchMapping} implementation that reads a 2 column text + file. The columns are separated by whitespace. The first column is a DNS or + IP address and the second column specifies the rack where the address maps. +

    +

    + This class uses the configuration parameter {@code + net.topology.table.file.name} to locate the mapping file. +

    +

    + Calls to {@link #resolve(List)} will look up the address as defined in the + mapping file. If no entry corresponding to the address is found, the value + {@code /default-rack} is returned. +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } mapping and {@literal <}groupId, groupName{@literal >} + mapping.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + }/host@realm. + @param principalName principal name of format as described above + @return host name if the the string conforms to the above format, else null]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } "jack" + + @param userName + @return userName without login method]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the return type of the run method + @param action the method to execute + @return the value from the run method]]> + + + + + + + + the return type of the run method + @param action the method to execute + @return the value from the run method + @throws IOException if the action throws an IOException + @throws Error if the action throws an Error + @throws RuntimeException if the action throws a RuntimeException + @throws InterruptedException if the action throws an InterruptedException + @throws UndeclaredThrowableException if the action throws something else]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CredentialProvider implementations must be thread safe.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + does not provide the stack trace for security purposes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A User-Agent String is considered to be a browser if it matches + any of the regex patterns from browser-useragent-regex; the default + behavior is to consider everything a browser that matches the following: + "^Mozilla.*,^Opera.*". Subclasses can optionally override + this method to use different behavior. + + @param userAgent The User-Agent String, or null if there isn't one + @return true if the User-Agent String refers to a browser, false if not]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The type of the token identifierextends TokenIdentifier]]> + + + + + + + + + + DelegationTokenAuthenticatedURL. +

    + An instance of the default {@link DelegationTokenAuthenticator} will be + used.]]> + + + + + DelegationTokenAuthenticatedURL. + + @param authenticator the {@link DelegationTokenAuthenticator} instance to + use, if null the default one will be used.]]> + + + + + DelegationTokenAuthenticatedURL using the default + {@link DelegationTokenAuthenticator} class. + + @param connConfigurator a connection configurator.]]> + + + + + DelegationTokenAuthenticatedURL. + + @param authenticator the {@link DelegationTokenAuthenticator} instance to + use, if null the default one will be used. + @param connConfigurator a connection configurator.]]> + + + + + + + + + + + + The default class is {@link KerberosDelegationTokenAuthenticator} + + @return the delegation token authenticator class to use as default.]]> + + + + + + + This method is provided to enable WebHDFS backwards compatibility. + + @param useQueryString TRUE if the token is transmitted in the + URL query string, FALSE if the delegation token is transmitted + using the {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP + header.]]> + + + + + TRUE if the token is transmitted in the URL query + string, FALSE if the delegation token is transmitted using the + {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP header.]]> + + + + + + + + + + + + + + + + + + Authenticator. + + @param url the URL to connect to. Only HTTP/S URLs are supported. + @param token the authentication token being used for the user. + @return an authenticated {@link HttpURLConnection}. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + Authenticator. If the doAs parameter is not NULL, + the request will be done on behalf of the specified doAs user. + + @param url the URL to connect to. Only HTTP/S URLs are supported. + @param token the authentication token being used for the user. + @param doAs user to do the the request on behalf of, if NULL the request is + as self. + @return an authenticated {@link HttpURLConnection}. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @return a delegation token. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @param doAsUser the user to do as, which will be the token owner. + @return a delegation token. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @throws IOException if an IO error occurred.]]> + + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred.]]> + + + + DelegationTokenAuthenticatedURL is a + {@link AuthenticatedURL} sub-class with built-in Hadoop Delegation Token + functionality. +

    + The authentication mechanisms supported by default are Hadoop Simple + authentication (also known as pseudo authentication) and Kerberos SPNEGO + authentication. +

    + Additional authentication mechanisms can be supported via {@link + DelegationTokenAuthenticator} implementations. +

    + The default {@link DelegationTokenAuthenticator} is the {@link + KerberosDelegationTokenAuthenticator} class which supports + automatic fallback from Kerberos SPNEGO to Hadoop Simple authentication via + the {@link PseudoDelegationTokenAuthenticator} class. +

    + AuthenticatedURL instances are not thread-safe.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @throws IOException if an IO error occurred.]]> + + + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + KerberosDelegationTokenAuthenticator provides support for + Kerberos SPNEGO authentication mechanism and support for Hadoop Delegation + Token operations. +

    + It falls back to the {@link PseudoDelegationTokenAuthenticator} if the HTTP + endpoint does not trigger a SPNEGO authentication]]> + + + + + + + + + PseudoDelegationTokenAuthenticator provides support for + Hadoop's pseudo authentication mechanism that accepts + the user name specified as a query string parameter and support for Hadoop + Delegation Token operations. +

    + This mimics the model of Hadoop Simple authentication trusting the + {@link UserGroupInformation#getCurrentUser()} valuelive. + @return a (snapshotted) map of blocker name->description values]]> + + + + + + + + + + + + + Do nothing if the service is null or not + in a state in which it can be/needs to be stopped. +

    + The service state is checked before the operation begins. + This process is not thread safe. + @param service a service or null]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • Any long-lived operation here will prevent the service state + change from completing in a timely manner.
  • +
  • If another thread is somehow invoked from the listener, and + that thread invokes the methods of the service (including + subclass-specific methods), there is a risk of a deadlock.
  • + + + + @param service the service that has changed.]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + The base implementation logs all arguments at the debug level, + then returns the passed in config unchanged.]]> + + + + + + + The action is to signal success by returning the exit code 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method is called before {@link #init(Configuration)}; + Any non-null configuration that is returned from this operation + becomes the one that is passed on to that {@link #init(Configuration)} + operation. +

    + This permits implementations to change the configuration before + the init operation. As the ServiceLauncher only creates + an instance of the base {@link Configuration} class, it is + recommended to instantiate any subclass (such as YarnConfiguration) + that injects new resources. +

    + @param config the initial configuration build up by the + service launcher. + @param args list of arguments passed to the command line + after any launcher-specific commands have been stripped. + @return the configuration to init the service with. + Recommended: pass down the config parameter with any changes + @throws Exception any problem]]> + + + + + + + The return value becomes the exit code of the launched process. +

    + If an exception is raised, the policy is: +

      +
    1. Any subset of {@link org.apache.hadoop.util.ExitUtil.ExitException}: + the exception is passed up unmodified. +
    2. +
    3. Any exception which implements + {@link org.apache.hadoop.util.ExitCodeProvider}: + A new {@link ServiceLaunchException} is created with the exit code + and message of the thrown exception; the thrown exception becomes the + cause.
    4. +
    5. Any other exception: a new {@link ServiceLaunchException} is created + with the exit code {@link LauncherExitCodes#EXIT_EXCEPTION_THROWN} and + the message of the original exception (which becomes the cause).
    6. +
    + @return the exit code + @throws org.apache.hadoop.util.ExitUtil.ExitException an exception passed + up as the exit code and error text. + @throws Exception any exception to report. If it provides an exit code + this is used in a wrapping exception.]]> +
    +
    + + + The command line options will be passed down before the + {@link Service#init(Configuration)} operation is invoked via an + invocation of {@link LaunchableService#bindArgs(Configuration, List)} + After the service has been successfully started via {@link Service#start()} + the {@link LaunchableService#execute()} method is called to execute the + service. When this method returns, the service launcher will exit, using + the return code from the method as its exit option.]]> + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Approximate HTTP equivalent: {@code 400 Bad Request}]]> + + + + + + approximate HTTP equivalent: Approximate HTTP equivalent: {@code 401 Unauthorized}]]> + + + + + + + + + + + Approximate HTTP equivalent: Approximate HTTP equivalent: {@code 403: Forbidden}]]> + + + + + + Approximate HTTP equivalent: {@code 404: Not Found}]]> + + + + + + Approximate HTTP equivalent: {@code 405: Not allowed}]]> + + + + + + Approximate HTTP equivalent: {@code 406: Not Acceptable}]]> + + + + + + Approximate HTTP equivalent: {@code 408: Request Timeout}]]> + + + + + + Approximate HTTP equivalent: {@code 409: Conflict}]]> + + + + + + Approximate HTTP equivalent: {@code 500 Internal Server Error}]]> + + + + + + Approximate HTTP equivalent: {@code 501: Not Implemented}]]> + + + + + + Approximate HTTP equivalent: {@code 503 Service Unavailable}]]> + + + + + + If raised, this is expected to be raised server-side and likely due + to client/server version incompatibilities. +

    + Approximate HTTP equivalent: {@code 505: Version Not Supported}]]> + + + + + + + + + + + + + + + Codes with a YARN prefix are YARN-related. +

    + Many of the exit codes are designed to resemble HTTP error codes, + squashed into a single byte. e.g 44 , "not found" is the equivalent + of 404. The various 2XX HTTP error codes aren't followed; + the Unix standard of "0" for success is used. +

    +    0-10: general command issues
    +   30-39: equivalent to the 3XX responses, where those responses are
    +          considered errors by the application.
    +   40-49: client-side/CLI/config problems
    +   50-59: service-side problems.
    +   60+  : application specific error codes
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + This uses {@link String#format(String, Object...)} + to build the formatted exception in the ENGLISH locale. +

    + If the last argument is a throwable, it becomes the cause of the exception. + It will also be used as a parameter for the format. + @param exitCode exit code + @param format format for message to use in exception + @param args list of arguments]]> + + + + + + This uses {@link String#format(String, Object...)} + to build the formatted exception in the ENGLISH locale. + @param exitCode exit code + @param cause inner cause + @param format format for message to use in exception + @param args list of arguments]]> + + + + + When caught by the ServiceLauncher, it will convert that + into a process exit code. + + The {@link #ServiceLaunchException(int, String, Object...)} constructor + generates formatted exceptions.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    + This will be 0 until a call + to {@link #finished()} has been made. + @return the currently recorded duration.]]> +
    + + + + + + + + + +
    + + + + + + + + + + Clients and/or applications can use the provided Progressable + to explicitly report progress to the Hadoop framework. This is especially + important for operations which take significant amount of time since, + in-lieu of the reported progress, the framework has to assume that an error + has occurred and time-out the operation.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Class is to be obtained + @return the correctly typed Class of the given object.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + kill -0 command or equivalent]]> + + + + + + + + + + + + + + + + + + + ".cmd" on Windows, or ".sh" otherwise. + + @param parent File parent directory + @param basename String script file basename + @return File referencing the script in the directory]]> + + + + + + ".cmd" on Windows, or ".sh" otherwise. + + @param basename String script file basename + @return String script file name]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + IOException. + @return the path to {@link #WINUTILS_EXE} + @throws RuntimeException if the path is not resolvable]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell. + @return the thread that ran runCommand() that spawned this shell + or null if no thread is waiting for this shell to complete]]> + + + + + + + + + + + + Shell interface. + @param cmd shell command to execute. + @return the output of the executed command.]]> + + + + + + + + + Shell interface. + @param env the map of environment key=value + @param cmd shell command to execute. + @param timeout time in milliseconds after which script should be marked timeout + @return the output of the executed command. + @throws IOException on any problem.]]> + + + + + + + + Shell interface. + @param env the map of environment key=value + @param cmd shell command to execute. + @return the output of the executed command. + @throws IOException on any problem.]]> + + + + + Shell processes. + Iterates through a map of all currently running Shell + processes and destroys them one by one. This method is thread safe]]> + + + + + Shell objects.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CreateProcess synchronization object.]]> + + + + + os.name property.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Important: caller must check for this value being null. + The lack of such checks has led to many support issues being raised. +

    + @deprecated use one of the exception-raising getter methods, + specifically {@link #getWinUtilsPath()} or {@link #getWinUtilsFile()}]]> + + + + + + + + + + + + + + Shell can be used to run shell commands like du or + df. It also offers facilities to gate commands by + time-intervals.]]> + + + + + + + + ShutdownHookManager singleton. + + @return ShutdownHookManager singleton.]]> + + + + + + + Runnable + @param priority priority of the shutdownHook.]]> + + + + + + + + + Runnable + @param priority priority of the shutdownHook + @param timeout timeout of the shutdownHook + @param unit unit of the timeout TimeUnit]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ShutdownHookManager enables running shutdownHook + in a deterministic order, higher priority first. +

    + The JVM runs ShutdownHooks in a non-deterministic order or in parallel. + This class registers a single JVM shutdownHook and run all the + shutdownHooks registered to it (to this class) in order based on their + priority. + + Unless a hook was registered with a shutdown explicitly set through + {@link #addShutdownHook(Runnable, int, long, TimeUnit)}, + the shutdown time allocated to it is set by the configuration option + {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT} in + {@code core-site.xml}, with a default value of + {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT_DEFAULT} + seconds.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool, is the standard for any Map-Reduce tool/application. + The tool/application should delegate the handling of + + standard command-line options to {@link ToolRunner#run(Tool, String[])} + and only handle its custom arguments.

    + +

    Here is how a typical Tool is implemented:

    +

    +     public class MyApp extends Configured implements Tool {
    +     
    +       public int run(String[] args) throws Exception {
    +         // Configuration processed by ToolRunner
    +         Configuration conf = getConf();
    +         
    +         // Create a JobConf using the processed conf
    +         JobConf job = new JobConf(conf, MyApp.class);
    +         
    +         // Process custom command-line options
    +         Path in = new Path(args[1]);
    +         Path out = new Path(args[2]);
    +         
    +         // Specify various job-specific parameters     
    +         job.setJobName("my-app");
    +         job.setInputPath(in);
    +         job.setOutputPath(out);
    +         job.setMapperClass(MyMapper.class);
    +         job.setReducerClass(MyReducer.class);
    +
    +         // Submit the job, then poll for progress until the job is complete
    +         RunningJob runningJob = JobClient.runJob(job);
    +         if (runningJob.isSuccessful()) {
    +           return 0;
    +         } else {
    +           return 1;
    +         }
    +       }
    +       
    +       public static void main(String[] args) throws Exception {
    +         // Let ToolRunner handle generic command-line options 
    +         int res = ToolRunner.run(new Configuration(), new MyApp(), args);
    +         
    +         System.exit(res);
    +       }
    +     }
    + 

    + + @see GenericOptionsParser + @see ToolRunner]]> + + + + + + + + + + + + + + Tool by {@link Tool#run(String[])}, after + parsing with the given generic arguments. Uses the given + Configuration, or builds one if null. + + Sets the Tool's configuration with the possibly modified + version of the conf. + + @param conf Configuration for the Tool. + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + Tool with its Configuration. + + Equivalent to run(tool.getConf(), tool, args). + + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + + + + + + + + + + ToolRunner can be used to run classes implementing + Tool interface. It works in conjunction with + {@link GenericOptionsParser} to parse the + + generic hadoop command line arguments and modifies the + Configuration of the Tool. The + application-specific options are passed along without being modified. +

    + + @see Tool + @see GenericOptionsParser]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Bloom filter, as defined by Bloom in 1970. +

    + The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by + the networking research community in the past decade thanks to the bandwidth efficiencies that it + offers for the transmission of set membership information between networked hosts. A sender encodes + the information into a bit vector, the Bloom filter, that is more compact than a conventional + representation. Computation and space costs for construction are linear in the number of elements. + The receiver uses the filter to test whether various elements are members of the set. Though the + filter will occasionally return a false positive, it will never return a false negative. When creating + the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Space/Time Trade-Offs in Hash Coding with Allowable Errors]]> + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this counting Bloom filter. +

    + Invariant: nothing happens if the specified key does not belong to this counter Bloom filter. + @param key The key to remove.]]> + + + + + + + + + + + + key -> count map. +

    NOTE: due to the bucket size of this filter, inserting the same + key more than 15 times will cause an overflow at all filter positions + associated with this key, and it will significantly increase the error + rate for this and other keys. For this reason the filter can only be + used to store small count values 0 <= N << 15. + @param key key to be tested + @return 0 if the key is not present. Otherwise, a positive value v will + be returned such that v == count with probability equal to the + error rate of this filter, and v > count otherwise. + Additionally, if the filter experienced an underflow as a result of + {@link #delete(Key)} operation, the return value may be lower than the + count with the probability of the false negative rate of such + filter.]]> + + + + + + + + + + + + + + + + + + + + + + counting Bloom filter, as defined by Fan et al. in a ToN + 2000 paper. +

    + A counting Bloom filter is an improvement to standard a Bloom filter as it + allows dynamic additions and deletions of set membership information. This + is achieved through the use of a counting vector instead of a bit vector. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Summary cache: a scalable wide-area web cache sharing protocol]]> + + + + + + + + + + + + + + Builds an empty Dynamic Bloom filter. + @param vectorSize The number of bits in the vector. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}). + @param nr The threshold for the maximum number of keys to record in a + dynamic Bloom filter row.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + dynamic Bloom filter, as defined in the INFOCOM 2006 paper. +

    + A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but + each of the s rows is a standard Bloom filter. The creation + process of a DBF is iterative. At the start, the DBF is a 1 * m + bit matrix, i.e., it is composed of a single standard Bloom filter. + It assumes that nr elements are recorded in the + initial bit vector, where nr {@literal <=} n + (n is the cardinality of the set A to record in + the filter). +

    + As the size of A grows during the execution of the application, + several keys must be inserted in the DBF. When inserting a key into the DBF, + one must first get an active Bloom filter in the matrix. A Bloom filter is + active when the number of recorded keys, nr, is + strictly less than the current cardinality of A, n. + If an active Bloom filter is found, the key is inserted and + nr is incremented by one. On the other hand, if there + is no active Bloom filter, a new one is created (i.e., a new row is added to + the matrix) according to the current size of A and the element + is added in this new Bloom filter and the nr value of + this new Bloom filter is set to one. A given key is said to belong to the + DBF if the k positions are set to one in one of the matrix rows. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + + @see Theory and Network Applications of Dynamic Bloom Filters]]> + + + + + + + + + Builds a hash function that must obey to a given maximum number of returned values and a highest value. + @param maxValue The maximum highest returned value. + @param nbHash The number of resulting hashed values. + @param hashType type of the hashing function (see {@link Hash}).]]> + + + + + this hash function. A NOOP]]> + + + + + + + + + + + + + + + + + + + The idea is to randomly select a bit to reset.]]> + + + + + + The idea is to select the bit to reset that will generate the minimum + number of false negative.]]> + + + + + + The idea is to select the bit to reset that will remove the maximum number + of false positive.]]> + + + + + + The idea is to select the bit to reset that will, at the same time, remove + the maximum number of false positve while minimizing the amount of false + negative generated.]]> + + + + + Originally created by + European Commission One-Lab Project 034819.]]> + + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this retouched Bloom filter. +

    + Invariant: if the false positive is null, nothing happens. + @param key The false positive key to add.]]> + + + + + + this retouched Bloom filter. + @param coll The collection of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The list of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The array of false positive.]]> + + + + + + + this retouched Bloom filter. + @param scheme The selective clearing scheme to apply.]]> + + + + + + + + + + + + retouched Bloom filter, as defined in the CoNEXT 2006 paper. +

    + It allows the removal of selected false positives at the cost of introducing + random false negatives, and with the benefit of eliminating some random false + positives at the same time. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + @see RemoveScheme The different selective clearing algorithms + + @see Retouched Bloom Filters: Allowing Networked Applications to Trade Off Selected False Positives Against False Negatives]]> + + + + + + + + + + + + + + + + + Any exception generated in the future is + extracted and rethrown. +

    + @param future future to evaluate + @param type of the result. + @return the result, if all went well. + @throws InterruptedIOException future was interrupted + @throws IOException if something went wrong + @throws RuntimeException any nested RTE thrown]]> +
    + + + + + + + + + + + + Any exception generated in the future is + extracted and rethrown. +

    + @param future future to evaluate + @param type of the result. + @return the result, if all went well. + @throws InterruptedIOException future was interrupted + @throws IOException if something went wrong + @throws RuntimeException any nested RTE thrown + @throws TimeoutException the future timed out.]]> +
    +
    + + + + + type of return value. + @return nothing, ever. + @throws IOException either the inner IOException, or a wrapper around + any non-Runtime-Exception + @throws RuntimeException if that is the inner cause.]]> + + + + + + + type of return value. + @return nothing, ever. + @throws IOException either the inner IOException, or a wrapper around + any non-Runtime-Exception + @throws RuntimeException if that is the inner cause.]]> + + + + + + +
  • If it is an IOE: Return.
  • +
  • If it is a {@link UncheckedIOException}: return the cause
  • +
  • Completion/Execution Exceptions: extract and repeat
  • +
  • If it is an RTE or Error: throw.
  • +
  • Any other type: wrap in an IOE
  • + + + Recursively handles wrapped Execution and Completion Exceptions in + case something very complicated has happened. + @param e exception. + @return an IOException extracted or built from the cause. + @throws RuntimeException if that is the inner cause. + @throws Error if that is the inner cause.]]> +
    +
    + + + Contains methods promoted from + {@link org.apache.hadoop.fs.impl.FutureIOSupport} because they + are a key part of integrating async IO in application code. +

    +

    + One key feature is that the {@link #awaitFuture(Future)} and + {@link #awaitFuture(Future, long, TimeUnit)} calls will + extract and rethrow exceptions raised in the future's execution, + including extracting the inner IOException of any + {@code UncheckedIOException} raised in the future. + This makes it somewhat easier to execute IOException-raising + code inside futures. +

    ]]> +
    +
    + + + + + + + type + @return a remote iterator]]> + + + + + + type + @return a remote iterator]]> + + + + + + type + @return a remote iterator]]> + + + + + + type + @return a remote iterator]]> + + + + + + + source type + @param result type + @param iterator source + @param mapper transformation + @return a remote iterator]]> + + + + + + source type + @param result type + @param iterator source + @return a remote iterator]]> + + + + + + +

    + Elements are filtered in the hasNext() method; if not used + the filtering will be done on demand in the {@code next()} + call. + @param type + @param iterator source + @param filter filter + @return a remote iterator]]> +
    +
    + + + + + source type. + @return a new iterator]]> + + + + + + + type + @return a list of the values. + @throws IOException if the source RemoteIterator raises it.]]> + + + + + + + + type + @return an array of the values. + @throws IOException if the source RemoteIterator raises it.]]> + + + + + + + +

    + If the iterator is an IOStatisticsSource returning a non-null + set of statistics, and this classes log is set to DEBUG, + then the statistics of the operation are evaluated and logged at + debug. +

    + The number of entries processed is returned, as it is useful to + know this, especially during tests or when reporting values + to users. +

    + This does not close the iterator afterwards. + @param source iterator source + @param consumer consumer of the values. + @return the number of elements processed + @param type of source + @throws IOException if the source RemoteIterator or the consumer raise one.]]> +
    +
    + + + + type of source]]> + + + +

    + This aims to make it straightforward to use lambda-expressions to + transform the results of an iterator, without losing the statistics + in the process, and to chain the operations together. +

    + The closeable operation will be passed through RemoteIterators which + wrap other RemoteIterators. This is to support any iterator which + can be closed to release held connections, file handles etc. + Unless client code is written to assume that RemoteIterator instances + may be closed, this is not likely to be broadly used. It is added + to make it possible to adopt this feature in a managed way. +

    + One notable feature is that the + {@link #foreach(RemoteIterator, ConsumerRaisingIOE)} method will + LOG at debug any IOStatistics provided by the iterator, if such + statistics are provided. There's no attempt at retrieval and logging + if the LOG is not set to debug, so it is a zero cost feature unless + the logger {@code org.apache.hadoop.fs.functional.RemoteIterators} + is at DEBUG. +

    + Based on the S3A Listing code, and some some work on moving other code + to using iterative listings so as to pick up the statistics.]]> +
    +
    + +
    + + + + diff --git a/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.4.xml b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.4.xml new file mode 100644 index 0000000000000..62a0e09f121af --- /dev/null +++ b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.4.xml @@ -0,0 +1,39037 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + UnsupportedOperationException + + If a key is deprecated in favor of multiple keys, they are all treated as + aliases of each other, and setting any one of them resets all the others + to the new value. + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key to be deprecated + @param newKeys list of keys that take up the values of deprecated key + @param customMessage depcrication message + @deprecated use {@link #addDeprecation(String key, String newKey, + String customMessage)} instead]]> + + + + + + + + UnsupportedOperationException + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key to be deprecated + @param newKey key that take up the values of deprecated key + @param customMessage deprecation message]]> + + + + + + + UnsupportedOperationException + + If a key is deprecated in favor of multiple keys, they are all treated as + aliases of each other, and setting any one of them resets all the others + to the new value. + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key Key that is to be deprecated + @param newKeys list of keys that take up the values of deprecated key + @deprecated use {@link #addDeprecation(String key, String newKey)} instead]]> + + + + + + + UnsupportedOperationException + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key Key that is to be deprecated + @param newKey key that takes up the value of deprecated key]]> + + + + + + key is deprecated. + + @param key the parameter which is to be checked for deprecation + @return true if the key is deprecated and + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + final. + + @param name resource to be added, the classpath is examined for a file + with that name.]]> + + + + + + + + + + final. + + @param url url of the resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + + + + + final. + + @param file file-path of resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + + + + + final. + + WARNING: The contents of the InputStream will be cached, by this method. + So use this sparingly because it does increase the memory consumption. + + @param in InputStream to deserialize the object from. In will be read from + when a get or set is called next. After it is read the stream will be + closed.]]> + + + + + + + + + + + final. + + @param in InputStream to deserialize the object from. + @param name the name of the resource because InputStream.toString is not + very descriptive some times.]]> + + + + + + + + + + + final. + + @param conf Configuration object from which to load properties]]> + + + + + + + + + + + name property, null if + no such property exists. If the key is deprecated, it returns the value of + the first key which replaces the deprecated key and is not null. + + Values are processed for variable expansion + before being returned. + + As a side effect get loads the properties from the sources if called for + the first time as a lazy init. + + @param name the property name, will be trimmed before get value. + @return the value of the name or its replacing property, + or null if no such property exists.]]> + + + + + + + + + + + + + + + name property, but only for + names which have no valid value, usually non-existent or commented + out in XML. + + @param name the property name + @return true if the property name exists without value]]> + + + + + + name property as a trimmed String, + null if no such property exists. + If the key is deprecated, it returns the value of + the first key which replaces the deprecated key and is not null + + Values are processed for variable expansion + before being returned. + + @param name the property name. + @return the value of the name or its replacing property, + or null if no such property exists.]]> + + + + + + + name property as a trimmed String, + defaultValue if no such property exists. + See @{Configuration#getTrimmed} for more details. + + @param name the property name. + @param defaultValue the property default value. + @return the value of the name or defaultValue + if it is not set.]]> + + + + + + name property, without doing + variable expansion.If the key is + deprecated, it returns the value of the first key which replaces + the deprecated key and is not null. + + @param name the property name. + @return the value of the name property or + its replacing property and null if no such property exists.]]> + + + + + + + value of the name property. If + name is deprecated or there is a deprecated name associated to it, + it sets the value to both names. Name will be trimmed before put into + configuration. + + @param name property name. + @param value property value.]]> + + + + + + + + value of the name property. If + name is deprecated, it also sets the value to + the keys that replace the deprecated key. Name will be trimmed before put + into configuration. + + @param name property name. + @param value property value. + @param source the place that this configuration value came from + (For debugging). + @throws IllegalArgumentException when the value or name is null.]]> + + + + + + + + + + + + + + + + + + + + name. If the key is deprecated, + it returns the value of the first key which replaces the deprecated key + and is not null. + If no such property exists, + then defaultValue is returned. + + @param name property name, will be trimmed before get value. + @param defaultValue default value. + @return property value, or defaultValue if the property + doesn't exist.]]> + + + + + + + name property as an int. + + If no such property exists, the provided default value is returned, + or if the specified value is not a valid int, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as an int, + or defaultValue.]]> + + + + + + name property as a set of comma-delimited + int values. + + If no such property exists, an empty array is returned. + + @param name property name + @return property value interpreted as an array of comma-delimited + int values]]> + + + + + + + name property to an int. + + @param name property name. + @param value int value of the property.]]> + + + + + + + name property as a long. + If no such property exists, the provided default value is returned, + or if the specified value is not a valid long, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a long, + or defaultValue.]]> + + + + + + + name property as a long or + human readable format. If no such property exists, the provided default + value is returned, or if the specified value is not a valid + long or human readable format, then an error is thrown. You + can use the following suffix (case insensitive): k(kilo), m(mega), g(giga), + t(tera), p(peta), e(exa) + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a long, + or defaultValue.]]> + + + + + + + name property to a long. + + @param name property name. + @param value long value of the property.]]> + + + + + + + name property as a float. + If no such property exists, the provided default value is returned, + or if the specified value is not a valid float, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a float, + or defaultValue.]]> + + + + + + + name property to a float. + + @param name property name. + @param value property value.]]> + + + + + + + name property as a double. + If no such property exists, the provided default value is returned, + or if the specified value is not a valid double, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a double, + or defaultValue.]]> + + + + + + + name property to a double. + + @param name property name. + @param value property value.]]> + + + + + + + name property as a boolean. + If no such property is specified, or if the specified value is not a valid + boolean, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a boolean, + or defaultValue.]]> + + + + + + + name property to a boolean. + + @param name property name. + @param value boolean value of the property.]]> + + + + + + + + + + + + + + name property to the given type. This + is equivalent to set(<name>, value.toString()). + @param name property name + @param value new value + @param enumeration type]]> + + + + + + + enumeration type + @throws IllegalArgumentException If mapping is illegal for the type + provided + @return enumeration type]]> + + + + + + + + name to the given time duration. This + is equivalent to set(<name>, value + <time suffix>). + @param name Property name + @param value Time duration + @param unit Unit of time]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + name property as a Pattern. + If no such property is specified, or if the specified value is not a valid + Pattern, then DefaultValue is returned. + Note that the returned value is NOT trimmed by this method. + + @param name property name + @param defaultValue default value + @return property value as a compiled Pattern, or defaultValue]]> + + + + + + + Pattern. + If the pattern is passed as null, sets the empty pattern which results in + further calls to getPattern(...) returning the default value. + + @param name property name + @param pattern new value]]> + + + + + + + + + + + + + + + + + + + name property as + a collection of Strings. + If no such property is specified then empty collection is returned. +

    + This is an optimized version of {@link #getStrings(String)} + + @param name property name. + @return property value as a collection of Strings.]]> + + + + + + name property as + an array of Strings. + If no such property is specified then null is returned. + + @param name property name. + @return property value as an array of Strings, + or null.]]> + + + + + + + name property as + an array of Strings. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of Strings, + or default value.]]> + + + + + + name property as + a collection of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then empty Collection is returned. + + @param name property name. + @return property value as a collection of Strings, or empty Collection]]> + + + + + + name property as + an array of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then an empty array is returned. + + @param name property name. + @return property value as an array of trimmed Strings, + or empty array.]]> + + + + + + + name property as + an array of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of trimmed Strings, + or default value.]]> + + + + + + + name property as + as comma delimited values. + + @param name property name. + @param values The values]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostProperty as a + InetSocketAddress. If hostProperty is + null, addressProperty will be used. This + is useful for cases where we want to differentiate between host + bind address and address clients should use to establish connection. + + @param hostProperty bind host property name. + @param addressProperty address property name. + @param defaultAddressValue the default value + @param defaultPort the default port + @return InetSocketAddress]]> + + + + + + + + name property as a + InetSocketAddress. + @param name property name. + @param defaultAddress the default value + @param defaultPort the default port + @return InetSocketAddress]]> + + + + + + + name property as + a host:port.]]> + + + + + + + + + name property as a host:port. The wildcard + address is replaced with the local host's address. If the host and address + properties are configured the host component of the address will be combined + with the port component of the addr to generate the address. This is to allow + optional control over which host name is used in multi-home bind-host + cases where a host can have multiple names + @param hostProperty the bind-host configuration name + @param addressProperty the service address configuration name + @param defaultAddressValue the service default address configuration value + @param addr InetSocketAddress of the service listener + @return InetSocketAddress for clients to connect]]> + + + + + + + name property as a host:port. The wildcard + address is replaced with the local host's address. + @param name property name. + @param addr InetSocketAddress of a listener to store in the given property + @return InetSocketAddress for clients to connect]]> + + + + + + + + + + + + + + + + + + + + name property + as an array of Class. + The value of the property specifies a list of comma separated class names. + If no such property is specified, then defaultValue is + returned. + + @param name the property name. + @param defaultValue default value. + @return property value as a Class[], + or defaultValue.]]> + + + + + + + name property as a Class. + If no such property is specified, then defaultValue is + returned. + + @param name the conf key name. + @param defaultValue default value. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property as a Class + implementing the interface specified by xface. + + If no such property is specified, then defaultValue is + returned. + + An exception is thrown if the returned class does not implement the named + interface. + + @param name the conf key name. + @param defaultValue default value. + @param xface the interface implemented by the named class. + @return property value as a Class, + or defaultValue.]]> + + + + + + + name property as a List + of objects implementing the interface specified by xface. + + An exception is thrown if any of the classes does not exist, or if it does + not implement the named interface. + + @param name the property name. + @param xface the interface implemented by the classes named by + name. + @return a List of objects implementing xface.]]> + + + + + + + + name property to the name of a + theClass implementing the given interface xface. + + An exception is thrown if theClass does not implement the + interface xface. + + @param name property name. + @param theClass property value. + @param xface the interface implemented by the named class.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + + + + + name. + + @param name configuration resource name. + @return an input stream attached to the resource.]]> + + + + + + name. + + @param name configuration resource name. + @return a reader attached to the resource.]]> + + + + + + + + + + + + + + + + + + + + + + String + key-value pairs in the configuration. + + @return an iterator over the entries.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • + When property name is not empty and the property exists in the + configuration, this method writes the property and its attributes + to the {@link Writer}. +
  • + +
  • + When property name is null or empty, this method writes all the + configuration properties and their attributes to the {@link Writer}. +
  • + +
  • + When property name is not empty but the property doesn't exist in + the configuration, this method throws an {@link IllegalArgumentException}. +
  • + + @param out the writer to write to.]]> +
    +
    + + + + + + + +
  • + When propertyName is not empty, and the property exists + in the configuration, the format of the output would be, +
    +  {
    +    "property": {
    +      "key" : "key1",
    +      "value" : "value1",
    +      "isFinal" : "key1.isFinal",
    +      "resource" : "key1.resource"
    +    }
    +  }
    +  
    +
  • + +
  • + When propertyName is null or empty, it behaves same as + {@link #dumpConfiguration(Configuration, Writer)}, the + output would be, +
    +  { "properties" :
    +      [ { key : "key1",
    +          value : "value1",
    +          isFinal : "key1.isFinal",
    +          resource : "key1.resource" },
    +        { key : "key2",
    +          value : "value2",
    +          isFinal : "ke2.isFinal",
    +          resource : "key2.resource" }
    +       ]
    +   }
    +  
    +
  • + +
  • + When propertyName is not empty, and the property is not + found in the configuration, this method will throw an + {@link IllegalArgumentException}. +
  • + +

    + @param config the configuration + @param propertyName property name + @param out the Writer to write to + @throws IOException + @throws IllegalArgumentException when property name is not + empty and the property is not found in configuration]]> + + + + + + + + + { "properties" : + [ { key : "key1", + value : "value1", + isFinal : "key1.isFinal", + resource : "key1.resource" }, + { key : "key2", + value : "value2", + isFinal : "ke2.isFinal", + resource : "key2.resource" } + ] + } + + + It does not output the properties of the configuration object which + is loaded from an input stream. +

    + + @param config the configuration + @param out the Writer to write to + @throws IOException]]> + + + + + + + + + + + + + + + + + + + true to set quiet-mode on, false + to turn it off.]]> + + + + + + + + + + + + + + + + + + + + + } with matching keys]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Resources + +

    Configurations are specified by resources. A resource contains a set of + name/value pairs as XML data. Each resource is named by either a + String or by a {@link Path}. If named by a String, + then the classpath is examined for a file with that name. If named by a + Path, then the local filesystem is examined directly, without + referring to the classpath. + +

    Unless explicitly turned off, Hadoop by default specifies two + resources, loaded in-order from the classpath:

      +
    1. + + core-default.xml: Read-only defaults for hadoop.
    2. +
    3. core-site.xml: Site-specific configuration for a given hadoop + installation.
    4. +
    + Applications may add additional resources, which are loaded + subsequent to these resources in the order they are added. + +

    Final Parameters

    + +

    Configuration parameters may be declared final. + Once a resource declares a value final, no subsequently-loaded + resource can alter that value. + For example, one might define a final parameter with: +

    
    +  <property>
    +    <name>dfs.hosts.include</name>
    +    <value>/etc/hadoop/conf/hosts.include</value>
    +    <final>true</final>
    +  </property>
    + + Administrators typically define parameters as final in + core-site.xml for values that user applications may not alter. + +

    Variable Expansion

    + +

    Value strings are first processed for variable expansion. The + available properties are:

      +
    1. Other properties defined in this Configuration; and, if a name is + undefined here,
    2. +
    3. Environment variables in {@link System#getenv()} if a name starts with + "env.", or
    4. +
    5. Properties in {@link System#getProperties()}.
    6. +
    + +

    For example, if a configuration resource contains the following property + definitions: +

    
    +  <property>
    +    <name>basedir</name>
    +    <value>/user/${user.name}</value>
    +  </property>
    +  
    +  <property>
    +    <name>tempdir</name>
    +    <value>${basedir}/tmp</value>
    +  </property>
    +
    +  <property>
    +    <name>otherdir</name>
    +    <value>${env.BASE_DIR}/other</value>
    +  </property>
    +  
    + +

    When conf.get("tempdir") is called, then ${basedir} + will be resolved to another property in this Configuration, while + ${user.name} would then ordinarily be resolved to the value + of the System property with that name. +

    When conf.get("otherdir") is called, then ${env.BASE_DIR} + will be resolved to the value of the ${BASE_DIR} environment variable. + It supports ${env.NAME:-default} and ${env.NAME-default} notations. + The former is resolved to "default" if ${NAME} environment variable is undefined + or its value is empty. + The latter behaves the same way only if ${NAME} is undefined. +

    By default, warnings will be given to any deprecated configuration + parameters and these are suppressible by configuring + log4j.logger.org.apache.hadoop.conf.Configuration.deprecation in + log4j.properties file. + +

    Tags

    + +

    Optionally we can tag related properties together by using tag + attributes. System tags are defined by hadoop.tags.system property. Users + can define there own custom tags in hadoop.tags.custom property. + +

    For example, we can tag existing property as: +

    
    +  <property>
    +    <name>dfs.replication</name>
    +    <value>3</value>
    +    <tag>HDFS,REQUIRED</tag>
    +  </property>
    +
    +  <property>
    +    <name>dfs.data.transfer.protection</name>
    +    <value>3</value>
    +    <tag>HDFS,SECURITY</tag>
    +  </property>
    + 
    +

    Properties marked with tags can be retrieved with conf + .getAllPropertiesByTag("HDFS") or conf.getAllPropertiesByTags + (Arrays.asList("YARN","SECURITY")).

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This implementation generates the key material and calls the + {@link #createKey(String, byte[], Options)} method. + + @param name the base name of the key + @param options the options for the new key. + @return the version name of the first version of the key. + @throws IOException + @throws NoSuchAlgorithmException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This implementation generates the key material and calls the + {@link #rollNewVersion(String, byte[])} method. + + @param name the basename of the key + @return the name of the new version of the key + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + KeyProvider implementations must be thread safe.]]> + + + + + + + + + + + + + + + + + + + + + + NULL if + a provider for the specified URI scheme could not be found. + @throws IOException thrown if the provider failed to initialize.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + uri has syntax error]]> + + + + + + + + + + + + + + + + uri is + not found]]> + + + + + + + + + + + + + + + + + + + + + + + uri + determines a configuration property name, + fs.AbstractFileSystem.scheme.impl whose value names the + AbstractFileSystem class. + + The entire URI and conf is passed to the AbstractFileSystem factory method. + + @param uri for the file system to be created. + @param conf which is passed to the file system impl. + + @return file system for the given URI. + + @throws UnsupportedFileSystemException if the file system for + uri is not supportedn some FileSystem implementations such as HDFS metadata + synchronization is essential to guarantee consistency of read requests + particularly in HA setting. + @throws IOException + @throws UnsupportedOperationException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } describing modifications + @throws IOException if an ACL could not be modified]]> + + + + + + + + } describing entries to remove + @throws IOException if an ACL could not be modified]]> + + + + + + + + + + + + + + + + + + + + + + } describing modifications, must + include entries for user, group, and others for compatibility with + permission bits. + @throws IOException if an ACL could not be modified]]> + + + + + + + } which returns each AclStatus + @throws IOException if an ACL could not be read]]> + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @throws IOException]]> + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @param flag xattr set flag + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attribute + @param name xattr name. + @return byte[] xattr value. + @throws IOException]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + + @return {@literal Map} describing the XAttrs of the file + or directory + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @param names XAttr names. + @return {@literal Map} describing the XAttrs of the file + or directory + @throws IOException]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return {@literal Map} describing the XAttrs of the file + or directory + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to remove extended attribute + @param name xattr name + @throws IOExceptionlockLocation(offset: 0, length: BLOCK_SIZE, + hosts: {"host1:9866", "host2:9866, host3:9866"}) + + + And if the file is erasure-coded, each BlockLocation represents a logical + block groups. Value offset is the offset of a block group in the file and + value length is the total length of a block group. Hosts of a BlockLocation + are the datanodes that holding all the data blocks and parity blocks of a + block group. + Suppose we have a RS_3_2 coded file (3 data units and 2 parity units). + A BlockLocation example will be like: +
    + BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
    +   "host2:9866","host3:9866","host4:9866","host5:9866"})
    + 
    + + Please refer to + {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} or + {@link FileContext#getFileBlockLocations(Path, long, long)} + for more examples.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + After a successful call, {@code buf.position()} will be advanced by the + number of bytes read and {@code buf.limit()} will be unchanged. +

    + In the case of an exception, the state of the buffer (the contents of the + buffer, the {@code buf.position()}, the {@code buf.limit()}, etc.) is + undefined, and callers should be prepared to recover from this + eventuality. +

    + Callers should use {@link StreamCapabilities#hasCapability(String)} with + {@link StreamCapabilities#PREADBYTEBUFFER} to check if the underlying + stream supports this interface, otherwise they might get a + {@link UnsupportedOperationException}. +

    + Implementations should treat 0-length requests as legitimate, and must not + signal an error upon their receipt. +

    + This does not change the current offset of a file, and is thread-safe. + + @param position position within file + @param buf the ByteBuffer to receive the results of the read operation. + @return the number of bytes read, possibly zero, or -1 if reached + end-of-stream + @throws IOException if there is some error performing the read]]> + + + + + + + + + This operation provides similar semantics to + {@link #read(long, ByteBuffer)}, the difference is that this method is + guaranteed to read data until the {@link ByteBuffer} is full, or until + the end of the data stream is reached. + + @param position position within file + @param buf the ByteBuffer to receive the results of the read operation. + @throws IOException if there is some error performing the read + @throws EOFException the end of the data was reached before + the read operation completed + @see #read(long, ByteBuffer)]]> + + + + + + + + + + + + + + + After a successful call, {@code buf.position()} will be advanced by the + number of bytes read and {@code buf.limit()} will be unchanged. +

    + In the case of an exception, the state of the buffer (the contents of the + buffer, the {@code buf.position()}, the {@code buf.limit()}, etc.) is + undefined, and callers should be prepared to recover from this + eventuality. +

    + Callers should use {@link StreamCapabilities#hasCapability(String)} with + {@link StreamCapabilities#READBYTEBUFFER} to check if the underlying + stream supports this interface, otherwise they might get a + {@link UnsupportedOperationException}. +

    + Implementations should treat 0-length requests as legitimate, and must not + signal an error upon their receipt. + + @param buf + the ByteBuffer to receive the results of the read operation. + @return the number of bytes read, possibly zero, or -1 if + reach end-of-stream + @throws IOException + if there is some error performing the read]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + setReplication of FileSystem + @param src file name + @param replication new replication + @throws IOException + @return true if successful; + false if file does not exist or is a directory]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + +

    + A higher number here does not necessarily improve performance, especially + for object stores, where multiple threads may be attempting to create an FS + instance for the same URI. +

    + Default value: {@value}.]]> +
    + + + +

    + Default value: {@value}.]]> +
    +
    + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + EnumSet.of(CreateFlag.CREATE, CreateFlag.APPEND) + +

    + + Use the CreateFlag as follows: +

      +
    1. CREATE - to create a file if it does not exist, + else throw FileAlreadyExists.
    2. +
    3. APPEND - to append to a file if it exists, + else throw FileNotFoundException.
    4. +
    5. OVERWRITE - to truncate a file if it exists, + else throw FileNotFoundException.
    6. +
    7. CREATE|APPEND - to create a file if it does not exist, + else append to an existing file.
    8. +
    9. CREATE|OVERWRITE - to create a file if it does not exist, + else overwrite an existing file.
    10. +
    11. SYNC_BLOCK - to force closed blocks to the disk device. + In addition {@link Syncable#hsync()} should be called after each write, + if true synchronous behavior is required.
    12. +
    13. LAZY_PERSIST - Create the block on transient storage (RAM) if + available.
    14. +
    15. APPEND_NEWBLOCK - Append data to a new block instead of end of the last + partial block.
    16. +
    + + Following combinations are not valid and will result in + {@link HadoopIllegalArgumentException}: +
      +
    1. APPEND|OVERWRITE
    2. +
    3. CREATE|APPEND|OVERWRITE
    4. +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + absOrFqPath is not supported. + @throws IOException If the file system for absOrFqPath could + not be instantiated.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + defaultFsUri is not supported]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NewWdir can be one of: +
      +
    • relative path: "foo/bar";
    • +
    • absolute without scheme: "/foo/bar"
    • +
    • fully qualified with scheme: "xx://auth/foo/bar"
    • +
    +
    + Illegal WDs: +
      +
    • relative with scheme: "xx:foo/bar"
    • +
    • non existent directory
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + f does not exist + @throws AccessControlException if access denied + @throws IOException If an IO Error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is not valid]]> + + + + + + + + + + + + + + + + + + + + +
  • Progress - to report progress on the operation - default null +
  • Permission - umask is applied against permission: default is + FsPermissions:getDefault() + +
  • CreateParent - create missing parent path; default is to not + to create parents +
  • The defaults for the following are SS defaults of the file + server implementing the target path. Not all parameters make sense + for all kinds of file system - eg. localFS ignores Blocksize, + replication, checksum +
      +
    • BufferSize - buffersize used in FSDataOutputStream +
    • Blocksize - block size for file blocks +
    • ReplicationFactor - replication for blocks +
    • ChecksumParam - Checksum parameters. server default is used + if not specified. +
    + + + @return {@link FSDataOutputStream} for created file + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If file f already exists + @throws FileNotFoundException If parent of f does not exist + and createParent is false + @throws ParentNotDirectoryException If parent of f is not a + directory. + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is not valid]]> + + + + + + + + + + + + + + + + + + + + + dir already + exists + @throws FileNotFoundException If parent of dir does not exist + and createParent is false + @throws ParentNotDirectoryException If parent of dir is not a + directory + @throws UnsupportedFileSystemException If file system for dir + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path dir is not valid]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is invalid]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + +
  • Fails if path is a directory. +
  • Fails if path does not exist. +
  • Fails if path is not closed. +
  • Fails if new size is greater than current size. + + @param f The path to the file to be truncated + @param newLength The size the file is to be truncated to + + @return true if the file has been truncated to the desired + newLength and is immediately available to be reused for + write operations such as append, or + false if a background process of adjusting the length of + the last block has been started, and clients should wait for it to + complete before proceeding with further file updates. + + @throws AccessControlException If access is denied + @throws FileNotFoundException If file f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + +
  • Fails if src is a file and dst is a directory. +
  • Fails if src is a directory and dst is a file. +
  • Fails if the parent of dst does not exist or is a file. + +

    + If OVERWRITE option is not passed as an argument, rename fails if the dst + already exists. +

    + If OVERWRITE option is passed as an argument, rename overwrites the dst if + it is a file or an empty directory. Rename fails if dst is a non-empty + directory. +

    + Note that atomicity of rename is dependent on the file system + implementation. Please refer to the file system documentation for details +

    + + @param src path to be renamed + @param dst new path after rename + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If dst already exists and + options has {@link Options.Rename#OVERWRITE} + option false. + @throws FileNotFoundException If src does not exist + @throws ParentNotDirectoryException If parent of dst is not a + directory + @throws UnsupportedFileSystemException If file system for src + and dst is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws HadoopIllegalArgumentException If username or + groupname is invalid.]]> + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If the given path does not refer to a symlink + or an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + Given a path referring to a symlink of form: + + {@literal <---}X{@literal --->} + fs://host/A/B/link + {@literal <-----}Y{@literal ----->} + + In this path X is the scheme and authority that identify the file system, + and Y is the path leading up to the final path component "link". If Y is + a symlink itself then let Y' be the target of Y and X' be the scheme and + authority of Y'. Symlink targets may: + + 1. Fully qualified URIs + + fs://hostX/A/B/file Resolved according to the target file system. + + 2. Partially qualified URIs (eg scheme but no host) + + fs:///A/B/file Resolved according to the target file system. Eg resolving + a symlink to hdfs:///A results in an exception because + HDFS URIs must be fully qualified, while a symlink to + file:///A will not since Hadoop's local file systems + require partially qualified URIs. + + 3. Relative paths + + path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path + is "../B/file" then [Y'][path] is hdfs://host/B/file + + 4. Absolute paths + + path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path + is "/file" then [X][path] is hdfs://host/file + + + @param target the target of the symbolic link + @param link the path to be created that points to target + @param createParent if true then missing parent dirs are created if + false then parent must exist + + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If file link already exists + @throws FileNotFoundException If target does not exist + @throws ParentNotDirectoryException If parent of link is not a + directory. + @throws UnsupportedFileSystemException If file system for + target or link is not supported + @throws IOException If an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } describing + modifications + @throws IOException if an ACL could not be modified]]> + + + + + + + + } describing entries + to remove + @throws IOException if an ACL could not be modified]]> + + + + + + + + + + + + + + + + + + + + + + } describing + modifications, must include entries for user, group, and others for + compatibility with permission bits. + @throws IOException if an ACL could not be modified]]> + + + + + + + } which returns + each AclStatus + @throws IOException if an ACL could not be read]]> + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @throws IOException]]> + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @param flag xattr set flag + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attribute + @param name xattr name. + @return byte[] xattr value. + @throws IOException]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs + of the file or directory + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @param names XAttr names. + @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs + of the file or directory + @throws IOException]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to remove extended attribute + @param name xattr name + @throws IOException]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return List{@literal <}String{@literal >} of the XAttr names of the + file or directory + @throws IOException]]> + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Path Names + + The Hadoop file system supports a URI namespace and URI names. This enables + multiple types of file systems to be referenced using fully-qualified URIs. + Two common Hadoop file system implementations are +

      +
    • the local file system: file:///path +
    • the HDFS file system: hdfs://nnAddress:nnPort/path +
    + + The Hadoop file system also supports additional naming schemes besides URIs. + Hadoop has the concept of a default file system, which implies a + default URI scheme and authority. This enables slash-relative names + relative to the default FS, which are more convenient for users and + application writers. The default FS is typically set by the user's + environment, though it can also be manually specified. +

    + + Hadoop also supports working-directory-relative names, which are paths + relative to the current working directory (similar to Unix). The working + directory can be in a different file system than the default FS. +

    + Thus, Hadoop path names can be specified as one of the following: +

      +
    • a fully-qualified URI: scheme://authority/path (e.g. + hdfs://nnAddress:nnPort/foo/bar) +
    • a slash-relative name: path relative to the default file system (e.g. + /foo/bar) +
    • a working-directory-relative name: path relative to the working dir (e.g. + foo/bar) +
    + Relative paths with scheme (scheme:foo/bar) are illegal. + +

    Role of FileContext and Configuration Defaults

    + + The FileContext is the analogue of per-process file-related state in Unix. It + contains two properties: + +
      +
    • the default file system (for resolving slash-relative names) +
    • the umask (for file permissions) +
    + In general, these properties are obtained from the default configuration file + in the user's environment (see {@link Configuration}). + + Further file system properties are specified on the server-side. File system + operations default to using these server-side defaults unless otherwise + specified. +

    + The file system related server-side defaults are: +

      +
    • the home directory (default is "/user/userName") +
    • the initial wd (only for local fs) +
    • replication factor +
    • block size +
    • buffer size +
    • encryptDataTransfer +
    • checksum option. (checksumType and bytesPerChecksum) +
    + +

    Example Usage

    + + Example 1: use the default config read from the $HADOOP_CONFIG/core.xml. + Unspecified values come from core-defaults.xml in the release jar. +
      +
    • myFContext = FileContext.getFileContext(); // uses the default config + // which has your default FS +
    • myFContext.create(path, ...); +
    • myFContext.setWorkingDir(path); +
    • myFContext.open (path, ...); +
    • ... +
    + Example 2: Get a FileContext with a specific URI as the default FS +
      +
    • myFContext = FileContext.getFileContext(URI); +
    • myFContext.create(path, ...); +
    • ... +
    + Example 3: FileContext with local file system as the default +
      +
    • myFContext = FileContext.getLocalFSFileContext(); +
    • myFContext.create(path, ...); +
    • ... +
    + Example 4: Use a specific config, ignoring $HADOOP_CONFIG + Generally you should not need use a config unless you are doing +
      +
    • configX = someConfigSomeOnePassedToYou; +
    • myFContext = getFileContext(configX); // configX is not changed, + // is passed down +
    • myFContext.create(path, ...); +
    • ... +
his implementation throws an UnsupportedOperationException. + + @return the protocol scheme for this FileSystem. + @throws UnsupportedOperationException if the operation is unsupported + (default).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • + If the configuration has the property + {@code "fs.$SCHEME.impl.disable.cache"} set to true, + a new instance will be created, initialized with the supplied URI and + configuration, then returned without being cached. +
  • +
  • + If the there is a cached FS instance matching the same URI, it will + be returned. +
  • +
  • + Otherwise: a new FS instance will be created, initialized with the + configuration and URI, cached and returned to the caller. +
  • + + @throws IOException if the FileSystem cannot be instantiated.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + if f == null : + result = null + elif f.getLen() {@literal <=} start: + result = [] + else result = [ locations(FS, b) for b in blocks(FS, p, s, s+l)] + + This call is most helpful with and distributed filesystem + where the hostnames of machines that contain blocks of the given file + can be determined. + + The default implementation returns an array containing one element: +
    + BlockLocation( { "localhost:9866" },  { "localhost" }, 0, file.getLen())
    + 
    + + In HDFS, if file is three-replicated, the returned array contains + elements like: +
    + BlockLocation(offset: 0, length: BLOCK_SIZE,
    +   hosts: {"host1:9866", "host2:9866, host3:9866"})
    + BlockLocation(offset: BLOCK_SIZE, length: BLOCK_SIZE,
    +   hosts: {"host2:9866", "host3:9866, host4:9866"})
    + 
    + + And if a file is erasure-coded, the returned BlockLocation are logical + block groups. + + Suppose we have a RS_3_2 coded file (3 data units and 2 parity units). + 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then + there will be one BlockLocation returned, with 0 offset, actual file size + and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks. + 3. If the file size is less than one group size but greater than one + stripe size, then there will be one BlockLocation returned, with 0 offset, + actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting + the actual blocks. + 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123 + for example, then the result will be like: +
    + BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
    +   "host2:9866","host3:9866","host4:9866","host5:9866"})
    + BlockLocation(offset: 3 * BLOCK_SIZE, length: 123, hosts: {"host1:9866",
    +   "host4:9866", "host5:9866"})
    + 
    + + @param file FilesStatus to get data from + @param start offset into the given file + @param len length for which to get locations for + @throws IOException IO failure]]> +
    +
mportant: the default implementation is not atomic + @param f path to use for create + @throws IOException IO failure]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • Fails if src is a file and dst is a directory.
  • +
  • Fails if src is a directory and dst is a file.
  • +
  • Fails if the parent of dst does not exist or is a file.
  • + +

    + If OVERWRITE option is not passed as an argument, rename fails + if the dst already exists. +

    + If OVERWRITE option is passed as an argument, rename overwrites + the dst if it is a file or an empty directory. Rename fails if dst is + a non-empty directory. +

    + Note that atomicity of rename is dependent on the file system + implementation. Please refer to the file system documentation for + details. This default implementation is non atomic. +

    + This method is deprecated since it is a temporary method added to + support the transition from FileSystem to FileContext for user + applications. + + @param src path to be renamed + @param dst new path after rename + @throws FileNotFoundException src path does not exist, or the parent + path of dst does not exist. + @throws FileAlreadyExistsException dest path exists and is a file + @throws ParentNotDirectoryException if the parent path of dest is not + a directory + @throws IOException on failure]]> + + + + + + + + +

  • Fails if path is a directory.
  • +
  • Fails if path does not exist.
  • +
  • Fails if path is not closed.
  • +
  • Fails if new size is greater than current size.
  • + + @param f The path to the file to be truncated + @param newLength The size the file is to be truncated to + + @return true if the file has been truncated to the desired + newLength and is immediately available to be reused for + write operations such as append, or + false if a background process of adjusting the length of + the last block has been started, and clients should wait for it to + complete before proceeding with further file updates. + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default).]]> +
    +
    + + + + + + + + + + + + + + + + + + + + +
  • Clean shutdown of the JVM cannot be guaranteed.
  • +
  • The time to shut down a FileSystem will depends on the number of + files to delete. For filesystems where the cost of checking + for the existence of a file/directory and the actual delete operation + (for example: object stores) is high, the time to shutdown the JVM can be + significantly extended by over-use of this feature.
  • +
  • Connectivity problems with a remote filesystem may delay shutdown + further, and may cause the files to not be deleted.
  • + + @param f the path to delete. + @return true if deleteOnExit is successful, otherwise false. + @throws IOException IO failure]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. +

    + Will not return null. Expect IOException upon access error. + @param f given path + @return the statuses of the files/directories in the given patch + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + + + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. + + @param f + a path name + @param filter + the user-supplied path filter + @return an array of FileStatus objects for the files under the given path + after applying the filter + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. + + @param files + a list of paths + @return a list of statuses for the files under the given paths after + applying the filter default Path filter + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. + + @param files + a list of paths + @param filter + the user-supplied path filter + @return a list of statuses for the files under the given paths after + applying the filter + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + Return all the files that match filePattern and are not checksum + files. Results are sorted by their names. + +

    + A filename pattern is composed of regular characters and + special pattern matching characters, which are: + +

    +
    +
    +
    ? +
    Matches any single character. + +

    +

    * +
    Matches zero or more characters. + +

    +

    [abc] +
    Matches a single character from character set + {a,b,c}. + +

    +

    [a-b] +
    Matches a single character from the character range + {a...b}. Note that character a must be + lexicographically less than or equal to character b. + +

    +

    [^a] +
    Matches a single character that is not from character set or range + {a}. Note that the ^ character must occur + immediately to the right of the opening bracket. + +

    +

    \c +
    Removes (escapes) any special meaning of character c. + +

    +

    {ab,cd} +
    Matches a string from the string set {ab, cd} + +

    +

    {ab,c{de,fh}} +
    Matches a string from the string set {ab, cde, cfh} + +
    +
    +
    + + @param pathPattern a glob specifying a path pattern + + @return an array of paths that match the path pattern + @throws IOException IO failure]]> +
    +
    + + + + + + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred]]> + + + + + + + + + f does not exist + @throws IOException if any I/O error occurred]]> + + + + + + + + p does not exist + @throws IOException if any I/O error occurred]]> + + + + + + + + + + If the path is a directory, + if recursive is false, returns files in the directory; + if recursive is true, return files in the subtree rooted at the path. + If the path is a file, return the file's status and block locations. + + @param f is the path + @param recursive if the subdirectories need to be traversed recursively + + @return an iterator that traverses statuses of the files + + @throws FileNotFoundException when the path does not exist; + @throws IOException see specific implementation]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + undefined. + @throws IOException IO failure]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In some FileSystem implementations such as HDFS metadata + synchronization is essential to guarantee consistency of read requests + particularly in HA setting. + @throws IOException + @throws UnsupportedOperationException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @param flag xattr set flag + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attribute + @param name xattr name. + @return byte[] xattr value. + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return Map describing the XAttrs of the file or directory + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @param names XAttr names. + @return Map describing the XAttrs of the file or directory + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return List{@literal } of the XAttr names of the file or directory + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to remove extended attribute + @param name xattr name + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is a default method which is intended to be overridden by + subclasses. The default implementation returns an empty storage statistics + object.

    + + @return The StorageStatistics for this FileSystem instance. + Will never be null.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + All user code that may potentially use the Hadoop Distributed + File System should be written to use a FileSystem object or its + successor, {@link FileContext}. + +

    + The local implementation is {@link LocalFileSystem} and distributed + implementation is DistributedFileSystem. There are other implementations + for object stores and (outside the Apache Hadoop codebase), + third party filesystems. +

    + Notes +

      +
    1. The behaviour of the filesystem is + + specified in the Hadoop documentation. + However, the normative specification of the behavior of this class is + actually HDFS: if HDFS does not behave the way these Javadocs or + the specification in the Hadoop documentations define, assume that + the documentation is incorrect. +
    2. +
    3. The term {@code FileSystem} refers to an instance of this class.
    4. +
    5. The acronym "FS" is used as an abbreviation of FileSystem.
    6. +
    7. The term {@code filesystem} refers to the distributed/local filesystem + itself, rather than the class used to interact with it.
    8. +
    9. The term "file" refers to a file in the remote filesystem, + rather than instances of {@code java.io.File}.
    10. +
    + + This is a carefully evolving class. + New methods may be marked as Unstable or Evolving for their initial release, + as a warning that they are new and may change based on the + experience of use in applications. +

    + Important note for developers +

    + If you are making changes here to the public API or protected methods, + you must review the following subclasses and make sure that + they are filtering/passing through new methods as appropriate. +

    + + {@link FilterFileSystem}: methods are passed through. If not, + then {@code TestFilterFileSystem.MustNotImplement} must be + updated with the unsupported interface. + Furthermore, if the new API's support is probed for via + {@link #hasPathCapability(Path, String)} then + {@link FilterFileSystem#hasPathCapability(Path, String)} + must return false, always. +

    + {@link ChecksumFileSystem}: checksums are created and + verified. +

    + {@code TestHarFileSystem} will need its {@code MustNotImplement} + interface updated. +

    + + There are some external places your changes will break things. + Do co-ordinate changes here. +

    + + HBase: HBoss +

    + Hive: HiveShim23 + {@code shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java}]]> +
    +
} caller's + environment variables to use for expansion + @return String[] with absolute path to new jar in position 0 and + unexpanded wild card entry path in position 1 + @throws IOException if there is an I/O error while writing the jar fileilterFileSystem contains + some other file system, which it uses as + its basic file system, possibly transforming + the data along the way or providing additional + functionality. The class FilterFileSystem + itself simply overrides all methods of + FileSystem with versions that + pass all requests to the contained file + system. Subclasses of FilterFileSystem + may further override some of these methods + and may also provide additional methods + and fields.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Return type on the {@link #build()} call. + @param type of builder itself.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -1 + if there is no more data because the end of the stream has been + reached]]> + + + + + + + + + + length bytes have been read. + + @param position position in the input stream to seek + @param buffer buffer into which data is read + @param offset offset into the buffer in which data is written + @param length the number of bytes to read + @throws IOException IO problems + @throws EOFException If the end of stream is reached while reading. + If an exception is thrown an undetermined number + of bytes in the buffer may have been written.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + // Don't + if (fs instanceof FooFileSystem) { + FooFileSystem fs = (FooFileSystem) fs; + OutputStream out = dfs.createFile(path) + .optionA() + .optionB("value") + .cache() + .build() + } else if (fs instanceof BarFileSystem) { + ... + } + + // Do + OutputStream out = fs.createFile(path) + .permission(perm) + .bufferSize(bufSize) + .opt("foofs:option.a", true) + .opt("foofs:option.b", "value") + .opt("barfs:cache", true) + .must("foofs:cache", true) + .must("barfs:cache-size", 256 * 1024 * 1024) + .build(); + + + If the option is not related to the file system, the option will be ignored. + If the option is must, but not supported by the file system, a + {@link IllegalArgumentException} will be thrown.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + path is invalid]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    + The interface extends {@link IOStatisticsSource} so that there is no + need to cast an instance to see if is a source of statistics. + However, implementations MAY return null for their actual statistics.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ListingBatch behaves similar to a Future, in that getting the result via + {@link #get()} will throw an Exception if there was a failure.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + and the scheme is null, and the authority + is null. + + @return whether the path is absolute and the URI has no scheme nor + authority parts]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if and only if pathname + should be included]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Warning: Not all filesystems satisfy the thread-safety requirement. + @param position position within file + @param buffer destination buffer + @param offset offset in the buffer + @param length number of bytes to read + @return actual number of bytes read; -1 means "none" + @throws IOException IO problems.]]> + + + + + + + + + + Warning: Not all filesystems satisfy the thread-safety requirement. + @param position position within file + @param buffer destination buffer + @param offset offset in the buffer + @param length number of bytes to read + @throws IOException IO problems. + @throws EOFException the end of the data was reached before + the read operation completed]]> + + + + + + + + Warning: Not all filesystems satisfy the thread-safety requirement. + @param position position within file + @param buffer destination buffer + @throws IOException IO problems. + @throws EOFException the end of the data was reached before + the read operation completed]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Note
    : Returned list is not sorted in any given order, + due to reliance on Java's {@link File#list()} API.)]]> + +
ttr is byte[], this class is to + covert byte[] to some kind of string representation or convert back. + String representation is convenient for display and input. For example + display in screen as shell response and json response, input as http + or shell parameter.]]> + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return ftp]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} backed by an FTP client provided by Apache Commons Net. +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is for reporting and testing.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + These are low-cost per-instance statistics provided by any Hadoop + I/O class instance. +

    + Consult the filesystem specification document for the requirements + of an implementation of this interface.]]> + + + + + + + + + + + + + + + + + + + + + + + Exceptions are caught and downgraded to debug logging. + @param source source of statistics. + @return a string for logging.]]> + + + + + + + + + + + + + + + + + + + Whenever this object's toString() method is called, it evaluates the + statistics. +

    + This is designed to affordable to use in log statements. + @param source source of statistics -may be null. + @return an object whose toString() operation returns the current values.]]> + + + + + + + Whenever this object's toString() method is called, it evaluates the + statistics. +

    + This is for use in log statements where for the cost of creation + of this entry is low; it is affordable to use in log statements. + @param statistics statistics to stringify -may be null. + @return an object whose toString() operation returns the current values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It is serializable so that frameworks which can use java serialization + to propagate data (Spark, Flink...) can send the statistics + back. For this reason, TreeMaps are explicitly used as field types, + even though IDEs can recommend use of Map instead. + For security reasons, untrusted java object streams should never be + deserialized. If for some reason this is required, use + {@link #requiredSerializationClasses()} to get the list of classes + used when deserializing instances of this object. +

    +

    + It is annotated for correct serializations with jackson2. +

    ]]> +
    + + + + + + + + + This is not an atomic option. +

    + The instance can be serialized, and its + {@code toString()} method lists all the values. + @param statistics statistics + @return a snapshot of the current values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It can be used to accrue values so as to dynamically update + the mean. If so, know that there is no synchronization + on the methods. +

    +

    + If a statistic has 0 samples then it is considered to be empty. +

    +

    + All 'empty' statistics are equivalent, independent of the sum value. +

    +

    + For non-empty statistics, sum and sample values must match + for equality. +

    +

    + It is serializable and annotated for correct serializations with jackson2. +

    +

    + Thread safety. The operations to add/copy sample data, are thread safe. +

    +
      +
    1. {@link #add(MeanStatistic)}
    2. +
    3. {@link #addSample(long)}
    4. +
    5. {@link #clear()}
    6. +
    7. {@link #setSamplesAndSum(long, long)}
    8. +
    9. {@link #set(MeanStatistic)}
    10. +
    11. {@link #setSamples(long)} and {@link #setSum(long)}
    12. +
    +

    + So is the {@link #mean()} method. This ensures that when + used to aggregated statistics, the aggregate value and sample + count are set and evaluated consistently. +

    +

    + Other methods marked as synchronized because Findbugs overreacts + to the idea that some operations to update sum and sample count + are synchronized, but that things like equals are not. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + names)}: {@value}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When adding new common statistic name constants, please make them unique. + By convention: +

    +
      +
    • the name of the constants are uppercase, words separated by + underscores.
    • +
    • the value of the constants are lowercase of the constant names.
    • +
    ]]> +
    +
hen adding new common statistic name constants, please make them unique. + By convention, they are implicitly unique: +
      +
    • + The name of the constants are uppercase, words separated by + underscores. +
    • +
    • + The value of the constants are lowercase of the constant names. +
    • +
    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Since these methods are often vendor- or device-specific, operators + may implement this interface in order to achieve fencing. +

    + Fencing is configured by the operator as an ordered list of methods to + attempt. Each method will be tried in turn, and the next in the list + will only be attempted if the previous one fails. See {@link NodeFencer} + for more information. +

    + If an implementation also implements {@link Configurable} then its + setConf method will be called upon instantiation.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + state (e.g ACTIVE/STANDBY) as well as + some additional information. + + @throws AccessControlException + if access is denied. + @throws IOException + if other errors happen + @see HAServiceStatus]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.http.filter.initializers. + +

      +
    • StaticUserWebFilter - An authorization plugin that makes all +users a static configured user. +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + public class IntArrayWritable extends ArrayWritable { + public IntArrayWritable() { + super(IntWritable.class); + } + }o is a ByteWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the item + @param conf the configuration to store + @param item the object to be stored + @param keyName the name of the key to use + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param items the objects to be stored + @param keyName the name of the key to use + @throws IndexOutOfBoundsException if the items array is empty + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + DefaultStringifier offers convenience methods to store/load objects to/from + the configuration. + + @param the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a DoubleWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + value argument is null or + its size is zero, the elementType argument must not be null. If + the argument value's size is bigger than zero, the argument + elementType is not be used. + + @param value + @param elementType]]> + + + + + value should not be null + or empty. + + @param value]]> + + + + + + + + + + + + + + value and elementType. If the value argument + is null or its size is zero, the elementType argument must not be + null. If the argument value's size is bigger than zero, the + argument elementType is not be used. + + @param value + @param elementType]]> + + + + + + + + + + + + + + + + + + + o is an EnumSetWritable with the same value, + or both are null.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a FloatWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When two sequence files, which have same Key type but different Value + types, are mapped out to reduce, multiple Value types is not allowed. + In this case, this class can help you wrap instances with different types. +

    + +

    + Compared with ObjectWritable, this class is much more effective, + because ObjectWritable will append the class declaration as a String + into the output file in every Key-Value pair. +

    + +

    + Generic Writable implements {@link Configurable} interface, so that it will be + configured by the framework. The configuration is passed to the wrapped objects + implementing {@link Configurable} interface before deserialization. +

    + + how to use it:
    + 1. Write your own class, such as GenericObject, which extends GenericWritable.
    + 2. Implements the abstract method getTypes(), defines + the classes which will be wrapped in GenericObject in application. + Attention: this classes defined in getTypes() method, must + implement Writable interface. +

    + + The code looks like this: +
    + public class GenericObject extends GenericWritable {
    + 
    +   private static Class[] CLASSES = {
    +               ClassType1.class, 
    +               ClassType2.class,
    +               ClassType3.class,
    +               };
    +
    +   protected Class[] getTypes() {
    +       return CLASSES;
    +   }
    +
    + }
    + 
    + + @since Nov 8, 2006]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a IntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + closes the input and output streams + at the end. + + @param in InputStrem to read from + @param out OutputStream to write to + @param conf the Configuration object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ignore any {@link Throwable} or + null pointers. Must only be used for cleanup in exception handlers. + + @param log the log to record problems to at debug level. Can be null. + @param closeables the objects to close + @deprecated use {@link #cleanupWithLogger(Logger, java.io.Closeable...)} + instead]]> + + + + + + + ignore any {@link Throwable} or + null pointers. Must only be used for cleanup in exception handlers. + + @param logger the log to record problems to at debug level. Can be null. + @param closeables the objects to close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is better than File#listDir because it does not ignore IOExceptions. + + @param dir The directory to list. + @param filter If non-null, the filter to use when listing + this directory. + @return The list of files in the directory. + + @throws IOException On I/O error]]> + + + + + + + + Borrowed from Uwe Schindler in LUCENE-5588 + @param fileToSync the file to fsync]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a LongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A map is a directory containing two files, the data file, + containing all keys and values in the map, and a smaller index + file, containing a fraction of the keys. The fraction is determined by + {@link Writer#getIndexInterval()}. + +

    The index file is read entirely into memory. Thus key implementations + should try to keep themselves small. + +

    Map files are created by adding entries in-order. To maintain a large + database, perform updates by copying the previous version of a database and + merging in a sorted change list, to create a new version of the database in + a new file. Sorting large change lists can be done with {@link + SequenceFile.Sorter}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is an MD5Hash whose digest contains the + same values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + className by first finding + it in the specified conf. If the specified conf is null, + try load it directly.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link Comparator} that operates directly on byte representations of + objects. +

    + @param + @see DeserializerComparator]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SequenceFiles are flat files consisting of binary key/value + pairs. + +

    SequenceFile provides {@link SequenceFile.Writer}, + {@link SequenceFile.Reader} and {@link Sorter} classes for writing, + reading and sorting respectively.

    + + There are three SequenceFile Writers based on the + {@link CompressionType} used to compress key/value pairs: +
      +
    1. + Writer : Uncompressed records. +
    2. +
    3. + RecordCompressWriter : Record-compressed files, only compress + values. +
    4. +
    5. + BlockCompressWriter : Block-compressed files, both keys & + values are collected in 'blocks' + separately and compressed. The size of + the 'block' is configurable. +
    + +

    The actual compression algorithm used to compress key and/or values can be + specified by using the appropriate {@link CompressionCodec}.

    + +

    The recommended way is to use the static createWriter methods + provided by the SequenceFile to chose the preferred format.

    + +

    The {@link SequenceFile.Reader} acts as the bridge and can read any of the + above SequenceFile formats.

    + +

    SequenceFile Formats

    + +

    Essentially there are 3 different formats for SequenceFiles + depending on the CompressionType specified. All of them share a + common header described below. + +

    +
      +
    • + version - 3 bytes of magic header SEQ, followed by 1 byte of actual + version number (e.g. SEQ4 or SEQ6) +
    • +
    • + keyClassName -key class +
    • +
    • + valueClassName - value class +
    • +
    • + compression - A boolean which specifies if compression is turned on for + keys/values in this file. +
    • +
    • + blockCompression - A boolean which specifies if block-compression is + turned on for keys/values in this file. +
    • +
    • + compression codec - CompressionCodec class which is used for + compression of keys and/or values (if compression is + enabled). +
    • +
    • + metadata - {@link Metadata} for this file. +
    • +
    • + sync - A sync marker to denote end of the header. +
    • +
    + +
    Uncompressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Value
      • +
      +
    • +
    • + A sync-marker every few 100 kilobytes or so. +
    • +
    + +
    Record-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Compressed Value
      • +
      +
    • +
    • + A sync-marker every few 100 kilobytes or so. +
    • +
    + +
    Block-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record Block +
        +
      • Uncompressed number of records in the block
      • +
      • Compressed key-lengths block-size
      • +
      • Compressed key-lengths block
      • +
      • Compressed keys block-size
      • +
      • Compressed keys block
      • +
      • Compressed value-lengths block-size
      • +
      • Compressed value-lengths block
      • +
      • Compressed values block-size
      • +
      • Compressed values block
      • +
      +
    • +
    • + A sync-marker every block. +
    • +
    + +

    The compressed blocks of key lengths and value lengths consist of the + actual lengths of individual keys/values encoded in ZeroCompressedInteger + format.

    + + @see CompressionCodec]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a ShortWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + position. Note that this + method avoids using the converter or doing String instantiation + @return the Unicode scalar value at position or -1 + if the position is invalid or points to a + trailing byte]]> + + + + + + + + + + what in the backing + buffer, starting as position start. The starting + position is measured in bytes and the return value is in + terms of byte position in the buffer. The backing buffer is + not converted to a string for this operation. + @return byte position of the first occurrence of the search + string in the UTF-8 buffer or -1 if not found]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Note: For performance reasons, this call does not clear the + underlying byte array that is retrievable via {@link #getBytes()}. + In order to free the byte-array memory, call {@link #set(byte[])} + with an empty byte array (For example, new byte[0]).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a Text with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException.]]> + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + @return ByteBuffer: bytes stores at ByteBuffer.array() + and length is ByteBuffer.limit()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In + addition, it provides methods for string traversal without converting the + byte array to a string.

    Also includes utilities for + serializing/deserialing a string, coding/decoding a string, checking if a + byte array contains valid UTF8 code, calculating the length of an encoded + string.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is useful when a class may evolve, so that instances written by the + old version of the class may still be processed by the new version. To + handle this situation, {@link #readFields(DataInput)} + implementations should catch {@link VersionMismatchException}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VIntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VLongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + out. + + @param out DataOuput to serialize this object into. + @throws IOException]]> + + + + + + + in. + +

    For efficiency, implementations should attempt to re-use storage in the + existing object where possible.

    + + @param in DataInput to deseriablize this object from. + @throws IOException]]> +
    + + + Any key or value type in the Hadoop Map-Reduce + framework implements this interface.

    + +

    Implementations typically implement a static read(DataInput) + method which constructs a new instance, calls {@link #readFields(DataInput)} + and returns the instance.

    + +

    Example:

    +
    +     public class MyWritable implements Writable {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +
    +       // Default constructor to allow (de)serialization
    +       MyWritable() { }
    +
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +
    +       public static MyWritable read(DataInput in) throws IOException {
    +         MyWritable w = new MyWritable();
    +         w.readFields(in);
    +         return w;
    +       }
    +     }
    + 
    ]]> +
    + + + + + + + + WritableComparables can be compared to each other, typically + via Comparators. Any type which is to be used as a + key in the Hadoop Map-Reduce framework should implement this + interface.

    + +

    Note that hashCode() is frequently used in Hadoop to partition + keys. It's important that your implementation of hashCode() returns the same + result across different instances of the JVM. Note also that the default + hashCode() implementation in Object does not + satisfy this property.

    + +

    Example:

    +
    +     public class MyWritableComparable implements
    +      WritableComparable{@literal } {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public int compareTo(MyWritableComparable o) {
    +         int thisValue = this.value;
    +         int thatValue = o.value;
    +         return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
    +       }
    +
    +       public int hashCode() {
    +         final int prime = 31;
    +         int result = 1;
    +         result = prime * result + counter;
    +         result = prime * result + (int) (timestamp ^ (timestamp >>> 32));
    +         return result
    +       }
    +     }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The default implementation reads the data into two {@link + WritableComparable}s (using {@link + Writable#readFields(DataInput)}, then calls {@link + #compare(WritableComparable,WritableComparable)}.]]> + + + + + + + The default implementation uses the natural ordering, calling {@link + Comparable#compareTo(Object)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This base implementation uses the natural ordering. To define alternate + orderings, override {@link #compare(WritableComparable,WritableComparable)}. + +

    One may optimize compare-intensive operations by overriding + {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are + provided to assist in optimized implementations of this method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Enum type + @param in DataInput to read from + @param enumType Class type of Enum + @return Enum represented by String read from DataInput + @throws IOException]]> + + + + + + + + + + + + + + + + len number of bytes in input streamin + @param in input stream + @param len number of bytes to skip + @throws IOException when skipped less number of bytes]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CompressionCodec for which to get the + Compressor + @param conf the Configuration object which contains confs for creating or reinit the compressor + @return Compressor for the given + CompressionCodec from the pool or a new one]]> + + + + + + + + + CompressionCodec for which to get the + Decompressor + @return Decompressor for the given + CompressionCodec the pool or a new one]]> + + + + + + Compressor to be returned to the pool]]> + + + + + + Decompressor to be returned to the + pool]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Codec aliases are case insensitive. +

    + The code alias is the short class name (without the package name). + If the short class name ends with 'Codec', then there are two aliases for + the codec, the complete short class name and the short class name without + the 'Codec' ending. For example for the 'GzipCodec' codec class name the + alias are 'gzip' and 'gzipcodec'. + + @param codecName the canonical class name of the codec + @return the codec object]]> + + + + + + + Codec aliases are case insensitive. +

    + The code alias is the short class name (without the package name). + If the short class name ends with 'Codec', then there are two aliases for + the codec, the complete short class name and the short class name without + the 'Codec' ending. For example for the 'GzipCodec' codec class name the + alias are 'gzip' and 'gzipcodec'. + + @param codecName the canonical class name of the codec + @return the codec class]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations are assumed to be buffered. This permits clients to + reposition the underlying input stream then call {@link #resetState()}, + without having to also synchronize client buffers.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + (Both native and non-native versions of various Decompressors require + that the data passed in via b[] remain unmodified until + the caller is explicitly notified--via {@link #needsInput()}--that the + buffer may be safely modified. With this requirement, an extra + buffer-copy can be avoided.) + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + {@link #setInput(byte[], int, int)} should be called to + provide more input. + + @return true if the input data buffer is empty and + {@link #setInput(byte[], int, int)} should be called in + order to provide more input.]]> + + + + + + + + + + + + + true if a preset dictionary is needed for decompression. + @return true if a preset dictionary is needed for decompression]]> + + + + + true if the end of the decompressed + data output stream has been reached. Indicates a concatenated data stream + when finished() returns true and {@link #getRemaining()} + returns a positive value. finished() will be reset with the + {@link #reset()} method. + @return true if the end of the decompressed + data output stream has been reached.]]> + + + + + + + + + + + + + + true and getRemaining() returns a positive value. If + {@link #finished()} returns true and getRemaining() returns + a zero value, indicates that the end of data stream has been reached and + is not a concatenated data stream. + @return The number of bytes remaining in the compressed data buffer.]]> + + + + + true and {@link #getRemaining()} returns a positive value, + reset() is called before processing of the next data stream in the + concatenated data stream. {@link #finished()} will be reset and will + return false when reset() is called.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io.compression.codecs = org.apache.hadoop.io.compress.PassthroughCodec + io.compress.passthrough.extension = .gz + + + Note: this is not a Splittable codec: it doesn't know the + capabilities of the passed in stream. It should be possible to + extend this in a subclass: the inner classes are marked as protected + to enable this. Do not retrofit splitting to this class..]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • "none" - No compression. +
  • "lzo" - LZO compression. +
  • "gz" - GZIP compression. + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • Block Compression. +
  • Named meta data blocks. +
  • Sorted or unsorted keys. +
  • Seek by key or by file offset. + + The memory footprint of a TFile includes the following: +
      +
    • Some constant overhead of reading or writing a compressed block. +
        +
      • Each compressed block requires one compression/decompression codec for + I/O. +
      • Temporary space to buffer the key. +
      • Temporary space to buffer the value (for TFile.Writer only). Values are + chunk encoded, so that we buffer at most one chunk of user data. By default, + the chunk buffer is 1MB. Reading chunked value does not require additional + memory. +
      +
    • TFile index, which is proportional to the total number of Data Blocks. + The total amount of memory needed to hold the index can be estimated as + (56+AvgKeySize)*NumBlocks. +
    • MetaBlock index, which is proportional to the total number of Meta + Blocks.The total amount of memory needed to hold the index for Meta Blocks + can be estimated as (40+AvgMetaBlockName)*NumMetaBlock. +
    +

    + The behavior of TFile can be customized by the following variables through + Configuration: +

      +
    • tfile.io.chunk.size: Value chunk size. Integer (in bytes). Default + to 1MB. Values of the length less than the chunk size is guaranteed to have + known value length in read time (See + {@link TFile.Reader.Scanner.Entry#isValueLengthKnown()}). +
    • tfile.fs.output.buffer.size: Buffer size used for + FSDataOutputStream. Integer (in bytes). Default to 256KB. +
    • tfile.fs.input.buffer.size: Buffer size used for + FSDataInputStream. Integer (in bytes). Default to 256KB. +
    +

    + Suggestions on performance optimization. +

      +
    • Minimum block size. We recommend a setting of minimum block size between + 256KB to 1MB for general usage. Larger block size is preferred if files are + primarily for sequential access. However, it would lead to inefficient random + access (because there are more data to decompress). Smaller blocks are good + for random access, but require more memory to hold the block index, and may + be slower to create (because we must flush the compressor stream at the + conclusion of each data block, which leads to an FS I/O flush). Further, due + to the internal caching in Compression codec, the smallest possible block + size would be around 20KB-30KB. +
    • The current implementation does not offer true multi-threading for + reading. The implementation uses FSDataInputStream seek()+read(), which is + shown to be much faster than positioned-read call in single thread mode. + However, it also means that if multiple threads attempt to access the same + TFile (using multiple scanners) simultaneously, the actual I/O is carried out + sequentially even if they access different DFS blocks. +
    • Compression codec. Use "none" if the data is not very compressable (by + compressable, I mean a compression ratio at least 2:1). Generally, use "lzo" + as the starting point for experimenting. "gz" overs slightly better + compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to + decompress, comparing to "lzo". +
    • File system buffering, if the underlying FSDataInputStream and + FSDataOutputStream is already adequately buffered; or if applications + reads/writes keys and values in large buffers, we can reduce the sizes of + input/output buffering in TFile layer by setting the configuration parameters + "tfile.fs.input.buffer.size" and "tfile.fs.output.buffer.size". +
    + + Some design rationale behind TFile can be found at Hadoop-3315.]]> + + + + + + + + + + + Utils#writeVLong(out, n). + + @param out + output stream + @param n + The integer to be encoded + @throws IOException + @see Utils#writeVLong(DataOutput, long)]]> + + + + + + + + +
  • if n in [-32, 127): encode in one byte with the actual value. + Otherwise, +
  • if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52; + byte[1]=n&0xff. Otherwise, +
  • if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 - + 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise, +
  • if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112; + byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff; + byte[3]=n&0xff. + Otherwise: +
  • if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] = + (n>>24)&0xff; byte[2]=(n>>16)&0xff; + byte[3]=(n>>8)&0xff; byte[4]=n&0xff; +
  • if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] = + (n>>32)&0xff; byte[2]=(n>>24)&0xff; + byte[3]=(n>>16)&0xff; byte[4]=(n>>8)&0xff; + byte[5]=n&0xff +
  • if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] = + (n>>40)&0xff; byte[2]=(n>>32)&0xff; + byte[3]=(n>>24)&0xff; byte[4]=(n>>16)&0xff; + byte[5]=(n>>8)&0xff; byte[6]=n&0xff; +
  • if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] = + (n>>48)&0xff; byte[2] = (n>>40)&0xff; + byte[3]=(n>>32)&0xff; byte[4]=(n>>24)&0xff; byte[5]= + (n>>16)&0xff; byte[6]=(n>>8)&0xff; byte[7]=n&0xff; +
  • if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] = + (n>>54)&0xff; byte[2] = (n>>48)&0xff; + byte[3] = (n>>40)&0xff; byte[4]=(n>>32)&0xff; + byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff; byte[7]= + (n>>8)&0xff; byte[8]=n&0xff; + + + @param out + output stream + @param n + the integer number + @throws IOException]]> + + + + + + + (int)Utils#readVLong(in). + + @param in + input stream + @return the decoded integer + @throws IOException + + @see Utils#readVLong(DataInput)]]> + + + + + + + +
  • if (FB >= -32), return (long)FB; +
  • if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff; +
  • if (FB in [-104, -73]), return (FB+88)<<16 + + (NB[0]&0xff)<<8 + NB[1]&0xff; +
  • if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff) + <<16 + (NB[1]&0xff)<<8 + NB[2]&0xff; +
  • if (FB in [-128, -121]), return interpret NB[FB+129] as a signed + big-endian integer. + + @param in + input stream + @return the decoded long integer. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @param cmp + Comparator for the key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @param cmp + Comparator for the key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + + + + + + + + + + An experimental {@link Serialization} for Java {@link Serializable} classes. +

    + @see JavaSerializationComparator]]> +
    +
    + + + + + + + + + A {@link RawComparator} that uses a {@link JavaSerialization} + {@link Deserializer} to deserialize objects that are then compared via + their {@link Comparable} interfaces. +

    + @param + @see JavaSerialization]]> +
    +
    + + + + + + + + + + + + + +This package provides a mechanism for using different serialization frameworks +in Hadoop. The property "io.serializations" defines a list of +{@link org.apache.hadoop.io.serializer.Serialization}s that know how to create +{@link org.apache.hadoop.io.serializer.Serializer}s and +{@link org.apache.hadoop.io.serializer.Deserializer}s. +

    + +

    +To add a new serialization framework write an implementation of +{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the +"io.serializations" property. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + avro.reflect.pkgs or implement + {@link AvroReflectSerializable} interface.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + +This package provides Avro serialization in Hadoop. This can be used to +serialize/deserialize Avro types in Hadoop. +

    + +

    +Use {@link org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization} for +serialization of classes generated by Avro's 'specific' compiler. +

    + +

    +Use {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} for +other classes. +{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} work for +any class which is either in the package list configured via +{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization#AVRO_REFLECT_PACKAGES} +or implement {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerializable} +interface. +

    ]]> +
    +
mplementations of this interface consume the {@link MetricsRecord} generated + from {@link MetricsSource}. It registers with {@link MetricsSystem} which + periodically pushes the {@link MetricsRecord} to the sink using + {@link #putMetrics(MetricsRecord)} method. If the implementing class also + implements {@link Closeable}, then the MetricsSystem will close the sink when + it is stopped.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the actual type of the source object + @param source object to register + @return the source object + @exception MetricsException]]> + + + + + + + + the actual type of the source object + @param source object to register + @param name of the source. Must be unique or null (then extracted from + the annotations of the source object.) + @param desc the description of the source (or null. See above.) + @return the source object + @exception MetricsException]]> + + + + + + + + + + + + + + + + + + + + +
  • {@link MetricsSource} generate and update metrics information.
  • +
  • {@link MetricsSink} consume the metrics information
  • + + + {@link MetricsSource} and {@link MetricsSink} register with the metrics + system. Implementations of {@link MetricsSystem} polls the + {@link MetricsSource}s periodically and pass the {@link MetricsRecord}s to + {@link MetricsSink}.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
} (aggregate). + Filter out entries that don't have at least minSamples. + + @return a map of peer DataNode Id to the average latency to that + node seen over the measurement period.]]> + + + + + + + + + + + This class maintains a group of rolling average metrics. It implements the + algorithm of rolling average, i.e. a number of sliding windows are kept to + roll over and evict old subsets of samples. Each window has a subset of + samples in a stream, where sub-sum and sub-total are collected. All sub-sums + and sub-totals in all windows will be aggregated to final-sum and final-total + used to compute final average, which is called rolling average. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is a metrics sink that uses + {@link org.apache.hadoop.fs.FileSystem} to write the metrics logs. Every + roll interval a new directory will be created under the path specified by the + basepath property. All metrics will be logged to a file in the + current interval's directory in a file named <hostname>.log, where + <hostname> is the name of the host on which the metrics logging + process is running. The base path is set by the + <prefix>.sink.<instance>.basepath property. The + time zone used to create the current interval's directory name is GMT. If + the basepath property isn't specified, it will default to + "/tmp", which is the temp directory on whatever default file + system is configured for the cluster.

    + +

    The <prefix>.sink.<instance>.ignore-error + property controls whether an exception is thrown when an error is encountered + writing a log file. The default value is true. When set to + false, file errors are quietly swallowed.

    + +

    The roll-interval property sets the amount of time before + rolling the directory. The default value is 1 hour. The roll interval may + not be less than 1 minute. The property's value should be given as + number unit, where number is an integer value, and + unit is a valid unit. Valid units are minute, hour, + and day. The units are case insensitive and may be abbreviated or + plural. If no units are specified, hours are assumed. For example, + "2", "2h", "2 hour", and + "2 hours" are all valid ways to specify two hours.

    + +

    The roll-offset-interval-millis property sets the upper + bound on a random time interval (in milliseconds) that is used to delay + before the initial roll. All subsequent rolls will happen an integer + number of roll intervals after the initial roll, hence retaining the original + offset. The purpose of this property is to insert some variance in the roll + times so that large clusters using this sink on every node don't cause a + performance impact on HDFS by rolling simultaneously. The default value is + 30000 (30s). When writing to HDFS, as a rule of thumb, the roll offset in + millis should be no less than the number of sink instances times 5. + +

    The primary use of this class is for logging to HDFS. As it uses + {@link org.apache.hadoop.fs.FileSystem} to access the target file system, + however, it can be used to write to the local file system, Amazon S3, or any + other supported file system. The base path for the sink will determine the + file system used. An unqualified path will write to the default file system + set by the configuration.

    + +

    Not all file systems support the ability to append to files. In file + systems without the ability to append to files, only one writer can write to + a file at a time. To allow for concurrent writes from multiple daemons on a + single host, the source property is used to set unique headers + for the log files. The property should be set to the name of + the source daemon, e.g. namenode. The value of the + source property should typically be the same as the property's + prefix. If this property is not set, the source is taken to be + unknown.

    + +

    Instead of appending to an existing file, by default the sink + will create a new file with a suffix of ".<n>", where + n is the next lowest integer that isn't already used in a file name, + similar to the Hadoop daemon logs. NOTE: the file with the highest + sequence number is the newest file, unlike the Hadoop daemon logs.

    + +

    For file systems that allow append, the sink supports appending to the + existing file instead. If the allow-append property is set to + true, the sink will instead append to the existing file on file systems that + support appends. By default, the allow-append property is + false.

    + +

    Note that when writing to HDFS with allow-append set to true, + there is a minimum acceptable number of data nodes. If the number of data + nodes drops below that minimum, the append will succeed, but reading the + data will fail with an IOException in the DataStreamer class. The minimum + number of data nodes required for a successful append is generally 2 or + 3.

    + +

    Note also that when writing to HDFS, the file size information is not + updated until the file is closed (at the end of the interval) even though + the data is being written successfully. This is a known HDFS limitation that + exists because of the performance cost of updating the metadata. See + HDFS-5478.

    + +

    When using this sink in a secure (Kerberos) environment, two additional + properties must be set: keytab-key and + principal-key. keytab-key should contain the key by + which the keytab file can be found in the configuration, for example, + yarn.nodemanager.keytab. principal-key should + contain the key by which the principal can be found in the configuration, + for example, yarn.nodemanager.principal.]]> + + + + + + + + + + + + + + + + + + + + + + + + + CollectD StatsD plugin). +
    + To configure this plugin, you will need to add the following + entries to your hadoop-metrics2.properties file: +
    +

    + *.sink.statsd.class=org.apache.hadoop.metrics2.sink.StatsDSink
    + [prefix].sink.statsd.server.host=
    + [prefix].sink.statsd.server.port=
    + [prefix].sink.statsd.skip.hostname=true|false (optional)
    + [prefix].sink.statsd.service.name=NameNode (name you want for service)
    + 
    ]]> +
    +
    + +
    + + + + + + + + + + + + + ,name=}" + Where the {@literal and } are the supplied + parameters. + + @param serviceName + @param nameName + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + ,name=}" + Where the {@literal and } are the supplied + parameters. + + @param serviceName + @param nameName + @param properties - Key value pairs to define additional JMX ObjectName + properties. + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostname or hostname:port. If + the specs string is null, defaults to localhost:defaultPort. + + @param specs server specs (see description) + @param defaultPort the default port if not specified + @return a list of InetSocketAddress objects.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method is used when parts of Hadoop need know whether to apply + single rack vs multi-rack policies, such as during block placement. + Such algorithms behave differently if they are on multi-switch systems. +

    + + @return true if the mapping thinks that it is on a single switch]]> +
    +
    + + + + + + + + + + + + + + + + + This predicate simply assumes that all mappings not derived from + this class are multi-switch. + @param mapping the mapping to query + @return true if the base class says it is single switch, or the mapping + is not derived from this class.]]> + + + + It is not mandatory to + derive {@link DNSToSwitchMapping} implementations from it, but it is strongly + recommended, as it makes it easy for the Hadoop developers to add new methods + to this base class that are automatically picked up by all implementations. +

    + + This class does not extend the Configured + base class, and should not be changed to do so, as it causes problems + for subclasses. The constructor of the Configured calls + the {@link #setConf(Configuration)} method, which will call into the + subclasses before they have been fully constructed.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If a name cannot be resolved to a rack, the implementation + should return {@link NetworkTopology#DEFAULT_RACK}. This + is what the bundled implementations do, though it is not a formal requirement + + @param names the list of hosts to resolve (can be empty) + @return list of resolved network paths. + If names is empty, the returned list is also empty]]> + + + + + + + + + + + + + + + + + + + + + + + + Calling {@link #setConf(Configuration)} will trigger a + re-evaluation of the configuration settings and so be used to + set up the mapping script.]]> + + + + + + + + + + + + + + + + + + + + + This will get called in the superclass constructor, so a check is needed + to ensure that the raw mapping is defined before trying to relaying a null + configuration. + @param conf]]> + + + + + + + + + + It contains a static class RawScriptBasedMapping that performs + the work: reading the configuration parameters, executing any defined + script, handling errors and such like. The outer + class extends {@link CachedDNSToSwitchMapping} to cache the delegated + queries. +

    + This DNS mapper's {@link #isSingleSwitch()} predicate returns + true if and only if a script is defined.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Simple {@link DNSToSwitchMapping} implementation that reads a 2 column text + file. The columns are separated by whitespace. The first column is a DNS or + IP address and the second column specifies the rack where the address maps. +

    +

    + This class uses the configuration parameter {@code + net.topology.table.file.name} to locate the mapping file. +

    +

    + Calls to {@link #resolve(List)} will look up the address as defined in the + mapping file. If no entry corresponding to the address is found, the value + {@code /default-rack} is returned. +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } mapping and {@literal <}groupId, groupName{@literal >} + mapping.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + }/host@realm. + @param principalName principal name of format as described above + @return host name if the the string conforms to the above format, else null]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } "jack" + + @param userName + @return userName without login method]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the return type of the run method + @param action the method to execute + @return the value from the run method]]> + + + + + + + + the return type of the run method + @param action the method to execute + @return the value from the run method + @throws IOException if the action throws an IOException + @throws Error if the action throws an Error + @throws RuntimeException if the action throws a RuntimeException + @throws InterruptedException if the action throws an InterruptedException + @throws UndeclaredThrowableException if the action throws something else]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CredentialProvider implementations must be thread safe.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + does not provide the stack trace for security purposes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A User-Agent String is considered to be a browser if it matches + any of the regex patterns from browser-useragent-regex; the default + behavior is to consider everything a browser that matches the following: + "^Mozilla.*,^Opera.*". Subclasses can optionally override + this method to use different behavior. + + @param userAgent The User-Agent String, or null if there isn't one + @return true if the User-Agent String refers to a browser, false if not]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The type of the token identifierextends TokenIdentifier]]> + + + + + + + + + + DelegationTokenAuthenticatedURL. +

    + An instance of the default {@link DelegationTokenAuthenticator} will be + used.]]> + + + + + DelegationTokenAuthenticatedURL. + + @param authenticator the {@link DelegationTokenAuthenticator} instance to + use, if null the default one will be used.]]> + + + + + DelegationTokenAuthenticatedURL using the default + {@link DelegationTokenAuthenticator} class. + + @param connConfigurator a connection configurator.]]> + + + + + DelegationTokenAuthenticatedURL. + + @param authenticator the {@link DelegationTokenAuthenticator} instance to + use, if null the default one will be used. + @param connConfigurator a connection configurator.]]> + + + + + + + + + + + + The default class is {@link KerberosDelegationTokenAuthenticator} + + @return the delegation token authenticator class to use as default.]]> + + + + + + + This method is provided to enable WebHDFS backwards compatibility. + + @param useQueryString TRUE if the token is transmitted in the + URL query string, FALSE if the delegation token is transmitted + using the {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP + header.]]> + + + + + TRUE if the token is transmitted in the URL query + string, FALSE if the delegation token is transmitted using the + {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP header.]]> + + + + + + + + + + + + + + + + + + Authenticator. + + @param url the URL to connect to. Only HTTP/S URLs are supported. + @param token the authentication token being used for the user. + @return an authenticated {@link HttpURLConnection}. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + Authenticator. If the doAs parameter is not NULL, + the request will be done on behalf of the specified doAs user. + + @param url the URL to connect to. Only HTTP/S URLs are supported. + @param token the authentication token being used for the user. + @param doAs user to do the the request on behalf of, if NULL the request is + as self. + @return an authenticated {@link HttpURLConnection}. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @return a delegation token. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @param doAsUser the user to do as, which will be the token owner. + @return a delegation token. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @throws IOException if an IO error occurred.]]> + + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred.]]> + + + + DelegationTokenAuthenticatedURL is a + {@link AuthenticatedURL} sub-class with built-in Hadoop Delegation Token + functionality. +

    + The authentication mechanisms supported by default are Hadoop Simple + authentication (also known as pseudo authentication) and Kerberos SPNEGO + authentication. +

    + Additional authentication mechanisms can be supported via {@link + DelegationTokenAuthenticator} implementations. +

    + The default {@link DelegationTokenAuthenticator} is the {@link + KerberosDelegationTokenAuthenticator} class which supports + automatic fallback from Kerberos SPNEGO to Hadoop Simple authentication via + the {@link PseudoDelegationTokenAuthenticator} class. +

    + AuthenticatedURL instances are not thread-safe.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @throws IOException if an IO error occurred.]]> + + + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + KerberosDelegationTokenAuthenticator provides support for + Kerberos SPNEGO authentication mechanism and support for Hadoop Delegation + Token operations. +

    + It falls back to the {@link PseudoDelegationTokenAuthenticator} if the HTTP + endpoint does not trigger a SPNEGO authentication]]> + + + + + + + + + PseudoDelegationTokenAuthenticator provides support for + Hadoop's pseudo authentication mechanism that accepts + the user name specified as a query string parameter and support for Hadoop + Delegation Token operations. +

    + This mimics the model of Hadoop Simple authentication trusting the + {@link UserGroupInformation#getCurrentUser()} valuelive. + @return a (snapshotted) map of blocker name->description values]]> + + + + + + + + + + + + + Do nothing if the service is null or not + in a state in which it can be/needs to be stopped. +

    + The service state is checked before the operation begins. + This process is not thread safe. + @param service a service or null]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • Any long-lived operation here will prevent the service state + change from completing in a timely manner.
  • +
  • If another thread is somehow invoked from the listener, and + that thread invokes the methods of the service (including + subclass-specific methods), there is a risk of a deadlock.
  • + + + + @param service the service that has changed.]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + The base implementation logs all arguments at the debug level, + then returns the passed in config unchanged.]]> + + + + + + + The action is to signal success by returning the exit code 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method is called before {@link #init(Configuration)}; + Any non-null configuration that is returned from this operation + becomes the one that is passed on to that {@link #init(Configuration)} + operation. +

    + This permits implementations to change the configuration before + the init operation. As the ServiceLauncher only creates + an instance of the base {@link Configuration} class, it is + recommended to instantiate any subclass (such as YarnConfiguration) + that injects new resources. +

    + @param config the initial configuration build up by the + service launcher. + @param args list of arguments passed to the command line + after any launcher-specific commands have been stripped. + @return the configuration to init the service with. + Recommended: pass down the config parameter with any changes + @throws Exception any problem]]> + + + + + + + The return value becomes the exit code of the launched process. +

    + If an exception is raised, the policy is: +

      +
    1. Any subset of {@link org.apache.hadoop.util.ExitUtil.ExitException}: + the exception is passed up unmodified. +
    2. +
    3. Any exception which implements + {@link org.apache.hadoop.util.ExitCodeProvider}: + A new {@link ServiceLaunchException} is created with the exit code + and message of the thrown exception; the thrown exception becomes the + cause.
    4. +
    5. Any other exception: a new {@link ServiceLaunchException} is created + with the exit code {@link LauncherExitCodes#EXIT_EXCEPTION_THROWN} and + the message of the original exception (which becomes the cause).
    6. +
    + @return the exit code + @throws org.apache.hadoop.util.ExitUtil.ExitException an exception passed + up as the exit code and error text. + @throws Exception any exception to report. If it provides an exit code + this is used in a wrapping exception.]]> +
    +
    + + + The command line options will be passed down before the + {@link Service#init(Configuration)} operation is invoked via an + invocation of {@link LaunchableService#bindArgs(Configuration, List)} + After the service has been successfully started via {@link Service#start()} + the {@link LaunchableService#execute()} method is called to execute the + service. When this method returns, the service launcher will exit, using + the return code from the method as its exit option.]]> + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Approximate HTTP equivalent: {@code 400 Bad Request}]]> + + + + + + approximate HTTP equivalent: Approximate HTTP equivalent: {@code 401 Unauthorized}]]> + + + + + + + + + + + Approximate HTTP equivalent: Approximate HTTP equivalent: {@code 403: Forbidden}]]> + + + + + + Approximate HTTP equivalent: {@code 404: Not Found}]]> + + + + + + Approximate HTTP equivalent: {@code 405: Not allowed}]]> + + + + + + Approximate HTTP equivalent: {@code 406: Not Acceptable}]]> + + + + + + Approximate HTTP equivalent: {@code 408: Request Timeout}]]> + + + + + + Approximate HTTP equivalent: {@code 409: Conflict}]]> + + + + + + Approximate HTTP equivalent: {@code 500 Internal Server Error}]]> + + + + + + Approximate HTTP equivalent: {@code 501: Not Implemented}]]> + + + + + + Approximate HTTP equivalent: {@code 503 Service Unavailable}]]> + + + + + + If raised, this is expected to be raised server-side and likely due + to client/server version incompatibilities. +

    + Approximate HTTP equivalent: {@code 505: Version Not Supported}]]> + + + + + + + + + + + + + + + Codes with a YARN prefix are YARN-related. +

    + Many of the exit codes are designed to resemble HTTP error codes, + squashed into a single byte. e.g 44 , "not found" is the equivalent + of 404. The various 2XX HTTP error codes aren't followed; + the Unix standard of "0" for success is used. +

    +    0-10: general command issues
    +   30-39: equivalent to the 3XX responses, where those responses are
    +          considered errors by the application.
    +   40-49: client-side/CLI/config problems
    +   50-59: service-side problems.
    +   60+  : application specific error codes
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + This uses {@link String#format(String, Object...)} + to build the formatted exception in the ENGLISH locale. +

    + If the last argument is a throwable, it becomes the cause of the exception. + It will also be used as a parameter for the format. + @param exitCode exit code + @param format format for message to use in exception + @param args list of arguments]]> + + + + + + This uses {@link String#format(String, Object...)} + to build the formatted exception in the ENGLISH locale. + @param exitCode exit code + @param cause inner cause + @param format format for message to use in exception + @param args list of arguments]]> + + + + + When caught by the ServiceLauncher, it will convert that + into a process exit code. + + The {@link #ServiceLaunchException(int, String, Object...)} constructor + generates formatted exceptions.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    + This will be 0 until a call + to {@link #finished()} has been made. + @return the currently recorded duration.]]> +
    + + + + + + + + + +
    + + + + + + + + + + Clients and/or applications can use the provided Progressable + to explicitly report progress to the Hadoop framework. This is especially + important for operations which take significant amount of time since, + in-lieu of the reported progress, the framework has to assume that an error + has occurred and time-out the operation.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Class is to be obtained + @return the correctly typed Class of the given object.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + kill -0 command or equivalent]]> + + + + + + + + + + + + + + + + + + + ".cmd" on Windows, or ".sh" otherwise. + + @param parent File parent directory + @param basename String script file basename + @return File referencing the script in the directory]]> + + + + + + ".cmd" on Windows, or ".sh" otherwise. + + @param basename String script file basename + @return String script file name]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + IOException. + @return the path to {@link #WINUTILS_EXE} + @throws RuntimeException if the path is not resolvable]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell. + @return the thread that ran runCommand() that spawned this shell + or null if no thread is waiting for this shell to complete]]> + + + + + + + + + + + + Shell interface. + @param cmd shell command to execute. + @return the output of the executed command.]]> + + + + + + + + + Shell interface. + @param env the map of environment key=value + @param cmd shell command to execute. + @param timeout time in milliseconds after which script should be marked timeout + @return the output of the executed command. + @throws IOException on any problem.]]> + + + + + + + + Shell interface. + @param env the map of environment key=value + @param cmd shell command to execute. + @return the output of the executed command. + @throws IOException on any problem.]]> + + + + + Shell processes. + Iterates through a map of all currently running Shell + processes and destroys them one by one. This method is thread safe]]> + + + + + Shell objects.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CreateProcess synchronization object.]]> + + + + + os.name property.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Important: caller must check for this value being null. + The lack of such checks has led to many support issues being raised. +

    + @deprecated use one of the exception-raising getter methods, + specifically {@link #getWinUtilsPath()} or {@link #getWinUtilsFile()}]]> + + + + + + + + + + + + + + Shell can be used to run shell commands like du or + df. It also offers facilities to gate commands by + time-intervals.]]> + + + + + + + + ShutdownHookManager singleton. + + @return ShutdownHookManager singleton.]]> + + + + + + + Runnable + @param priority priority of the shutdownHook.]]> + + + + + + + + + Runnable + @param priority priority of the shutdownHook + @param timeout timeout of the shutdownHook + @param unit unit of the timeout TimeUnit]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ShutdownHookManager enables running shutdownHook + in a deterministic order, higher priority first. +

    + The JVM runs ShutdownHooks in a non-deterministic order or in parallel. + This class registers a single JVM shutdownHook and run all the + shutdownHooks registered to it (to this class) in order based on their + priority. + + Unless a hook was registered with a shutdown explicitly set through + {@link #addShutdownHook(Runnable, int, long, TimeUnit)}, + the shutdown time allocated to it is set by the configuration option + {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT} in + {@code core-site.xml}, with a default value of + {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT_DEFAULT} + seconds.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool, is the standard for any Map-Reduce tool/application. + The tool/application should delegate the handling of + + standard command-line options to {@link ToolRunner#run(Tool, String[])} + and only handle its custom arguments.

    + +

    Here is how a typical Tool is implemented:

    +

    +     public class MyApp extends Configured implements Tool {
    +     
    +       public int run(String[] args) throws Exception {
    +         // Configuration processed by ToolRunner
    +         Configuration conf = getConf();
    +         
    +         // Create a JobConf using the processed conf
    +         JobConf job = new JobConf(conf, MyApp.class);
    +         
    +         // Process custom command-line options
    +         Path in = new Path(args[1]);
    +         Path out = new Path(args[2]);
    +         
    +         // Specify various job-specific parameters     
    +         job.setJobName("my-app");
    +         job.setInputPath(in);
    +         job.setOutputPath(out);
    +         job.setMapperClass(MyMapper.class);
    +         job.setReducerClass(MyReducer.class);
    +
    +         // Submit the job, then poll for progress until the job is complete
    +         RunningJob runningJob = JobClient.runJob(job);
    +         if (runningJob.isSuccessful()) {
    +           return 0;
    +         } else {
    +           return 1;
    +         }
    +       }
    +       
    +       public static void main(String[] args) throws Exception {
    +         // Let ToolRunner handle generic command-line options 
    +         int res = ToolRunner.run(new Configuration(), new MyApp(), args);
    +         
    +         System.exit(res);
    +       }
    +     }
    + 

    + + @see GenericOptionsParser + @see ToolRunner]]> + + + + + + + + + + + + + + Tool by {@link Tool#run(String[])}, after + parsing with the given generic arguments. Uses the given + Configuration, or builds one if null. + + Sets the Tool's configuration with the possibly modified + version of the conf. + + @param conf Configuration for the Tool. + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + Tool with its Configuration. + + Equivalent to run(tool.getConf(), tool, args). + + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + + + + + + + + + + ToolRunner can be used to run classes implementing + Tool interface. It works in conjunction with + {@link GenericOptionsParser} to parse the + + generic hadoop command line arguments and modifies the + Configuration of the Tool. The + application-specific options are passed along without being modified. +

    + + @see Tool + @see GenericOptionsParser]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Bloom filter, as defined by Bloom in 1970. +

    + The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by + the networking research community in the past decade thanks to the bandwidth efficiencies that it + offers for the transmission of set membership information between networked hosts. A sender encodes + the information into a bit vector, the Bloom filter, that is more compact than a conventional + representation. Computation and space costs for construction are linear in the number of elements. + The receiver uses the filter to test whether various elements are members of the set. Though the + filter will occasionally return a false positive, it will never return a false negative. When creating + the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Space/Time Trade-Offs in Hash Coding with Allowable Errors]]> + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this counting Bloom filter. +

    + Invariant: nothing happens if the specified key does not belong to this counter Bloom filter. + @param key The key to remove.]]> + + + + + + + + + + + + key -> count map. +

    NOTE: due to the bucket size of this filter, inserting the same + key more than 15 times will cause an overflow at all filter positions + associated with this key, and it will significantly increase the error + rate for this and other keys. For this reason the filter can only be + used to store small count values 0 <= N << 15. + @param key key to be tested + @return 0 if the key is not present. Otherwise, a positive value v will + be returned such that v == count with probability equal to the + error rate of this filter, and v > count otherwise. + Additionally, if the filter experienced an underflow as a result of + {@link #delete(Key)} operation, the return value may be lower than the + count with the probability of the false negative rate of such + filter.]]> + + + + + + + + + + + + + + + + + + + + + + counting Bloom filter, as defined by Fan et al. in a ToN + 2000 paper. +

    + A counting Bloom filter is an improvement to standard a Bloom filter as it + allows dynamic additions and deletions of set membership information. This + is achieved through the use of a counting vector instead of a bit vector. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Summary cache: a scalable wide-area web cache sharing protocol]]> + + + + + + + + + + + + + + Builds an empty Dynamic Bloom filter. + @param vectorSize The number of bits in the vector. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}). + @param nr The threshold for the maximum number of keys to record in a + dynamic Bloom filter row.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + dynamic Bloom filter, as defined in the INFOCOM 2006 paper. +

    + A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but + each of the s rows is a standard Bloom filter. The creation + process of a DBF is iterative. At the start, the DBF is a 1 * m + bit matrix, i.e., it is composed of a single standard Bloom filter. + It assumes that nr elements are recorded in the + initial bit vector, where nr {@literal <=} n + (n is the cardinality of the set A to record in + the filter). +

    + As the size of A grows during the execution of the application, + several keys must be inserted in the DBF. When inserting a key into the DBF, + one must first get an active Bloom filter in the matrix. A Bloom filter is + active when the number of recorded keys, nr, is + strictly less than the current cardinality of A, n. + If an active Bloom filter is found, the key is inserted and + nr is incremented by one. On the other hand, if there + is no active Bloom filter, a new one is created (i.e., a new row is added to + the matrix) according to the current size of A and the element + is added in this new Bloom filter and the nr value of + this new Bloom filter is set to one. A given key is said to belong to the + DBF if the k positions are set to one in one of the matrix rows. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + + @see Theory and Network Applications of Dynamic Bloom Filters]]> + + + + + + + + + Builds a hash function that must obey to a given maximum number of returned values and a highest value. + @param maxValue The maximum highest returned value. + @param nbHash The number of resulting hashed values. + @param hashType type of the hashing function (see {@link Hash}).]]> + + + + + this hash function. A NOOP]]> + + + + + + + + + + + + + + + + + + + The idea is to randomly select a bit to reset.]]> + + + + + + The idea is to select the bit to reset that will generate the minimum + number of false negative.]]> + + + + + + The idea is to select the bit to reset that will remove the maximum number + of false positive.]]> + + + + + + The idea is to select the bit to reset that will, at the same time, remove + the maximum number of false positve while minimizing the amount of false + negative generated.]]> + + + + + Originally created by + European Commission One-Lab Project 034819.]]> + + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this retouched Bloom filter. +

    + Invariant: if the false positive is null, nothing happens. + @param key The false positive key to add.]]> + + + + + + this retouched Bloom filter. + @param coll The collection of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The list of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The array of false positive.]]> + + + + + + + this retouched Bloom filter. + @param scheme The selective clearing scheme to apply.]]> + + + + + + + + + + + + retouched Bloom filter, as defined in the CoNEXT 2006 paper. +

    + It allows the removal of selected false positives at the cost of introducing + random false negatives, and with the benefit of eliminating some random false + positives at the same time. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + @see RemoveScheme The different selective clearing algorithms + + @see Retouched Bloom Filters: Allowing Networked Applications to Trade Off Selected False Positives Against False Negatives]]> + + + + + + + + + + + + + + + + + Any exception generated in the future is + extracted and rethrown. +

    + @param future future to evaluate + @param type of the result. + @return the result, if all went well. + @throws InterruptedIOException future was interrupted + @throws IOException if something went wrong + @throws RuntimeException any nested RTE thrown]]> +
    + + + + + + + + + + + + Any exception generated in the future is + extracted and rethrown. +

    + @param future future to evaluate + @param type of the result. + @return the result, if all went well. + @throws InterruptedIOException future was interrupted + @throws IOException if something went wrong + @throws RuntimeException any nested RTE thrown + @throws TimeoutException the future timed out.]]> +
    +
    + + + + + type of return value. + @return nothing, ever. + @throws IOException either the inner IOException, or a wrapper around + any non-Runtime-Exception + @throws RuntimeException if that is the inner cause.]]> + + + + + + + type of return value. + @return nothing, ever. + @throws IOException either the inner IOException, or a wrapper around + any non-Runtime-Exception + @throws RuntimeException if that is the inner cause.]]> + + + + + + +
  • If it is an IOE: Return.
  • +
  • If it is a {@link UncheckedIOException}: return the cause
  • +
  • Completion/Execution Exceptions: extract and repeat
  • +
  • If it is an RTE or Error: throw.
  • +
  • Any other type: wrap in an IOE
  • + + + Recursively handles wrapped Execution and Completion Exceptions in + case something very complicated has happened. + @param e exception. + @return an IOException extracted or built from the cause. + @throws RuntimeException if that is the inner cause. + @throws Error if that is the inner cause.]]> +
    +
    + + + Contains methods promoted from + {@link org.apache.hadoop.fs.impl.FutureIOSupport} because they + are a key part of integrating async IO in application code. +

    +

    + One key feature is that the {@link #awaitFuture(Future)} and + {@link #awaitFuture(Future, long, TimeUnit)} calls will + extract and rethrow exceptions raised in the future's execution, + including extracting the inner IOException of any + {@code UncheckedIOException} raised in the future. + This makes it somewhat easier to execute IOException-raising + code inside futures. +

    ]]> +
    +
    + + + + + + + type + @return a remote iterator]]> + + + + + + type + @return a remote iterator]]> + + + + + + type + @return a remote iterator]]> + + + + + + type + @return a remote iterator]]> + + + + + + + source type + @param result type + @param iterator source + @param mapper transformation + @return a remote iterator]]> + + + + + + source type + @param result type + @param iterator source + @return a remote iterator]]> + + + + + + +

    + Elements are filtered in the hasNext() method; if not used + the filtering will be done on demand in the {@code next()} + call. + @param type + @param iterator source + @param filter filter + @return a remote iterator]]> +
    +
    + + + + + source type. + @return a new iterator]]> + + + + + + + type + @return a list of the values. + @throws IOException if the source RemoteIterator raises it.]]> + + + + + + + + type + @return an array of the values. + @throws IOException if the source RemoteIterator raises it.]]> + + + + + + + +

    + If the iterator is an IOStatisticsSource returning a non-null + set of statistics, and this classes log is set to DEBUG, + then the statistics of the operation are evaluated and logged at + debug. +

    + The number of entries processed is returned, as it is useful to + know this, especially during tests or when reporting values + to users. +

    + This does not close the iterator afterwards. + @param source iterator source + @param consumer consumer of the values. + @return the number of elements processed + @param type of source + @throws IOException if the source RemoteIterator or the consumer raise one.]]> +
    +
    + + + + type of source]]> + + + +

    + This aims to make it straightforward to use lambda-expressions to + transform the results of an iterator, without losing the statistics + in the process, and to chain the operations together. +

    + The closeable operation will be passed through RemoteIterators which + wrap other RemoteIterators. This is to support any iterator which + can be closed to release held connections, file handles etc. + Unless client code is written to assume that RemoteIterator instances + may be closed, this is not likely to be broadly used. It is added + to make it possible to adopt this feature in a managed way. +

    + One notable feature is that the + {@link #foreach(RemoteIterator, ConsumerRaisingIOE)} method will + LOG at debug any IOStatistics provided by the iterator, if such + statistics are provided. There's no attempt at retrieval and logging + if the LOG is not set to debug, so it is a zero cost feature unless + the logger {@code org.apache.hadoop.fs.functional.RemoteIterators} + is at DEBUG. +

    + Based on the S3A Listing code, and some some work on moving other code + to using iterative listings so as to pick up the statistics.]]> +
    +
    + +
    + + + +
    diff --git a/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.5.xml b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.5.xml new file mode 100644 index 0000000000000..b788b4497fea0 --- /dev/null +++ b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.5.xml @@ -0,0 +1,40640 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + UnsupportedOperationException + + If a key is deprecated in favor of multiple keys, they are all treated as + aliases of each other, and setting any one of them resets all the others + to the new value. + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key to be deprecated + @param newKeys list of keys that take up the values of deprecated key + @param customMessage depcrication message + @deprecated use {@link #addDeprecation(String key, String newKey, + String customMessage)} instead]]> + + + + + + + + UnsupportedOperationException + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key to be deprecated + @param newKey key that take up the values of deprecated key + @param customMessage deprecation message]]> + + + + + + + UnsupportedOperationException + + If a key is deprecated in favor of multiple keys, they are all treated as + aliases of each other, and setting any one of them resets all the others + to the new value. + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key Key that is to be deprecated + @param newKeys list of keys that take up the values of deprecated key + @deprecated use {@link #addDeprecation(String key, String newKey)} instead]]> + + + + + + + UnsupportedOperationException + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key Key that is to be deprecated + @param newKey key that takes up the value of deprecated key]]> + + + + + + key is deprecated. + + @param key the parameter which is to be checked for deprecation + @return true if the key is deprecated and + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + final. + + @param name resource to be added, the classpath is examined for a file + with that name.]]> + + + + + + + + + + final. + + @param url url of the resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + + + + + final. + + @param file file-path of resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + + + + + final. + + WARNING: The contents of the InputStream will be cached, by this method. + So use this sparingly because it does increase the memory consumption. + + @param in InputStream to deserialize the object from. In will be read from + when a get or set is called next. After it is read the stream will be + closed.]]> + + + + + + + + + + + final. + + @param in InputStream to deserialize the object from. + @param name the name of the resource because InputStream.toString is not + very descriptive some times.]]> + + + + + + + + + + + final. + + @param conf Configuration object from which to load properties]]> + + + + + + + + + + + name property, null if + no such property exists. If the key is deprecated, it returns the value of + the first key which replaces the deprecated key and is not null. + + Values are processed for variable expansion + before being returned. + + As a side effect get loads the properties from the sources if called for + the first time as a lazy init. + + @param name the property name, will be trimmed before get value. + @return the value of the name or its replacing property, + or null if no such property exists.]]> + + + + + + + + + + + + + + + name property, but only for + names which have no valid value, usually non-existent or commented + out in XML. + + @param name the property name + @return true if the property name exists without value]]> + + + + + + name property as a trimmed String, + null if no such property exists. + If the key is deprecated, it returns the value of + the first key which replaces the deprecated key and is not null + + Values are processed for variable expansion + before being returned. + + @param name the property name. + @return the value of the name or its replacing property, + or null if no such property exists.]]> + + + + + + + name property as a trimmed String, + defaultValue if no such property exists. + See @{Configuration#getTrimmed} for more details. + + @param name the property name. + @param defaultValue the property default value. + @return the value of the name or defaultValue + if it is not set.]]> + + + + + + name property, without doing + variable expansion.If the key is + deprecated, it returns the value of the first key which replaces + the deprecated key and is not null. + + @param name the property name. + @return the value of the name property or + its replacing property and null if no such property exists.]]> + + + + + + + value of the name property. If + name is deprecated or there is a deprecated name associated to it, + it sets the value to both names. Name will be trimmed before put into + configuration. + + @param name property name. + @param value property value.]]> + + + + + + + + value of the name property. If + name is deprecated, it also sets the value to + the keys that replace the deprecated key. Name will be trimmed before put + into configuration. + + @param name property name. + @param value property value. + @param source the place that this configuration value came from + (For debugging). + @throws IllegalArgumentException when the value or name is null.]]> + + + + + + + + + + + + + + + + + + + + name. If the key is deprecated, + it returns the value of the first key which replaces the deprecated key + and is not null. + If no such property exists, + then defaultValue is returned. + + @param name property name, will be trimmed before get value. + @param defaultValue default value. + @return property value, or defaultValue if the property + doesn't exist.]]> + + + + + + + name property as an int. + + If no such property exists, the provided default value is returned, + or if the specified value is not a valid int, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as an int, + or defaultValue.]]> + + + + + + name property as a set of comma-delimited + int values. + + If no such property exists, an empty array is returned. + + @param name property name + @return property value interpreted as an array of comma-delimited + int values]]> + + + + + + + name property to an int. + + @param name property name. + @param value int value of the property.]]> + + + + + + + name property as a long. + If no such property exists, the provided default value is returned, + or if the specified value is not a valid long, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a long, + or defaultValue.]]> + + + + + + + name property as a long or + human readable format. If no such property exists, the provided default + value is returned, or if the specified value is not a valid + long or human readable format, then an error is thrown. You + can use the following suffix (case insensitive): k(kilo), m(mega), g(giga), + t(tera), p(peta), e(exa) + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a long, + or defaultValue.]]> + + + + + + + name property to a long. + + @param name property name. + @param value long value of the property.]]> + + + + + + + name property as a float. + If no such property exists, the provided default value is returned, + or if the specified value is not a valid float, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a float, + or defaultValue.]]> + + + + + + + name property to a float. + + @param name property name. + @param value property value.]]> + + + + + + + name property as a double. + If no such property exists, the provided default value is returned, + or if the specified value is not a valid double, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a double, + or defaultValue.]]> + + + + + + + name property to a double. + + @param name property name. + @param value property value.]]> + + + + + + + name property as a boolean. + If no such property is specified, or if the specified value is not a valid + boolean, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a boolean, + or defaultValue.]]> + + + + + + + name property to a boolean. + + @param name property name. + @param value boolean value of the property.]]> + + + + + + + + + + + + + + name property to the given type. This + is equivalent to set(<name>, value.toString()). + @param name property name + @param value new value + @param enumeration type]]> + + + + + + + enumeration type + @throws IllegalArgumentException If mapping is illegal for the type + provided + @return enumeration type]]> + + + + + + + + name to the given time duration. This + is equivalent to set(<name>, value + <time suffix>). + @param name Property name + @param value Time duration + @param unit Unit of time]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + name property as a Pattern. + If no such property is specified, or if the specified value is not a valid + Pattern, then DefaultValue is returned. + Note that the returned value is NOT trimmed by this method. + + @param name property name + @param defaultValue default value + @return property value as a compiled Pattern, or defaultValue]]> + + + + + + + Pattern. + If the pattern is passed as null, sets the empty pattern which results in + further calls to getPattern(...) returning the default value. + + @param name property name + @param pattern new value]]> + + + + + + + + + + + + + + + + + + + name property as + a collection of Strings. + If no such property is specified then empty collection is returned. +

    + This is an optimized version of {@link #getStrings(String)} + + @param name property name. + @return property value as a collection of Strings.]]> + + + + + + name property as + an array of Strings. + If no such property is specified then null is returned. + + @param name property name. + @return property value as an array of Strings, + or null.]]> + + + + + + + name property as + an array of Strings. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of Strings, + or default value.]]> + + + + + + name property as + a collection of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then empty Collection is returned. + + @param name property name. + @return property value as a collection of Strings, or empty Collection]]> + + + + + + name property as + an array of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then an empty array is returned. + + @param name property name. + @return property value as an array of trimmed Strings, + or empty array.]]> + + + + + + + name property as + an array of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of trimmed Strings, + or default value.]]> + + + + + + + name property as + as comma delimited values. + + @param name property name. + @param values The values]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostProperty as a + InetSocketAddress. If hostProperty is + null, addressProperty will be used. This + is useful for cases where we want to differentiate between host + bind address and address clients should use to establish connection. + + @param hostProperty bind host property name. + @param addressProperty address property name. + @param defaultAddressValue the default value + @param defaultPort the default port + @return InetSocketAddress]]> + + + + + + + + name property as a + InetSocketAddress. + @param name property name. + @param defaultAddress the default value + @param defaultPort the default port + @return InetSocketAddress]]> + + + + + + + name property as + a host:port. + @param name property name. + @param addr inetSocketAddress addr.]]> + + + + + + + + + name property as a host:port. The wildcard + address is replaced with the local host's address. If the host and address + properties are configured the host component of the address will be combined + with the port component of the addr to generate the address. This is to allow + optional control over which host name is used in multi-home bind-host + cases where a host can have multiple names + @param hostProperty the bind-host configuration name + @param addressProperty the service address configuration name + @param defaultAddressValue the service default address configuration value + @param addr InetSocketAddress of the service listener + @return InetSocketAddress for clients to connect]]> + + + + + + + name property as a host:port. The wildcard + address is replaced with the local host's address. + @param name property name. + @param addr InetSocketAddress of a listener to store in the given property + @return InetSocketAddress for clients to connect]]> + + + + + + + + + + + + + + + + + + + + name property + as an array of Class. + The value of the property specifies a list of comma separated class names. + If no such property is specified, then defaultValue is + returned. + + @param name the property name. + @param defaultValue default value. + @return property value as a Class[], + or defaultValue.]]> + + + + + + + name property as a Class. + If no such property is specified, then defaultValue is + returned. + + @param name the conf key name. + @param defaultValue default value. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property as a Class + implementing the interface specified by xface. + + If no such property is specified, then defaultValue is + returned. + + An exception is thrown if the returned class does not implement the named + interface. + + @param name the conf key name. + @param defaultValue default value. + @param xface the interface implemented by the named class. + @param Interface class type. + @return property value as a Class, + or defaultValue.]]> + + + + + + + name property as a List + of objects implementing the interface specified by xface. + + An exception is thrown if any of the classes does not exist, or if it does + not implement the named interface. + + @param name the property name. + @param xface the interface implemented by the classes named by + name. + @param Interface class type. + @return a List of objects implementing xface.]]> + + + + + + + + name property to the name of a + theClass implementing the given interface xface. + + An exception is thrown if theClass does not implement the + interface xface. + + @param name property name. + @param theClass property value. + @param xface the interface implemented by the named class.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path. + @throws IOException raised on errors performing I/O.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path. + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + name. + + @param name configuration resource name. + @return an input stream attached to the resource.]]> + + + + + + name. + + @param name configuration resource name. + @return a reader attached to the resource.]]> + + + + + + + + + + + + + + + + + + + + + + String + key-value pairs in the configuration. + + @return an iterator over the entries.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • + When property name is not empty and the property exists in the + configuration, this method writes the property and its attributes + to the {@link Writer}. +
  • + +
  • + When property name is null or empty, this method writes all the + configuration properties and their attributes to the {@link Writer}. +
  • + +
  • + When property name is not empty but the property doesn't exist in + the configuration, this method throws an {@link IllegalArgumentException}. +
  • + + @param propertyName xml property name. + @param out the writer to write to. + @throws IOException raised on errors performing I/O.]]> +
    +
    + + + + + + + +
  • + When propertyName is not empty, and the property exists + in the configuration, the format of the output would be, +
    +  {
    +    "property": {
    +      "key" : "key1",
    +      "value" : "value1",
    +      "isFinal" : "key1.isFinal",
    +      "resource" : "key1.resource"
    +    }
    +  }
    +  
    +
  • + +
  • + When propertyName is null or empty, it behaves same as + {@link #dumpConfiguration(Configuration, Writer)}, the + output would be, +
    +  { "properties" :
    +      [ { key : "key1",
    +          value : "value1",
    +          isFinal : "key1.isFinal",
    +          resource : "key1.resource" },
    +        { key : "key2",
    +          value : "value2",
    +          isFinal : "ke2.isFinal",
    +          resource : "key2.resource" }
    +       ]
    +   }
    +  
    +
  • + +
  • + When propertyName is not empty, and the property is not + found in the configuration, this method will throw an + {@link IllegalArgumentException}. +
  • + +

    + @param config the configuration + @param propertyName property name + @param out the Writer to write to + @throws IOException raised on errors performing I/O. + @throws IllegalArgumentException when property name is not + empty and the property is not found in configuration]]> + + + + + + + + + { "properties" : + [ { key : "key1", + value : "value1", + isFinal : "key1.isFinal", + resource : "key1.resource" }, + { key : "key2", + value : "value2", + isFinal : "ke2.isFinal", + resource : "key2.resource" } + ] + } + + + It does not output the properties of the configuration object which + is loaded from an input stream. +

    + + @param config the configuration + @param out the Writer to write to + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + + + + + + + + true to set quiet-mode on, false + to turn it off.]]> + + + + + + + + + + + + + + + + + + + + + } with matching keys]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Resources + +

    Configurations are specified by resources. A resource contains a set of + name/value pairs as XML data. Each resource is named by either a + String or by a {@link Path}. If named by a String, + then the classpath is examined for a file with that name. If named by a + Path, then the local filesystem is examined directly, without + referring to the classpath. + +

    Unless explicitly turned off, Hadoop by default specifies two + resources, loaded in-order from the classpath:

      +
    1. + + core-default.xml: Read-only defaults for hadoop.
    2. +
    3. core-site.xml: Site-specific configuration for a given hadoop + installation.
    4. +
    + Applications may add additional resources, which are loaded + subsequent to these resources in the order they are added. + +

    Final Parameters

    + +

    Configuration parameters may be declared final. + Once a resource declares a value final, no subsequently-loaded + resource can alter that value. + For example, one might define a final parameter with: +

    
    +  <property>
    +    <name>dfs.hosts.include</name>
    +    <value>/etc/hadoop/conf/hosts.include</value>
    +    <final>true</final>
    +  </property>
    + + Administrators typically define parameters as final in + core-site.xml for values that user applications may not alter. + +

    Variable Expansion

    + +

    Value strings are first processed for variable expansion. The + available properties are:

      +
    1. Other properties defined in this Configuration; and, if a name is + undefined here,
    2. +
    3. Environment variables in {@link System#getenv()} if a name starts with + "env.", or
    4. +
    5. Properties in {@link System#getProperties()}.
    6. +
    + +

    For example, if a configuration resource contains the following property + definitions: +

    
    +  <property>
    +    <name>basedir</name>
    +    <value>/user/${user.name}</value>
    +  </property>
    +  
    +  <property>
    +    <name>tempdir</name>
    +    <value>${basedir}/tmp</value>
    +  </property>
    +
    +  <property>
    +    <name>otherdir</name>
    +    <value>${env.BASE_DIR}/other</value>
    +  </property>
    +  
    + +

    When conf.get("tempdir") is called, then ${basedir} + will be resolved to another property in this Configuration, while + ${user.name} would then ordinarily be resolved to the value + of the System property with that name. +

    When conf.get("otherdir") is called, then ${env.BASE_DIR} + will be resolved to the value of the ${BASE_DIR} environment variable. + It supports ${env.NAME:-default} and ${env.NAME-default} notations. + The former is resolved to "default" if ${NAME} environment variable is undefined + or its value is empty. + The latter behaves the same way only if ${NAME} is undefined. +

    By default, warnings will be given to any deprecated configuration + parameters and these are suppressible by configuring + log4j.logger.org.apache.hadoop.conf.Configuration.deprecation in + log4j.properties file. + +

    Tags

    + +

    Optionally we can tag related properties together by using tag + attributes. System tags are defined by hadoop.tags.system property. Users + can define there own custom tags in hadoop.tags.custom property. + +

    For example, we can tag existing property as: +

    
    +  <property>
    +    <name>dfs.replication</name>
    +    <value>3</value>
    +    <tag>HDFS,REQUIRED</tag>
    +  </property>
    +
    +  <property>
    +    <name>dfs.data.transfer.protection</name>
    +    <value>3</value>
    +    <tag>HDFS,SECURITY</tag>
    +  </property>
    + 
    +

    Properties marked with tags can be retrieved with conf + .getAllPropertiesByTag("HDFS") or conf.getAllPropertiesByTags + (Arrays.asList("YARN","SECURITY")).

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This implementation generates the key material and calls the + {@link #createKey(String, byte[], Options)} method. + + @param name the base name of the key + @param options the options for the new key. + @return the version name of the first version of the key. + @throws IOException raised on errors performing I/O. + @throws NoSuchAlgorithmException no such algorithm exception.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This implementation generates the key material and calls the + {@link #rollNewVersion(String, byte[])} method. + + @param name the basename of the key + @return the name of the new version of the key + @throws IOException raised on errors performing I/O. + @throws NoSuchAlgorithmException This exception is thrown when a particular + cryptographic algorithm is requested + but is not available in the environment.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + KeyProvider implementations must be thread safe.]]> + + + + + + + + + + + + + + + + + + + + + + NULL if + a provider for the specified URI scheme could not be found. + @throws IOException thrown if the provider failed to initialize.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + uri has syntax error]]> + + + + + + + + + + + + + + + + uri is + not found]]> + + + + + + + + + + + + + + + + + + + + + + + uri + determines a configuration property name, + fs.AbstractFileSystem.scheme.impl whose value names the + AbstractFileSystem class. + + The entire URI and conf is passed to the AbstractFileSystem factory method. + + @param uri for the file system to be created. + @param conf which is passed to the file system impl. + + @return file system for the given URI. + + @throws UnsupportedFileSystemException if the file system for + uri is not supportedn some FileSystem implementations such as HDFS metadata + synchronization is essential to guarantee consistency of read requests + particularly in HA setting. + @throws IOException raised on errors performing I/O. + @throws UnsupportedOperationException Unsupported Operation Exception.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } describing modifications + @throws IOException if an ACL could not be modified]]> + + + + + + + + } describing entries to remove + @throws IOException if an ACL could not be modified]]> + + + + + + + + + + + + + + + + + + + + + + } describing modifications, must + include entries for user, group, and others for compatibility with + permission bits. + @throws IOException if an ACL could not be modified]]> + + + + + + + } which returns each AclStatus + @throws IOException if an ACL could not be read]]> + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @param flag xattr set flag + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attribute + @param name xattr name. + @return byte[] xattr value. + @throws IOException raised on errors performing I/O.]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + + @return {@literal Map} describing the XAttrs of the file + or directory + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @param names XAttr names. + @return {@literal Map} describing the XAttrs of the file + or directory + @throws IOException raised on errors performing I/O.]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return {@literal Map} describing the XAttrs of the file + or directory + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to remove extended attribute + @param name xattr name + @throws IOException raised on errors performinglockLocation(offset: 0, length: BLOCK_SIZE, + hosts: {"host1:9866", "host2:9866, host3:9866"}) + + + And if the file is erasure-coded, each BlockLocation represents a logical + block groups. Value offset is the offset of a block group in the file and + value length is the total length of a block group. Hosts of a BlockLocation + are the datanodes that holding all the data blocks and parity blocks of a + block group. + Suppose we have a RS_3_2 coded file (3 data units and 2 parity units). + A BlockLocation example will be like: +
    + BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
    +   "host2:9866","host3:9866","host4:9866","host5:9866"})
    + 
    + + Please refer to + {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} or + {@link FileContext#getFileBlockLocations(Path, long, long)} + for more examples.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + After a successful call, {@code buf.position()} will be advanced by the + number of bytes read and {@code buf.limit()} will be unchanged. +

    + In the case of an exception, the state of the buffer (the contents of the + buffer, the {@code buf.position()}, the {@code buf.limit()}, etc.) is + undefined, and callers should be prepared to recover from this + eventuality. +

    + Callers should use {@link StreamCapabilities#hasCapability(String)} with + {@link StreamCapabilities#PREADBYTEBUFFER} to check if the underlying + stream supports this interface, otherwise they might get a + {@link UnsupportedOperationException}. +

    + Implementations should treat 0-length requests as legitimate, and must not + signal an error upon their receipt. +

    + This does not change the current offset of a file, and is thread-safe. + + @param position position within file + @param buf the ByteBuffer to receive the results of the read operation. + @return the number of bytes read, possibly zero, or -1 if reached + end-of-stream + @throws IOException if there is some error performing the read]]> + + + + + + + + + This operation provides similar semantics to + {@link #read(long, ByteBuffer)}, the difference is that this method is + guaranteed to read data until the {@link ByteBuffer} is full, or until + the end of the data stream is reached. + + @param position position within file + @param buf the ByteBuffer to receive the results of the read operation. + @throws IOException if there is some error performing the read + @throws EOFException the end of the data was reached before + the read operation completed + @see #read(long, ByteBuffer)]]> + + + + + + + + + + + + + + + After a successful call, {@code buf.position()} will be advanced by the + number of bytes read and {@code buf.limit()} will be unchanged. +

    + In the case of an exception, the state of the buffer (the contents of the + buffer, the {@code buf.position()}, the {@code buf.limit()}, etc.) is + undefined, and callers should be prepared to recover from this + eventuality. +

    + Callers should use {@link StreamCapabilities#hasCapability(String)} with + {@link StreamCapabilities#READBYTEBUFFER} to check if the underlying + stream supports this interface, otherwise they might get a + {@link UnsupportedOperationException}. +

    + Implementations should treat 0-length requests as legitimate, and must not + signal an error upon their receipt. + + @param buf + the ByteBuffer to receive the results of the read operation. + @return the number of bytes read, possibly zero, or -1 if + reach end-of-stream + @throws IOException + if there is some error performing the read]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + setReplication of FileSystem + @param src file name + @param replication new replication + @throws IOException if an I/O error occurs. + @return true if successful; + false if file does not exist or is a directory]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + A higher number here does not necessarily improve performance, especially + for object stores, where multiple threads may be attempting to create an FS + instance for the same URI. +

    + Default value: {@value}.]]> +
    + + + + + Default value: {@value}. +

    ]]> +
    +
    + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + EnumSet.of(CreateFlag.CREATE, CreateFlag.APPEND) + +

    + + Use the CreateFlag as follows: +

      +
    1. CREATE - to create a file if it does not exist, + else throw FileAlreadyExists.
    2. +
    3. APPEND - to append to a file if it exists, + else throw FileNotFoundException.
    4. +
    5. OVERWRITE - to truncate a file if it exists, + else throw FileNotFoundException.
    6. +
    7. CREATE|APPEND - to create a file if it does not exist, + else append to an existing file.
    8. +
    9. CREATE|OVERWRITE - to create a file if it does not exist, + else overwrite an existing file.
    10. +
    11. SYNC_BLOCK - to force closed blocks to the disk device. + In addition {@link Syncable#hsync()} should be called after each write, + if true synchronous behavior is required.
    12. +
    13. LAZY_PERSIST - Create the block on transient storage (RAM) if + available.
    14. +
    15. APPEND_NEWBLOCK - Append data to a new block instead of end of the last + partial block.
    16. +
    + + Following combinations are not valid and will result in + {@link HadoopIllegalArgumentException}: +
      +
    1. APPEND|OVERWRITE
    2. +
    3. CREATE|APPEND|OVERWRITE
    4. +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + absOrFqPath is not supported. + @throws IOException If the file system for absOrFqPath could + not be instantiated.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + defaultFsUri is not supported]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NewWdir can be one of: +
      +
    • relative path: "foo/bar";
    • +
    • absolute without scheme: "/foo/bar"
    • +
    • fully qualified with scheme: "xx://auth/foo/bar"
    • +
    +
    + Illegal WDs: +
      +
    • relative with scheme: "xx:foo/bar"
    • +
    • non existent directory
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + f does not exist + @throws AccessControlException if access denied + @throws IOException If an IO Error occurred + @throws UnresolvedLinkException If unresolved link occurred. + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is not valid]]> + + + + + + + + + + + + + + + + + + + + +
  • Progress - to report progress on the operation - default null +
  • Permission - umask is applied against permission: default is + FsPermissions:getDefault() + +
  • CreateParent - create missing parent path; default is to not + to create parents +
  • The defaults for the following are SS defaults of the file + server implementing the target path. Not all parameters make sense + for all kinds of file system - eg. localFS ignores Blocksize, + replication, checksum +
      +
    • BufferSize - buffersize used in FSDataOutputStream +
    • Blocksize - block size for file blocks +
    • ReplicationFactor - replication for blocks +
    • ChecksumParam - Checksum parameters. server default is used + if not specified. +
    + + + @return {@link FSDataOutputStream} for created file + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If file f already exists + @throws FileNotFoundException If parent of f does not exist + and createParent is false + @throws ParentNotDirectoryException If parent of f is not a + directory. + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is not valid]]> + + + + + + + + + + + + + + + + + + + + + dir already + exists + @throws FileNotFoundException If parent of dir does not exist + and createParent is false + @throws ParentNotDirectoryException If parent of dir is not a + directory + @throws UnsupportedFileSystemException If file system for dir + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path dir is not valid]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is invalid + + @return if delete success true, not false.]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + @return input stream.]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + @return output stream.]]> + + + + + + + + + + + +
  • Fails if path is a directory. +
  • Fails if path does not exist. +
  • Fails if path is not closed. +
  • Fails if new size is greater than current size. + + @param f The path to the file to be truncated + @param newLength The size the file is to be truncated to + + @return true if the file has been truncated to the desired + newLength and is immediately available to be reused for + write operations such as append, or + false if a background process of adjusting the length of + the last block has been started, and clients should wait for it to + complete before proceeding with further file updates. + + @throws AccessControlException If access is denied + @throws FileNotFoundException If file f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + +
  • Fails if src is a file and dst is a directory. +
  • Fails if src is a directory and dst is a file. +
  • Fails if the parent of dst does not exist or is a file. + +

    + If OVERWRITE option is not passed as an argument, rename fails if the dst + already exists. +

    + If OVERWRITE option is passed as an argument, rename overwrites the dst if + it is a file or an empty directory. Rename fails if dst is a non-empty + directory. +

    + Note that atomicity of rename is dependent on the file system + implementation. Please refer to the file system documentation for details +

    + + @param src path to be renamed + @param dst new path after rename + @param options rename options. + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If dst already exists and + options has {@link Options.Rename#OVERWRITE} + option false. + @throws FileNotFoundException If src does not exist + @throws ParentNotDirectoryException If parent of dst is not a + directory + @throws UnsupportedFileSystemException If file system for src + and dst is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws HadoopIllegalArgumentException If username or + groupname is invalid.]]> + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + f is + not supported.]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If the given path does not refer to a symlink + or an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + Given a path referring to a symlink of form: + + {@literal <---}X{@literal --->} + fs://host/A/B/link + {@literal <-----}Y{@literal ----->} + + In this path X is the scheme and authority that identify the file system, + and Y is the path leading up to the final path component "link". If Y is + a symlink itself then let Y' be the target of Y and X' be the scheme and + authority of Y'. Symlink targets may: + + 1. Fully qualified URIs + + fs://hostX/A/B/file Resolved according to the target file system. + + 2. Partially qualified URIs (eg scheme but no host) + + fs:///A/B/file Resolved according to the target file system. Eg resolving + a symlink to hdfs:///A results in an exception because + HDFS URIs must be fully qualified, while a symlink to + file:///A will not since Hadoop's local file systems + require partially qualified URIs. + + 3. Relative paths + + path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path + is "../B/file" then [Y'][path] is hdfs://host/B/file + + 4. Absolute paths + + path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path + is "/file" then [X][path] is hdfs://host/file + + + @param target the target of the symbolic link + @param link the path to be created that points to target + @param createParent if true then missing parent dirs are created if + false then parent must exist + + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If file link already exists + @throws FileNotFoundException If target does not exist + @throws ParentNotDirectoryException If parent of link is not a + directory. + @throws UnsupportedFileSystemException If file system for + target or link is not supported + @throws IOException If an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + f does not exist. + @throws UnresolvedLinkException If unresolved link occurred. + @throws AccessControlException If access is denied. + @throws IOException If an I/O error occurred. + @return resolve path.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } describing + modifications + @throws IOException if an ACL could not be modified]]> + + + + + + + + } describing entries + to remove + @throws IOException if an ACL could not be modified]]> + + + + + + + + + + + + + + + + + + + + + + } describing + modifications, must include entries for user, group, and others for + compatibility with permission bits. + @throws IOException if an ACL could not be modified]]> + + + + + + + } which returns + each AclStatus + @throws IOException if an ACL could not be read]]> + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @throws IOException If an I/O error occurred.]]> + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @param flag xattr set flag + @throws IOException If an I/O error occurred.]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attribute + @param name xattr name. + @return byte[] xattr value. + @throws IOException If an I/O error occurred.]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs + of the file or directory + @throws IOException If an I/O error occurred.]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @param names XAttr names. + @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs + of the file or directory + @throws IOException If an I/O error occurred.]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to remove extended attribute + @param name xattr name + @throws IOException If an I/O error occurred.]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return List{@literal <}String{@literal >} of the XAttr names of the + file or directory + @throws IOException If an I/O error occurred.]]> + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Path Names + + The Hadoop file system supports a URI namespace and URI names. This enables + multiple types of file systems to be referenced using fully-qualified URIs. + Two common Hadoop file system implementations are +

      +
    • the local file system: file:///path +
    • the HDFS file system: hdfs://nnAddress:nnPort/path +
    + + The Hadoop file system also supports additional naming schemes besides URIs. + Hadoop has the concept of a default file system, which implies a + default URI scheme and authority. This enables slash-relative names + relative to the default FS, which are more convenient for users and + application writers. The default FS is typically set by the user's + environment, though it can also be manually specified. +

    + + Hadoop also supports working-directory-relative names, which are paths + relative to the current working directory (similar to Unix). The working + directory can be in a different file system than the default FS. +

    + Thus, Hadoop path names can be specified as one of the following: +

      +
    • a fully-qualified URI: scheme://authority/path (e.g. + hdfs://nnAddress:nnPort/foo/bar) +
    • a slash-relative name: path relative to the default file system (e.g. + /foo/bar) +
    • a working-directory-relative name: path relative to the working dir (e.g. + foo/bar) +
    + Relative paths with scheme (scheme:foo/bar) are illegal. + +

    Role of FileContext and Configuration Defaults

    + + The FileContext is the analogue of per-process file-related state in Unix. It + contains two properties: + +
      +
    • the default file system (for resolving slash-relative names) +
    • the umask (for file permissions) +
    + In general, these properties are obtained from the default configuration file + in the user's environment (see {@link Configuration}). + + Further file system properties are specified on the server-side. File system + operations default to using these server-side defaults unless otherwise + specified. +

    + The file system related server-side defaults are: +

      +
    • the home directory (default is "/user/userName") +
    • the initial wd (only for local fs) +
    • replication factor +
    • block size +
    • buffer size +
    • encryptDataTransfer +
    • checksum option. (checksumType and bytesPerChecksum) +
    + +

    Example Usage

    + + Example 1: use the default config read from the $HADOOP_CONFIG/core.xml. + Unspecified values come from core-defaults.xml in the release jar. +
      +
    • myFContext = FileContext.getFileContext(); // uses the default config + // which has your default FS +
    • myFContext.create(path, ...); +
    • myFContext.setWorkingDir(path); +
    • myFContext.open (path, ...); +
    • ... +
    + Example 2: Get a FileContext with a specific URI as the default FS +
      +
    • myFContext = FileContext.getFileContext(URI); +
    • myFContext.create(path, ...); +
    • ... +
    + Example 3: FileContext with local file system as the default +
      +
    • myFContext = FileContext.getLocalFSFileContext(); +
    • myFContext.create(path, ...); +
    • ... +
    + Example 4: Use a specific config, ignoring $HADOOP_CONFIG + Generally you should not need use a config unless you are doing +
      +
    • configX = someConfigSomeOnePassedToYou; +
    • myFContext = getFileContext(configX); // configX is not changed, + // is passed down +
    • myFContext.create(path, ...); +
    • ... +
his implementation throws an UnsupportedOperationException. + + @return the protocol scheme for this FileSystem. + @throws UnsupportedOperationException if the operation is unsupported + (default).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • + If the configuration has the property + {@code "fs.$SCHEME.impl.disable.cache"} set to true, + a new instance will be created, initialized with the supplied URI and + configuration, then returned without being cached. +
  • +
  • + If the there is a cached FS instance matching the same URI, it will + be returned. +
  • +
  • + Otherwise: a new FS instance will be created, initialized with the + configuration and URI, cached and returned to the caller. +
  • + + @param uri uri of the filesystem. + @param conf configrution. + @return filesystem instance. + @throws IOException if the FileSystem cannot be instantiated.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + if f == null : + result = null + elif f.getLen() {@literal <=} start: + result = [] + else result = [ locations(FS, b) for b in blocks(FS, p, s, s+l)] + + This call is most helpful with and distributed filesystem + where the hostnames of machines that contain blocks of the given file + can be determined. + + The default implementation returns an array containing one element: +
    + BlockLocation( { "localhost:9866" },  { "localhost" }, 0, file.getLen())
    + 
    + + In HDFS, if file is three-replicated, the returned array contains + elements like: +
    + BlockLocation(offset: 0, length: BLOCK_SIZE,
    +   hosts: {"host1:9866", "host2:9866, host3:9866"})
    + BlockLocation(offset: BLOCK_SIZE, length: BLOCK_SIZE,
    +   hosts: {"host2:9866", "host3:9866, host4:9866"})
    + 
    + + And if a file is erasure-coded, the returned BlockLocation are logical + block groups. + + Suppose we have a RS_3_2 coded file (3 data units and 2 parity units). + 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then + there will be one BlockLocation returned, with 0 offset, actual file size + and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks. + 3. If the file size is less than one group size but greater than one + stripe size, then there will be one BlockLocation returned, with 0 offset, + actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting + the actual blocks. + 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123 + for example, then the result will be like: +
    + BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
    +   "host2:9866","host3:9866","host4:9866","host5:9866"})
    + BlockLocation(offset: 3 * BLOCK_SIZE, length: 123, hosts: {"host1:9866",
    +   "host4:9866", "host5:9866"})
    + 
    + + @param file FilesStatus to get data from + @param start offset into the given file + @param len length for which to get locations for + @throws IOException IO failure + @return block location array.]]> +
    +
mportant: the default implementation is not atomic + @param f path to use for create + @throws IOException IO failure + @return if create new file success true,not false.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • Fails if src is a file and dst is a directory.
  • +
  • Fails if src is a directory and dst is a file.
  • +
  • Fails if the parent of dst does not exist or is a file.
  • + +

    + If OVERWRITE option is not passed as an argument, rename fails + if the dst already exists. +

    +

    + If OVERWRITE option is passed as an argument, rename overwrites + the dst if it is a file or an empty directory. Rename fails if dst is + a non-empty directory. +

    + Note that atomicity of rename is dependent on the file system + implementation. Please refer to the file system documentation for + details. This default implementation is non atomic. +

    + This method is deprecated since it is a temporary method added to + support the transition from FileSystem to FileContext for user + applications. +

    + + @param src path to be renamed + @param dst new path after rename + @param options rename options. + @throws FileNotFoundException src path does not exist, or the parent + path of dst does not exist. + @throws FileAlreadyExistsException dest path exists and is a file + @throws ParentNotDirectoryException if the parent path of dest is not + a directory + @throws IOException on failure]]> +
    +
    + + + + + + +
  • Fails if path is a directory.
  • +
  • Fails if path does not exist.
  • +
  • Fails if path is not closed.
  • +
  • Fails if new size is greater than current size.
  • + + @param f The path to the file to be truncated + @param newLength The size the file is to be truncated to + + @return true if the file has been truncated to the desired + newLength and is immediately available to be reused for + write operations such as append, or + false if a background process of adjusting the length of + the last block has been started, and clients should wait for it to + complete before proceeding with further file updates. + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default).]]> +
    +
    + + + + + + + + + + + + + + + + + + + + +
  • Clean shutdown of the JVM cannot be guaranteed.
  • +
  • The time to shut down a FileSystem will depends on the number of + files to delete. For filesystems where the cost of checking + for the existence of a file/directory and the actual delete operation + (for example: object stores) is high, the time to shutdown the JVM can be + significantly extended by over-use of this feature.
  • +
  • Connectivity problems with a remote filesystem may delay shutdown + further, and may cause the files to not be deleted.
  • + + @param f the path to delete. + @return true if deleteOnExit is successful, otherwise false. + @throws IOException IO failure]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. +

    + Will not return null. Expect IOException upon access error. + @param f given path + @return the statuses of the files/directories in the given patch + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + + + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. + + @param f + a path name + @param filter + the user-supplied path filter + @return an array of FileStatus objects for the files under the given path + after applying the filter + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. + + @param files + a list of paths + @return a list of statuses for the files under the given paths after + applying the filter default Path filter + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. + + @param files + a list of paths + @param filter + the user-supplied path filter + @return a list of statuses for the files under the given paths after + applying the filter + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + Return all the files that match filePattern and are not checksum + files. Results are sorted by their names. + +

    + A filename pattern is composed of regular characters and + special pattern matching characters, which are: + +

    +
    +
    +
    ? +
    Matches any single character. + +
    * +
    Matches zero or more characters. + +
    [abc] +
    Matches a single character from character set + {a,b,c}. + +
    [a-b] +
    Matches a single character from the character range + {a...b}. Note that character a must be + lexicographically less than or equal to character b. + +
    [^a] +
    Matches a single character that is not from character set or range + {a}. Note that the ^ character must occur + immediately to the right of the opening bracket. + +
    \c +
    Removes (escapes) any special meaning of character c. + +
    {ab,cd} +
    Matches a string from the string set {ab, cd} + +
    {ab,c{de,fh}} +
    Matches a string from the string set {ab, cde, cfh} + +
    +
    +
    + + @param pathPattern a glob specifying a path pattern + + @return an array of paths that match the path pattern + @throws IOException IO failure]]> +
    +
    + + + + + + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred]]> + + + + + + + + + f does not exist + @throws IOException if any I/O error occurred]]> + + + + + + + + p does not exist + @throws IOException if any I/O error occurred]]> + + + + + + + + + + If the path is a directory, + if recursive is false, returns files in the directory; + if recursive is true, return files in the subtree rooted at the path. + If the path is a file, return the file's status and block locations. + + @param f is the path + @param recursive if the subdirectories need to be traversed recursively + + @return an iterator that traverses statuses of the files + + @throws FileNotFoundException when the path does not exist; + @throws IOException see specific implementation]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + undefined. + @throws IOException IO failure]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In some FileSystem implementations such as HDFS metadata + synchronization is essential to guarantee consistency of read requests + particularly in HA setting. + @throws IOException If an I/O error occurred. + @throws UnsupportedOperationException if the operation is unsupported.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @param flag xattr set flag + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attribute + @param name xattr name. + @return byte[] xattr value. + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return Map describing the XAttrs of the file or directory + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @param names XAttr names. + @return Map describing the XAttrs of the file or directory + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return List{@literal } of the XAttr names of the file or directory + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to remove extended attribute + @param name xattr name + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is a default method which is intended to be overridden by + subclasses. The default implementation returns an empty storage statistics + object.

    + + @return The StorageStatistics for this FileSystem instance. + Will never be null.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + All user code that may potentially use the Hadoop Distributed + File System should be written to use a FileSystem object or its + successor, {@link FileContext}. +

    +

    + The local implementation is {@link LocalFileSystem} and distributed + implementation is DistributedFileSystem. There are other implementations + for object stores and (outside the Apache Hadoop codebase), + third party filesystems. +

    + Notes +
      +
    1. The behaviour of the filesystem is + + specified in the Hadoop documentation. + However, the normative specification of the behavior of this class is + actually HDFS: if HDFS does not behave the way these Javadocs or + the specification in the Hadoop documentations define, assume that + the documentation is incorrect. +
    2. +
    3. The term {@code FileSystem} refers to an instance of this class.
    4. +
    5. The acronym "FS" is used as an abbreviation of FileSystem.
    6. +
    7. The term {@code filesystem} refers to the distributed/local filesystem + itself, rather than the class used to interact with it.
    8. +
    9. The term "file" refers to a file in the remote filesystem, + rather than instances of {@code java.io.File}.
    10. +
    + + This is a carefully evolving class. + New methods may be marked as Unstable or Evolving for their initial release, + as a warning that they are new and may change based on the + experience of use in applications. +

    + Important note for developers +

    + If you are making changes here to the public API or protected methods, + you must review the following subclasses and make sure that + they are filtering/passing through new methods as appropriate. + + {@link FilterFileSystem}: methods are passed through. If not, + then {@code TestFilterFileSystem.MustNotImplement} must be + updated with the unsupported interface. + Furthermore, if the new API's support is probed for via + {@link #hasPathCapability(Path, String)} then + {@link FilterFileSystem#hasPathCapability(Path, String)} + must return false, always. +

    + {@link ChecksumFileSystem}: checksums are created and + verified. +

    + {@code TestHarFileSystem} will need its {@code MustNotImplement} + interface updated. + +

    + There are some external places your changes will break things. + Do co-ordinate changes here. +

    + + HBase: HBoss +

    + Hive: HiveShim23 +

    + {@code shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java}]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + returns true if the operation succeeded. When deleteSource is true, + this means "after the copy, delete(source) returned true" + If the destination is a directory, and mkdirs (dest) fails, + the operation will return false rather than raise any exception. +

    + The overwrite flag is about overwriting files; it has no effect about + handing an attempt to copy a file atop a directory (expect an IOException), + or a directory over a path which contains a file (mkdir will fail, so + "false"). +

    + The operation is recursive, and the deleteSource operation takes place + as each subdirectory is copied. Therefore, if an operation fails partway + through, the source tree may be partially deleted. +

    + @param srcFS source filesystem + @param srcStatus status of source + @param dstFS destination filesystem + @param dst path of source + @param deleteSource delete the source? + @param overwrite overwrite files at destination? + @param conf configuration to use when opening files + @return true if the operation succeeded. + @throws IOException failure]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } caller's + environment variables to use for expansion + @return String[] with absolute path to new jar in position 0 and + unexpanded wild card entry path in position 1 + @throws IOException if there is an I/O error while writing the jar file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ilterFileSystem contains + some other file system, which it uses as + its basic file system, possibly transforming + the data along the way or providing additional + functionality. The class FilterFileSystem + itself simply overrides all methods of + FileSystem with versions that + pass all requests to the contained file + system. Subclasses of FilterFileSystem + may further override some of these methods + and may also provide additional methods + and fields.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Return type on the {@link #build()} call. + @param type of builder itself.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -1 + if there is no more data because the end of the stream has been + reached]]> + + + + + + + + + + length bytes have been read. + + @param position position in the input stream to seek + @param buffer buffer into which data is read + @param offset offset into the buffer in which data is written + @param length the number of bytes to read + @throws IOException IO problems + @throws EOFException If the end of stream is reached while reading. + If an exception is thrown an undetermined number + of bytes in the buffer may have been writtenon't + if (fs instanceof FooFileSystem) { + FooFileSystem fs = (FooFileSystem) fs; + OutputStream out = dfs.createFile(path) + .optionA() + .optionB("value") + .cache() + .build() + } else if (fs instanceof BarFileSystem) { + ... + } + + // Do + OutputStream out = fs.createFile(path) + .permission(perm) + .bufferSize(bufSize) + .opt("foofs:option.a", true) + .opt("foofs:option.b", "value") + .opt("barfs:cache", true) + .must("foofs:cache", true) + .must("barfs:cache-size", 256 * 1024 * 1024) + .build(); + + + If the option is not related to the file system, the option will be ignored. + If the option is must, but not supported by the file system, a + {@link IllegalArgumentException} will be thrown.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + path is invalid]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The interface extends {@link IOStatisticsSource} so that there is no + need to cast an instance to see if is a source of statistics. + However, implementations MAY return null for their actual statistics. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A partial listing of the children of a parent directory. Since it is a + partial listing, multiple PartialListing may need to be combined to obtain + the full listing of a parent directory. +

    + ListingBatch behaves similar to a Future, in that getting the result via + {@link #get()} will throw an Exception if there was a failure.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + and the scheme is null, and the authority + is null. + + @return whether the path is absolute and the URI has no scheme nor + authority parts]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if and only if pathname + should be included]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Warning: Not all filesystems satisfy the thread-safety requirement. + @param position position within file + @param buffer destination buffer + @param offset offset in the buffer + @param length number of bytes to read + @return actual number of bytes read; -1 means "none" + @throws IOException IO problems.]]> + + + + + + + + + + Warning: Not all filesystems satisfy the thread-safety requirement. + @param position position within file + @param buffer destination buffer + @param offset offset in the buffer + @param length number of bytes to read + @throws IOException IO problems. + @throws EOFException the end of the data was reached before + the read operation completed]]> + + + + + + + + Warning: Not all filesystems satisfy the thread-safety requirement. + @param position position within file + @param buffer destination buffer + @throws IOException IO problems. + @throws EOFException the end of the data was reached before + the read operation completed]]> + + + + + + + + + + + + + + + + + + + The position returned by getPos() after readVectored() is undefined. +

    +

    + If a file is changed while the readVectored() operation is in progress, the output is + undefined. Some ranges may have old data, some may have new and some may have both. +

    +

    + While a readVectored() operation is in progress, normal read api calls may block. +

    + @param ranges the byte ranges to read + @param allocate the function to allocate ByteBuffer + @throws IOException any IOE.]]> +
    +
    + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Note
    : Returned list is not sorted in any given order, + due to reliance on Java's {@link File#list()} API.)]]> + +
ttr is byte[], this class is to + covert byte[] to some kind of string representation or convert back. + String representation is convenient for display and input. For example + display in screen as shell response and json response, input as http + or shell parameter.]]> + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + MUST NOT be part of/refer to + any object instance of significant memory size. + Applications SHOULD remove references when they are + no longer needed. + When logged at TRACE, prints the key and stack trace of the caller, + to allow for debugging of any problems. + @param key key + @param value new value + @return old value or null]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return ftp]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} backed by an FTP client provided by Apache Commons Net. +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is for reporting and testing.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + These are low-cost per-instance statistics provided by any Hadoop + I/O class instance. +

    + Consult the filesystem specification document for the requirements + of an implementation of this interface.]]> + + + + + + + + + + + + + + + + + + + + + + + Exceptions are caught and downgraded to debug logging. + @param source source of statistics. + @return a string for logging.]]> + + + + + + + + + + + + + + + + + + + Whenever this object's toString() method is called, it evaluates the + statistics. +

    + This is designed to affordable to use in log statements. + @param source source of statistics -may be null. + @return an object whose toString() operation returns the current values.]]> + + + + + + + Whenever this object's toString() method is called, it evaluates the + statistics. +

    + This is for use in log statements where for the cost of creation + of this entry is low; it is affordable to use in log statements. + @param statistics statistics to stringify -may be null. + @return an object whose toString() operation returns the current values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It is serializable so that frameworks which can use java serialization + to propagate data (Spark, Flink...) can send the statistics + back. For this reason, TreeMaps are explicitly used as field types, + even though IDEs can recommend use of Map instead. + For security reasons, untrusted java object streams should never be + deserialized. If for some reason this is required, use + {@link #requiredSerializationClasses()} to get the list of classes + used when deserializing instances of this object. +

    +

    + It is annotated for correct serializations with jackson2. +

    ]]> +
    + + + + + + + + + This is not an atomic option. +

    + The instance can be serialized, and its + {@code toString()} method lists all the values. + @param statistics statistics + @return a snapshot of the current values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It can be used to accrue values so as to dynamically update + the mean. If so, know that there is no synchronization + on the methods. +

    +

    + If a statistic has 0 samples then it is considered to be empty. +

    +

    + All 'empty' statistics are equivalent, independent of the sum value. +

    +

    + For non-empty statistics, sum and sample values must match + for equality. +

    +

    + It is serializable and annotated for correct serializations with jackson2. +

    +

    + Thread safety. The operations to add/copy sample data, are thread safe. +

    +
      +
    1. {@link #add(MeanStatistic)}
    2. +
    3. {@link #addSample(long)}
    4. +
    5. {@link #clear()}
    6. +
    7. {@link #setSamplesAndSum(long, long)}
    8. +
    9. {@link #set(MeanStatistic)}
    10. +
    11. {@link #setSamples(long)} and {@link #setSum(long)}
    12. +
    +

    + So is the {@link #mean()} method. This ensures that when + used to aggregated statistics, the aggregate value and sample + count are set and evaluated consistently. +

    +

    + Other methods marked as synchronized because Findbugs overreacts + to the idea that some operations to update sum and sample count + are synchronized, but that things like equals are not. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + names)}: {@value}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When adding new common statistic name constants, please make them unique. + By convention: +

    +
      +
    • the name of the constants are uppercase, words separated by + underscores.
    • +
    • the value of the constants are lowercase of the constant names.
    • +
    ]]> +
    +
hen adding new common statistic name constants, please make them unique. + By convention, they are implicitly unique: +
      +
    • + The name of the constants are uppercase, words separated by + underscores. +
    • +
    • + The value of the constants are lowercase of the constant names. +
    • +
    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Since these methods are often vendor- or device-specific, operators + may implement this interface in order to achieve fencing. +

    + Fencing is configured by the operator as an ordered list of methods to + attempt. Each method will be tried in turn, and the next in the list + will only be attempted if the previous one fails. See {@link NodeFencer} + for more information. +

    + If an implementation also implements {@link Configurable} then its + setConf method will be called upon instantiation.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + state (e.g ACTIVE/STANDBY) as well as + some additional information. + + @throws AccessControlException + if access is denied. + @throws IOException + if other errors happen + @see HAServiceStatus + @return HAServiceStatus.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.http.filter.initializers. + +

      +
    • StaticUserWebFilter - An authorization plugin that makes all +users a static configured user. +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + public class IntArrayWritable extends ArrayWritable { + public IntArrayWritable() { + super(IntWritable.class); + } + }o is a ByteWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the item + @param conf the configuration to store + @param item the object to be stored + @param keyName the name of the key to use + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param items the objects to be stored + @param keyName the name of the key to use + @throws IndexOutOfBoundsException if the items array is empty + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + DefaultStringifier offers convenience methods to store/load objects to/from + the configuration. + + @param the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a DoubleWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + value argument is null or + its size is zero, the elementType argument must not be null. If + the argument value's size is bigger than zero, the argument + elementType is not be used. + + @param value enumSet value. + @param elementType elementType.]]> + + + + + value should not be null + or empty. + + @param value enumSet value.]]> + + + + + + + + + + + + + + value and elementType. If the value argument + is null or its size is zero, the elementType argument must not be + null. If the argument value's size is bigger than zero, the + argument elementType is not be used. + + @param value enumSet Value. + @param elementType elementType.]]> + + + + + + + + + + + + + + + + + + + o is an EnumSetWritable with the same value, + or both are null.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a FloatWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When two sequence files, which have same Key type but different Value + types, are mapped out to reduce, multiple Value types is not allowed. + In this case, this class can help you wrap instances with different types. +

    + +

    + Compared with ObjectWritable, this class is much more effective, + because ObjectWritable will append the class declaration as a String + into the output file in every Key-Value pair. +

    + +

    + Generic Writable implements {@link Configurable} interface, so that it will be + configured by the framework. The configuration is passed to the wrapped objects + implementing {@link Configurable} interface before deserialization. +

    + + how to use it:
    + 1. Write your own class, such as GenericObject, which extends GenericWritable.
    + 2. Implements the abstract method getTypes(), defines + the classes which will be wrapped in GenericObject in application. + Attention: this classes defined in getTypes() method, must + implement Writable interface. +

    + + The code looks like this: +
    + public class GenericObject extends GenericWritable {
    + 
    +   private static Class[] CLASSES = {
    +               ClassType1.class, 
    +               ClassType2.class,
    +               ClassType3.class,
    +               };
    +
    +   protected Class[] getTypes() {
    +       return CLASSES;
    +   }
    +
    + }
    + 
    + + @since Nov 8, 2006]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a IntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + closes the input and output streams + at the end. + + @param in InputStrem to read from + @param out OutputStream to write to + @param conf the Configuration object. + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ignore any {@link Throwable} or + null pointers. Must only be used for cleanup in exception handlers. + + @param log the log to record problems to at debug level. Can be null. + @param closeables the objects to close + @deprecated use {@link #cleanupWithLogger(Logger, java.io.Closeable...)} + instead]]> + + + + + + + ignore any {@link Throwable} or + null pointers. Must only be used for cleanup in exception handlers. + + @param logger the log to record problems to at debug level. Can be null. + @param closeables the objects to close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is better than File#listDir because it does not ignore IOExceptions. + + @param dir The directory to list. + @param filter If non-null, the filter to use when listing + this directory. + @return The list of files in the directory. + + @throws IOException On I/O error]]> + + + + + + + + Borrowed from Uwe Schindler in LUCENE-5588 + @param fileToSync the file to fsync + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a LongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A map is a directory containing two files, the data file, + containing all keys and values in the map, and a smaller index + file, containing a fraction of the keys. The fraction is determined by + {@link Writer#getIndexInterval()}. + +

    The index file is read entirely into memory. Thus key implementations + should try to keep themselves small. + +

    Map files are created by adding entries in-order. To maintain a large + database, perform updates by copying the previous version of a database and + merging in a sorted change list, to create a new version of the database in + a new file. Sorting large change lists can be done with {@link + SequenceFile.Sorter}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is an MD5Hash whose digest contains the + same values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + className by first finding + it in the specified conf. If the specified conf is null, + try load it directly. + + @param conf configuration. + @param className classname. + @return Class.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link Comparator} that operates directly on byte representations of + objects. +

    + @param generic type. + @see DeserializerComparator]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SequenceFiles are flat files consisting of binary key/value + pairs. + +

    SequenceFile provides {@link SequenceFile.Writer}, + {@link SequenceFile.Reader} and {@link Sorter} classes for writing, + reading and sorting respectively.

    + + There are three SequenceFile Writers based on the + {@link CompressionType} used to compress key/value pairs: +
      +
    1. + Writer : Uncompressed records. +
    2. +
    3. + RecordCompressWriter : Record-compressed files, only compress + values. +
    4. +
    5. + BlockCompressWriter : Block-compressed files, both keys & + values are collected in 'blocks' + separately and compressed. The size of + the 'block' is configurable. +
    + +

    The actual compression algorithm used to compress key and/or values can be + specified by using the appropriate {@link CompressionCodec}.

    + +

    The recommended way is to use the static createWriter methods + provided by the SequenceFile to chose the preferred format.

    + +

    The {@link SequenceFile.Reader} acts as the bridge and can read any of the + above SequenceFile formats.

    + +

    SequenceFile Formats

    + +

    Essentially there are 3 different formats for SequenceFiles + depending on the CompressionType specified. All of them share a + common header described below. + +

    +
      +
    • + version - 3 bytes of magic header SEQ, followed by 1 byte of actual + version number (e.g. SEQ4 or SEQ6) +
    • +
    • + keyClassName -key class +
    • +
    • + valueClassName - value class +
    • +
    • + compression - A boolean which specifies if compression is turned on for + keys/values in this file. +
    • +
    • + blockCompression - A boolean which specifies if block-compression is + turned on for keys/values in this file. +
    • +
    • + compression codec - CompressionCodec class which is used for + compression of keys and/or values (if compression is + enabled). +
    • +
    • + metadata - {@link Metadata} for this file. +
    • +
    • + sync - A sync marker to denote end of the header. +
    • +
    + +
    Uncompressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Value
      • +
      +
    • +
    • + A sync-marker every few 100 kilobytes or so. +
    • +
    + +
    Record-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Compressed Value
      • +
      +
    • +
    • + A sync-marker every few 100 kilobytes or so. +
    • +
    + +
    Block-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record Block +
        +
      • Uncompressed number of records in the block
      • +
      • Compressed key-lengths block-size
      • +
      • Compressed key-lengths block
      • +
      • Compressed keys block-size
      • +
      • Compressed keys block
      • +
      • Compressed value-lengths block-size
      • +
      • Compressed value-lengths block
      • +
      • Compressed values block-size
      • +
      • Compressed values block
      • +
      +
    • +
    • + A sync-marker every block. +
    • +
    + +

    The compressed blocks of key lengths and value lengths consist of the + actual lengths of individual keys/values encoded in ZeroCompressedInteger + format.

    + + @see CompressionCodec]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a ShortWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + position. Note that this + method avoids using the converter or doing String instantiation. + + @param position input position. + @return the Unicode scalar value at position or -1 + if the position is invalid or points to a + trailing byte.]]> + + + + + + + + + + what in the backing + buffer, starting as position start. The starting + position is measured in bytes and the return value is in + terms of byte position in the buffer. The backing buffer is + not converted to a string for this operation. + + @param what input what. + @param start input start. + @return byte position of the first occurrence of the search + string in the UTF-8 buffer or -1 if not found]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Note: For performance reasons, this call does not clear the + underlying byte array that is retrievable via {@link #getBytes()}. + In order to free the byte-array memory, call {@link #set(byte[])} + with an empty byte array (For example, new byte[0]).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a Text with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + + @param utf8 input utf8. + @param start input start. + @param length input length. + @param replace input replace. + @throws CharacterCodingException a character encoding or + decoding error occurs.]]> + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + + @param string input string. + @param replace input replace. + @return ByteBuffer: bytes stores at ByteBuffer.array() + and length is ByteBuffer.limit() + @throws CharacterCodingException a character encoding or decoding error occurs.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In + addition, it provides methods for string traversal without converting the + byte array to a string.

    Also includes utilities for + serializing/deserialing a string, coding/decoding a string, checking if a + byte array contains valid UTF8 code, calculating the length of an encoded + string.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is useful when a class may evolve, so that instances written by the + old version of the class may still be processed by the new version. To + handle this situation, {@link #readFields(DataInput)} + implementations should catch {@link VersionMismatchException}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VIntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VLongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + out. + + @param out DataOuput to serialize this object into. + @throws IOException any other problem for write.]]> + + + + + + + in. + +

    For efficiency, implementations should attempt to re-use storage in the + existing object where possible.

    + + @param in DataInput to deseriablize this object from. + @throws IOException any other problem for readFields.]]> +
    + + + Any key or value type in the Hadoop Map-Reduce + framework implements this interface.

    + +

    Implementations typically implement a static read(DataInput) + method which constructs a new instance, calls {@link #readFields(DataInput)} + and returns the instance.

    + +

    Example:

    +
    +     public class MyWritable implements Writable {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +
    +       // Default constructor to allow (de)serialization
    +       MyWritable() { }
    +
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +
    +       public static MyWritable read(DataInput in) throws IOException {
    +         MyWritable w = new MyWritable();
    +         w.readFields(in);
    +         return w;
    +       }
    +     }
    + 
    ]]> +
    + + + + + + + + WritableComparables can be compared to each other, typically + via Comparators. Any type which is to be used as a + key in the Hadoop Map-Reduce framework should implement this + interface.

    + +

    Note that hashCode() is frequently used in Hadoop to partition + keys. It's important that your implementation of hashCode() returns the same + result across different instances of the JVM. Note also that the default + hashCode() implementation in Object does not + satisfy this property.

    + +

    Example:

    +
    +     public class MyWritableComparable implements
    +      WritableComparable{@literal } {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public int compareTo(MyWritableComparable o) {
    +         int thisValue = this.value;
    +         int thatValue = o.value;
    +         return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
    +       }
    +
    +       public int hashCode() {
    +         final int prime = 31;
    +         int result = 1;
    +         result = prime * result + counter;
    +         result = prime * result + (int) (timestamp ^ (timestamp >>> 32));
    +         return result
    +       }
    +     }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The default implementation reads the data into two {@link + WritableComparable}s (using {@link + Writable#readFields(DataInput)}, then calls {@link + #compare(WritableComparable,WritableComparable)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This base implementation uses the natural ordering. To define alternate + orderings, override {@link #compare(WritableComparable,WritableComparable)}. + +

    One may optimize compare-intensive operations by overriding + {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are + provided to assist in optimized implementations of this method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Generics Type T. + @param orig The object to copy + @param conf input Configuration. + @return The copied object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Enum type + @param in DataInput to read from + @param enumType Class type of Enum + @return Enum represented by String read from DataInput + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + + + + + len number of bytes in input streamin + @param in input stream + @param len number of bytes to skip + @throws IOException when skipped less number of bytes]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CompressionCodec for which to get the + Compressor + @param conf the Configuration object which contains confs for creating or reinit the compressor + @return Compressor for the given + CompressionCodec from the pool or a new one]]> + + + + + + + + + CompressionCodec for which to get the + Decompressor + @return Decompressor for the given + CompressionCodec the pool or a new one]]> + + + + + + Compressor to be returned to the pool]]> + + + + + + Decompressor to be returned to the + pool]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Codec aliases are case insensitive. +

    + The code alias is the short class name (without the package name). + If the short class name ends with 'Codec', then there are two aliases for + the codec, the complete short class name and the short class name without + the 'Codec' ending. For example for the 'GzipCodec' codec class name the + alias are 'gzip' and 'gzipcodec'. + + @param codecName the canonical class name of the codec + @return the codec object]]> + + + + + + + Codec aliases are case insensitive. +

    + The code alias is the short class name (without the package name). + If the short class name ends with 'Codec', then there are two aliases for + the codec, the complete short class name and the short class name without + the 'Codec' ending. For example for the 'GzipCodec' codec class name the + alias are 'gzip' and 'gzipcodec'. + + @param codecName the canonical class name of the codec + @return the codec class]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations are assumed to be buffered. This permits clients to + reposition the underlying input stream then call {@link #resetState()}, + without having to also synchronize client buffers.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + (Both native and non-native versions of various Decompressors require + that the data passed in via b[] remain unmodified until + the caller is explicitly notified--via {@link #needsInput()}--that the + buffer may be safely modified. With this requirement, an extra + buffer-copy can be avoided.) + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + {@link #setInput(byte[], int, int)} should be called to + provide more input. + + @return true if the input data buffer is empty and + {@link #setInput(byte[], int, int)} should be called in + order to provide more input.]]> + + + + + + + + + + + + + true if a preset dictionary is needed for decompression. + @return true if a preset dictionary is needed for decompression]]> + + + + + true if the end of the decompressed + data output stream has been reached. Indicates a concatenated data stream + when finished() returns true and {@link #getRemaining()} + returns a positive value. finished() will be reset with the + {@link #reset()} method. + @return true if the end of the decompressed + data output stream has been reached.]]> + + + + + + + + + + + + + + true and getRemaining() returns a positive value. If + {@link #finished()} returns true and getRemaining() returns + a zero value, indicates that the end of data stream has been reached and + is not a concatenated data stream. + @return The number of bytes remaining in the compressed data buffer.]]> + + + + + true and {@link #getRemaining()} returns a positive value, + reset() is called before processing of the next data stream in the + concatenated data stream. {@link #finished()} will be reset and will + return false when reset() is called.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io.compression.codecs = org.apache.hadoop.io.compress.PassthroughCodec + io.compress.passthrough.extension = .gz + + + Note: this is not a Splittable codec: it doesn't know the + capabilities of the passed in stream. It should be possible to + extend this in a subclass: the inner classes are marked as protected + to enable this. Do not retrofit splitting to this class..]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • "none" - No compression. +
  • "lzo" - LZO compression. +
  • "gz" - GZIP compression. + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • Block Compression. +
  • Named meta data blocks. +
  • Sorted or unsorted keys. +
  • Seek by key or by file offset. + + The memory footprint of a TFile includes the following: +
      +
    • Some constant overhead of reading or writing a compressed block. +
        +
      • Each compressed block requires one compression/decompression codec for + I/O. +
      • Temporary space to buffer the key. +
      • Temporary space to buffer the value (for TFile.Writer only). Values are + chunk encoded, so that we buffer at most one chunk of user data. By default, + the chunk buffer is 1MB. Reading chunked value does not require additional + memory. +
      +
    • TFile index, which is proportional to the total number of Data Blocks. + The total amount of memory needed to hold the index can be estimated as + (56+AvgKeySize)*NumBlocks. +
    • MetaBlock index, which is proportional to the total number of Meta + Blocks.The total amount of memory needed to hold the index for Meta Blocks + can be estimated as (40+AvgMetaBlockName)*NumMetaBlock. +
    +

    + The behavior of TFile can be customized by the following variables through + Configuration: +

      +
    • tfile.io.chunk.size: Value chunk size. Integer (in bytes). Default + to 1MB. Values of the length less than the chunk size is guaranteed to have + known value length in read time (See + {@link TFile.Reader.Scanner.Entry#isValueLengthKnown()}). +
    • tfile.fs.output.buffer.size: Buffer size used for + FSDataOutputStream. Integer (in bytes). Default to 256KB. +
    • tfile.fs.input.buffer.size: Buffer size used for + FSDataInputStream. Integer (in bytes). Default to 256KB. +
    +

    + Suggestions on performance optimization. +

      +
    • Minimum block size. We recommend a setting of minimum block size between + 256KB to 1MB for general usage. Larger block size is preferred if files are + primarily for sequential access. However, it would lead to inefficient random + access (because there are more data to decompress). Smaller blocks are good + for random access, but require more memory to hold the block index, and may + be slower to create (because we must flush the compressor stream at the + conclusion of each data block, which leads to an FS I/O flush). Further, due + to the internal caching in Compression codec, the smallest possible block + size would be around 20KB-30KB. +
    • The current implementation does not offer true multi-threading for + reading. The implementation uses FSDataInputStream seek()+read(), which is + shown to be much faster than positioned-read call in single thread mode. + However, it also means that if multiple threads attempt to access the same + TFile (using multiple scanners) simultaneously, the actual I/O is carried out + sequentially even if they access different DFS blocks. +
    • Compression codec. Use "none" if the data is not very compressable (by + compressable, I mean a compression ratio at least 2:1). Generally, use "lzo" + as the starting point for experimenting. "gz" overs slightly better + compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to + decompress, comparing to "lzo". +
    • File system buffering, if the underlying FSDataInputStream and + FSDataOutputStream is already adequately buffered; or if applications + reads/writes keys and values in large buffers, we can reduce the sizes of + input/output buffering in TFile layer by setting the configuration parameters + "tfile.fs.input.buffer.size" and "tfile.fs.output.buffer.size". +
    + + Some design rationale behind TFile can be found at Hadoop-3315.]]> + + + + + + + + + + + Utils#writeVLong(out, n). + + @param out + output stream + @param n + The integer to be encoded + @throws IOException raised on errors performing I/O. + @see Utils#writeVLong(DataOutput, long)]]> + + + + + + + + +
  • if n in [-32, 127): encode in one byte with the actual value. + Otherwise, +
  • if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52; + byte[1]=n&0xff. Otherwise, +
  • if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 - + 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise, +
  • if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112; + byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff; + byte[3]=n&0xff. + Otherwise: +
  • if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] = + (n>>24)&0xff; byte[2]=(n>>16)&0xff; + byte[3]=(n>>8)&0xff; byte[4]=n&0xff; +
  • if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] = + (n>>32)&0xff; byte[2]=(n>>24)&0xff; + byte[3]=(n>>16)&0xff; byte[4]=(n>>8)&0xff; + byte[5]=n&0xff +
  • if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] = + (n>>40)&0xff; byte[2]=(n>>32)&0xff; + byte[3]=(n>>24)&0xff; byte[4]=(n>>16)&0xff; + byte[5]=(n>>8)&0xff; byte[6]=n&0xff; +
  • if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] = + (n>>48)&0xff; byte[2] = (n>>40)&0xff; + byte[3]=(n>>32)&0xff; byte[4]=(n>>24)&0xff; byte[5]= + (n>>16)&0xff; byte[6]=(n>>8)&0xff; byte[7]=n&0xff; +
  • if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] = + (n>>54)&0xff; byte[2] = (n>>48)&0xff; + byte[3] = (n>>40)&0xff; byte[4]=(n>>32)&0xff; + byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff; byte[7]= + (n>>8)&0xff; byte[8]=n&0xff; + + + @param out + output stream + @param n + the integer number + @throws IOException raised on errors performing I/O.]]> + + + + + + + (int)Utils#readVLong(in). + + @param in + input stream + @return the decoded integer + @throws IOException raised on errors performing I/O. + + @see Utils#readVLong(DataInput)]]> + + + + + + + +
  • if (FB >= -32), return (long)FB; +
  • if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff; +
  • if (FB in [-104, -73]), return (FB+88)<<16 + + (NB[0]&0xff)<<8 + NB[1]&0xff; +
  • if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff) + <<16 + (NB[1]&0xff)<<8 + NB[2]&0xff; +
  • if (FB in [-128, -121]), return interpret NB[FB+129] as a signed + big-endian integer. + + @param in + input stream + @return the decoded long integer. + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + + + + + + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @param cmp + Comparator for the key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @param cmp + Comparator for the key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + + + + + + + + + + An experimental {@link Serialization} for Java {@link Serializable} classes. +

    + @see JavaSerializationComparator]]> +
    +
    + + + + + + + + + A {@link RawComparator} that uses a {@link JavaSerialization} + {@link Deserializer} to deserialize objects that are then compared via + their {@link Comparable} interfaces. +

    + @param generic type. + @see JavaSerialization]]> +
    +
    + + + + + + + + + + + + + +This package provides a mechanism for using different serialization frameworks +in Hadoop. The property "io.serializations" defines a list of +{@link org.apache.hadoop.io.serializer.Serialization}s that know how to create +{@link org.apache.hadoop.io.serializer.Serializer}s and +{@link org.apache.hadoop.io.serializer.Deserializer}s. +

    + +

    +To add a new serialization framework write an implementation of +{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the +"io.serializations" property. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + avro.reflect.pkgs or implement + {@link AvroReflectSerializable} interface.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + +This package provides Avro serialization in Hadoop. This can be used to +serialize/deserialize Avro types in Hadoop. +

    + +

    +Use {@link org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization} for +serialization of classes generated by Avro's 'specific' compiler. +

    + +

    +Use {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} for +other classes. +{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} work for +any class which is either in the package list configured via +{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization#AVRO_REFLECT_PACKAGES} +or implement {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerializable} +interface. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations of this interface consume the {@link MetricsRecord} generated + from {@link MetricsSource}. It registers with {@link MetricsSystem} which + periodically pushes the {@link MetricsRecord} to the sink using + {@link #putMetrics(MetricsRecord)} method. If the implementing class also + implements {@link Closeable}, then the MetricsSystem will close the sink when + it is stopped.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the actual type of the source object + @param source object to register + @return the source object + @exception MetricsException Metrics Exception.]]> + + + + + + + + the actual type of the source object + @param source object to register + @param name of the source. Must be unique or null (then extracted from + the annotations of the source object.) + @param desc the description of the source (or null. See above.) + @return the source object + @exception MetricsException Metrics Exception.]]> + + + + + + + + + + + + + + + + + + + + +
  • {@link MetricsSource} generate and update metrics information.
  • +
  • {@link MetricsSink} consume the metrics information
  • + + + {@link MetricsSource} and {@link MetricsSink} register with the metrics + system. Implementations of {@link MetricsSystem} polls the + {@link MetricsSource}s periodically and pass the {@link MetricsRecord}s to + {@link MetricsSink}.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
} (aggregate). + Filter out entries that don't have at least minSamples. + + @param minSamples input minSamples. + @return a map of peer DataNode Id to the average latency to that + node seen over the measurement period.]]> + + + + + + + + + + + This class maintains a group of rolling average metrics. It implements the + algorithm of rolling average, i.e. a number of sliding windows are kept to + roll over and evict old subsets of samples. Each window has a subset of + samples in a stream, where sub-sum and sub-total are collected. All sub-sums + and sub-totals in all windows will be aggregated to final-sum and final-total + used to compute final average, which is called rolling average. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is a metrics sink that uses + {@link org.apache.hadoop.fs.FileSystem} to write the metrics logs. Every + roll interval a new directory will be created under the path specified by the + basepath property. All metrics will be logged to a file in the + current interval's directory in a file named <hostname>.log, where + <hostname> is the name of the host on which the metrics logging + process is running. The base path is set by the + <prefix>.sink.<instance>.basepath property. The + time zone used to create the current interval's directory name is GMT. If + the basepath property isn't specified, it will default to + "/tmp", which is the temp directory on whatever default file + system is configured for the cluster.

    + +

    The <prefix>.sink.<instance>.ignore-error + property controls whether an exception is thrown when an error is encountered + writing a log file. The default value is true. When set to + false, file errors are quietly swallowed.

    + +

    The roll-interval property sets the amount of time before + rolling the directory. The default value is 1 hour. The roll interval may + not be less than 1 minute. The property's value should be given as + number unit, where number is an integer value, and + unit is a valid unit. Valid units are minute, hour, + and day. The units are case insensitive and may be abbreviated or + plural. If no units are specified, hours are assumed. For example, + "2", "2h", "2 hour", and + "2 hours" are all valid ways to specify two hours.

    + +

    The roll-offset-interval-millis property sets the upper + bound on a random time interval (in milliseconds) that is used to delay + before the initial roll. All subsequent rolls will happen an integer + number of roll intervals after the initial roll, hence retaining the original + offset. The purpose of this property is to insert some variance in the roll + times so that large clusters using this sink on every node don't cause a + performance impact on HDFS by rolling simultaneously. The default value is + 30000 (30s). When writing to HDFS, as a rule of thumb, the roll offset in + millis should be no less than the number of sink instances times 5. + +

    The primary use of this class is for logging to HDFS. As it uses + {@link org.apache.hadoop.fs.FileSystem} to access the target file system, + however, it can be used to write to the local file system, Amazon S3, or any + other supported file system. The base path for the sink will determine the + file system used. An unqualified path will write to the default file system + set by the configuration.

    + +

    Not all file systems support the ability to append to files. In file + systems without the ability to append to files, only one writer can write to + a file at a time. To allow for concurrent writes from multiple daemons on a + single host, the source property is used to set unique headers + for the log files. The property should be set to the name of + the source daemon, e.g. namenode. The value of the + source property should typically be the same as the property's + prefix. If this property is not set, the source is taken to be + unknown.

    + +

    Instead of appending to an existing file, by default the sink + will create a new file with a suffix of ".<n>", where + n is the next lowest integer that isn't already used in a file name, + similar to the Hadoop daemon logs. NOTE: the file with the highest + sequence number is the newest file, unlike the Hadoop daemon logs.

    + +

    For file systems that allow append, the sink supports appending to the + existing file instead. If the allow-append property is set to + true, the sink will instead append to the existing file on file systems that + support appends. By default, the allow-append property is + false.

    + +

    Note that when writing to HDFS with allow-append set to true, + there is a minimum acceptable number of data nodes. If the number of data + nodes drops below that minimum, the append will succeed, but reading the + data will fail with an IOException in the DataStreamer class. The minimum + number of data nodes required for a successful append is generally 2 or + 3.

    + +

    Note also that when writing to HDFS, the file size information is not + updated until the file is closed (at the end of the interval) even though + the data is being written successfully. This is a known HDFS limitation that + exists because of the performance cost of updating the metadata. See + HDFS-5478.

    + +

    When using this sink in a secure (Kerberos) environment, two additional + properties must be set: keytab-key and + principal-key. keytab-key should contain the key by + which the keytab file can be found in the configuration, for example, + yarn.nodemanager.keytab. principal-key should + contain the key by which the principal can be found in the configuration, + for example, yarn.nodemanager.principal.]]> + + + + + + + + + + + + + + + + + + + + + + + + + CollectD StatsD plugin). +
    + To configure this plugin, you will need to add the following + entries to your hadoop-metrics2.properties file: +
    +

    + *.sink.statsd.class=org.apache.hadoop.metrics2.sink.StatsDSink
    + [prefix].sink.statsd.server.host=
    + [prefix].sink.statsd.server.port=
    + [prefix].sink.statsd.skip.hostname=true|false (optional)
    + [prefix].sink.statsd.service.name=NameNode (name you want for service)
    + 
    ]]> +
    +
    + +
    + + + + + + + + + + + + + ,name=}" + Where the {@literal and } are the supplied + parameters. + + @param serviceName serviceName. + @param nameName nameName. + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + ,name=}" + Where the {@literal and } are the supplied + parameters. + + @param serviceName serviceName. + @param nameName nameName. + @param properties - Key value pairs to define additional JMX ObjectName + properties. + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostname or hostname:port. If + the specs string is null, defaults to localhost:defaultPort. + + @param specs server specs (see description) + @param defaultPort the default port if not specified + @return a list of InetSocketAddress objects.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method is used when parts of Hadoop need know whether to apply + single rack vs multi-rack policies, such as during block placement. + Such algorithms behave differently if they are on multi-switch systems. +

    + + @return true if the mapping thinks that it is on a single switch]]> +
    +
    + + + + + + + + + + + + + + + + + This predicate simply assumes that all mappings not derived from + this class are multi-switch. + @param mapping the mapping to query + @return true if the base class says it is single switch, or the mapping + is not derived from this class.]]> + + + + It is not mandatory to + derive {@link DNSToSwitchMapping} implementations from it, but it is strongly + recommended, as it makes it easy for the Hadoop developers to add new methods + to this base class that are automatically picked up by all implementations. +

    + + This class does not extend the Configured + base class, and should not be changed to do so, as it causes problems + for subclasses. The constructor of the Configured calls + the {@link #setConf(Configuration)} method, which will call into the + subclasses before they have been fully constructed.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If a name cannot be resolved to a rack, the implementation + should return {@link NetworkTopology#DEFAULT_RACK}. This + is what the bundled implementations do, though it is not a formal requirement + + @param names the list of hosts to resolve (can be empty) + @return list of resolved network paths. + If names is empty, the returned list is also empty]]> + + + + + + + + + + + + + + + + + + + + + + + + Calling {@link #setConf(Configuration)} will trigger a + re-evaluation of the configuration settings and so be used to + set up the mapping script.]]> + + + + + + + + + + + + + + + + + + + + + This will get called in the superclass constructor, so a check is needed + to ensure that the raw mapping is defined before trying to relaying a null + configuration. +

    + @param conf input Configuration.]]> +
    + + + + + + + + + It contains a static class RawScriptBasedMapping that performs + the work: reading the configuration parameters, executing any defined + script, handling errors and such like. The outer + class extends {@link CachedDNSToSwitchMapping} to cache the delegated + queries. +

    + This DNS mapper's {@link #isSingleSwitch()} predicate returns + true if and only if a script is defined.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Simple {@link DNSToSwitchMapping} implementation that reads a 2 column text + file. The columns are separated by whitespace. The first column is a DNS or + IP address and the second column specifies the rack where the address maps. +

    +

    + This class uses the configuration parameter {@code + net.topology.table.file.name} to locate the mapping file. +

    +

    + Calls to {@link #resolve(List)} will look up the address as defined in the + mapping file. If no entry corresponding to the address is found, the value + {@code /default-rack} is returned. +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } mapping and {@literal <}groupId, groupName{@literal >} + mapping.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + }/host@realm. + @param principalName principal name of format as described above + @return host name if the the string conforms to the above format, else null]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + generic type T. + @return generic type T.]]> + + + + + + + Generics Type T. + @return the result of the action + @throws IOException in the event of error]]> + + + + + + + generic type T. + @return the result of the action + @throws IOException in the event of error]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } "jack" + + @param userName input userName. + @return userName without login method]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the return type of the run method + @param action the method to execute + @return the value from the run method]]> + + + + + + + + the return type of the run method + @param action the method to execute + @return the value from the run method + @throws IOException if the action throws an IOException + @throws Error if the action throws an Error + @throws RuntimeException if the action throws a RuntimeException + @throws InterruptedException if the action throws an InterruptedException + @throws UndeclaredThrowableException if the action throws something else]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CredentialProvider implementations must be thread safe.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + does not provide the stack trace for security purposes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A User-Agent String is considered to be a browser if it matches + any of the regex patterns from browser-useragent-regex; the default + behavior is to consider everything a browser that matches the following: + "^Mozilla.*,^Opera.*". Subclasses can optionally override + this method to use different behavior. + + @param userAgent The User-Agent String, or null if there isn't one + @return true if the User-Agent String refers to a browser, false if not]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The type of the token identifierextends TokenIdentifier]]> + + + + + + + + + + DelegationTokenAuthenticatedURL. +

    + An instance of the default {@link DelegationTokenAuthenticator} will be + used.]]> + + + + + DelegationTokenAuthenticatedURL. + + @param authenticator the {@link DelegationTokenAuthenticator} instance to + use, if null the default one will be used.]]> + + + + + DelegationTokenAuthenticatedURL using the default + {@link DelegationTokenAuthenticator} class. + + @param connConfigurator a connection configurator.]]> + + + + + DelegationTokenAuthenticatedURL. + + @param authenticator the {@link DelegationTokenAuthenticator} instance to + use, if null the default one will be used. + @param connConfigurator a connection configurator.]]> + + + + + + + + + + + + The default class is {@link KerberosDelegationTokenAuthenticator} + + @return the delegation token authenticator class to use as default.]]> + + + + + + + This method is provided to enable WebHDFS backwards compatibility. + + @param useQueryString TRUE if the token is transmitted in the + URL query string, FALSE if the delegation token is transmitted + using the {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP + header.]]> + + + + + TRUE if the token is transmitted in the URL query + string, FALSE if the delegation token is transmitted using the + {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP header.]]> + + + + + + + + + + + + + + + + + + Authenticator. + + @param url the URL to connect to. Only HTTP/S URLs are supported. + @param token the authentication token being used for the user. + @return an authenticated {@link HttpURLConnection}. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + Authenticator. If the doAs parameter is not NULL, + the request will be done on behalf of the specified doAs user. + + @param url the URL to connect to. Only HTTP/S URLs are supported. + @param token the authentication token being used for the user. + @param doAs user to do the the request on behalf of, if NULL the request is + as self. + @return an authenticated {@link HttpURLConnection}. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @return a delegation token. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @param doAsUser the user to do as, which will be the token owner. + @return a delegation token. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred. + @return delegation token long value.]]> + + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred. + @return delegation token long value.]]> + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @throws IOException if an IO error occurred.]]> + + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred.]]> + + + + DelegationTokenAuthenticatedURL is a + {@link AuthenticatedURL} sub-class with built-in Hadoop Delegation Token + functionality. +

    + The authentication mechanisms supported by default are Hadoop Simple + authentication (also known as pseudo authentication) and Kerberos SPNEGO + authentication. +

    + Additional authentication mechanisms can be supported via {@link + DelegationTokenAuthenticator} implementations. +

    + The default {@link DelegationTokenAuthenticator} is the {@link + KerberosDelegationTokenAuthenticator} class which supports + automatic fallback from Kerberos SPNEGO to Hadoop Simple authentication via + the {@link PseudoDelegationTokenAuthenticator} class. +

    + AuthenticatedURL instances are not thread-safe.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred. + @return abstract delegation token identifier.]]> + + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred. + @return abstract delegation token identifier.]]> + + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @param dToken abstract delegation token identifier. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred. + @return delegation token long value.]]> + + + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @param doAsUser the user to do as, which will be the token owner. + @param dToken abstract delegation token identifier. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred. + @return delegation token long value.]]> + + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @param dToken abstract delegation token identifier. + @throws IOException if an IO error occurred.]]> + + + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @param dToken abstract delegation token identifier. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + KerberosDelegationTokenAuthenticator provides support for + Kerberos SPNEGO authentication mechanism and support for Hadoop Delegation + Token operations. +

    + It falls back to the {@link PseudoDelegationTokenAuthenticator} if the HTTP + endpoint does not trigger a SPNEGO authentication]]> + + + + + + + + + PseudoDelegationTokenAuthenticator provides support for + Hadoop's pseudo authentication mechanism that accepts + the user name specified as a query string parameter and support for Hadoop + Delegation Token operations. +

    + This mimics the model of Hadoop Simple authentication trusting the + {@link UserGroupInformation#getCurrentUser()} valuelive. + @return a (snapshotted) map of blocker name->description values]]> + + + + + + + + + + + + + Do nothing if the service is null or not + in a state in which it can be/needs to be stopped. +

    + The service state is checked before the operation begins. + This process is not thread safe. + @param service a service or null]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • Any long-lived operation here will prevent the service state + change from completing in a timely manner.
  • +
  • If another thread is somehow invoked from the listener, and + that thread invokes the methods of the service (including + subclass-specific methods), there is a risk of a deadlock.
  • + + + + @param service the service that has changed.]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + The base implementation logs all arguments at the debug level, + then returns the passed in config unchanged.]]> + + + + + + + The action is to signal success by returning the exit code 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method is called before {@link #init(Configuration)}; + Any non-null configuration that is returned from this operation + becomes the one that is passed on to that {@link #init(Configuration)} + operation. +

    + This permits implementations to change the configuration before + the init operation. As the ServiceLauncher only creates + an instance of the base {@link Configuration} class, it is + recommended to instantiate any subclass (such as YarnConfiguration) + that injects new resources. +

    + @param config the initial configuration build up by the + service launcher. + @param args list of arguments passed to the command line + after any launcher-specific commands have been stripped. + @return the configuration to init the service with. + Recommended: pass down the config parameter with any changes + @throws Exception any problem]]> + + + + + + + The return value becomes the exit code of the launched process. +

    + If an exception is raised, the policy is: +

      +
    1. Any subset of {@link org.apache.hadoop.util.ExitUtil.ExitException}: + the exception is passed up unmodified. +
    2. +
    3. Any exception which implements + {@link org.apache.hadoop.util.ExitCodeProvider}: + A new {@link ServiceLaunchException} is created with the exit code + and message of the thrown exception; the thrown exception becomes the + cause.
    4. +
    5. Any other exception: a new {@link ServiceLaunchException} is created + with the exit code {@link LauncherExitCodes#EXIT_EXCEPTION_THROWN} and + the message of the original exception (which becomes the cause).
    6. +
    + @return the exit code + @throws org.apache.hadoop.util.ExitUtil.ExitException an exception passed + up as the exit code and error text. + @throws Exception any exception to report. If it provides an exit code + this is used in a wrapping exception.]]> +
    +
    + + + The command line options will be passed down before the + {@link Service#init(Configuration)} operation is invoked via an + invocation of {@link LaunchableService#bindArgs(Configuration, List)} + After the service has been successfully started via {@link Service#start()} + the {@link LaunchableService#execute()} method is called to execute the + service. When this method returns, the service launcher will exit, using + the return code from the method as its exit option.]]> + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Approximate HTTP equivalent: {@code 400 Bad Request}]]> + + + + + + approximate HTTP equivalent: Approximate HTTP equivalent: {@code 401 Unauthorized}]]> + + + + + + + + + + + Approximate HTTP equivalent: Approximate HTTP equivalent: {@code 403: Forbidden}]]> + + + + + + Approximate HTTP equivalent: {@code 404: Not Found}]]> + + + + + + Approximate HTTP equivalent: {@code 405: Not allowed}]]> + + + + + + Approximate HTTP equivalent: {@code 406: Not Acceptable}]]> + + + + + + Approximate HTTP equivalent: {@code 408: Request Timeout}]]> + + + + + + Approximate HTTP equivalent: {@code 409: Conflict}]]> + + + + + + Approximate HTTP equivalent: {@code 500 Internal Server Error}]]> + + + + + + Approximate HTTP equivalent: {@code 501: Not Implemented}]]> + + + + + + Approximate HTTP equivalent: {@code 503 Service Unavailable}]]> + + + + + + If raised, this is expected to be raised server-side and likely due + to client/server version incompatibilities. +

    + Approximate HTTP equivalent: {@code 505: Version Not Supported}]]> + + + + + + + + + + + + + + + Codes with a YARN prefix are YARN-related. +

    + Many of the exit codes are designed to resemble HTTP error codes, + squashed into a single byte. e.g 44 , "not found" is the equivalent + of 404. The various 2XX HTTP error codes aren't followed; + the Unix standard of "0" for success is used. +

    +    0-10: general command issues
    +   30-39: equivalent to the 3XX responses, where those responses are
    +          considered errors by the application.
    +   40-49: client-side/CLI/config problems
    +   50-59: service-side problems.
    +   60+  : application specific error codes
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + This uses {@link String#format(String, Object...)} + to build the formatted exception in the ENGLISH locale. +

    + If the last argument is a throwable, it becomes the cause of the exception. + It will also be used as a parameter for the format. + @param exitCode exit code + @param format format for message to use in exception + @param args list of arguments]]> + + + + + + This uses {@link String#format(String, Object...)} + to build the formatted exception in the ENGLISH locale. + @param exitCode exit code + @param cause inner cause + @param format format for message to use in exception + @param args list of arguments]]> + + + + + When caught by the ServiceLauncher, it will convert that + into a process exit code. + + The {@link #ServiceLaunchException(int, String, Object...)} constructor + generates formatted exceptions.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This will be 0 until a call + to {@link #finished()} has been made. +

    + @return the currently recorded duration.]]> +
    + + + + + + + + + +
    + + + + + + + + + + Clients and/or applications can use the provided Progressable + to explicitly report progress to the Hadoop framework. This is especially + important for operations which take significant amount of time since, + in-lieu of the reported progress, the framework has to assume that an error + has occurred and time-out the operation.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Generics Type. + @param theClass class of which an object is created + @param conf Configuration + @return a new object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Generics Type T + @param o object whose correctly-typed Class is to be obtained + @return the correctly typed Class of the given object.]]> + + + + + + + + + Generics Type. + @param conf input Configuration. + @param src the object to copy from + @param dst the object to copy into, which is destroyed + @return dst param (the copy) + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + kill -0 command or equivalent]]> + + + + + + + + + + + + + + + + + + + ".cmd" on Windows, or ".sh" otherwise. + + @param parent File parent directory + @param basename String script file basename + @return File referencing the script in the directory]]> + + + + + + ".cmd" on Windows, or ".sh" otherwise. + + @param basename String script file basename + @return String script file name]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + IOException. + @return the path to {@link #WINUTILS_EXE} + @throws RuntimeException if the path is not resolvable]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell. + @return the thread that ran runCommand() that spawned this shell + or null if no thread is waiting for this shell to complete]]> + + + + + + + + + + + + Shell interface. + @param cmd shell command to execute. + @return the output of the executed command. + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + Shell interface. + @param env the map of environment key=value + @param cmd shell command to execute. + @param timeout time in milliseconds after which script should be marked timeout + @return the output of the executed command. + @throws IOException on any problem.]]> + + + + + + + + Shell interface. + @param env the map of environment key=value + @param cmd shell command to execute. + @return the output of the executed command. + @throws IOException on any problem.]]> + + + + + Shell processes. + Iterates through a map of all currently running Shell + processes and destroys them one by one. This method is thread safe]]> + + + + + Shell objects. + + @return all shells set.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CreateProcess synchronization object.]]> + + + + + os.name property.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Important: caller must check for this value being null. + The lack of such checks has led to many support issues being raised. +

    + @deprecated use one of the exception-raising getter methods, + specifically {@link #getWinUtilsPath()} or {@link #getWinUtilsFile()}]]> + + + + + + + + + + + + + + Shell can be used to run shell commands like du or + df. It also offers facilities to gate commands by + time-intervals.]]> + + + + + + + + ShutdownHookManager singleton. + + @return ShutdownHookManager singleton.]]> + + + + + + + Runnable + @param priority priority of the shutdownHook.]]> + + + + + + + + + Runnable + @param priority priority of the shutdownHook + @param timeout timeout of the shutdownHook + @param unit unit of the timeout TimeUnit]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ShutdownHookManager enables running shutdownHook + in a deterministic order, higher priority first. +

    + The JVM runs ShutdownHooks in a non-deterministic order or in parallel. + This class registers a single JVM shutdownHook and run all the + shutdownHooks registered to it (to this class) in order based on their + priority. + + Unless a hook was registered with a shutdown explicitly set through + {@link #addShutdownHook(Runnable, int, long, TimeUnit)}, + the shutdown time allocated to it is set by the configuration option + {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT} in + {@code core-site.xml}, with a default value of + {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT_DEFAULT} + seconds.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool, is the standard for any Map-Reduce tool/application. + The tool/application should delegate the handling of + + standard command-line options to {@link ToolRunner#run(Tool, String[])} + and only handle its custom arguments.

    + +

    Here is how a typical Tool is implemented:

    +
    +     public class MyApp extends Configured implements Tool {
    +     
    +       public int run(String[] args) throws Exception {
    +         // Configuration processed by ToolRunner
    +         Configuration conf = getConf();
    +         
    +         // Create a JobConf using the processed conf
    +         JobConf job = new JobConf(conf, MyApp.class);
    +         
    +         // Process custom command-line options
    +         Path in = new Path(args[1]);
    +         Path out = new Path(args[2]);
    +         
    +         // Specify various job-specific parameters     
    +         job.setJobName("my-app");
    +         job.setInputPath(in);
    +         job.setOutputPath(out);
    +         job.setMapperClass(MyMapper.class);
    +         job.setReducerClass(MyReducer.class);
    +
    +         // Submit the job, then poll for progress until the job is complete
    +         RunningJob runningJob = JobClient.runJob(job);
    +         if (runningJob.isSuccessful()) {
    +           return 0;
    +         } else {
    +           return 1;
    +         }
    +       }
    +       
    +       public static void main(String[] args) throws Exception {
    +         // Let ToolRunner handle generic command-line options 
    +         int res = ToolRunner.run(new Configuration(), new MyApp(), args);
    +         
    +         System.exit(res);
    +       }
    +     }
    + 
    + + @see GenericOptionsParser + @see ToolRunner]]> +
    + + + + + + + + + + + + + Tool by {@link Tool#run(String[])}, after + parsing with the given generic arguments. Uses the given + Configuration, or builds one if null. + + Sets the Tool's configuration with the possibly modified + version of the conf. + + @param conf Configuration for the Tool. + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method. + @throws Exception Exception.]]> + + + + + + + + Tool with its Configuration. + + Equivalent to run(tool.getConf(), tool, args). + + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method. + @throws Exception exception.]]> + + + + + + + + + + + + + + + + + ToolRunner can be used to run classes implementing + Tool interface. It works in conjunction with + {@link GenericOptionsParser} to parse the + + generic hadoop command line arguments and modifies the + Configuration of the Tool. The + application-specific options are passed along without being modified. +

    + + @see Tool + @see GenericOptionsParser]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Bloom filter, as defined by Bloom in 1970. +

    + The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by + the networking research community in the past decade thanks to the bandwidth efficiencies that it + offers for the transmission of set membership information between networked hosts. A sender encodes + the information into a bit vector, the Bloom filter, that is more compact than a conventional + representation. Computation and space costs for construction are linear in the number of elements. + The receiver uses the filter to test whether various elements are members of the set. Though the + filter will occasionally return a false positive, it will never return a false negative. When creating + the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Space/Time Trade-Offs in Hash Coding with Allowable Errors]]> + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this counting Bloom filter. +

    + Invariant: nothing happens if the specified key does not belong to this counter Bloom filter. + @param key The key to remove.]]> + + + + + + + + + + + + key -> count map. +

    NOTE: due to the bucket size of this filter, inserting the same + key more than 15 times will cause an overflow at all filter positions + associated with this key, and it will significantly increase the error + rate for this and other keys. For this reason the filter can only be + used to store small count values 0 <= N << 15. + @param key key to be tested + @return 0 if the key is not present. Otherwise, a positive value v will + be returned such that v == count with probability equal to the + error rate of this filter, and v > count otherwise. + Additionally, if the filter experienced an underflow as a result of + {@link #delete(Key)} operation, the return value may be lower than the + count with the probability of the false negative rate of such + filter.]]> + + + + + + + + + + + + + + + + + + + + + + counting Bloom filter, as defined by Fan et al. in a ToN + 2000 paper. +

    + A counting Bloom filter is an improvement to standard a Bloom filter as it + allows dynamic additions and deletions of set membership information. This + is achieved through the use of a counting vector instead of a bit vector. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Summary cache: a scalable wide-area web cache sharing protocol]]> + + + + + + + + + + + + + + Builds an empty Dynamic Bloom filter. + @param vectorSize The number of bits in the vector. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}). + @param nr The threshold for the maximum number of keys to record in a + dynamic Bloom filter row.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + dynamic Bloom filter, as defined in the INFOCOM 2006 paper. +

    + A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but + each of the s rows is a standard Bloom filter. The creation + process of a DBF is iterative. At the start, the DBF is a 1 * m + bit matrix, i.e., it is composed of a single standard Bloom filter. + It assumes that nr elements are recorded in the + initial bit vector, where nr {@literal <=} n + (n is the cardinality of the set A to record in + the filter). +

    + As the size of A grows during the execution of the application, + several keys must be inserted in the DBF. When inserting a key into the DBF, + one must first get an active Bloom filter in the matrix. A Bloom filter is + active when the number of recorded keys, nr, is + strictly less than the current cardinality of A, n. + If an active Bloom filter is found, the key is inserted and + nr is incremented by one. On the other hand, if there + is no active Bloom filter, a new one is created (i.e., a new row is added to + the matrix) according to the current size of A and the element + is added in this new Bloom filter and the nr value of + this new Bloom filter is set to one. A given key is said to belong to the + DBF if the k positions are set to one in one of the matrix rows. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + + @see Theory and Network Applications of Dynamic Bloom Filters]]> + + + + + + + + + Builds a hash function that must obey to a given maximum number of returned values and a highest value. + @param maxValue The maximum highest returned value. + @param nbHash The number of resulting hashed values. + @param hashType type of the hashing function (see {@link Hash}).]]> + + + + + this hash function. A NOOP]]> + + + + + + + + + + + + + + + + + + + The idea is to randomly select a bit to reset.]]> + + + + + + The idea is to select the bit to reset that will generate the minimum + number of false negative.]]> + + + + + + The idea is to select the bit to reset that will remove the maximum number + of false positive.]]> + + + + + + The idea is to select the bit to reset that will, at the same time, remove + the maximum number of false positve while minimizing the amount of false + negative generated.]]> + + + + + Originally created by + European Commission One-Lab Project 034819.]]> + + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this retouched Bloom filter. +

    + Invariant: if the false positive is null, nothing happens. + @param key The false positive key to add.]]> + + + + + + this retouched Bloom filter. + @param coll The collection of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The list of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The array of false positive.]]> + + + + + + + this retouched Bloom filter. + @param scheme The selective clearing scheme to apply.]]> + + + + + + + + + + + + retouched Bloom filter, as defined in the CoNEXT 2006 paper. +

    + It allows the removal of selected false positives at the cost of introducing + random false negatives, and with the benefit of eliminating some random false + positives at the same time. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + @see RemoveScheme The different selective clearing algorithms + + @see Retouched Bloom Filters: Allowing Networked Applications to Trade Off Selected False Positives Against False Negatives]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Any exception generated in the future is + extracted and rethrown. +

    + @param future future to evaluate + @param type of the result. + @return the result, if all went well. + @throws InterruptedIOException future was interrupted + @throws IOException if something went wrong + @throws RuntimeException any nested RTE thrown]]> +
    + + + + + + + + + + + + Any exception generated in the future is + extracted and rethrown. +

    + @param future future to evaluate + @param timeout timeout to wait + @param unit time unit. + @param type of the result. + @return the result, if all went well. + @throws InterruptedIOException future was interrupted + @throws IOException if something went wrong + @throws RuntimeException any nested RTE thrown + @throws TimeoutException the future timed out.]]> +
    +
    + + + + + type of return value. + @return nothing, ever. + @throws IOException either the inner IOException, or a wrapper around + any non-Runtime-Exception + @throws RuntimeException if that is the inner cause.]]> + + + + + + + type of return value. + @return nothing, ever. + @throws IOException either the inner IOException, or a wrapper around + any non-Runtime-Exception + @throws RuntimeException if that is the inner cause.]]> + + + + + + +
  • If it is an IOE: Return.
  • +
  • If it is a {@link UncheckedIOException}: return the cause
  • +
  • Completion/Execution Exceptions: extract and repeat
  • +
  • If it is an RTE or Error: throw.
  • +
  • Any other type: wrap in an IOE
  • + + + Recursively handles wrapped Execution and Completion Exceptions in + case something very complicated has happened. + @param e exception. + @return an IOException extracted or built from the cause. + @throws RuntimeException if that is the inner cause. + @throws Error if that is the inner cause.]]> +
    +
    + + + + + + + type of result + @param type of builder + @return the builder passed in.]]> + + + + + + + + + + fs.example.s3a.option becomes "s3a.option" + fs.example.fs.io.policy becomes "fs.io.policy" + fs.example.something becomes "something" + + @param builder builder to modify + @param conf configuration to read + @param prefix prefix to scan/strip + @param mandatory are the options to be mandatory or optional?]]> + + + + + + Return type. + @return the evaluated result. + @throws UnsupportedOperationException fail fast if unsupported + @throws IllegalArgumentException invalid argument]]> + + + + + Contains methods promoted from + {@link org.apache.hadoop.fs.impl.FutureIOSupport} because they + are a key part of integrating async IO in application code. +

    +

    + One key feature is that the {@link #awaitFuture(Future)} and + {@link #awaitFuture(Future, long, TimeUnit)} calls will + extract and rethrow exceptions raised in the future's execution, + including extracting the inner IOException of any + {@code UncheckedIOException} raised in the future. + This makes it somewhat easier to execute IOException-raising + code inside futures. +

    ]]> +
    +
    + + + + + + + type + @return a remote iterator]]> + + + + + + type + @param iterator iterator. + @return a remote iterator]]> + + + + + + type + @param iterable iterable. + @return a remote iterator]]> + + + + + + type + @param array array. + @return a remote iterator]]> + + + + + + + source type + @param result type + @param iterator source + @param mapper transformation + @return a remote iterator]]> + + + + + + source type + @param result type + @param iterator source + @return a remote iterator]]> + + + + + + + + Elements are filtered in the hasNext() method; if not used + the filtering will be done on demand in the {@code next()} + call. +

    + @param type + @param iterator source + @param filter filter + @return a remote iterator]]> +
    +
    + + + + + source type. + @return a new iterator]]> + + + + + + + type + @return a list of the values. + @throws IOException if the source RemoteIterator raises it.]]> + + + + + + + + type + @return an array of the values. + @throws IOException if the source RemoteIterator raises it.]]> + + + + + + + + and this classes log is set to DEBUG, + then the statistics of the operation are evaluated and logged at + debug. +

    + The number of entries processed is returned, as it is useful to + know this, especially during tests or when reporting values + to users. +

    + This does not close the iterator afterwards. + @param source iterator source + @param consumer consumer of the values. + @return the number of elements processed + @param type of source + @throws IOException if the source RemoteIterator or the consumer raise one.]]> +
    +
    + + + + type of source]]> + + + + + This aims to make it straightforward to use lambda-expressions to + transform the results of an iterator, without losing the statistics + in the process, and to chain the operations together. +

    + The closeable operation will be passed through RemoteIterators which + wrap other RemoteIterators. This is to support any iterator which + can be closed to release held connections, file handles etc. + Unless client code is written to assume that RemoteIterator instances + may be closed, this is not likely to be broadly used. It is added + to make it possible to adopt this feature in a managed way. +

    + One notable feature is that the + {@link #foreach(RemoteIterator, ConsumerRaisingIOE)} method will + LOG at debug any IOStatistics provided by the iterator, if such + statistics are provided. There's no attempt at retrieval and logging + if the LOG is not set to debug, so it is a zero cost feature unless + the logger {@code org.apache.hadoop.fs.functional.RemoteIterators} + is at DEBUG. +

    + Based on the S3A Listing code, and some some work on moving other code + to using iterative listings so as to pick up the statistics.]]> +
    +
    + +
    + + + +
    diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 737db05635c74..9c7657b53afc2 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project-dist - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project-dist hadoop-common - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop Common Apache Hadoop Common jar @@ -37,7 +37,6 @@ wsce-site.xml - org.apache.hadoop.thirdparty @@ -48,6 +47,11 @@ hadoop-annotations compile + + org.apache.hadoop.thirdparty + hadoop-shaded-guava + + com.google.guava guava @@ -94,8 +98,8 @@ compile - javax.activation - javax.activation-api + jakarta.activation + jakarta.activation-api runtime @@ -137,12 +141,39 @@ com.sun.jersey jersey-servlet compile + + + javax.enterprise + cdi-api + + + javax.servlet + servlet-api + + + ch.qos.cal10n + cal10n-api + + - - com.sun.jersey + com.github.pjfanning jersey-json compile + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.jaxrs + jackson-jaxrs-json-provider + + com.sun.jersey @@ -155,8 +186,8 @@ compile - log4j - log4j + ch.qos.reload4j + reload4j compile @@ -169,11 +200,6 @@ assertj-core test - - org.glassfish.grizzly - grizzly-http-servlet - test - commons-beanutils commons-beanutils @@ -201,7 +227,7 @@ org.slf4j - slf4j-log4j12 + slf4j-reload4j compile @@ -276,11 +302,12 @@ sshd-core test - - org.apache.htrace - htrace-core4 + org.apache.ftpserver + ftpserver-core + test + org.apache.zookeeper zookeeper @@ -304,6 +331,10 @@ + + io.dropwizard.metrics + metrics-core + org.apache.zookeeper zookeeper @@ -357,6 +388,16 @@ wildfly-openssl-java provided + + org.xerial.snappy + snappy-java + compile + + + org.lz4 + lz4-java + provided + @@ -390,11 +431,21 @@ src-compile-protoc - false + + false + + ProtobufRpcEngine.proto + + src-test-compile-protoc - false + + false + + *legacy.proto + +
    @@ -406,24 +457,43 @@ replace-generated-sources false + + **/ProtobufRpcEngineProtos.java + replace-generated-test-sources false + + **/TestProtosLegacy.java + **/TestRpcServiceProtosLegacy.java + replace-sources false + + + **/ProtobufHelper.java + **/RpcWritable.java + **/ProtobufRpcEngineCallback.java + **/ProtobufRpcEngine.java + **/ProtobufRpcEngine2.java + **/ProtobufRpcEngineProtos.java + replace-test-sources false + + **/TestProtoBufRpc.java + @@ -547,11 +617,6 @@ src/main/native/m4/* src/test/empty-file src/test/all-tests - src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.h - src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.c - src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.h - src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.c - src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc_encoder.h src/main/native/gtest/**/* src/test/resources/test-untar.tgz src/test/resources/test.har/_SUCCESS @@ -620,10 +685,6 @@ false - - - - false @@ -677,11 +738,7 @@ ${project.build.directory}/native/javah ${sun.arch.data.model} ${require.bzip2} - ${require.snappy} ${require.zstd} - ${snappy.prefix} - ${snappy.lib} - ${snappy.include} ${zstd.prefix} ${zstd.lib} ${zstd.include} @@ -736,14 +793,9 @@ - - - false - false - true @@ -843,10 +895,6 @@ /nologo /p:Configuration=Release /p:OutDir=${project.build.directory}/bin/ - /p:CustomSnappyPrefix=${snappy.prefix} - /p:CustomSnappyLib=${snappy.lib} - /p:CustomSnappyInclude=${snappy.include} - /p:RequireSnappy=${require.snappy} /p:CustomZstdPrefix=${zstd.prefix} /p:CustomZstdLib=${zstd.lib} /p:CustomZstdInclude=${zstd.include} @@ -890,7 +938,6 @@ org.apache.maven.plugins maven-surefire-plugin - ${ignoreTestFailure} ${testsThreadCount} false ${maven-surefire-plugin.argLine} -DminiClusterDedicatedDirs=true @@ -931,7 +978,6 @@ ${basedir}/../../dev-support/bin/releasedocmaker - true --index --license @@ -1010,7 +1056,123 @@
    - + + + aarch64 + + false + + aarch64 + + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-source-legacy-protobuf + generate-sources + + add-source + + + + ${basedir}/src/main/arm-java + + + + + add-test-source-legacy-protobuf + generate-test-sources + + add-test-source + + + + ${basedir}/src/test/arm-java + + + + + + + + + + + x86_64 + + false + + !aarch64 + + + + + + org.xolstice.maven.plugins + protobuf-maven-plugin + + + src-compile-protoc-legacy + generate-sources + + compile + + + false + + + com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier} + + false + ${basedir}/src/main/proto + ${project.build.directory}/generated-sources/java + false + + ProtobufRpcEngine.proto + + + + + src-test-compile-protoc-legacy + generate-test-sources + + test-compile + + + false + + + com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier} + + false + ${basedir}/src/test/proto + ${project.build.directory}/generated-test-sources/java + false + + test_legacy.proto + test_rpc_service_legacy.proto + + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + + **/FSProtos.java + + *.proto:*.tracing:*.protobuf + + + + + diff --git a/hadoop-common-project/hadoop-common/src/CMakeLists.txt b/hadoop-common-project/hadoop-common/src/CMakeLists.txt index 10591f6ce2aa8..d2ef03645a4ae 100644 --- a/hadoop-common-project/hadoop-common/src/CMakeLists.txt +++ b/hadoop-common-project/hadoop-common/src/CMakeLists.txt @@ -67,33 +67,6 @@ else() endif() set(CMAKE_FIND_LIBRARY_SUFFIXES ${STORED_CMAKE_FIND_LIBRARY_SUFFIXES}) -# Require snappy. -set(STORED_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) -hadoop_set_find_shared_library_version("1") -find_library(SNAPPY_LIBRARY - NAMES snappy - PATHS ${CUSTOM_SNAPPY_PREFIX} ${CUSTOM_SNAPPY_PREFIX}/lib - ${CUSTOM_SNAPPY_PREFIX}/lib64 ${CUSTOM_SNAPPY_LIB}) -set(CMAKE_FIND_LIBRARY_SUFFIXES ${STORED_CMAKE_FIND_LIBRARY_SUFFIXES}) -find_path(SNAPPY_INCLUDE_DIR - NAMES snappy.h - PATHS ${CUSTOM_SNAPPY_PREFIX} ${CUSTOM_SNAPPY_PREFIX}/include - ${CUSTOM_SNAPPY_INCLUDE}) -if(SNAPPY_LIBRARY AND SNAPPY_INCLUDE_DIR) - get_filename_component(HADOOP_SNAPPY_LIBRARY ${SNAPPY_LIBRARY} NAME) - set(SNAPPY_SOURCE_FILES - "${SRC}/io/compress/snappy/SnappyCompressor.c" - "${SRC}/io/compress/snappy/SnappyDecompressor.c") - set(REQUIRE_SNAPPY ${REQUIRE_SNAPPY}) # Stop warning about unused variable. - message(STATUS "Found Snappy: ${SNAPPY_LIBRARY}") -else() - set(SNAPPY_INCLUDE_DIR "") - set(SNAPPY_SOURCE_FILES "") - if(REQUIRE_SNAPPY) - message(FATAL_ERROR "Required snappy library could not be found. SNAPPY_LIBRARY=${SNAPPY_LIBRARY}, SNAPPY_INCLUDE_DIR=${SNAPPY_INCLUDE_DIR}, CUSTOM_SNAPPY_INCLUDE_DIR=${CUSTOM_SNAPPY_INCLUDE_DIR}, CUSTOM_SNAPPY_PREFIX=${CUSTOM_SNAPPY_PREFIX}, CUSTOM_SNAPPY_INCLUDE=${CUSTOM_SNAPPY_INCLUDE}") - endif() -endif() - # Require zstandard SET(STORED_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) hadoop_set_find_shared_library_version("1") @@ -253,7 +226,6 @@ include_directories( ${JNI_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIRS} ${BZIP2_INCLUDE_DIR} - ${SNAPPY_INCLUDE_DIR} ${ISAL_INCLUDE_DIR} ${ZSTD_INCLUDE_DIR} ${OPENSSL_INCLUDE_DIR} @@ -264,12 +236,7 @@ configure_file(${CMAKE_SOURCE_DIR}/config.h.cmake ${CMAKE_BINARY_DIR}/config.h) set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) hadoop_add_dual_library(hadoop main/native/src/exception.c - ${SRC}/io/compress/lz4/Lz4Compressor.c - ${SRC}/io/compress/lz4/Lz4Decompressor.c - ${SRC}/io/compress/lz4/lz4.c - ${SRC}/io/compress/lz4/lz4hc.c ${ISAL_SOURCE_FILES} - ${SNAPPY_SOURCE_FILES} ${ZSTD_SOURCE_FILES} ${OPENSSL_SOURCE_FILES} ${SRC}/io/compress/zlib/ZlibCompressor.c diff --git a/hadoop-common-project/hadoop-common/src/main/arm-java/org/apache/hadoop/ipc/protobuf/ProtobufRpcEngineProtos.java b/hadoop-common-project/hadoop-common/src/main/arm-java/org/apache/hadoop/ipc/protobuf/ProtobufRpcEngineProtos.java new file mode 100644 index 0000000000000..28e28bf633784 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/arm-java/org/apache/hadoop/ipc/protobuf/ProtobufRpcEngineProtos.java @@ -0,0 +1,1163 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// This is class is added to source because for arm protoc 2.5.0 executable +// is not available to generate the same code. +// Generated by the protocol buffer compiler. DO NOT EDIT! +// source: ProtobufRpcEngine.proto +package org.apache.hadoop.ipc.protobuf; + +public final class ProtobufRpcEngineProtos { + private ProtobufRpcEngineProtos() {} + public static void registerAllExtensions( + com.google.protobuf.ExtensionRegistry registry) { + } + public interface RequestHeaderProtoOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // required string methodName = 1; + /** + * required string methodName = 1; + * + *
    +     ** Name of the RPC method
    +     * 
    + */ + boolean hasMethodName(); + /** + * required string methodName = 1; + * + *
    +     ** Name of the RPC method
    +     * 
    + */ + java.lang.String getMethodName(); + /** + * required string methodName = 1; + * + *
    +     ** Name of the RPC method
    +     * 
    + */ + com.google.protobuf.ByteString + getMethodNameBytes(); + + // required string declaringClassProtocolName = 2; + /** + * required string declaringClassProtocolName = 2; + * + *
    +     **
    +     * RPCs for a particular interface (ie protocol) are done using a
    +     * IPC connection that is setup using rpcProxy.
    +     * The rpcProxy's has a declared protocol name that is
    +     * sent form client to server at connection time.
    +     *
    +     * Each Rpc call also sends a protocol name
    +     * (called declaringClassprotocolName). This name is usually the same
    +     * as the connection protocol name except in some cases.
    +     * For example metaProtocols such ProtocolInfoProto which get metainfo
    +     * about the protocol reuse the connection but need to indicate that
    +     * the actual protocol is different (i.e. the protocol is
    +     * ProtocolInfoProto) since they reuse the connection; in this case
    +     * the declaringClassProtocolName field is set to the ProtocolInfoProto
    +     * 
    + */ + boolean hasDeclaringClassProtocolName(); + /** + * required string declaringClassProtocolName = 2; + * + *
    +     **
    +     * RPCs for a particular interface (ie protocol) are done using a
    +     * IPC connection that is setup using rpcProxy.
    +     * The rpcProxy's has a declared protocol name that is
    +     * sent form client to server at connection time.
    +     *
    +     * Each Rpc call also sends a protocol name
    +     * (called declaringClassprotocolName). This name is usually the same
    +     * as the connection protocol name except in some cases.
    +     * For example metaProtocols such ProtocolInfoProto which get metainfo
    +     * about the protocol reuse the connection but need to indicate that
    +     * the actual protocol is different (i.e. the protocol is
    +     * ProtocolInfoProto) since they reuse the connection; in this case
    +     * the declaringClassProtocolName field is set to the ProtocolInfoProto
    +     * 
    + */ + java.lang.String getDeclaringClassProtocolName(); + /** + * required string declaringClassProtocolName = 2; + * + *
    +     **
    +     * RPCs for a particular interface (ie protocol) are done using a
    +     * IPC connection that is setup using rpcProxy.
    +     * The rpcProxy's has a declared protocol name that is
    +     * sent form client to server at connection time.
    +     *
    +     * Each Rpc call also sends a protocol name
    +     * (called declaringClassprotocolName). This name is usually the same
    +     * as the connection protocol name except in some cases.
    +     * For example metaProtocols such ProtocolInfoProto which get metainfo
    +     * about the protocol reuse the connection but need to indicate that
    +     * the actual protocol is different (i.e. the protocol is
    +     * ProtocolInfoProto) since they reuse the connection; in this case
    +     * the declaringClassProtocolName field is set to the ProtocolInfoProto
    +     * 
    + */ + com.google.protobuf.ByteString + getDeclaringClassProtocolNameBytes(); + + // required uint64 clientProtocolVersion = 3; + /** + * required uint64 clientProtocolVersion = 3; + * + *
    +     ** protocol version of class declaring the called method
    +     * 
    + */ + boolean hasClientProtocolVersion(); + /** + * required uint64 clientProtocolVersion = 3; + * + *
    +     ** protocol version of class declaring the called method
    +     * 
    + */ + long getClientProtocolVersion(); + } + /** + * Protobuf type {@code hadoop.common.RequestHeaderProto} + * + *
    +   **
    +   * This message is the header for the Protobuf Rpc Engine
    +   * when sending a RPC request from  RPC client to the RPC server.
    +   * The actual request (serialized as protobuf) follows this request.
    +   *
    +   * No special header is needed for the Rpc Response for Protobuf Rpc Engine.
    +   * The normal RPC response header (see RpcHeader.proto) are sufficient.
    +   * 
    + */ + public static final class RequestHeaderProto extends + com.google.protobuf.GeneratedMessage + implements RequestHeaderProtoOrBuilder { + // Use RequestHeaderProto.newBuilder() to construct. + private RequestHeaderProto(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private RequestHeaderProto(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final RequestHeaderProto defaultInstance; + public static RequestHeaderProto getDefaultInstance() { + return defaultInstance; + } + + public RequestHeaderProto getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private RequestHeaderProto( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + case 10: { + bitField0_ |= 0x00000001; + methodName_ = input.readBytes(); + break; + } + case 18: { + bitField0_ |= 0x00000002; + declaringClassProtocolName_ = input.readBytes(); + break; + } + case 24: { + bitField0_ |= 0x00000004; + clientProtocolVersion_ = input.readUInt64(); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.internal_static_hadoop_common_RequestHeaderProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.internal_static_hadoop_common_RequestHeaderProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto.class, org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public RequestHeaderProto parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new RequestHeaderProto(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + private int bitField0_; + // required string methodName = 1; + public static final int METHODNAME_FIELD_NUMBER = 1; + private java.lang.Object methodName_; + /** + * required string methodName = 1; + * + *
    +     ** Name of the RPC method
    +     * 
    + */ + public boolean hasMethodName() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * required string methodName = 1; + * + *
    +     ** Name of the RPC method
    +     * 
    + */ + public java.lang.String getMethodName() { + java.lang.Object ref = methodName_; + if (ref instanceof java.lang.String) { + return (java.lang.String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + if (bs.isValidUtf8()) { + methodName_ = s; + } + return s; + } + } + /** + * required string methodName = 1; + * + *
    +     ** Name of the RPC method
    +     * 
    + */ + public com.google.protobuf.ByteString + getMethodNameBytes() { + java.lang.Object ref = methodName_; + if (ref instanceof java.lang.String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + methodName_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + // required string declaringClassProtocolName = 2; + public static final int DECLARINGCLASSPROTOCOLNAME_FIELD_NUMBER = 2; + private java.lang.Object declaringClassProtocolName_; + /** + * required string declaringClassProtocolName = 2; + * + *
    +     **
    +     * RPCs for a particular interface (ie protocol) are done using a
    +     * IPC connection that is setup using rpcProxy.
    +     * The rpcProxy's has a declared protocol name that is
    +     * sent form client to server at connection time.
    +     *
    +     * Each Rpc call also sends a protocol name
    +     * (called declaringClassprotocolName). This name is usually the same
    +     * as the connection protocol name except in some cases.
    +     * For example metaProtocols such ProtocolInfoProto which get metainfo
    +     * about the protocol reuse the connection but need to indicate that
    +     * the actual protocol is different (i.e. the protocol is
    +     * ProtocolInfoProto) since they reuse the connection; in this case
    +     * the declaringClassProtocolName field is set to the ProtocolInfoProto
    +     * 
    + */ + public boolean hasDeclaringClassProtocolName() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + /** + * required string declaringClassProtocolName = 2; + * + *
    +     **
    +     * RPCs for a particular interface (ie protocol) are done using a
    +     * IPC connection that is setup using rpcProxy.
    +     * The rpcProxy's has a declared protocol name that is
    +     * sent form client to server at connection time.
    +     *
    +     * Each Rpc call also sends a protocol name
    +     * (called declaringClassprotocolName). This name is usually the same
    +     * as the connection protocol name except in some cases.
    +     * For example metaProtocols such ProtocolInfoProto which get metainfo
    +     * about the protocol reuse the connection but need to indicate that
    +     * the actual protocol is different (i.e. the protocol is
    +     * ProtocolInfoProto) since they reuse the connection; in this case
    +     * the declaringClassProtocolName field is set to the ProtocolInfoProto
    +     * 
    + */ + public java.lang.String getDeclaringClassProtocolName() { + java.lang.Object ref = declaringClassProtocolName_; + if (ref instanceof java.lang.String) { + return (java.lang.String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + if (bs.isValidUtf8()) { + declaringClassProtocolName_ = s; + } + return s; + } + } + /** + * required string declaringClassProtocolName = 2; + * + *
    +     **
    +     * RPCs for a particular interface (ie protocol) are done using a
    +     * IPC connection that is setup using rpcProxy.
    +     * The rpcProxy's has a declared protocol name that is
    +     * sent form client to server at connection time.
    +     *
    +     * Each Rpc call also sends a protocol name
    +     * (called declaringClassprotocolName). This name is usually the same
    +     * as the connection protocol name except in some cases.
    +     * For example metaProtocols such ProtocolInfoProto which get metainfo
    +     * about the protocol reuse the connection but need to indicate that
    +     * the actual protocol is different (i.e. the protocol is
    +     * ProtocolInfoProto) since they reuse the connection; in this case
    +     * the declaringClassProtocolName field is set to the ProtocolInfoProto
    +     * 
    + */ + public com.google.protobuf.ByteString + getDeclaringClassProtocolNameBytes() { + java.lang.Object ref = declaringClassProtocolName_; + if (ref instanceof java.lang.String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + declaringClassProtocolName_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + // required uint64 clientProtocolVersion = 3; + public static final int CLIENTPROTOCOLVERSION_FIELD_NUMBER = 3; + private long clientProtocolVersion_; + /** + * required uint64 clientProtocolVersion = 3; + * + *
    +     ** protocol version of class declaring the called method
    +     * 
    + */ + public boolean hasClientProtocolVersion() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + /** + * required uint64 clientProtocolVersion = 3; + * + *
    +     ** protocol version of class declaring the called method
    +     * 
    + */ + public long getClientProtocolVersion() { + return clientProtocolVersion_; + } + + private void initFields() { + methodName_ = ""; + declaringClassProtocolName_ = ""; + clientProtocolVersion_ = 0L; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + if (!hasMethodName()) { + memoizedIsInitialized = 0; + return false; + } + if (!hasDeclaringClassProtocolName()) { + memoizedIsInitialized = 0; + return false; + } + if (!hasClientProtocolVersion()) { + memoizedIsInitialized = 0; + return false; + } + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeBytes(1, getMethodNameBytes()); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + output.writeBytes(2, getDeclaringClassProtocolNameBytes()); + } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + output.writeUInt64(3, clientProtocolVersion_); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(1, getMethodNameBytes()); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(2, getDeclaringClassProtocolNameBytes()); + } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + size += com.google.protobuf.CodedOutputStream + .computeUInt64Size(3, clientProtocolVersion_); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto)) { + return super.equals(obj); + } + org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto other = (org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto) obj; + + boolean result = true; + result = result && (hasMethodName() == other.hasMethodName()); + if (hasMethodName()) { + result = result && getMethodName() + .equals(other.getMethodName()); + } + result = result && (hasDeclaringClassProtocolName() == other.hasDeclaringClassProtocolName()); + if (hasDeclaringClassProtocolName()) { + result = result && getDeclaringClassProtocolName() + .equals(other.getDeclaringClassProtocolName()); + } + result = result && (hasClientProtocolVersion() == other.hasClientProtocolVersion()); + if (hasClientProtocolVersion()) { + result = result && (getClientProtocolVersion() + == other.getClientProtocolVersion()); + } + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + private int memoizedHashCode = 0; + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (hasMethodName()) { + hash = (37 * hash) + METHODNAME_FIELD_NUMBER; + hash = (53 * hash) + getMethodName().hashCode(); + } + if (hasDeclaringClassProtocolName()) { + hash = (37 * hash) + DECLARINGCLASSPROTOCOLNAME_FIELD_NUMBER; + hash = (53 * hash) + getDeclaringClassProtocolName().hashCode(); + } + if (hasClientProtocolVersion()) { + hash = (37 * hash) + CLIENTPROTOCOLVERSION_FIELD_NUMBER; + hash = (53 * hash) + hashLong(getClientProtocolVersion()); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code hadoop.common.RequestHeaderProto} + * + *
    +     **
    +     * This message is the header for the Protobuf Rpc Engine
    +     * when sending a RPC request from  RPC client to the RPC server.
    +     * The actual request (serialized as protobuf) follows this request.
    +     *
    +     * No special header is needed for the Rpc Response for Protobuf Rpc Engine.
    +     * The normal RPC response header (see RpcHeader.proto) are sufficient.
    +     * 
    + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProtoOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.internal_static_hadoop_common_RequestHeaderProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.internal_static_hadoop_common_RequestHeaderProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto.class, org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto.Builder.class); + } + + // Construct using org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + methodName_ = ""; + bitField0_ = (bitField0_ & ~0x00000001); + declaringClassProtocolName_ = ""; + bitField0_ = (bitField0_ & ~0x00000002); + clientProtocolVersion_ = 0L; + bitField0_ = (bitField0_ & ~0x00000004); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.internal_static_hadoop_common_RequestHeaderProto_descriptor; + } + + public org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto getDefaultInstanceForType() { + return org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto.getDefaultInstance(); + } + + public org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto build() { + org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto buildPartial() { + org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto result = new org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.methodName_ = methodName_; + if (((from_bitField0_ & 0x00000002) == 0x00000002)) { + to_bitField0_ |= 0x00000002; + } + result.declaringClassProtocolName_ = declaringClassProtocolName_; + if (((from_bitField0_ & 0x00000004) == 0x00000004)) { + to_bitField0_ |= 0x00000004; + } + result.clientProtocolVersion_ = clientProtocolVersion_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto) { + return mergeFrom((org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto other) { + if (other == org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto.getDefaultInstance()) return this; + if (other.hasMethodName()) { + bitField0_ |= 0x00000001; + methodName_ = other.methodName_; + onChanged(); + } + if (other.hasDeclaringClassProtocolName()) { + bitField0_ |= 0x00000002; + declaringClassProtocolName_ = other.declaringClassProtocolName_; + onChanged(); + } + if (other.hasClientProtocolVersion()) { + setClientProtocolVersion(other.getClientProtocolVersion()); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + if (!hasMethodName()) { + + return false; + } + if (!hasDeclaringClassProtocolName()) { + + return false; + } + if (!hasClientProtocolVersion()) { + + return false; + } + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + private int bitField0_; + + // required string methodName = 1; + private java.lang.Object methodName_ = ""; + /** + * required string methodName = 1; + * + *
    +       ** Name of the RPC method
    +       * 
    + */ + public boolean hasMethodName() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * required string methodName = 1; + * + *
    +       ** Name of the RPC method
    +       * 
    + */ + public java.lang.String getMethodName() { + java.lang.Object ref = methodName_; + if (!(ref instanceof java.lang.String)) { + java.lang.String s = ((com.google.protobuf.ByteString) ref) + .toStringUtf8(); + methodName_ = s; + return s; + } else { + return (java.lang.String) ref; + } + } + /** + * required string methodName = 1; + * + *
    +       ** Name of the RPC method
    +       * 
    + */ + public com.google.protobuf.ByteString + getMethodNameBytes() { + java.lang.Object ref = methodName_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + methodName_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + /** + * required string methodName = 1; + * + *
    +       ** Name of the RPC method
    +       * 
    + */ + public Builder setMethodName( + java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000001; + methodName_ = value; + onChanged(); + return this; + } + /** + * required string methodName = 1; + * + *
    +       ** Name of the RPC method
    +       * 
    + */ + public Builder clearMethodName() { + bitField0_ = (bitField0_ & ~0x00000001); + methodName_ = getDefaultInstance().getMethodName(); + onChanged(); + return this; + } + /** + * required string methodName = 1; + * + *
    +       ** Name of the RPC method
    +       * 
    + */ + public Builder setMethodNameBytes( + com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000001; + methodName_ = value; + onChanged(); + return this; + } + + // required string declaringClassProtocolName = 2; + private java.lang.Object declaringClassProtocolName_ = ""; + /** + * required string declaringClassProtocolName = 2; + * + *
    +       **
    +       * RPCs for a particular interface (ie protocol) are done using a
    +       * IPC connection that is setup using rpcProxy.
    +       * The rpcProxy's has a declared protocol name that is
    +       * sent form client to server at connection time.
    +       *
    +       * Each Rpc call also sends a protocol name
    +       * (called declaringClassprotocolName). This name is usually the same
    +       * as the connection protocol name except in some cases.
    +       * For example metaProtocols such ProtocolInfoProto which get metainfo
    +       * about the protocol reuse the connection but need to indicate that
    +       * the actual protocol is different (i.e. the protocol is
    +       * ProtocolInfoProto) since they reuse the connection; in this case
    +       * the declaringClassProtocolName field is set to the ProtocolInfoProto
    +       * 
    + */ + public boolean hasDeclaringClassProtocolName() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + /** + * required string declaringClassProtocolName = 2; + * + *
    +       **
    +       * RPCs for a particular interface (ie protocol) are done using a
    +       * IPC connection that is setup using rpcProxy.
    +       * The rpcProxy's has a declared protocol name that is
    +       * sent form client to server at connection time.
    +       *
    +       * Each Rpc call also sends a protocol name
    +       * (called declaringClassprotocolName). This name is usually the same
    +       * as the connection protocol name except in some cases.
    +       * For example metaProtocols such ProtocolInfoProto which get metainfo
    +       * about the protocol reuse the connection but need to indicate that
    +       * the actual protocol is different (i.e. the protocol is
    +       * ProtocolInfoProto) since they reuse the connection; in this case
    +       * the declaringClassProtocolName field is set to the ProtocolInfoProto
    +       * 
    + */ + public java.lang.String getDeclaringClassProtocolName() { + java.lang.Object ref = declaringClassProtocolName_; + if (!(ref instanceof java.lang.String)) { + java.lang.String s = ((com.google.protobuf.ByteString) ref) + .toStringUtf8(); + declaringClassProtocolName_ = s; + return s; + } else { + return (java.lang.String) ref; + } + } + /** + * required string declaringClassProtocolName = 2; + * + *
    +       **
    +       * RPCs for a particular interface (ie protocol) are done using a
    +       * IPC connection that is setup using rpcProxy.
    +       * The rpcProxy's has a declared protocol name that is
    +       * sent form client to server at connection time.
    +       *
    +       * Each Rpc call also sends a protocol name
    +       * (called declaringClassprotocolName). This name is usually the same
    +       * as the connection protocol name except in some cases.
    +       * For example metaProtocols such ProtocolInfoProto which get metainfo
    +       * about the protocol reuse the connection but need to indicate that
    +       * the actual protocol is different (i.e. the protocol is
    +       * ProtocolInfoProto) since they reuse the connection; in this case
    +       * the declaringClassProtocolName field is set to the ProtocolInfoProto
    +       * 
    + */ + public com.google.protobuf.ByteString + getDeclaringClassProtocolNameBytes() { + java.lang.Object ref = declaringClassProtocolName_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + declaringClassProtocolName_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + /** + * required string declaringClassProtocolName = 2; + * + *
    +       **
    +       * RPCs for a particular interface (ie protocol) are done using a
    +       * IPC connection that is setup using rpcProxy.
    +       * The rpcProxy's has a declared protocol name that is
    +       * sent form client to server at connection time.
    +       *
    +       * Each Rpc call also sends a protocol name
    +       * (called declaringClassprotocolName). This name is usually the same
    +       * as the connection protocol name except in some cases.
    +       * For example metaProtocols such ProtocolInfoProto which get metainfo
    +       * about the protocol reuse the connection but need to indicate that
    +       * the actual protocol is different (i.e. the protocol is
    +       * ProtocolInfoProto) since they reuse the connection; in this case
    +       * the declaringClassProtocolName field is set to the ProtocolInfoProto
    +       * 
    + */ + public Builder setDeclaringClassProtocolName( + java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000002; + declaringClassProtocolName_ = value; + onChanged(); + return this; + } + /** + * required string declaringClassProtocolName = 2; + * + *
    +       **
    +       * RPCs for a particular interface (ie protocol) are done using a
    +       * IPC connection that is setup using rpcProxy.
    +       * The rpcProxy's has a declared protocol name that is
    +       * sent form client to server at connection time.
    +       *
    +       * Each Rpc call also sends a protocol name
    +       * (called declaringClassprotocolName). This name is usually the same
    +       * as the connection protocol name except in some cases.
    +       * For example metaProtocols such ProtocolInfoProto which get metainfo
    +       * about the protocol reuse the connection but need to indicate that
    +       * the actual protocol is different (i.e. the protocol is
    +       * ProtocolInfoProto) since they reuse the connection; in this case
    +       * the declaringClassProtocolName field is set to the ProtocolInfoProto
    +       * 
    + */ + public Builder clearDeclaringClassProtocolName() { + bitField0_ = (bitField0_ & ~0x00000002); + declaringClassProtocolName_ = getDefaultInstance().getDeclaringClassProtocolName(); + onChanged(); + return this; + } + /** + * required string declaringClassProtocolName = 2; + * + *
    +       **
    +       * RPCs for a particular interface (ie protocol) are done using a
    +       * IPC connection that is setup using rpcProxy.
    +       * The rpcProxy's has a declared protocol name that is
    +       * sent form client to server at connection time.
    +       *
    +       * Each Rpc call also sends a protocol name
    +       * (called declaringClassprotocolName). This name is usually the same
    +       * as the connection protocol name except in some cases.
    +       * For example metaProtocols such ProtocolInfoProto which get metainfo
    +       * about the protocol reuse the connection but need to indicate that
    +       * the actual protocol is different (i.e. the protocol is
    +       * ProtocolInfoProto) since they reuse the connection; in this case
    +       * the declaringClassProtocolName field is set to the ProtocolInfoProto
    +       * 
    + */ + public Builder setDeclaringClassProtocolNameBytes( + com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000002; + declaringClassProtocolName_ = value; + onChanged(); + return this; + } + + // required uint64 clientProtocolVersion = 3; + private long clientProtocolVersion_ ; + /** + * required uint64 clientProtocolVersion = 3; + * + *
    +       ** protocol version of class declaring the called method
    +       * 
    + */ + public boolean hasClientProtocolVersion() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + /** + * required uint64 clientProtocolVersion = 3; + * + *
    +       ** protocol version of class declaring the called method
    +       * 
    + */ + public long getClientProtocolVersion() { + return clientProtocolVersion_; + } + /** + * required uint64 clientProtocolVersion = 3; + * + *
    +       ** protocol version of class declaring the called method
    +       * 
    + */ + public Builder setClientProtocolVersion(long value) { + bitField0_ |= 0x00000004; + clientProtocolVersion_ = value; + onChanged(); + return this; + } + /** + * required uint64 clientProtocolVersion = 3; + * + *
    +       ** protocol version of class declaring the called method
    +       * 
    + */ + public Builder clearClientProtocolVersion() { + bitField0_ = (bitField0_ & ~0x00000004); + clientProtocolVersion_ = 0L; + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:hadoop.common.RequestHeaderProto) + } + + static { + defaultInstance = new RequestHeaderProto(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:hadoop.common.RequestHeaderProto) + } + + private static com.google.protobuf.Descriptors.Descriptor + internal_static_hadoop_common_RequestHeaderProto_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_hadoop_common_RequestHeaderProto_fieldAccessorTable; + + public static com.google.protobuf.Descriptors.FileDescriptor + getDescriptor() { + return descriptor; + } + private static com.google.protobuf.Descriptors.FileDescriptor + descriptor; + static { + java.lang.String[] descriptorData = { + "\n\027ProtobufRpcEngine.proto\022\rhadoop.common" + + "\"k\n\022RequestHeaderProto\022\022\n\nmethodName\030\001 \002" + + "(\t\022\"\n\032declaringClassProtocolName\030\002 \002(\t\022\035" + + "\n\025clientProtocolVersion\030\003 \002(\004B<\n\036org.apa" + + "che.hadoop.ipc.protobufB\027ProtobufRpcEngi" + + "neProtos\240\001\001" + }; + com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = + new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { + public com.google.protobuf.ExtensionRegistry assignDescriptors( + com.google.protobuf.Descriptors.FileDescriptor root) { + descriptor = root; + internal_static_hadoop_common_RequestHeaderProto_descriptor = + getDescriptor().getMessageTypes().get(0); + internal_static_hadoop_common_RequestHeaderProto_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_hadoop_common_RequestHeaderProto_descriptor, + new java.lang.String[] { "MethodName", "DeclaringClassProtocolName", "ClientProtocolVersion", }); + return null; + } + }; + com.google.protobuf.Descriptors.FileDescriptor + .internalBuildGeneratedFileFrom(descriptorData, + new com.google.protobuf.Descriptors.FileDescriptor[] { + }, assigner); + } + + // @@protoc_insertion_point(outer_class_scope) +} diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop b/hadoop-common-project/hadoop-common/src/main/bin/hadoop index 7d9ffc69bc503..ab7cadd7d8ad7 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop @@ -26,9 +26,9 @@ MYNAME="${BASH_SOURCE-$0}" function hadoop_usage { hadoop_add_option "buildpaths" "attempt to add class files from build tree" - hadoop_add_option "hostnames list[,of,host,names]" "hosts to use in slave mode" + hadoop_add_option "hostnames list[,of,host,names]" "hosts to use in worker mode" hadoop_add_option "loglevel level" "set the log4j level for this command" - hadoop_add_option "hosts filename" "list of hosts to use in slave mode" + hadoop_add_option "hosts filename" "list of hosts to use in worker mode" hadoop_add_option "workers" "turn on worker mode" hadoop_add_subcommand "checknative" client "check native Hadoop and compression libraries availability" diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh index 55304916ad1f7..1d8096b4baae1 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh @@ -16,7 +16,7 @@ # limitations under the License. -# Run a Hadoop command on all slave hosts. +# Run a Hadoop command on all worker hosts. function hadoop_usage { diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh index eb7285fb4e667..197dea5828376 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh @@ -596,11 +596,6 @@ function hadoop_bootstrap YARN_LIB_JARS_DIR=${YARN_LIB_JARS_DIR:-"share/hadoop/yarn/lib"} MAPRED_DIR=${MAPRED_DIR:-"share/hadoop/mapreduce"} MAPRED_LIB_JARS_DIR=${MAPRED_LIB_JARS_DIR:-"share/hadoop/mapreduce/lib"} - HDDS_DIR=${HDDS_DIR:-"share/hadoop/hdds"} - HDDS_LIB_JARS_DIR=${HDDS_LIB_JARS_DIR:-"share/hadoop/hdds/lib"} - OZONE_DIR=${OZONE_DIR:-"share/hadoop/ozone"} - OZONE_LIB_JARS_DIR=${OZONE_LIB_JARS_DIR:-"share/hadoop/ozone/lib"} - OZONEFS_DIR=${OZONEFS_DIR:-"share/hadoop/ozonefs"} HADOOP_TOOLS_HOME=${HADOOP_TOOLS_HOME:-${HADOOP_HOME}} HADOOP_TOOLS_DIR=${HADOOP_TOOLS_DIR:-"share/hadoop/tools"} @@ -1342,7 +1337,7 @@ function hadoop_add_to_classpath_tools # shellcheck disable=SC1090 . "${HADOOP_LIBEXEC_DIR}/tools/${module}.sh" else - hadoop_error "ERROR: Tools helper ${HADOOP_LIBEXEC_DIR}/tools/${module}.sh was not found." + hadoop_debug "Tools helper ${HADOOP_LIBEXEC_DIR}/tools/${module}.sh was not found." fi if declare -f hadoop_classpath_tools_${module} >/dev/null 2>&1; then @@ -2210,7 +2205,7 @@ function hadoop_daemon_handler hadoop_verify_logdir hadoop_status_daemon "${daemon_pidfile}" if [[ $? == 0 ]]; then - hadoop_error "${daemonname} is running as process $(cat "${daemon_pidfile}"). Stop it first." + hadoop_error "${daemonname} is running as process $(cat "${daemon_pidfile}"). Stop it first and ensure ${daemon_pidfile} file is empty before retry." exit 1 else # stale pid file, so just remove it and continue on @@ -2271,7 +2266,7 @@ function hadoop_secure_daemon_handler hadoop_verify_logdir hadoop_status_daemon "${daemon_pidfile}" if [[ $? == 0 ]]; then - hadoop_error "${daemonname} is running as process $(cat "${daemon_pidfile}"). Stop it first." + hadoop_error "${daemonname} is running as process $(cat "${daemon_pidfile}"). Stop it first and ensure ${daemon_pidfile} file is empty before retry." exit 1 else # stale pid file, so just remove it and continue on diff --git a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh index e43cd95b047ee..f4625f5999b1c 100644 --- a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh +++ b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh @@ -390,15 +390,6 @@ export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)} # # export HDFS_DFSROUTER_OPTS="" -### -# Ozone Manager specific parameters -### -# Specify the JVM options to be used when starting the Ozone Manager. -# These options will be appended to the options specified as HADOOP_OPTS -# and therefore may override any similar flags set in HADOOP_OPTS -# -# export HDFS_OM_OPTS="" - ### # HDFS StorageContainerManager specific parameters ### diff --git a/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties b/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties index 7f9ea462679b3..52d2c1ff038e6 100644 --- a/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties +++ b/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties @@ -282,13 +282,6 @@ log4j.appender.NMAUDIT.MaxBackupIndex=${nm.audit.log.maxbackupindex} #log4j.appender.nodemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-nodemanager-yyyy_mm_dd.log #log4j.appender.nodemanagerrequestlog.RetainDays=3 -#Http Server request logs for Ozone S3Gateway -log4j.logger.http.requests.s3gateway=INFO,s3gatewayrequestlog -log4j.appender.s3gatewayrequestlog=org.apache.hadoop.http.HttpRequestLogAppender -log4j.appender.s3gatewayrequestlog.Filename=${hadoop.log.dir}/jetty-s3gateway-yyyy_mm_dd.log -log4j.appender.s3gatewayrequestlog.RetainDays=3 - - # WebHdfs request log on datanodes # Specify -Ddatanode.webhdfs.logger=INFO,HTTPDRFA on datanode startup to # direct the log to a separate file. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ConfServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ConfServlet.java index cce744e5076f5..836f3819f8baf 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ConfServlet.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ConfServlet.java @@ -30,7 +30,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.http.HttpServer2; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * A servlet to print out the running configuration data. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ConfigRedactor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ConfigRedactor.java index 5b2d1449f9c86..881a2ce811bbe 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ConfigRedactor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ConfigRedactor.java @@ -57,8 +57,8 @@ public ConfigRedactor(Configuration conf) { * Given a key / value pair, decides whether or not to redact and returns * either the original value or text indicating it has been redacted. * - * @param key - * @param value + * @param key param key. + * @param value param value, will return if conditions permit. * @return Original value, or text indicating it has been redacted */ public String redact(String key, String value) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java index 9a8841e701b96..d0c7cce75e943 100755 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java @@ -24,7 +24,6 @@ import com.ctc.wstx.stax.WstxInputFactory; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; -import com.google.common.annotations.VisibleForTesting; import java.io.BufferedInputStream; import java.io.DataInput; @@ -40,6 +39,8 @@ import java.lang.ref.WeakReference; import java.net.InetSocketAddress; import java.net.JarURLConnection; +import java.net.URI; +import java.net.URISyntaxException; import java.net.URL; import java.net.URLConnection; import java.nio.file.Files; @@ -81,10 +82,11 @@ import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; -import com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; import org.apache.commons.collections.map.UnmodifiableMap; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -96,18 +98,19 @@ import org.apache.hadoop.security.alias.CredentialProvider; import org.apache.hadoop.security.alias.CredentialProvider.CredentialEntry; import org.apache.hadoop.security.alias.CredentialProviderFactory; +import org.apache.hadoop.thirdparty.com.google.common.base.Strings; +import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringInterner; import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.util.XMLUtils; + import org.codehaus.stax2.XMLStreamReader2; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; -import com.google.common.base.Preconditions; -import com.google.common.base.Strings; - import static org.apache.commons.lang3.StringUtils.isBlank; import static org.apache.commons.lang3.StringUtils.isNotBlank; @@ -315,7 +318,7 @@ private static boolean getRestrictParserDefault(Object resource) { private boolean loadDefaults = true; /** - * Configuration objects + * Configuration objects. */ private static final WeakHashMap REGISTRY = new WeakHashMap(); @@ -1025,11 +1028,11 @@ public synchronized void reloadConfiguration() { properties = null; // trigger reload finalParameters.clear(); // clear site-limits } - + private synchronized void addResourceObject(Resource resource) { resources.add(resource); // add to resources restrictSystemProps |= resource.isParserRestricted(); - reloadConfiguration(); + loadProps(properties, resources.size() - 1, false); } private static final int MAX_SUBST = 20; @@ -1137,36 +1140,37 @@ private String substituteVars(String expr) { final String var = eval.substring(varBounds[SUB_START_IDX], varBounds[SUB_END_IDX]); String val = null; - if (!restrictSystemProps) { - try { - if (var.startsWith("env.") && 4 < var.length()) { - String v = var.substring(4); - int i = 0; - for (; i < v.length(); i++) { - char c = v.charAt(i); - if (c == ':' && i < v.length() - 1 && v.charAt(i + 1) == '-') { - val = getenv(v.substring(0, i)); - if (val == null || val.length() == 0) { - val = v.substring(i + 2); - } - break; - } else if (c == '-') { - val = getenv(v.substring(0, i)); - if (val == null) { - val = v.substring(i + 1); - } - break; + try { + // evaluate system properties or environment variables even when + // the configuration is restricted -the restrictions are enforced + // in the getenv/getProperty calls + if (var.startsWith("env.") && 4 < var.length()) { + String v = var.substring(4); + int i = 0; + for (; i < v.length(); i++) { + char c = v.charAt(i); + if (c == ':' && i < v.length() - 1 && v.charAt(i + 1) == '-') { + val = getenv(v.substring(0, i)); + if (val == null || val.length() == 0) { + val = v.substring(i + 2); } + break; + } else if (c == '-') { + val = getenv(v.substring(0, i)); + if (val == null) { + val = v.substring(i + 1); + } + break; } - if (i == v.length()) { - val = getenv(v); - } - } else { - val = getProperty(var); } - } catch (SecurityException se) { - LOG.warn("Unexpected SecurityException in Configuration", se); + if (i == v.length()) { + val = getenv(v); + } + } else { + val = getProperty(var); } + } catch (SecurityException se) { + LOG.warn("Unexpected SecurityException in Configuration", se); } if (val == null) { val = getRaw(var); @@ -1192,13 +1196,33 @@ private String substituteVars(String expr) { throw new IllegalStateException("Variable substitution depth too large: " + MAX_SUBST + " " + expr); } - + + /** + * Get the environment variable value if + * {@link #restrictSystemProps} does not block this. + * @param name environment variable name. + * @return the value or null if either it is unset or access forbidden. + */ String getenv(String name) { - return System.getenv(name); + if (!restrictSystemProps) { + return System.getenv(name); + } else { + return null; + } } + /** + * Get a system property value if + * {@link #restrictSystemProps} does not block this. + * @param key property key + * @return the value or null if either it is unset or access forbidden. + */ String getProperty(String key) { - return System.getProperty(key); + if (!restrictSystemProps) { + return System.getProperty(key); + } else { + return null; + } } /** @@ -1863,6 +1887,7 @@ public long getTimeDuration(String name, String defaultValue, * @param name Property name * @param vStr The string value with time unit suffix to be converted. * @param unit Unit to convert the stored property, if it exists. + * @return time duration in given time unit. */ public long getTimeDurationHelper(String name, String vStr, TimeUnit unit) { return getTimeDurationHelper(name, vStr, unit, unit); @@ -1877,6 +1902,7 @@ public long getTimeDurationHelper(String name, String vStr, TimeUnit unit) { * @param vStr The string value with time unit suffix to be converted. * @param defaultUnit Unit to convert the stored property, if it exists. * @param returnUnit Unit for the returned value. + * @return time duration in given time unit. */ private long getTimeDurationHelper(String name, String vStr, TimeUnit defaultUnit, TimeUnit returnUnit) { @@ -2161,7 +2187,7 @@ private static int convertToInt(String value, int defaultValue) { } /** - * Is the given value in the set of ranges + * Is the given value in the set of ranges. * @param value the value to check * @return is the value in the ranges? */ @@ -2218,7 +2244,7 @@ public Iterator iterator() { } /** - * Parse the given attribute as a set of integer ranges + * Parse the given attribute as a set of integer ranges. * @param name the attribute name * @param defaultValue the default value if it is not set * @return a new set of ranges from the configured value @@ -2437,7 +2463,7 @@ public char[] getPasswordFromCredentialProviders(String name) /** * Fallback to clear text passwords in configuration. - * @param name + * @param name the property name. * @return clear text password or null */ protected char[] getPasswordFromConfig(String name) { @@ -2502,6 +2528,8 @@ public InetSocketAddress getSocketAddr( /** * Set the socket address for the name property as * a host:port. + * @param name property name. + * @param addr inetSocketAddress addr. */ public void setSocketAddr(String name, InetSocketAddress addr) { set(name, NetUtils.getHostPortString(addr)); @@ -2679,6 +2707,7 @@ public Class getClass(String name, Class defaultValue) { * @param name the conf key name. * @param defaultValue default value. * @param xface the interface implemented by the named class. + * @param Interface class type. * @return property value as a Class, * or defaultValue. */ @@ -2708,6 +2737,7 @@ else if (theClass != null) * @param name the property name. * @param xface the interface implemented by the classes named by * name. + * @param Interface class type. * @return a List of objects implementing xface. */ @SuppressWarnings("unchecked") @@ -2740,15 +2770,16 @@ public void setClass(String name, Class theClass, Class xface) { set(name, theClass.getName()); } - /** + /** * Get a local file under a directory named by dirsProp with * the given path. If dirsProp contains multiple directories, * then one is chosen based on path's hash code. If the selected * directory does not exist, an attempt is made to create it. - * + * * @param dirsProp directory in which to locate the file. * @param path file-path. * @return local file under the directory with the given path. + * @throws IOException raised on errors performing I/O. */ public Path getLocalPath(String dirsProp, String path) throws IOException { @@ -2772,15 +2803,16 @@ public Path getLocalPath(String dirsProp, String path) throw new IOException("No valid local directories in property: "+dirsProp); } - /** + /** * Get a local file name under a directory named in dirsProp with * the given path. If dirsProp contains multiple directories, * then one is chosen based on path's hash code. If the selected * directory does not exist, an attempt is made to create it. - * + * * @param dirsProp directory in which to locate the file. * @param path file-path. * @return local file under the directory with the given path. + * @throws IOException raised on errors performing I/O. */ public File getFile(String dirsProp, String path) throws IOException { @@ -2870,12 +2902,27 @@ public Set getFinalParameters() { protected synchronized Properties getProps() { if (properties == null) { properties = new Properties(); - Map backup = updatingResource != null ? - new ConcurrentHashMap(updatingResource) : null; - loadResources(properties, resources, quietmode); + loadProps(properties, 0, true); + } + return properties; + } + /** + * Loads the resource at a given index into the properties. + * @param props the object containing the loaded properties. + * @param startIdx the index where the new resource has been added. + * @param fullReload flag whether we do complete reload of the conf instead + * of just loading the new resource. + */ + private synchronized void loadProps(final Properties props, + final int startIdx, final boolean fullReload) { + if (props != null) { + Map backup = + updatingResource != null + ? new ConcurrentHashMap<>(updatingResource) : null; + loadResources(props, resources, startIdx, fullReload, quietmode); if (overlay != null) { - properties.putAll(overlay); + props.putAll(overlay); if (backup != null) { for (Map.Entry item : overlay.entrySet()) { String key = (String) item.getKey(); @@ -2887,7 +2934,6 @@ protected synchronized Properties getProps() { } } } - return properties; } /** @@ -2919,11 +2965,13 @@ public Iterator> iterator() { // methods that allow non-strings to be put into configurations are removed, // we could replace properties with a Map and get rid of this // code. - Map result = new HashMap(); - for(Map.Entry item: getProps().entrySet()) { - if (item.getKey() instanceof String && - item.getValue() instanceof String) { + Properties props = getProps(); + Map result = new HashMap<>(); + synchronized (props) { + for (Map.Entry item : props.entrySet()) { + if (item.getKey() instanceof String && item.getValue() instanceof String) { result.put((String) item.getKey(), (String) item.getValue()); + } } } return result.entrySet().iterator(); @@ -2989,14 +3037,16 @@ private XMLStreamReader parse(InputStream is, String systemIdStr, private void loadResources(Properties properties, ArrayList resources, + int startIdx, + boolean fullReload, boolean quiet) { - if(loadDefaults) { + if(loadDefaults && fullReload) { for (String resource : defaultResources) { loadResource(properties, new Resource(resource, false), quiet); } } - for (int i = 0; i < resources.size(); i++) { + for (int i = startIdx; i < resources.size(); i++) { Resource ret = loadResource(properties, resources.get(i), quiet); if (ret != null) { resources.set(i, ret); @@ -3243,7 +3293,15 @@ private void handleInclude() throws XMLStreamException, IOException { File href = new File(confInclude); if (!href.isAbsolute()) { // Included resources are relative to the current resource - File baseFile = new File(name).getParentFile(); + File baseFile; + + try { + baseFile = new File(new URI(name)); + } catch (IllegalArgumentException | URISyntaxException e) { + baseFile = new File(name); + } + + baseFile = baseFile.getParentFile(); href = new File(baseFile, href.getPath()); } if (!href.exists()) { @@ -3366,7 +3424,7 @@ void parseNext() throws IOException, XMLStreamException { /** * Add tags defined in HADOOP_TAGS_SYSTEM, HADOOP_TAGS_CUSTOM. - * @param prop + * @param prop properties. */ public void addTags(Properties prop) { // Get all system tags @@ -3467,7 +3525,7 @@ private void loadProperty(Properties properties, String name, String attr, /** * Print a warning if a property with a given name already exists with a - * different value + * different value. */ private void checkForOverride(Properties properties, String name, String attr, String value) { String propertyValue = properties.getProperty(attr); @@ -3477,11 +3535,12 @@ private void checkForOverride(Properties properties, String name, String attr, S } } - /** + /** * Write out the non-default properties in this configuration to the given * {@link OutputStream} using UTF-8 encoding. - * + * * @param out the output stream to write to. + * @throws IOException raised on errors performing I/O. */ public void writeXml(OutputStream out) throws IOException { writeXml(new OutputStreamWriter(out, "UTF-8")); @@ -3511,7 +3570,9 @@ public void writeXml(Writer out) throws IOException { * the configuration, this method throws an {@link IllegalArgumentException}. * * + * @param propertyName xml property name. * @param out the writer to write to. + * @throws IOException raised on errors performing I/O. */ public void writeXml(@Nullable String propertyName, Writer out) throws IOException, IllegalArgumentException { @@ -3520,7 +3581,7 @@ public void writeXml(@Nullable String propertyName, Writer out) try { DOMSource source = new DOMSource(doc); StreamResult result = new StreamResult(out); - TransformerFactory transFactory = TransformerFactory.newInstance(); + TransformerFactory transFactory = XMLUtils.newSecureTransformerFactory(); Transformer transformer = transFactory.newTransformer(); // Important to not hold Configuration log while writing result, since @@ -3665,7 +3726,7 @@ private synchronized void appendXMLProperty(Document doc, Element conf, * @param config the configuration * @param propertyName property name * @param out the Writer to write to - * @throws IOException + * @throws IOException raised on errors performing I/O. * @throws IllegalArgumentException when property name is not * empty and the property is not found in configuration **/ @@ -3712,7 +3773,7 @@ public static void dumpConfiguration(Configuration config, * * @param config the configuration * @param out the Writer to write to - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static void dumpConfiguration(Configuration config, Writer out) throws IOException { @@ -3741,7 +3802,7 @@ public static void dumpConfiguration(Configuration config, * @param jsonGen json writer * @param config configuration * @param name property name - * @throws IOException + * @throws IOException raised on errors performing I/O. */ private static void appendJSONProperty(JsonGenerator jsonGen, Configuration config, String name, ConfigRedactor redactor) @@ -3823,7 +3884,10 @@ synchronized boolean getQuietMode() { return this.quietmode; } - /** For debugging. List non-default properties to the terminal and exit. */ + /** For debugging. List non-default properties to the terminal and exit. + * @param args the argument to be parsed. + * @throws Exception exception. + */ public static void main(String[] args) throws Exception { new Configuration().writeXml(System.out); } @@ -3857,14 +3921,15 @@ public void write(DataOutput out) throws IOException { } /** - * get keys matching the the regex - * @param regex + * get keys matching the the regex. + * @param regex the regex to match against. * @return {@literal Map} with matching keys */ public Map getValByRegex(String regex) { Pattern p = Pattern.compile(regex); Map result = new HashMap(); + List resultKeys = new ArrayList<>(); Matcher m; for(Map.Entry item: getProps().entrySet()) { @@ -3872,11 +3937,12 @@ public Map getValByRegex(String regex) { item.getValue() instanceof String) { m = p.matcher((String)item.getKey()); if(m.find()) { // match - result.put((String) item.getKey(), - substituteVars(getProps().getProperty((String) item.getKey()))); + resultKeys.add((String) item.getKey()); } } } + resultKeys.forEach(item -> + result.put(item, substituteVars(getProps().getProperty(item)))); return result; } @@ -3901,6 +3967,8 @@ public static void dumpDeprecatedKeys() { /** * Returns whether or not a deprecated name has been warned. If the name is not * deprecated then always return false + * @param name proprties. + * @return true if name is a warned deprecation. */ public static boolean hasWarnedDeprecation(String name) { DeprecationContext deprecations = deprecationContext.get(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configured.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configured.java index f06af2b98df14..77a7117d19665 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configured.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configured.java @@ -33,7 +33,9 @@ public Configured() { this(null); } - /** Construct a Configured. */ + /** Construct a Configured. + * @param conf the Configuration object. + */ public Configured(Configuration conf) { setConf(conf); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Reconfigurable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Reconfigurable.java index c93dc31a881a9..915faf4c237ad 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Reconfigurable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Reconfigurable.java @@ -33,6 +33,9 @@ public interface Reconfigurable extends Configurable { * (or null if it was not previously set). If newVal is null, set the property * to its default value; * + * @param property property name. + * @param newVal new value. + * @throws ReconfigurationException if there was an error applying newVal. * If the property cannot be changed, throw a * {@link ReconfigurationException}. */ @@ -45,11 +48,14 @@ void reconfigureProperty(String property, String newVal) * If isPropertyReconfigurable returns true for a property, * then changeConf should not throw an exception when changing * this property. + * @param property property name. + * @return true if property reconfigurable; false if not. */ boolean isPropertyReconfigurable(String property); /** * Return all the properties that can be changed at run time. + * @return reconfigurable propertys. */ Collection getReconfigurableProperties(); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurableBase.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurableBase.java index 8cacbdcdac039..1ba19ec0001b7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurableBase.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurableBase.java @@ -18,9 +18,9 @@ package org.apache.hadoop.conf; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.apache.hadoop.util.Time; import org.apache.hadoop.conf.ReconfigurationUtil.PropertyChange; import org.slf4j.Logger; @@ -79,6 +79,7 @@ public ReconfigurableBase() { /** * Construct a ReconfigurableBase with the {@link Configuration} * conf. + * @param conf configuration. */ public ReconfigurableBase(Configuration conf) { super((conf == null) ? new Configuration() : conf); @@ -91,6 +92,7 @@ public void setReconfigurationUtil(ReconfigurationUtil ru) { /** * Create a new configuration. + * @return configuration. */ protected abstract Configuration getNewConf(); @@ -162,6 +164,7 @@ public void run() { /** * Start a reconfiguration task to reload configuration in background. + * @throws IOException raised on errors performing I/O. */ public void startReconfigurationTask() throws IOException { synchronized (reconfigLock) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurationException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurationException.java index 0935bf025fd30..b22af76c9eb6b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurationException.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurationException.java @@ -59,6 +59,10 @@ public ReconfigurationException() { /** * Create a new instance of {@link ReconfigurationException}. + * @param property property name. + * @param newVal new value. + * @param oldVal old value. + * @param cause original exception. */ public ReconfigurationException(String property, String newVal, String oldVal, @@ -71,6 +75,9 @@ public ReconfigurationException(String property, /** * Create a new instance of {@link ReconfigurationException}. + * @param property property name. + * @param newVal new value. + * @param oldVal old value. */ public ReconfigurationException(String property, String newVal, String oldVal) { @@ -82,6 +89,7 @@ public ReconfigurationException(String property, /** * Get property that cannot be changed. + * @return property info. */ public String getProperty() { return property; @@ -89,6 +97,7 @@ public String getProperty() { /** * Get value to which property was supposed to be changed. + * @return new value. */ public String getNewValue() { return newVal; @@ -96,6 +105,7 @@ public String getNewValue() { /** * Get old value of property that cannot be changed. + * @return old value. */ public String getOldValue() { return oldVal; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurationTaskStatus.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurationTaskStatus.java index 05ec90758e5fa..ca9ddb61566ef 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurationTaskStatus.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurationTaskStatus.java @@ -42,7 +42,8 @@ public ReconfigurationTaskStatus(long startTime, long endTime, /** * Return true if * - A reconfiguration task has finished or - * - an active reconfiguration task is running + * - an active reconfiguration task is running. + * @return true if startTime > 0; false if not. */ public boolean hasTask() { return startTime > 0; @@ -51,6 +52,7 @@ public boolean hasTask() { /** * Return true if the latest reconfiguration task has finished and there is * no another active task running. + * @return true if endTime > 0; false if not. */ public boolean stopped() { return endTime > 0; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/AesCtrCryptoCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/AesCtrCryptoCodec.java index 3e52560259638..a1ddca6e20967 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/AesCtrCryptoCodec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/AesCtrCryptoCodec.java @@ -20,7 +20,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import java.io.IOException; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoCodec.java index bcf4a65ec24d4..f73a1857f1297 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoCodec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoCodec.java @@ -31,8 +31,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Splitter; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.base.Splitter; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_CRYPTO_CODEC_CLASSES_KEY_PREFIX; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_CRYPTO_CIPHER_SUITE_KEY; @@ -141,14 +141,18 @@ private static List> getCodecClasses( public abstract CipherSuite getCipherSuite(); /** - * Create a {@link org.apache.hadoop.crypto.Encryptor}. - * @return Encryptor the encryptor + * Create a {@link org.apache.hadoop.crypto.Encryptor}. + * + * @return Encryptor the encryptor. + * @throws GeneralSecurityException thrown if create encryptor error. */ public abstract Encryptor createEncryptor() throws GeneralSecurityException; - + /** * Create a {@link org.apache.hadoop.crypto.Decryptor}. + * * @return Decryptor the decryptor + * @throws GeneralSecurityException thrown if create decryptor error. */ public abstract Decryptor createDecryptor() throws GeneralSecurityException; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoInputStream.java index 9e601e26cf944..21e06f26c31d2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoInputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoInputStream.java @@ -30,7 +30,7 @@ import java.util.Queue; import java.util.concurrent.ConcurrentLinkedQueue; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.ByteBufferPositionedReadable; @@ -46,9 +46,13 @@ import org.apache.hadoop.fs.Seekable; import org.apache.hadoop.fs.StreamCapabilities; import org.apache.hadoop.fs.StreamCapabilitiesPolicy; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; import org.apache.hadoop.io.ByteBufferPool; import org.apache.hadoop.util.StringUtils; +import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.retrieveIOStatistics; + /** * CryptoInputStream decrypts data. It is not thread-safe. AES CTR mode is * required in order to ensure that the plain text and cipher text have a 1:1 @@ -66,7 +70,7 @@ public class CryptoInputStream extends FilterInputStream implements Seekable, PositionedReadable, ByteBufferReadable, HasFileDescriptor, CanSetDropBehind, CanSetReadahead, HasEnhancedByteBufferAccess, ReadableByteChannel, CanUnbuffer, StreamCapabilities, - ByteBufferPositionedReadable { + ByteBufferPositionedReadable, IOStatisticsSource { private final byte[] oneByteBuf = new byte[1]; private final CryptoCodec codec; private final Decryptor decryptor; @@ -153,7 +157,7 @@ public InputStream getWrappedStream() { * @param off the buffer offset. * @param len the maximum number of decrypted data bytes to read. * @return int the total number of decrypted data bytes read into the buffer. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public int read(byte[] b, int off, int len) throws IOException { @@ -313,7 +317,7 @@ private void resetStreamOffset(long offset) throws IOException { } @Override - public void close() throws IOException { + public synchronized void close() throws IOException { if (closed) { return; } @@ -867,8 +871,16 @@ public boolean hasCapability(String capability) { + " does not expose its stream capabilities."); } return ((StreamCapabilities) in).hasCapability(capability); + case StreamCapabilities.IOSTATISTICS: + return (in instanceof StreamCapabilities) + && ((StreamCapabilities) in).hasCapability(capability); default: return false; } } + + @Override + public IOStatistics getIOStatistics() { + return retrieveIOStatistics(in); + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoOutputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoOutputStream.java index 8d11043937612..df36bd6fe698e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoOutputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoOutputStream.java @@ -28,8 +28,13 @@ import org.apache.hadoop.fs.CanSetDropBehind; import org.apache.hadoop.fs.StreamCapabilities; import org.apache.hadoop.fs.Syncable; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; +import org.apache.hadoop.fs.impl.StoreImplementationUtils; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; + +import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.retrieveIOStatistics; /** * CryptoOutputStream encrypts data. It is not thread-safe. AES CTR mode is @@ -48,7 +53,7 @@ @InterfaceAudience.Private @InterfaceStability.Evolving public class CryptoOutputStream extends FilterOutputStream implements - Syncable, CanSetDropBehind, StreamCapabilities { + Syncable, CanSetDropBehind, StreamCapabilities, IOStatisticsSource { private final byte[] oneByteBuf = new byte[1]; private final CryptoCodec codec; private final Encryptor encryptor; @@ -142,7 +147,7 @@ public OutputStream getWrappedStream() { * @param b the data. * @param off the start offset in the data. * @param len the number of bytes to write. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public synchronized void write(byte[] b, int off, int len) throws IOException { @@ -237,12 +242,15 @@ public synchronized void close() throws IOException { return; } try { - flush(); - if (closeOutputStream) { - super.close(); - codec.close(); + try { + flush(); + } finally { + if (closeOutputStream) { + super.close(); + codec.close(); + } + freeBuffers(); } - freeBuffers(); } finally { closed = true; } @@ -308,9 +316,11 @@ private void freeBuffers() { @Override public boolean hasCapability(String capability) { - if (out instanceof StreamCapabilities) { - return ((StreamCapabilities) out).hasCapability(capability); - } - return false; + return StoreImplementationUtils.hasCapability(out, capability); + } + + @Override + public IOStatistics getIOStatistics() { + return retrieveIOStatistics(out); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoStreamUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoStreamUtils.java index b55f84226d3cd..5bf66c7c4a601 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoStreamUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoStreamUtils.java @@ -29,7 +29,7 @@ import org.apache.hadoop.fs.Seekable; import org.apache.hadoop.util.CleanerUtil; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -39,7 +39,10 @@ public class CryptoStreamUtils { private static final Logger LOG = LoggerFactory.getLogger(CryptoStreamUtils.class); - /** Forcibly free the direct buffer. */ + /** + * Forcibly free the direct buffer. + * @param buffer input buffer. + */ public static void freeDB(ByteBuffer buffer) { if (CleanerUtil.UNMAP_SUPPORTED) { try { @@ -52,20 +55,31 @@ public static void freeDB(ByteBuffer buffer) { } } - /** Read crypto buffer size */ + /** + * @return Read crypto buffer size. + * @param conf input Configuration. + */ public static int getBufferSize(Configuration conf) { return conf.getInt(HADOOP_SECURITY_CRYPTO_BUFFER_SIZE_KEY, HADOOP_SECURITY_CRYPTO_BUFFER_SIZE_DEFAULT); } - /** AES/CTR/NoPadding is required */ + /** + * AES/CTR/NoPadding is required. + * @param codec input code. + */ public static void checkCodec(CryptoCodec codec) { if (codec.getCipherSuite() != CipherSuite.AES_CTR_NOPADDING) { throw new UnsupportedCodecException("AES/CTR/NoPadding is required"); } } - /** Check and floor buffer size */ + /** + * @return Check and floor buffer size. + * + * @param codec input code. + * @param bufferSize input bufferSize. + */ public static int checkBufferSize(CryptoCodec codec, int bufferSize) { Preconditions.checkArgument(bufferSize >= MIN_BUFFER_SIZE, "Minimum value of buffer size is " + MIN_BUFFER_SIZE + "."); @@ -74,8 +88,10 @@ public static int checkBufferSize(CryptoCodec codec, int bufferSize) { } /** - * If input stream is {@link org.apache.hadoop.fs.Seekable}, return it's - * current position, otherwise return 0; + * @return If input stream is {@link org.apache.hadoop.fs.Seekable}, return it's + * current position, otherwise return 0. + * @param in input in. + * @throws IOException raised on errors performing I/O. */ public static long getInputStreamOffset(InputStream in) throws IOException { if (in instanceof Seekable) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/JceAesCtrCryptoCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/JceAesCtrCryptoCodec.java index de0e5dd6268f2..1c670f76f4859 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/JceAesCtrCryptoCodec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/JceAesCtrCryptoCodec.java @@ -29,7 +29,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/OpensslAesCtrCryptoCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/OpensslAesCtrCryptoCodec.java index a127925a7a538..0963cb6005ed8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/OpensslAesCtrCryptoCodec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/OpensslAesCtrCryptoCodec.java @@ -29,7 +29,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.crypto.random.OpensslSecureRandom; import org.apache.hadoop.util.ReflectionUtils; import org.slf4j.Logger; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/OpensslCipher.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/OpensslCipher.java index 0a2ba52e555e5..fcb3730ca5b30 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/OpensslCipher.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/OpensslCipher.java @@ -29,7 +29,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.util.NativeCodeLoader; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.util.PerformanceAdvisory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -214,34 +214,33 @@ public int update(ByteBuffer input, ByteBuffer output) output.position(output.position() + len); return len; } - + /** * Finishes a multiple-part operation. The data is encrypted or decrypted, * depending on how this cipher was initialized. *

    - * * The result is stored in the output buffer. Upon return, the output buffer's * position will have advanced by n, where n is the value returned by this * method; the output buffer's limit will not have changed. - *

    - * + *

    * If output.remaining() bytes are insufficient to hold the result, * a ShortBufferException is thrown. *

    - * * Upon finishing, this method resets this cipher object to the state it was * in when previously initialized. That is, the object is available to encrypt * or decrypt more data. - *

    - * - * If any exception is thrown, this cipher object need to be reset before it + *

    + * If any exception is thrown, this cipher object need to be reset before it * can be used again. - * + * * @param output the output ByteBuffer * @return int number of bytes stored in output - * @throws ShortBufferException - * @throws IllegalBlockSizeException - * @throws BadPaddingException + * @throws ShortBufferException if there is insufficient space in the output buffer. + * @throws IllegalBlockSizeException This exception is thrown when the length + * of data provided to a block cipher is incorrect. + * @throws BadPaddingException This exception is thrown when a particular + * padding mechanism is expected for the input + * data but the data is not padded properly. */ public int doFinal(ByteBuffer output) throws ShortBufferException, IllegalBlockSizeException, BadPaddingException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/CachingKeyProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/CachingKeyProvider.java index 7a66e1e4bab48..4f456e54a4d61 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/CachingKeyProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/CachingKeyProvider.java @@ -22,9 +22,9 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheBuilder; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheLoader; +import org.apache.hadoop.thirdparty.com.google.common.cache.LoadingCache; /** * A KeyProviderExtension implementation providing a short lived diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/JavaKeyStoreProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/JavaKeyStoreProvider.java index 7951af56bc8f9..3c3099e113567 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/JavaKeyStoreProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/JavaKeyStoreProvider.java @@ -18,7 +18,7 @@ package org.apache.hadoop.crypto.key; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; @@ -32,7 +32,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import javax.crypto.spec.SecretKeySpec; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java index a8c283ab649cc..13aa383c1901b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java @@ -245,7 +245,7 @@ protected int addVersion() { /** * Serialize the metadata to a set of bytes. * @return the serialized bytes - * @throws IOException + * @throws IOException raised on errors performing I/O. */ protected byte[] serialize() throws IOException { ByteArrayOutputStream buffer = new ByteArrayOutputStream(); @@ -284,7 +284,7 @@ protected byte[] serialize() throws IOException { /** * Deserialize a new metadata object from a set of bytes. * @param bytes the serialized metadata - * @throws IOException + * @throws IOException raised on errors performing I/O. */ protected Metadata(byte[] bytes) throws IOException { String cipher = null; @@ -449,7 +449,7 @@ public boolean isTransient() { * when decrypting data. * @param versionName the name of a specific version of the key * @return the key material - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public abstract KeyVersion getKeyVersion(String versionName ) throws IOException; @@ -457,14 +457,15 @@ public abstract KeyVersion getKeyVersion(String versionName /** * Get the key names for all keys. * @return the list of key names - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public abstract List getKeys() throws IOException; /** * Get key metadata in bulk. * @param names the names of the keys to get - * @throws IOException + * @throws IOException raised on errors performing I/O. + * @return Metadata Array. */ public Metadata[] getKeysMetadata(String... names) throws IOException { Metadata[] result = new Metadata[names.length]; @@ -476,8 +477,10 @@ public Metadata[] getKeysMetadata(String... names) throws IOException { /** * Get the key material for all versions of a specific key name. + * + * @param name the base name of the key. * @return the list of key material - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public abstract List getKeyVersions(String name) throws IOException; @@ -487,7 +490,7 @@ public Metadata[] getKeysMetadata(String... names) throws IOException { * @param name the base name of the key * @return the version name of the current version of the key or null if the * key version doesn't exist - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public KeyVersion getCurrentKey(String name) throws IOException { Metadata meta = getMetadata(name); @@ -501,7 +504,7 @@ public KeyVersion getCurrentKey(String name) throws IOException { * Get metadata about the key. * @param name the basename of the key * @return the key's metadata or null if the key doesn't exist - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public abstract Metadata getMetadata(String name) throws IOException; @@ -511,7 +514,7 @@ public KeyVersion getCurrentKey(String name) throws IOException { * @param material the key material for the first version of the key. * @param options the options for the new key. * @return the version name of the first version of the key. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public abstract KeyVersion createKey(String name, byte[] material, Options options) throws IOException; @@ -536,7 +539,7 @@ private String getAlgorithm(String cipher) { * @param size length of the key. * @param algorithm algorithm to use for generating the key. * @return the generated key. - * @throws NoSuchAlgorithmException + * @throws NoSuchAlgorithmException no such algorithm exception. */ protected byte[] generateKey(int size, String algorithm) throws NoSuchAlgorithmException { @@ -557,8 +560,8 @@ protected byte[] generateKey(int size, String algorithm) * @param name the base name of the key * @param options the options for the new key. * @return the version name of the first version of the key. - * @throws IOException - * @throws NoSuchAlgorithmException + * @throws IOException raised on errors performing I/O. + * @throws NoSuchAlgorithmException no such algorithm exception. */ public KeyVersion createKey(String name, Options options) throws NoSuchAlgorithmException, IOException { @@ -569,7 +572,7 @@ public KeyVersion createKey(String name, Options options) /** * Delete the given key. * @param name the name of the key to delete - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public abstract void deleteKey(String name) throws IOException; @@ -578,7 +581,7 @@ public KeyVersion createKey(String name, Options options) * @param name the basename of the key * @param material the new key material * @return the name of the new version of the key - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public abstract KeyVersion rollNewVersion(String name, byte[] material @@ -600,7 +603,10 @@ public void close() throws IOException { * * @param name the basename of the key * @return the name of the new version of the key - * @throws IOException + * @throws IOException raised on errors performing I/O. + * @throws NoSuchAlgorithmException This exception is thrown when a particular + * cryptographic algorithm is requested + * but is not available in the environment. */ public KeyVersion rollNewVersion(String name) throws NoSuchAlgorithmException, IOException { @@ -619,7 +625,7 @@ public KeyVersion rollNewVersion(String name) throws NoSuchAlgorithmException, * version of the given key. * * @param name the basename of the key - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void invalidateCache(String name) throws IOException { // NOP @@ -627,7 +633,7 @@ public void invalidateCache(String name) throws IOException { /** * Ensures that any changes to the keys are written to persistent store. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public abstract void flush() throws IOException; @@ -636,7 +642,7 @@ public void invalidateCache(String name) throws IOException { * "/aaa/bbb". * @param versionName the version name to split * @return the base name of the key - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static String getBaseName(String versionName) throws IOException { int div = versionName.lastIndexOf('@'); @@ -659,9 +665,11 @@ protected static String buildVersionName(String name, int version) { /** * Find the provider with the given key. + * * @param providerList the list of providers - * @param keyName the key name we are looking for + * @param keyName the key name we are looking for. * @return the KeyProvider that has the key + * @throws IOException raised on errors performing I/O. */ public static KeyProvider findProvider(List providerList, String keyName) throws IOException { @@ -679,7 +687,7 @@ public static KeyProvider findProvider(List providerList, * means. If true, the password should be provided by the caller using * setPassword(). * @return Whether or not the provider requires a password - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public boolean needsPassword() throws IOException { return false; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderCryptoExtension.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderCryptoExtension.java index 00d7a7dfce0f7..27e5f87432001 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderCryptoExtension.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderCryptoExtension.java @@ -29,7 +29,7 @@ import javax.crypto.spec.IvParameterSpec; import javax.crypto.spec.SecretKeySpec; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.crypto.CryptoCodec; @@ -179,6 +179,7 @@ public interface CryptoExtension extends KeyProviderExtension.Extension { * Calls to this method allows the underlying KeyProvider to warm-up any * implementation specific caches used to store the Encrypted Keys. * @param keyNames Array of Key Names + * @throws IOException thrown if the key material could not be encrypted. */ public void warmUpEncryptedKeys(String... keyNames) throws IOException; @@ -475,8 +476,9 @@ public void drain(String keyName) { /** * This constructor is to be used by sub classes that provide * delegating/proxying functionality to the {@link KeyProviderCryptoExtension} - * @param keyProvider - * @param extension + * + * @param keyProvider key provider. + * @param extension crypto extension. */ protected KeyProviderCryptoExtension(KeyProvider keyProvider, CryptoExtension extension) { @@ -487,6 +489,7 @@ protected KeyProviderCryptoExtension(KeyProvider keyProvider, * Notifies the Underlying CryptoExtension implementation to warm up any * implementation specific caches for the specified KeyVersions * @param keyNames Arrays of key Names + * @throws IOException raised on errors performing I/O. */ public void warmUpEncryptedKeys(String... keyNames) throws IOException { @@ -558,7 +561,7 @@ public EncryptedKeyVersion reencryptEncryptedKey(EncryptedKeyVersion ekv) * Calls {@link CryptoExtension#drain(String)} for the given key name on the * underlying {@link CryptoExtension}. * - * @param keyName + * @param keyName key name. */ public void drain(String keyName) { getExtension().drain(keyName); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderDelegationTokenExtension.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderDelegationTokenExtension.java index 05d99ed0810fc..16139244bfc42 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderDelegationTokenExtension.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderDelegationTokenExtension.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.crypto.key; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.security.Credentials; @@ -48,14 +48,14 @@ public interface DelegationTokenExtension * Renews the given token. * @param token The token to be renewed. * @return The token's lifetime after renewal, or 0 if it can't be renewed. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ long renewDelegationToken(final Token token) throws IOException; /** * Cancels the given token. * @param token The token to be cancelled. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ Void cancelDelegationToken(final Token token) throws IOException; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java index f2f6f1801c2dd..2c6d1acb16237 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java @@ -25,7 +25,7 @@ import java.util.List; import java.util.Map; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.commons.lang3.StringUtils; @@ -75,7 +75,7 @@ public class KeyShell extends CommandShell { * * @param args Command line arguments. * @return 0 on success, 1 on failure. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override protected int init(String[] args) throws IOException { @@ -547,7 +547,7 @@ private String prettifyException(Exception e) { * success and 1 for failure. * * @param args Command line arguments. - * @throws Exception + * @throws Exception raised on errors performing I/O. */ public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new KeyShell(), args); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java index 71ed4557b357b..bc56f0e28676b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java @@ -79,9 +79,9 @@ import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.CryptoExtension; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.base.Strings; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Strings; import static org.apache.hadoop.util.KMSUtil.checkNotEmpty; import static org.apache.hadoop.util.KMSUtil.checkNotNull; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/LoadBalancingKMSClientProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/LoadBalancingKMSClientProvider.java index ee2295cff77f3..4d19ea32e7fc6 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/LoadBalancingKMSClientProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/LoadBalancingKMSClientProvider.java @@ -50,8 +50,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * A simple LoadBalancing KMSClientProvider that round-robins requests diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/ValueQueue.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/ValueQueue.java index 7d26acbf21a03..7162d77d3b8d1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/ValueQueue.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/ValueQueue.java @@ -33,11 +33,11 @@ import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; -import com.google.common.base.Preconditions; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheBuilder; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheLoader; +import org.apache.hadoop.thirdparty.com.google.common.cache.LoadingCache; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.hadoop.classification.InterfaceAudience; /** @@ -63,7 +63,7 @@ public interface QueueRefiller { * @param keyName Key name * @param keyQueue Queue that needs to be filled * @param numValues number of Values to be added to the queue. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void fillQueueForKey(String keyName, Queue keyQueue, int numValues) throws IOException; @@ -268,7 +268,7 @@ public ValueQueue(final int numValues, final float lowWaterMark, long expiry, * Initializes the Value Queues for the provided keys by calling the * fill Method with "numInitValues" values * @param keyNames Array of key Names - * @throws ExecutionException + * @throws ExecutionException executionException. */ public void initializeQueuesForKeys(String... keyNames) throws ExecutionException { @@ -285,8 +285,8 @@ public void initializeQueuesForKeys(String... keyNames) * function to add 1 value to Queue and then drain it. * @param keyName String key name * @return E the next value in the Queue - * @throws IOException - * @throws ExecutionException + * @throws IOException raised on errors performing I/O. + * @throws ExecutionException executionException. */ public E getNext(String keyName) throws IOException, ExecutionException { @@ -345,8 +345,8 @@ public int getSize(String keyName) { * @param keyName String key name * @param num Minimum number of values to return. * @return {@literal List} values returned - * @throws IOException - * @throws ExecutionException + * @throws IOException raised on errors performing I/O. + * @throws ExecutionException execution exception. */ public List getAtMost(String keyName, int num) throws IOException, ExecutionException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/random/OpensslSecureRandom.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/random/OpensslSecureRandom.java index 1863f5ec2035f..a7a609ce440b6 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/random/OpensslSecureRandom.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/random/OpensslSecureRandom.java @@ -22,7 +22,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.util.NativeCodeLoader; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.util.PerformanceAdvisory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Abortable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Abortable.java new file mode 100644 index 0000000000000..d2fd174795831 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Abortable.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Abort data being written to a stream, so that close() does + * not write the data. It is implemented by output streams in + * some object stores, and passed through {@link FSDataOutputStream}. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public interface Abortable { + + /** + * Abort the active operation without the output becoming visible. + * + * This is to provide ability to cancel the write on stream; once + * a stream is aborted, the write MUST NOT become visible. + * + * @throws UnsupportedOperationException if the operation is not supported. + * @return the result. + */ + AbortableResult abort(); + + /** + * Interface for the result of aborts; allows subclasses to extend + * (IOStatistics etc) or for future enhancements if ever needed. + */ + interface AbortableResult { + + /** + * Was the stream already closed/aborted? + * @return true if a close/abort operation had already + * taken place. + */ + boolean alreadyClosed(); + + /** + * Any exception caught during cleanup operations, + * exceptions whose raising/catching does not change + * the semantics of the abort. + * @return an exception or null. + */ + IOException anyCleanupException(); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java index 1df68b647c99a..e2972f90a0517 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java @@ -56,7 +56,7 @@ import org.apache.hadoop.util.LambdaUtils; import org.apache.hadoop.util.Progressable; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -273,7 +273,7 @@ public static AbstractFileSystem get(final URI uri, final Configuration conf) * @param supportedScheme the scheme supported by the implementor * @param authorityNeeded if true then theURI must have authority, if false * then the URI must have null authority. - * + * @param defaultPort default port to use if port is not specified in the URI. * @throws URISyntaxException uri has syntax error */ public AbstractFileSystem(final URI uri, final String supportedScheme, @@ -282,11 +282,12 @@ public AbstractFileSystem(final URI uri, final String supportedScheme, myUri = getUri(uri, supportedScheme, authorityNeeded, defaultPort); statistics = getStatistics(uri); } - + /** - * Check that the Uri's scheme matches - * @param uri - * @param supportedScheme + * Check that the Uri's scheme matches. + * + * @param uri name URI of the FS. + * @param supportedScheme supported scheme. */ public void checkScheme(URI uri, String supportedScheme) { String scheme = uri.getScheme(); @@ -363,7 +364,7 @@ public URI getUri() { * If the path is fully qualified URI, then its scheme and authority * matches that of this file system. Otherwise the path must be * slash-relative name. - * + * @param path the path. * @throws InvalidPathException if the path is invalid */ public void checkPath(Path path) { @@ -432,7 +433,7 @@ public String getUriPath(final Path p) { /** * Make the path fully qualified to this file system - * @param path + * @param path the path. * @return the qualified path */ public Path makeQualified(Path path) { @@ -497,9 +498,9 @@ public FsServerDefaults getServerDefaults(final Path f) throws IOException { * through any internal symlinks or mount point * @param p path to be resolved * @return fully qualified path - * @throws FileNotFoundException - * @throws AccessControlException - * @throws IOException + * @throws FileNotFoundException when file not find throw. + * @throws AccessControlException when accees control error throw. + * @throws IOException raised on errors performing I/O. * @throws UnresolvedLinkException if symbolic link on path cannot be * resolved internally */ @@ -514,6 +515,18 @@ public Path resolvePath(final Path p) throws FileNotFoundException, * {@link FileContext#create(Path, EnumSet, Options.CreateOpts...)} except * that the Path f must be fully qualified and the permission is absolute * (i.e. umask has been applied). + * + * @param f the path. + * @param createFlag create_flag. + * @param opts create ops. + * @throws AccessControlException access controll exception. + * @throws FileAlreadyExistsException file already exception. + * @throws FileNotFoundException file not found exception. + * @throws ParentNotDirectoryException parent not dir exception. + * @throws UnsupportedFileSystemException unsupported file system exception. + * @throws UnresolvedLinkException unresolved link exception. + * @throws IOException raised on errors performing I/O. + * @return output stream. */ public final FSDataOutputStream create(final Path f, final EnumSet createFlag, Options.CreateOpts... opts) @@ -631,6 +644,24 @@ public final FSDataOutputStream create(final Path f, * The specification of this method matches that of * {@link #create(Path, EnumSet, Options.CreateOpts...)} except that the opts * have been declared explicitly. + * + * @param f the path. + * @param flag create flag. + * @param absolutePermission absolute permission. + * @param bufferSize buffer size. + * @param replication replications. + * @param blockSize block size. + * @param progress progress. + * @param checksumOpt check sum opt. + * @param createParent create parent. + * @throws AccessControlException access control exception. + * @throws FileAlreadyExistsException file already exists exception. + * @throws FileNotFoundException file not found exception. + * @throws ParentNotDirectoryException parent not directory exception. + * @throws UnsupportedFileSystemException unsupported filesystem exception. + * @throws UnresolvedLinkException unresolved link exception. + * @throws IOException raised on errors performing I/O. + * @return output stream. */ public abstract FSDataOutputStream createInternal(Path f, EnumSet flag, FsPermission absolutePermission, @@ -645,6 +676,14 @@ public abstract FSDataOutputStream createInternal(Path f, * {@link FileContext#mkdir(Path, FsPermission, boolean)} except that the Path * f must be fully qualified and the permission is absolute (i.e. * umask has been applied). + * @param dir directory. + * @param permission permission. + * @param createParent create parent flag. + * @throws AccessControlException access control exception. + * @throws FileAlreadyExistsException file already exists exception. + * @throws FileNotFoundException file not found exception. + * @throws UnresolvedLinkException unresolved link exception. + * @throws IOException raised on errors performing I/O. */ public abstract void mkdir(final Path dir, final FsPermission permission, final boolean createParent) throws AccessControlException, @@ -655,6 +694,14 @@ public abstract void mkdir(final Path dir, final FsPermission permission, * The specification of this method matches that of * {@link FileContext#delete(Path, boolean)} except that Path f must be for * this file system. + * + * @param f the path. + * @param recursive recursive flag. + * @throws AccessControlException access control exception. + * @throws FileNotFoundException file not found exception. + * @throws UnresolvedLinkException unresolved link exception. + * @throws IOException raised on errors performing I/O. + * @return if successfully deleted success true, not false. */ public abstract boolean delete(final Path f, final boolean recursive) throws AccessControlException, FileNotFoundException, @@ -664,6 +711,13 @@ public abstract boolean delete(final Path f, final boolean recursive) * The specification of this method matches that of * {@link FileContext#open(Path)} except that Path f must be for this * file system. + * + * @param f the path. + * @throws AccessControlException access control exception. + * @throws FileNotFoundException file not found exception. + * @throws UnresolvedLinkException unresolved link exception. + * @throws IOException raised on errors performing I/O. + * @return input stream. */ public FSDataInputStream open(final Path f) throws AccessControlException, FileNotFoundException, UnresolvedLinkException, IOException { @@ -674,6 +728,14 @@ public FSDataInputStream open(final Path f) throws AccessControlException, * The specification of this method matches that of * {@link FileContext#open(Path, int)} except that Path f must be for this * file system. + * + * @param f the path. + * @param bufferSize buffer size. + * @throws AccessControlException access control exception. + * @throws FileNotFoundException file not found exception. + * @throws UnresolvedLinkException unresolved link exception. + * @throws IOException raised on errors performing I/O. + * @return if successfully open success true, not false. */ public abstract FSDataInputStream open(final Path f, int bufferSize) throws AccessControlException, FileNotFoundException, @@ -683,6 +745,14 @@ public abstract FSDataInputStream open(final Path f, int bufferSize) * The specification of this method matches that of * {@link FileContext#truncate(Path, long)} except that Path f must be for * this file system. + * + * @param f the path. + * @param newLength new length. + * @throws AccessControlException access control exception. + * @throws FileNotFoundException file not found exception. + * @throws UnresolvedLinkException unresolved link exception. + * @throws IOException raised on errors performing I/O. + * @return if successfully truncate success true, not false. */ public boolean truncate(Path f, long newLength) throws AccessControlException, FileNotFoundException, @@ -695,6 +765,14 @@ public boolean truncate(Path f, long newLength) * The specification of this method matches that of * {@link FileContext#setReplication(Path, short)} except that Path f must be * for this file system. + * + * @param f the path. + * @param replication replication. + * @return if successfully set replication success true, not false. + * @throws AccessControlException access control exception. + * @throws FileNotFoundException file not found exception. + * @throws UnresolvedLinkException unresolved link exception. + * @throws IOException raised on errors performing I/O. */ public abstract boolean setReplication(final Path f, final short replication) throws AccessControlException, @@ -704,6 +782,16 @@ public abstract boolean setReplication(final Path f, * The specification of this method matches that of * {@link FileContext#rename(Path, Path, Options.Rename...)} except that Path * f must be for this file system. + * + * @param src src. + * @param dst dst. + * @param options options. + * @throws AccessControlException access control exception. + * @throws FileAlreadyExistsException file already exists exception. + * @throws FileNotFoundException file not found exception. + * @throws ParentNotDirectoryException parent not directory exception. + * @throws UnresolvedLinkException unresolved link exception. + * @throws IOException raised on errors performing I/O. */ public final void rename(final Path src, final Path dst, final Options.Rename... options) throws AccessControlException, @@ -728,6 +816,15 @@ public final void rename(final Path src, final Path dst, * File systems that do not have a built in overwrite need implement only this * method and can take advantage of the default impl of the other * {@link #renameInternal(Path, Path, boolean)} + * + * @param src src. + * @param dst dst. + * @throws AccessControlException access control exception. + * @throws FileAlreadyExistsException file already exists exception. + * @throws FileNotFoundException file not found exception. + * @throws ParentNotDirectoryException parent not directory exception. + * @throws UnresolvedLinkException unresolved link exception. + * @throws IOException raised on errors performing I/O. */ public abstract void renameInternal(final Path src, final Path dst) throws AccessControlException, FileAlreadyExistsException, @@ -738,6 +835,16 @@ public abstract void renameInternal(final Path src, final Path dst) * The specification of this method matches that of * {@link FileContext#rename(Path, Path, Options.Rename...)} except that Path * f must be for this file system. + * + * @param src src. + * @param dst dst. + * @param overwrite overwrite flag. + * @throws AccessControlException access control exception. + * @throws FileAlreadyExistsException file already exists exception. + * @throws FileNotFoundException file not found exception. + * @throws ParentNotDirectoryException parent not directory exception. + * @throws UnresolvedLinkException unresolved link exception. + * @throws IOException raised on errors performing I/O. */ public void renameInternal(final Path src, final Path dst, boolean overwrite) throws AccessControlException, @@ -801,6 +908,12 @@ public boolean supportsSymlinks() { /** * The specification of this method matches that of * {@link FileContext#createSymlink(Path, Path, boolean)}; + * + * @param target target. + * @param link link. + * @param createParent create parent. + * @throws IOException raised on errors performing I/O. + * @throws UnresolvedLinkException unresolved link exception. */ public void createSymlink(final Path target, final Path link, final boolean createParent) throws IOException, UnresolvedLinkException { @@ -811,6 +924,8 @@ public void createSymlink(final Path target, final Path link, * Partially resolves the path. This is used during symlink resolution in * {@link FSLinkResolver}, and differs from the similarly named method * {@link FileContext#getLinkTarget(Path)}. + * @param f the path. + * @return target path. * @throws IOException subclass implementations may throw IOException */ public Path getLinkTarget(final Path f) throws IOException { @@ -823,6 +938,13 @@ public Path getLinkTarget(final Path f) throws IOException { * The specification of this method matches that of * {@link FileContext#setPermission(Path, FsPermission)} except that Path f * must be for this file system. + * + * @param f the path. + * @param permission permission. + * @throws AccessControlException access control exception. + * @throws FileNotFoundException file not found exception. + * @throws UnresolvedLinkException unresolved link exception. + * @throws IOException raised on errors performing I/O. */ public abstract void setPermission(final Path f, final FsPermission permission) throws AccessControlException, @@ -832,6 +954,14 @@ public abstract void setPermission(final Path f, * The specification of this method matches that of * {@link FileContext#setOwner(Path, String, String)} except that Path f must * be for this file system. + * + * @param f the path. + * @param username username. + * @param groupname groupname. + * @throws AccessControlException access control exception. + * @throws FileNotFoundException file not found exception. + * @throws UnresolvedLinkException unresolved link exception. + * @throws IOException raised on errors performing I/O. */ public abstract void setOwner(final Path f, final String username, final String groupname) throws AccessControlException, @@ -841,6 +971,14 @@ public abstract void setOwner(final Path f, final String username, * The specification of this method matches that of * {@link FileContext#setTimes(Path, long, long)} except that Path f must be * for this file system. + * + * @param f the path. + * @param mtime modify time. + * @param atime access time. + * @throws AccessControlException access control exception. + * @throws FileNotFoundException file not found exception. + * @throws UnresolvedLinkException unresolved link exception. + * @throws IOException raised on errors performing I/O. */ public abstract void setTimes(final Path f, final long mtime, final long atime) throws AccessControlException, FileNotFoundException, @@ -850,6 +988,13 @@ public abstract void setTimes(final Path f, final long mtime, * The specification of this method matches that of * {@link FileContext#getFileChecksum(Path)} except that Path f must be for * this file system. + * + * @param f the path. + * @throws AccessControlException access control exception. + * @throws FileNotFoundException file not found exception. + * @throws UnresolvedLinkException unresolved link exception. + * @throws IOException raised on errors performing I/O. + * @return File Check sum. */ public abstract FileChecksum getFileChecksum(final Path f) throws AccessControlException, FileNotFoundException, @@ -860,16 +1005,44 @@ public abstract FileChecksum getFileChecksum(final Path f) * {@link FileContext#getFileStatus(Path)} * except that an UnresolvedLinkException may be thrown if a symlink is * encountered in the path. + * + * @param f the path. + * @throws AccessControlException access control exception. + * @throws FileNotFoundException file not found exception. + * @throws UnresolvedLinkException unresolved link exception. + * @throws IOException raised on errors performing I/O. + * @return File Status */ public abstract FileStatus getFileStatus(final Path f) throws AccessControlException, FileNotFoundException, UnresolvedLinkException, IOException; + /** + * Synchronize client metadata state. + *

    + * In some FileSystem implementations such as HDFS metadata + * synchronization is essential to guarantee consistency of read requests + * particularly in HA setting. + * @throws IOException raised on errors performing I/O. + * @throws UnsupportedOperationException Unsupported Operation Exception. + */ + public void msync() throws IOException, UnsupportedOperationException { + throw new UnsupportedOperationException(getClass().getCanonicalName() + + " does not support method msync"); + } + /** * The specification of this method matches that of * {@link FileContext#access(Path, FsAction)} * except that an UnresolvedLinkException may be thrown if a symlink is * encountered in the path. + * + * @param path the path. + * @param mode fsaction mode. + * @throws AccessControlException access control exception. + * @throws FileNotFoundException file not found exception. + * @throws UnresolvedLinkException unresolved link exception. + * @throws IOException raised on errors performing I/O. */ @InterfaceAudience.LimitedPrivate({"HDFS", "Hive"}) public void access(Path path, FsAction mode) throws AccessControlException, @@ -884,6 +1057,13 @@ public void access(Path path, FsAction mode) throws AccessControlException, * encountered in the path leading up to the final path component. * If the file system does not support symlinks then the behavior is * equivalent to {@link AbstractFileSystem#getFileStatus(Path)}. + * + * @param f the path. + * @throws AccessControlException access control exception. + * @throws FileNotFoundException file not found exception. + * @throws UnsupportedFileSystemException UnSupported File System Exception. + * @throws IOException raised on errors performing I/O. + * @return file status. */ public FileStatus getFileLinkStatus(final Path f) throws AccessControlException, FileNotFoundException, @@ -895,6 +1075,15 @@ public FileStatus getFileLinkStatus(final Path f) * The specification of this method matches that of * {@link FileContext#getFileBlockLocations(Path, long, long)} except that * Path f must be for this file system. + * + * @param f the path. + * @param start start. + * @param len length. + * @throws AccessControlException access control exception. + * @throws FileNotFoundException file not found exception. + * @throws UnresolvedLinkException unresolved link exception. + * @throws IOException raised on errors performing I/O. + * @return BlockLocation Array. */ public abstract BlockLocation[] getFileBlockLocations(final Path f, final long start, final long len) throws AccessControlException, @@ -904,6 +1093,13 @@ public abstract BlockLocation[] getFileBlockLocations(final Path f, * The specification of this method matches that of * {@link FileContext#getFsStatus(Path)} except that Path f must be for this * file system. + * + * @param f the path. + * @throws AccessControlException access control exception. + * @throws FileNotFoundException file not found exception. + * @throws UnresolvedLinkException unresolved link exception. + * @throws IOException raised on errors performing I/O. + * @return Fs Status. */ public FsStatus getFsStatus(final Path f) throws AccessControlException, FileNotFoundException, UnresolvedLinkException, IOException { @@ -914,6 +1110,11 @@ public FsStatus getFsStatus(final Path f) throws AccessControlException, /** * The specification of this method matches that of * {@link FileContext#getFsStatus(Path)}. + * + * @throws AccessControlException access control exception. + * @throws FileNotFoundException file not found exception. + * @throws IOException raised on errors performing I/O. + * @return Fs Status. */ public abstract FsStatus getFsStatus() throws AccessControlException, FileNotFoundException, IOException; @@ -922,6 +1123,13 @@ public abstract FsStatus getFsStatus() throws AccessControlException, * The specification of this method matches that of * {@link FileContext#listStatus(Path)} except that Path f must be for this * file system. + * + * @param f path. + * @throws AccessControlException access control exception. + * @throws FileNotFoundException file not found exception. + * @throws UnresolvedLinkException unresolved link exception. + * @throws IOException raised on errors performing I/O. + * @return FileStatus Iterator. */ public RemoteIterator listStatusIterator(final Path f) throws AccessControlException, FileNotFoundException, @@ -954,6 +1162,13 @@ public FileStatus next() { * will have different formats for replicated and erasure coded file. Please * refer to {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} * for more details. + * + * @param f the path. + * @throws AccessControlException access control exception. + * @throws FileNotFoundException file not found exception. + * @throws UnresolvedLinkException unresolved link exception. + * @throws IOException raised on errors performing I/O. + * @return FileStatus Iterator. */ public RemoteIterator listLocatedStatus(final Path f) throws AccessControlException, FileNotFoundException, @@ -986,6 +1201,12 @@ public LocatedFileStatus next() throws IOException { * The specification of this method matches that of * {@link FileContext.Util#listStatus(Path)} except that Path f must be * for this file system. + * @param f the path. + * @throws AccessControlException access control exception. + * @throws FileNotFoundException file not found exception. + * @throws UnresolvedLinkException unresolved link exception. + * @throws IOException raised on errors performing I/O. + * @return FileStatus Iterator. */ public abstract FileStatus[] listStatus(final Path f) throws AccessControlException, FileNotFoundException, @@ -994,7 +1215,8 @@ public abstract FileStatus[] listStatus(final Path f) /** * @return an iterator over the corrupt files under the given path * (may contain duplicates if a file has more than one corrupt block) - * @throws IOException + * @param path the path. + * @throws IOException raised on errors performing I/O. */ public RemoteIterator listCorruptFileBlocks(Path path) throws IOException { @@ -1007,6 +1229,10 @@ public RemoteIterator listCorruptFileBlocks(Path path) * The specification of this method matches that of * {@link FileContext#setVerifyChecksum(boolean, Path)} except that Path f * must be for this file system. + * + * @param verifyChecksum verify check sum flag. + * @throws AccessControlException access control exception. + * @throws IOException raised on errors performing I/O. */ public abstract void setVerifyChecksum(final boolean verifyChecksum) throws AccessControlException, IOException; @@ -1028,7 +1254,7 @@ public String getCanonicalServiceName() { * @param renewer the account name that is allowed to renew the token. * @return List of delegation tokens. * If delegation tokens not supported then return a list of size zero. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @InterfaceAudience.LimitedPrivate( { "HDFS", "MapReduce" }) public List> getDelegationTokens(String renewer) throws IOException { @@ -1128,7 +1354,7 @@ public AclStatus getAclStatus(Path path) throws IOException { * @param path Path to modify * @param name xattr name. * @param value xattr value. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void setXAttr(Path path, String name, byte[] value) throws IOException { @@ -1147,7 +1373,7 @@ public void setXAttr(Path path, String name, byte[] value) * @param name xattr name. * @param value xattr value. * @param flag xattr set flag - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void setXAttr(Path path, String name, byte[] value, EnumSet flag) throws IOException { @@ -1165,7 +1391,7 @@ public void setXAttr(Path path, String name, byte[] value, * @param path Path to get extended attribute * @param name xattr name. * @return byte[] xattr value. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public byte[] getXAttr(Path path, String name) throws IOException { throw new UnsupportedOperationException(getClass().getSimpleName() @@ -1183,7 +1409,7 @@ public byte[] getXAttr(Path path, String name) throws IOException { * * @return {@literal Map} describing the XAttrs of the file * or directory - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public Map getXAttrs(Path path) throws IOException { throw new UnsupportedOperationException(getClass().getSimpleName() @@ -1201,7 +1427,7 @@ public Map getXAttrs(Path path) throws IOException { * @param names XAttr names. * @return {@literal Map} describing the XAttrs of the file * or directory - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public Map getXAttrs(Path path, List names) throws IOException { @@ -1219,7 +1445,7 @@ public Map getXAttrs(Path path, List names) * @param path Path to get extended attributes * @return {@literal Map} describing the XAttrs of the file * or directory - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public List listXAttrs(Path path) throws IOException { @@ -1236,7 +1462,7 @@ public List listXAttrs(Path path) * * @param path Path to remove extended attribute * @param name xattr name - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void removeXAttr(Path path, String name) throws IOException { throw new UnsupportedOperationException(getClass().getSimpleName() @@ -1246,6 +1472,11 @@ public void removeXAttr(Path path, String name) throws IOException { /** * The specification of this method matches that of * {@link FileContext#createSnapshot(Path, String)}. + * + * @param path the path. + * @param snapshotName snapshot name. + * @throws IOException raised on errors performing I/O. + * @return path. */ public Path createSnapshot(final Path path, final String snapshotName) throws IOException { @@ -1256,6 +1487,11 @@ public Path createSnapshot(final Path path, final String snapshotName) /** * The specification of this method matches that of * {@link FileContext#renameSnapshot(Path, String, String)}. + * + * @param path the path. + * @param snapshotOldName snapshot old name. + * @param snapshotNewName snapshot new name. + * @throws IOException raised on errors performing I/O. */ public void renameSnapshot(final Path path, final String snapshotOldName, final String snapshotNewName) throws IOException { @@ -1266,6 +1502,10 @@ public void renameSnapshot(final Path path, final String snapshotOldName, /** * The specification of this method matches that of * {@link FileContext#deleteSnapshot(Path, String)}. + * + * @param snapshotDir snapshot dir. + * @param snapshotName snapshot name. + * @throws IOException raised on errors performing I/O. */ public void deleteSnapshot(final Path snapshotDir, final String snapshotName) throws IOException { @@ -1276,7 +1516,7 @@ public void deleteSnapshot(final Path snapshotDir, final String snapshotName) /** * Set the source path to satisfy storage policy. * @param path The source path referring to either a directory or a file. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void satisfyStoragePolicy(final Path path) throws IOException { throw new UnsupportedOperationException( @@ -1290,6 +1530,7 @@ public void satisfyStoragePolicy(final Path path) throws IOException { * @param policyName the name of the target storage policy. The list * of supported Storage policies can be retrieved * via {@link #getAllStoragePolicies}. + * @throws IOException raised on errors performing I/O. */ public void setStoragePolicy(final Path path, final String policyName) throws IOException { @@ -1301,7 +1542,7 @@ public void setStoragePolicy(final Path path, final String policyName) /** * Unset the storage policy set for a given file or directory. * @param src file or directory path. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void unsetStoragePolicy(final Path src) throws IOException { throw new UnsupportedOperationException(getClass().getSimpleName() @@ -1313,7 +1554,7 @@ public void unsetStoragePolicy(final Path src) throws IOException { * * @param src file or directory path. * @return storage policy for give file. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public BlockStoragePolicySpi getStoragePolicy(final Path src) throws IOException { @@ -1325,7 +1566,7 @@ public BlockStoragePolicySpi getStoragePolicy(final Path src) * Retrieve all the storage policies supported by this file system. * * @return all storage policies supported by this filesystem. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public Collection getAllStoragePolicies() throws IOException { @@ -1383,4 +1624,34 @@ public boolean hasPathCapability(final Path path, return false; } } + + /** + * Create a multipart uploader. + * @param basePath file path under which all files are uploaded + * @return a MultipartUploaderBuilder object to build the uploader + * @throws IOException if some early checks cause IO failures. + * @throws UnsupportedOperationException if support is checked early. + */ + @InterfaceStability.Unstable + public MultipartUploaderBuilder createMultipartUploader(Path basePath) + throws IOException { + methodNotSupported(); + return null; + } + + /** + * Helper method that throws an {@link UnsupportedOperationException} for the + * current {@link FileSystem} method being called. + */ + protected final void methodNotSupported() { + // The order of the stacktrace elements is (from top to bottom): + // - java.lang.Thread.getStackTrace + // - org.apache.hadoop.fs.FileSystem.methodNotSupported + // - + // therefore, to find out the current method name, we use the element at + // index 2. + String name = Thread.currentThread().getStackTrace()[2].getMethodName(); + throw new UnsupportedOperationException(getClass().getCanonicalName() + + " does not support method " + name); + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AvroFSInput.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AvroFSInput.java index b4a4a85674dfa..155381de949ef 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AvroFSInput.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AvroFSInput.java @@ -25,6 +25,10 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL; +import static org.apache.hadoop.util.functional.FutureIO.awaitFuture; + /** Adapts an {@link FSDataInputStream} to Avro's SeekableInput interface. */ @InterfaceAudience.Public @InterfaceStability.Stable @@ -32,17 +36,30 @@ public class AvroFSInput implements Closeable, SeekableInput { private final FSDataInputStream stream; private final long len; - /** Construct given an {@link FSDataInputStream} and its length. */ + /** + * Construct given an {@link FSDataInputStream} and its length. + * + * @param in inputstream. + * @param len len. + */ public AvroFSInput(final FSDataInputStream in, final long len) { this.stream = in; this.len = len; } - /** Construct given a {@link FileContext} and a {@link Path}. */ + /** Construct given a {@link FileContext} and a {@link Path}. + * @param fc filecontext. + * @param p the path. + * @throws IOException If an I/O error occurred. + * */ public AvroFSInput(final FileContext fc, final Path p) throws IOException { FileStatus status = fc.getFileStatus(p); this.len = status.getLen(); - this.stream = fc.open(p); + this.stream = awaitFuture(fc.openFile(p) + .opt(FS_OPTION_OPENFILE_READ_POLICY, + FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL) + .withFileStatus(status) + .build()); } @Override diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BatchedRemoteIterator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BatchedRemoteIterator.java index 607fffbcc701a..e693bcbfe89fc 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BatchedRemoteIterator.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BatchedRemoteIterator.java @@ -68,6 +68,7 @@ public BatchedRemoteIterator(K prevKey) { * * @param prevKey The key to send. * @return A list of replies. + * @throws IOException If an I/O error occurred. */ public abstract BatchedEntries makeRequest(K prevKey) throws IOException; @@ -102,6 +103,8 @@ public boolean hasNext() throws IOException { /** * Return the next list key associated with an element. + * @param element element. + * @return K Generics Type. */ public abstract K elementToPrevKey(E element); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java index c6dde52d83dd1..67687c1f0e04c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java @@ -74,7 +74,7 @@ public class BlockLocation implements Serializable { private static final String[] EMPTY_STR_ARRAY = new String[0]; private static final StorageType[] EMPTY_STORAGE_TYPE_ARRAY = - new StorageType[0]; + StorageType.EMPTY_ARRAY; /** * Default Constructor. @@ -85,6 +85,7 @@ public BlockLocation() { /** * Copy constructor. + * @param that blocklocation. */ public BlockLocation(BlockLocation that) { this.hosts = that.hosts; @@ -100,6 +101,10 @@ public BlockLocation(BlockLocation that) { /** * Constructor with host, name, offset and length. + * @param names names array. + * @param hosts host array. + * @param offset offset. + * @param length length. */ public BlockLocation(String[] names, String[] hosts, long offset, long length) { @@ -108,6 +113,11 @@ public BlockLocation(String[] names, String[] hosts, long offset, /** * Constructor with host, name, offset, length and corrupt flag. + * @param names names. + * @param hosts hosts. + * @param offset offset. + * @param length length. + * @param corrupt corrupt. */ public BlockLocation(String[] names, String[] hosts, long offset, long length, boolean corrupt) { @@ -116,6 +126,11 @@ public BlockLocation(String[] names, String[] hosts, long offset, /** * Constructor with host, name, network topology, offset and length. + * @param names names. + * @param hosts hosts. + * @param topologyPaths topologyPaths. + * @param offset offset. + * @param length length. */ public BlockLocation(String[] names, String[] hosts, String[] topologyPaths, long offset, long length) { @@ -125,6 +140,12 @@ public BlockLocation(String[] names, String[] hosts, String[] topologyPaths, /** * Constructor with host, name, network topology, offset, length * and corrupt flag. + * @param names names. + * @param hosts hosts. + * @param topologyPaths topologyPaths. + * @param offset offset. + * @param length length. + * @param corrupt corrupt. */ public BlockLocation(String[] names, String[] hosts, String[] topologyPaths, long offset, long length, boolean corrupt) { @@ -177,6 +198,8 @@ public BlockLocation(String[] names, String[] hosts, String[] cachedHosts, /** * Get the list of hosts (hostname) hosting this block. + * @return hosts array. + * @throws IOException If an I/O error occurred. */ public String[] getHosts() throws IOException { return hosts; @@ -184,6 +207,7 @@ public String[] getHosts() throws IOException { /** * Get the list of hosts (hostname) hosting a cached replica of the block. + * @return cached hosts. */ public String[] getCachedHosts() { return cachedHosts; @@ -191,6 +215,8 @@ public String[] getCachedHosts() { /** * Get the list of names (IP:xferPort) hosting this block. + * @return names array. + * @throws IOException If an I/O error occurred. */ public String[] getNames() throws IOException { return names; @@ -199,6 +225,8 @@ public String[] getNames() throws IOException { /** * Get the list of network topology paths for each of the hosts. * The last component of the path is the "name" (IP:xferPort). + * @return topology paths. + * @throws IOException If an I/O error occurred. */ public String[] getTopologyPaths() throws IOException { return topologyPaths; @@ -206,6 +234,7 @@ public String[] getTopologyPaths() throws IOException { /** * Get the storageID of each replica of the block. + * @return storage ids. */ public String[] getStorageIds() { return storageIds; @@ -213,6 +242,7 @@ public String[] getStorageIds() { /** * Get the storage type of each replica of the block. + * @return storage type of each replica of the block. */ public StorageType[] getStorageTypes() { return storageTypes; @@ -220,6 +250,7 @@ public StorageType[] getStorageTypes() { /** * Get the start offset of file associated with this block. + * @return start offset of file associated with this block. */ public long getOffset() { return offset; @@ -227,6 +258,7 @@ public long getOffset() { /** * Get the length of the block. + * @return length of the block. */ public long getLength() { return length; @@ -234,6 +266,7 @@ public long getLength() { /** * Get the corrupt flag. + * @return corrupt flag. */ public boolean isCorrupt() { return corrupt; @@ -241,6 +274,7 @@ public boolean isCorrupt() { /** * Return true if the block is striped (erasure coded). + * @return if the block is striped true, not false. */ public boolean isStriped() { return false; @@ -248,6 +282,7 @@ public boolean isStriped() { /** * Set the start offset of file associated with this block. + * @param offset start offset. */ public void setOffset(long offset) { this.offset = offset; @@ -255,6 +290,7 @@ public void setOffset(long offset) { /** * Set the length of block. + * @param length length of block. */ public void setLength(long length) { this.length = length; @@ -262,6 +298,7 @@ public void setLength(long length) { /** * Set the corrupt flag. + * @param corrupt corrupt flag. */ public void setCorrupt(boolean corrupt) { this.corrupt = corrupt; @@ -269,6 +306,8 @@ public void setCorrupt(boolean corrupt) { /** * Set the hosts hosting this block. + * @param hosts hosts array. + * @throws IOException If an I/O error occurred. */ public void setHosts(String[] hosts) throws IOException { if (hosts == null) { @@ -280,6 +319,7 @@ public void setHosts(String[] hosts) throws IOException { /** * Set the hosts hosting a cached replica of this block. + * @param cachedHosts cached hosts. */ public void setCachedHosts(String[] cachedHosts) { if (cachedHosts == null) { @@ -291,6 +331,8 @@ public void setCachedHosts(String[] cachedHosts) { /** * Set the names (host:port) hosting this block. + * @param names names. + * @throws IOException If an I/O error occurred. */ public void setNames(String[] names) throws IOException { if (names == null) { @@ -302,6 +344,9 @@ public void setNames(String[] names) throws IOException { /** * Set the network topology paths of the hosts. + * + * @param topologyPaths topology paths. + * @throws IOException If an I/O error occurred. */ public void setTopologyPaths(String[] topologyPaths) throws IOException { if (topologyPaths == null) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BufferedFSInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BufferedFSInputStream.java index 973b136bb3ab2..7f3171235c8f4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BufferedFSInputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BufferedFSInputStream.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -21,9 +21,17 @@ import java.io.EOFException; import java.io.FileDescriptor; import java.io.IOException; +import java.util.StringJoiner; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.function.IntFunction; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; + +import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.retrieveIOStatistics; /** @@ -33,7 +41,8 @@ @InterfaceAudience.Private @InterfaceStability.Unstable public class BufferedFSInputStream extends BufferedInputStream -implements Seekable, PositionedReadable, HasFileDescriptor { + implements Seekable, PositionedReadable, HasFileDescriptor, + IOStatisticsSource, StreamCapabilities { /** * Creates a BufferedFSInputStream * with the specified buffer size, @@ -126,4 +135,50 @@ public FileDescriptor getFileDescriptor() throws IOException { return null; } } + + /** + * If the inner stream supports {@link StreamCapabilities}, + * forward the probe to it. + * Otherwise: return false. + * + * @param capability string to query the stream support for. + * @return true if a capability is known to be supported. + */ + @Override + public boolean hasCapability(final String capability) { + if (in instanceof StreamCapabilities) { + return ((StreamCapabilities) in).hasCapability(capability); + } else { + return false; + } + } + + @Override + public IOStatistics getIOStatistics() { + return retrieveIOStatistics(in); + } + + @Override + public String toString() { + return new StringJoiner(", ", + BufferedFSInputStream.class.getSimpleName() + "[", "]") + .add("in=" + in) + .toString(); + } + + @Override + public int minSeekForVectorReads() { + return ((PositionedReadable) in).minSeekForVectorReads(); + } + + @Override + public int maxReadSizeForVectorReads() { + return ((PositionedReadable) in).maxReadSizeForVectorReads(); + } + + @Override + public void readVectored(List ranges, + IntFunction allocate) throws IOException { + ((PositionedReadable) in).readVectored(ranges, allocate); + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ByteBufferUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ByteBufferUtil.java index c31c29b5b6d31..ab052029eeb93 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ByteBufferUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ByteBufferUtil.java @@ -26,7 +26,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.io.ByteBufferPool; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; @InterfaceAudience.Private @InterfaceStability.Evolving @@ -48,6 +48,12 @@ private static boolean streamHasByteBufferRead(InputStream stream) { /** * Perform a fallback read. + * + * @param stream input stream. + * @param bufferPool bufferPool. + * @param maxLength maxLength. + * @throws IOException raised on errors performing I/O. + * @return byte buffer. */ public static ByteBuffer fallbackRead( InputStream stream, ByteBufferPool bufferPool, int maxLength) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CachingGetSpaceUsed.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CachingGetSpaceUsed.java index 58dc82d2efb2d..17c0da0c8fc34 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CachingGetSpaceUsed.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CachingGetSpaceUsed.java @@ -19,6 +19,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.classification.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,6 +53,9 @@ public abstract class CachingGetSpaceUsed implements Closeable, GetSpaceUsed { /** * This is the constructor used by the builder. * All overriding classes should implement this. + * + * @param builder builder. + * @throws IOException raised on errors performing I/O. */ public CachingGetSpaceUsed(CachingGetSpaceUsed.Builder builder) throws IOException { @@ -139,6 +143,8 @@ public String getDirPath() { /** * Increment the cached value of used space. + * + * @param value dfs used value. */ public void incDfsUsed(long value) { used.addAndGet(value); @@ -153,11 +159,25 @@ boolean running() { /** * How long in between runs of the background refresh. + * + * @return refresh interval. */ - long getRefreshInterval() { + @VisibleForTesting + public long getRefreshInterval() { return refreshInterval; } + /** + * Randomize the refresh interval timing by this amount, the actual interval will be chosen + * uniformly between {@code interval-jitter} and {@code interval+jitter}. + * + * @return between interval-jitter and interval+jitter. + */ + @VisibleForTesting + public long getJitter() { + return jitter; + } + /** * Reset the current used data amount. This should be called * when the cached value is re-computed. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CanSetDropBehind.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CanSetDropBehind.java index 2e2d98b9c5462..0077838920a9e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CanSetDropBehind.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CanSetDropBehind.java @@ -36,6 +36,6 @@ public interface CanSetDropBehind { * UnsupportedOperationException If this stream doesn't support * setting the drop-behind. */ - public void setDropBehind(Boolean dropCache) + void setDropBehind(Boolean dropCache) throws IOException, UnsupportedOperationException; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java index cc9c284c9fa55..8ec2a1c67b2c1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java @@ -22,27 +22,39 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.IntBuffer; import java.nio.channels.ClosedChannelException; +import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.EnumSet; import java.util.List; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; +import java.util.function.IntFunction; +import java.util.zip.CRC32; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.impl.AbstractFSBuilderImpl; +import org.apache.hadoop.fs.impl.CombinedFileRange; import org.apache.hadoop.fs.impl.FutureDataInputStreamBuilderImpl; import org.apache.hadoop.fs.impl.OpenFileParameters; import org.apache.hadoop.fs.permission.AclEntry; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; +import org.apache.hadoop.fs.statistics.IOStatisticsSupport; import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.LambdaUtils; import org.apache.hadoop.util.Progressable; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_STANDARD_OPTIONS; import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; +import static org.apache.hadoop.fs.impl.StoreImplementationUtils.isProbeForSyncable; +import static org.apache.hadoop.fs.VectoredReadUtils.sortRanges; /**************************************************************** * Abstract Checksumed FileSystem. @@ -62,7 +74,7 @@ public abstract class ChecksumFileSystem extends FilterFileSystem { public static double getApproxChkSumLength(long size) { return ChecksumFSOutputSummer.CHKSUM_AS_FRACTION * size; } - + public ChecksumFileSystem(FileSystem fs) { super(fs); } @@ -78,7 +90,7 @@ public void setConf(Configuration conf) { bytesPerChecksum); } } - + /** * Set whether to verify checksum. */ @@ -91,32 +103,51 @@ public void setVerifyChecksum(boolean verifyChecksum) { public void setWriteChecksum(boolean writeChecksum) { this.writeChecksum = writeChecksum; } - + /** get the raw file system */ @Override public FileSystem getRawFileSystem() { return fs; } - /** Return the name of the checksum file associated with a file.*/ + /** + * Return the name of the checksum file associated with a file. + * + * @param file the file path. + * @return name of the checksum file associated with a file. + */ public Path getChecksumFile(Path file) { return new Path(file.getParent(), "." + file.getName() + ".crc"); } - /** Return true iff file is a checksum file name.*/ + /** + * Return true if file is a checksum file name. + * + * @param file the file path. + * @return if file is a checksum file true, not false. + */ public static boolean isChecksumFile(Path file) { String name = file.getName(); return name.startsWith(".") && name.endsWith(".crc"); } - /** Return the length of the checksum file given the size of the + /** + * Return the length of the checksum file given the size of the * actual file. - **/ + * + * @param file the file path. + * @param fileSize file size. + * @return checksum length. + */ public long getChecksumFileLength(Path file, long fileSize) { return getChecksumLength(fileSize, getBytesPerSum()); } - /** Return the bytes Per Checksum */ + /** + * Return the bytes Per Checksum. + * + * @return bytes per check sum. + */ public int getBytesPerSum() { return bytesPerChecksum; } @@ -134,22 +165,24 @@ private int getSumBufferSize(int bytesPerSum, int bufferSize) { * For open()'s FSInputStream * It verifies that data matches checksums. *******************************************************/ - private static class ChecksumFSInputChecker extends FSInputChecker { + private static class ChecksumFSInputChecker extends FSInputChecker implements + IOStatisticsSource, StreamCapabilities { private ChecksumFileSystem fs; private FSDataInputStream datas; private FSDataInputStream sums; - + private static final int HEADER_LENGTH = 8; - + private int bytesPerSum = 1; - + private long fileLen = -1L; + public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file) throws IOException { this(fs, file, fs.getConf().getInt( - LocalFileSystemConfigKeys.LOCAL_FS_STREAM_BUFFER_SIZE_KEY, + LocalFileSystemConfigKeys.LOCAL_FS_STREAM_BUFFER_SIZE_KEY, LocalFileSystemConfigKeys.LOCAL_FS_STREAM_BUFFER_SIZE_DEFAULT)); } - + public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file, int bufferSize) throws IOException { super( file, fs.getFileStatus(file).getReplication() ); @@ -165,7 +198,8 @@ public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file, int bufferSize) if (!Arrays.equals(version, CHECKSUM_VERSION)) throw new IOException("Not a checksum file: "+sumFile); this.bytesPerSum = sums.readInt(); - set(fs.verifyChecksum, DataChecksum.newCrc32(), bytesPerSum, 4); + set(fs.verifyChecksum, DataChecksum.newCrc32(), bytesPerSum, + FSInputChecker.CHECKSUM_SIZE); } catch (IOException e) { // mincing the message is terrible, but java throws permission // exceptions as FNF because that's all the method signatures allow! @@ -177,21 +211,21 @@ public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file, int bufferSize) set(fs.verifyChecksum, null, 1, 0); } } - + private long getChecksumFilePos( long dataPos ) { - return HEADER_LENGTH + 4*(dataPos/bytesPerSum); + return HEADER_LENGTH + FSInputChecker.CHECKSUM_SIZE*(dataPos/bytesPerSum); } - + @Override protected long getChunkPosition( long dataPos ) { return dataPos/bytesPerSum*bytesPerSum; } - + @Override public int available() throws IOException { return datas.available() + super.available(); } - + @Override public int read(long position, byte[] b, int off, int len) throws IOException { @@ -209,7 +243,7 @@ public int read(long position, byte[] b, int off, int len) } return nread; } - + @Override public void close() throws IOException { datas.close(); @@ -218,7 +252,7 @@ public void close() throws IOException { } set(fs.verifyChecksum, null, 1, 0); } - + @Override public boolean seekToNewSource(long targetPos) throws IOException { @@ -241,7 +275,7 @@ protected int readChunk(long pos, byte[] buf, int offset, int len, final int checksumsToRead = Math.min( len/bytesPerSum, // number of checksums based on len to read checksum.length / CHECKSUM_SIZE); // size of checksum buffer - long checksumPos = getChecksumFilePos(pos); + long checksumPos = getChecksumFilePos(pos); if(checksumPos != sums.getPos()) { sums.seek(checksumPos); } @@ -270,8 +304,198 @@ protected int readChunk(long pos, byte[] buf, int offset, int len, } return nread; } + + /** + * Get the IO Statistics of the nested stream, falling back to + * null if the stream does not implement the interface + * {@link IOStatisticsSource}. + * @return an IOStatistics instance or null + */ + @Override + public IOStatistics getIOStatistics() { + return IOStatisticsSupport.retrieveIOStatistics(datas); + } + + public static long findChecksumOffset(long dataOffset, + int bytesPerSum) { + return HEADER_LENGTH + (dataOffset/bytesPerSum) * FSInputChecker.CHECKSUM_SIZE; + } + + /** + * Calculate length of file if not already cached. + * @return file length. + * @throws IOException any IOE. + */ + private long getFileLength() throws IOException { + if (fileLen == -1L) { + fileLen = fs.getFileStatus(file).getLen(); + } + return fileLen; + } + + /** + * Find the checksum ranges that correspond to the given data ranges. + * @param dataRanges the input data ranges, which are assumed to be sorted + * and non-overlapping + * @return a list of AsyncReaderUtils.CombinedFileRange that correspond to + * the checksum ranges + */ + public static List findChecksumRanges( + List dataRanges, + int bytesPerSum, + int minSeek, + int maxSize) { + List result = new ArrayList<>(); + CombinedFileRange currentCrc = null; + for(FileRange range: dataRanges) { + long crcOffset = findChecksumOffset(range.getOffset(), bytesPerSum); + long crcEnd = findChecksumOffset(range.getOffset() + range.getLength() + + bytesPerSum - 1, bytesPerSum); + if (currentCrc == null || + !currentCrc.merge(crcOffset, crcEnd, range, minSeek, maxSize)) { + currentCrc = new CombinedFileRange(crcOffset, crcEnd, range); + result.add(currentCrc); + } + } + return result; + } + + /** + * Check the data against the checksums. + * @param sumsBytes the checksum data + * @param sumsOffset where from the checksum file this buffer started + * @param data the file data + * @param dataOffset where the file data started (must be a multiple of + * bytesPerSum) + * @param bytesPerSum how many bytes per a checksum + * @param file the path of the filename + * @return the data buffer + * @throws CompletionException if the checksums don't match + */ + static ByteBuffer checkBytes(ByteBuffer sumsBytes, + long sumsOffset, + ByteBuffer data, + long dataOffset, + int bytesPerSum, + Path file) { + // determine how many bytes we need to skip at the start of the sums + int offset = + (int) (findChecksumOffset(dataOffset, bytesPerSum) - sumsOffset); + IntBuffer sums = sumsBytes.asIntBuffer(); + sums.position(offset / FSInputChecker.CHECKSUM_SIZE); + ByteBuffer current = data.duplicate(); + int numFullChunks = data.remaining() / bytesPerSum; + boolean partialChunk = ((data.remaining() % bytesPerSum) != 0); + int totalChunks = numFullChunks; + if (partialChunk) { + totalChunks++; + } + CRC32 crc = new CRC32(); + // check each chunk to ensure they match + for(int c = 0; c < totalChunks; ++c) { + // set the buffer position to the start of every chunk. + current.position(c * bytesPerSum); + + if (c == numFullChunks) { + // During last chunk, there may be less than chunk size + // data preset, so setting the limit accordingly. + int lastIncompleteChunk = data.remaining() % bytesPerSum; + current.limit((c * bytesPerSum) + lastIncompleteChunk); + } else { + // set the buffer limit to end of every chunk. + current.limit((c + 1) * bytesPerSum); + } + + // compute the crc + crc.reset(); + crc.update(current); + int expected = sums.get(); + int calculated = (int) crc.getValue(); + + if (calculated != expected) { + // cast of c added to silence findbugs + long errPosn = dataOffset + (long) c * bytesPerSum; + throw new CompletionException(new ChecksumException( + "Checksum error: " + file + " at " + errPosn + + " exp: " + expected + " got: " + calculated, errPosn)); + } + } + // if everything matches, we return the data + return data; + } + + /** + * Validates range parameters. + * In case of CheckSum FS, we already have calculated + * fileLength so failing fast here. + * @param ranges requested ranges. + * @param fileLength length of file. + * @throws EOFException end of file exception. + */ + private void validateRangeRequest(List ranges, + final long fileLength) throws EOFException { + for (FileRange range : ranges) { + VectoredReadUtils.validateRangeRequest(range); + if (range.getOffset() + range.getLength() > fileLength) { + final String errMsg = String.format("Requested range [%d, %d) is beyond EOF for path %s", + range.getOffset(), range.getLength(), file); + LOG.warn(errMsg); + throw new EOFException(errMsg); + } + } + } + + @Override + public void readVectored(List ranges, + IntFunction allocate) throws IOException { + final long length = getFileLength(); + validateRangeRequest(ranges, length); + + // If the stream doesn't have checksums, just delegate. + if (sums == null) { + datas.readVectored(ranges, allocate); + return; + } + int minSeek = minSeekForVectorReads(); + int maxSize = maxReadSizeForVectorReads(); + List dataRanges = + VectoredReadUtils.mergeSortedRanges(Arrays.asList(sortRanges(ranges)), bytesPerSum, + minSeek, maxReadSizeForVectorReads()); + // While merging the ranges above, they are rounded up based on the value of bytesPerSum + // which leads to some ranges crossing the EOF thus they need to be fixed else it will + // cause EOFException during actual reads. + for (CombinedFileRange range : dataRanges) { + if (range.getOffset() + range.getLength() > length) { + range.setLength((int) (length - range.getOffset())); + } + } + List checksumRanges = findChecksumRanges(dataRanges, + bytesPerSum, minSeek, maxSize); + sums.readVectored(checksumRanges, allocate); + datas.readVectored(dataRanges, allocate); + for(CombinedFileRange checksumRange: checksumRanges) { + for(FileRange dataRange: checksumRange.getUnderlying()) { + // when we have both the ranges, validate the checksum + CompletableFuture result = + checksumRange.getData().thenCombineAsync(dataRange.getData(), + (sumBuffer, dataBuffer) -> + checkBytes(sumBuffer, checksumRange.getOffset(), + dataBuffer, dataRange.getOffset(), bytesPerSum, file)); + // Now, slice the read data range to the user's ranges + for(FileRange original: ((CombinedFileRange) dataRange).getUnderlying()) { + original.setData(result.thenApply( + (b) -> VectoredReadUtils.sliceTo(b, dataRange.getOffset(), original))); + } + } + } + } + + @Override + public boolean hasCapability(String capability) { + return datas.hasCapability(capability); + } } - + private static class FSDataBoundedInputStream extends FSDataInputStream { private FileSystem fs; private Path file; @@ -282,12 +506,12 @@ private static class FSDataBoundedInputStream extends FSDataInputStream { this.fs = fs; this.file = file; } - + @Override public boolean markSupported() { return false; } - + /* Return the file length */ private long getFileLength() throws IOException { if( fileLen==-1L ) { @@ -295,7 +519,7 @@ private long getFileLength() throws IOException { } return fileLen; } - + /** * Skips over and discards n bytes of data from the * input stream. @@ -319,11 +543,11 @@ public synchronized long skip(long n) throws IOException { } return super.skip(n); } - + /** * Seek to the given position in the stream. * The next read() will be from that position. - * + * *

    This method does not allow seek past the end of the file. * This produces IOException. * @@ -346,6 +570,7 @@ public synchronized void seek(long pos) throws IOException { * Opens an FSDataInputStream at the indicated Path. * @param f the file name to open * @param bufferSize the size of the buffer to be used. + * @throws IOException if an I/O error occurs. */ @Override public FSDataInputStream open(Path f, int bufferSize) throws IOException { @@ -388,21 +613,22 @@ public void concat(final Path f, final Path[] psrcs) throws IOException { */ public static long getChecksumLength(long size, int bytesPerSum) { //the checksum length is equal to size passed divided by bytesPerSum + - //bytes written in the beginning of the checksum file. - return ((size + bytesPerSum - 1) / bytesPerSum) * 4 + - CHECKSUM_VERSION.length + 4; + //bytes written in the beginning of the checksum file. + return ((size + bytesPerSum - 1) / bytesPerSum) * FSInputChecker.CHECKSUM_SIZE + + ChecksumFSInputChecker.HEADER_LENGTH; } /** This class provides an output stream for a checksummed file. * It generates checksums for data. */ - private static class ChecksumFSOutputSummer extends FSOutputSummer { - private FSDataOutputStream datas; + private static class ChecksumFSOutputSummer extends FSOutputSummer + implements IOStatisticsSource, StreamCapabilities { + private FSDataOutputStream datas; private FSDataOutputStream sums; private static final float CHKSUM_AS_FRACTION = 0.01f; private boolean isClosed = false; - - public ChecksumFSOutputSummer(ChecksumFileSystem fs, - Path file, + + ChecksumFSOutputSummer(ChecksumFileSystem fs, + Path file, boolean overwrite, int bufferSize, short replication, @@ -423,7 +649,7 @@ public ChecksumFSOutputSummer(ChecksumFileSystem fs, sums.write(CHECKSUM_VERSION, 0, CHECKSUM_VERSION.length); sums.writeInt(bytesPerSum); } - + @Override public void close() throws IOException { try { @@ -434,7 +660,7 @@ public void close() throws IOException { isClosed = true; } } - + @Override protected void writeChunk(byte[] b, int offset, int len, byte[] checksum, int ckoff, int cklen) @@ -449,6 +675,31 @@ protected void checkClosed() throws IOException { throw new ClosedChannelException(); } } + + /** + * Get the IO Statistics of the nested stream, falling back to + * null if the stream does not implement the interface + * {@link IOStatisticsSource}. + * @return an IOStatistics instance or null + */ + @Override + public IOStatistics getIOStatistics() { + return IOStatisticsSupport.retrieveIOStatistics(datas); + } + + /** + * Probe the inner stream for a capability. + * Syncable operations are rejected before being passed down. + * @param capability string to query the stream support for. + * @return true if a capability is known to be supported. + */ + @Override + public boolean hasCapability(final String capability) { + if (isProbeForSyncable(capability)) { + return false; + } + return datas.hasCapability(capability); + } } @Override @@ -627,7 +878,7 @@ boolean apply(Path p) throws IOException { * Implement the abstract setReplication of FileSystem * @param src file name * @param replication new replication - * @throws IOException + * @throws IOException if an I/O error occurs. * @return true if successful; * false if file does not exist or is a directory */ @@ -665,7 +916,7 @@ public boolean rename(Path src, Path dst) throws IOException { value = fs.rename(srcCheckFile, dstCheckFile); } else if (fs.exists(dstCheckFile)) { // no src checksum, so remove dst checksum - value = fs.delete(dstCheckFile, true); + value = fs.delete(dstCheckFile, true); } return value; @@ -697,7 +948,7 @@ public boolean delete(Path f, boolean recursive) throws IOException{ return fs.delete(f, true); } } - + final private static PathFilter DEFAULT_FILTER = new PathFilter() { @Override public boolean accept(Path file) { @@ -708,11 +959,11 @@ public boolean accept(Path file) { /** * List the statuses of the files/directories in the given path if the path is * a directory. - * + * * @param f * given path * @return the statuses of the files/directories in the given path - * @throws IOException + * @throws IOException if an I/O error occurs. */ @Override public FileStatus[] listStatus(Path f) throws IOException { @@ -729,18 +980,18 @@ public RemoteIterator listStatusIterator(final Path p) /** * List the statuses of the files/directories in the given path if the path is * a directory. - * + * * @param f * given path * @return the statuses of the files/directories in the given patch - * @throws IOException + * @throws IOException if an I/O error occurs. */ @Override public RemoteIterator listLocatedStatus(Path f) throws IOException { return fs.listLocatedStatus(f, DEFAULT_FILTER); } - + @Override public boolean mkdirs(Path f) throws IOException { return fs.mkdirs(f); @@ -769,6 +1020,10 @@ public void copyToLocalFile(boolean delSrc, Path src, Path dst) * Copy it from FS control to the local dst name. * If src and dst are directories, the copyCrc parameter * determines whether to copy CRC files. + * @param src src path. + * @param dst dst path. + * @param copyCrc copy csc flag. + * @throws IOException if an I/O error occurs. */ @SuppressWarnings("deprecation") public void copyToLocalFile(Path src, Path dst, boolean copyCrc) @@ -790,7 +1045,7 @@ public void copyToLocalFile(Path src, Path dst, boolean copyCrc) } else { FileStatus[] srcs = listStatus(src); for (FileStatus srcFile : srcs) { - copyToLocalFile(srcFile.getPath(), + copyToLocalFile(srcFile.getPath(), new Path(dst, srcFile.getPath().getName()), copyCrc); } } @@ -847,7 +1102,7 @@ protected CompletableFuture openFileWithOptions( final OpenFileParameters parameters) throws IOException { AbstractFSBuilderImpl.rejectUnknownMandatoryKeys( parameters.getMandatoryKeys(), - Collections.emptySet(), + FS_OPTION_OPENFILE_STANDARD_OPTIONS, "for " + path); return LambdaUtils.eval( new CompletableFuture<>(), diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFs.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFs.java index bc1122c56a2bd..4820c5c3045d7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFs.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFs.java @@ -70,30 +70,53 @@ public void setVerifyChecksum(boolean inVerifyChecksum) { this.verifyChecksum = inVerifyChecksum; } - /** get the raw file system. */ + /** + * get the raw file system. + * + * @return abstract file system. + */ public AbstractFileSystem getRawFs() { return getMyFs(); } - /** Return the name of the checksum file associated with a file.*/ + /** + * Return the name of the checksum file associated with a file. + * + * @param file the file path. + * @return the checksum file associated with a file. + */ public Path getChecksumFile(Path file) { return new Path(file.getParent(), "." + file.getName() + ".crc"); } - /** Return true iff file is a checksum file name.*/ + /** + * Return true iff file is a checksum file name. + * + * @param file the file path. + * @return if is checksum file true,not false. + */ public static boolean isChecksumFile(Path file) { String name = file.getName(); return name.startsWith(".") && name.endsWith(".crc"); } - /** Return the length of the checksum file given the size of the + /** + * Return the length of the checksum file given the size of the * actual file. - **/ + * + * @param file the file path. + * @param fileSize file size. + * @return check sum file length. + */ public long getChecksumFileLength(Path file, long fileSize) { return getChecksumLength(fileSize, getBytesPerSum()); } - /** Return the bytes Per Checksum. */ + /** + * Return the bytes Per Checksum. + * + * @return bytes per sum. + */ public int getBytesPerSum() { return defaultBytesPerChecksum; } @@ -433,7 +456,7 @@ private boolean isDirectory(Path f) * Implement the abstract setReplication of FileSystem * @param src file name * @param replication new replication - * @throws IOException + * @throws IOException if an I/O error occurs. * @return true if successful; * false if file does not exist or is a directory */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java index 40ddfba3c0898..8d4d352a665b7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java @@ -378,7 +378,9 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic { public static final boolean RPC_METRICS_QUANTILE_ENABLE_DEFAULT = false; public static final String RPC_METRICS_PERCENTILES_INTERVALS_KEY = "rpc.metrics.percentiles.intervals"; - + + public static final String RPC_METRICS_TIME_UNIT = "rpc.metrics.timeunit"; + /** Allowed hosts for nfs exports */ public static final String NFS_EXPORTS_ALLOWED_HOSTS_SEPARATOR = ";"; public static final String NFS_EXPORTS_ALLOWED_HOSTS_KEY = "nfs.exports.allowed.hosts"; @@ -394,6 +396,12 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic { public static final String ZK_ACL_DEFAULT = "world:anyone:rwcda"; /** Authentication for the ZooKeeper ensemble. */ public static final String ZK_AUTH = ZK_PREFIX + "auth"; + /** Principal name for zookeeper servers. */ + public static final String ZK_SERVER_PRINCIPAL = ZK_PREFIX + "server.principal"; + /** Kerberos principal name for zookeeper connection. */ + public static final String ZK_KERBEROS_PRINCIPAL = ZK_PREFIX + "kerberos.principal"; + /** Kerberos keytab for zookeeper connection. */ + public static final String ZK_KERBEROS_KEYTAB = ZK_PREFIX + "kerberos.keytab"; /** Address of the ZooKeeper ensemble. */ public static final String ZK_ADDRESS = ZK_PREFIX + "address"; @@ -435,4 +443,50 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic { "hadoop.metrics.jvm.use-thread-mxbean"; public static final boolean HADOOP_METRICS_JVM_USE_THREAD_MXBEAN_DEFAULT = false; + + /** logging level for IOStatistics (debug or info). */ + public static final String IOSTATISTICS_LOGGING_LEVEL + = "fs.iostatistics.logging.level"; + + /** DEBUG logging level for IOStatistics logging. */ + public static final String IOSTATISTICS_LOGGING_LEVEL_DEBUG + = "debug"; + + /** WARN logging level for IOStatistics logging. */ + public static final String IOSTATISTICS_LOGGING_LEVEL_WARN + = "warn"; + + /** ERROR logging level for IOStatistics logging. */ + public static final String IOSTATISTICS_LOGGING_LEVEL_ERROR + = "error"; + + /** INFO logging level for IOStatistics logging. */ + public static final String IOSTATISTICS_LOGGING_LEVEL_INFO + = "info"; + + /** Default value for IOStatistics logging level. */ + public static final String IOSTATISTICS_LOGGING_LEVEL_DEFAULT + = IOSTATISTICS_LOGGING_LEVEL_DEBUG; + + /** + * default hadoop temp dir on local system: {@value}. + */ + public static final String HADOOP_TMP_DIR = "hadoop.tmp.dir"; + + /** + * Thread-level IOStats Support. + * {@value} + */ + public static final String IOSTATISTICS_THREAD_LEVEL_ENABLED = + "fs.iostatistics.thread.level.enabled"; + + /** + * Default value for Thread-level IOStats Support is true. + */ + public static final boolean IOSTATISTICS_THREAD_LEVEL_ENABLED_DEFAULT = + true; + + public static final String HADOOP_SECURITY_RESOLVER_IMPL = + "hadoop.security.resolver.impl"; + } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java index a68012b06d2bc..ce5ab9f5010a4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java @@ -164,6 +164,28 @@ public class CommonConfigurationKeysPublic { public static final String FS_AUTOMATIC_CLOSE_KEY = "fs.automatic.close"; /** Default value for FS_AUTOMATIC_CLOSE_KEY */ public static final boolean FS_AUTOMATIC_CLOSE_DEFAULT = true; + + /** + * Number of filesystems instances can be created in parallel. + *

    + * A higher number here does not necessarily improve performance, especially + * for object stores, where multiple threads may be attempting to create an FS + * instance for the same URI. + *

    + * Default value: {@value}. + */ + public static final String FS_CREATION_PARALLEL_COUNT = + "fs.creation.parallel.count"; + + /** + * Default value for {@link #FS_CREATION_PARALLEL_COUNT}. + *

    + * Default value: {@value}. + *

    + */ + public static final int FS_CREATION_PARALLEL_COUNT_DEFAULT = + 64; + /** * @see * @@ -350,6 +372,9 @@ public class CommonConfigurationKeysPublic { "hadoop.caller.context.signature.max.size"; public static final int HADOOP_CALLER_CONTEXT_SIGNATURE_MAX_SIZE_DEFAULT = 40; + public static final String HADOOP_CALLER_CONTEXT_SEPARATOR_KEY = + "hadoop.caller.context.separator"; + public static final String HADOOP_CALLER_CONTEXT_SEPARATOR_DEFAULT = ","; /** * @see @@ -468,6 +493,10 @@ public class CommonConfigurationKeysPublic { "ipc.server.log.slow.rpc"; public static final boolean IPC_SERVER_LOG_SLOW_RPC_DEFAULT = false; + public static final String IPC_SERVER_PURGE_INTERVAL_MINUTES_KEY = + "ipc.server.purge.interval"; + public static final int IPC_SERVER_PURGE_INTERVAL_MINUTES_DEFAULT = 15; + /** * @see * @@ -948,6 +977,8 @@ public class CommonConfigurationKeysPublic { "ssl.keystore.pass$", "fs.s3.*[Ss]ecret.?[Kk]ey", "fs.s3a.*.server-side-encryption.key", + "fs.s3a.encryption.algorithm", + "fs.s3a.encryption.key", "fs.azure\\.account.key.*", "credential$", "oauth.*secret", @@ -988,5 +1019,22 @@ public class CommonConfigurationKeysPublic { public static final String HADOOP_PROMETHEUS_ENABLED = "hadoop.prometheus.endpoint.enabled"; public static final boolean HADOOP_PROMETHEUS_ENABLED_DEFAULT = false; + + /** + * @see + * + * core-default.xml + */ + public static final String HADOOP_HTTP_IDLE_TIMEOUT_MS_KEY = + "hadoop.http.idle_timeout.ms"; + public static final int HADOOP_HTTP_IDLE_TIMEOUT_MS_DEFAULT = 60000; + + /** + * To configure scheduling of server metrics update thread. This config is used to indicate + * initial delay and delay between each execution of the metric update runnable thread. + */ + public static final String IPC_SERVER_METRICS_UPDATE_RUNNER_INTERVAL = + "ipc.server.metrics.update.runner.interval"; + public static final int IPC_SERVER_METRICS_UPDATE_RUNNER_INTERVAL_DEFAULT = 5000; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java index fb46ef81e36fa..b898fde43cc83 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java @@ -131,4 +131,43 @@ private CommonPathCapabilities() { @InterfaceStability.Unstable public static final String FS_EXPERIMENTAL_BATCH_LISTING = "fs.capability.batch.listing"; + + /** + * Does the store support multipart uploading? + * Value: {@value}. + */ + public static final String FS_MULTIPART_UPLOADER = + "fs.capability.multipart.uploader"; + + + /** + * Stream abort() capability implemented by {@link Abortable#abort()}. + * Value: {@value}. + */ + public static final String ABORTABLE_STREAM = + "fs.capability.outputstream.abortable"; + + /** + * Does this FS support etags? + * That is: will FileStatus entries from listing/getFileStatus + * probes support EtagSource and return real values. + */ + public static final String ETAGS_AVAILABLE = + "fs.capability.etags.available"; + + /** + * Are etags guaranteed to be preserved across rename() operations.. + * FileSystems MUST NOT declare support for this feature + * unless this holds. + */ + public static final String ETAGS_PRESERVED_IN_RENAME = + "fs.capability.etags.preserved.in.rename"; + + /** + * Does this Filesystem support lease recovery operations such as + * {@link LeaseRecoverable#recoverLease(Path)} and {@link LeaseRecoverable#isFileClosed(Path)}}? + * Value: {@value}. + */ + public static final String LEASE_RECOVERABLE = "fs.capability.lease.recoverable"; + } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CompositeCrcFileChecksum.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CompositeCrcFileChecksum.java index e1ed5cbcfcaa6..bdbc8f3a33f4b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CompositeCrcFileChecksum.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CompositeCrcFileChecksum.java @@ -37,7 +37,13 @@ public class CompositeCrcFileChecksum extends FileChecksum { private DataChecksum.Type crcType; private int bytesPerCrc; - /** Create a CompositeCrcFileChecksum. */ + /** + * Create a CompositeCrcFileChecksum. + * + * @param crc crc. + * @param crcType crcType. + * @param bytesPerCrc bytesPerCrc. + */ public CompositeCrcFileChecksum( int crc, DataChecksum.Type crcType, int bytesPerCrc) { this.crc = crc; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ContentSummary.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ContentSummary.java index cdbd10f636dd3..4759ffea7d038 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ContentSummary.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ContentSummary.java @@ -149,17 +149,31 @@ public ContentSummary build() { @Deprecated public ContentSummary() {} - /** Constructor, deprecated by ContentSummary.Builder + /** + * Constructor, deprecated by ContentSummary.Builder * This constructor implicitly set spaceConsumed the same as length. * spaceConsumed and length must be set explicitly with - * ContentSummary.Builder + * ContentSummary.Builder. + * + * @param length length. + * @param fileCount file count. + * @param directoryCount directory count. * */ @Deprecated public ContentSummary(long length, long fileCount, long directoryCount) { this(length, fileCount, directoryCount, -1L, length, -1L); } - /** Constructor, deprecated by ContentSummary.Builder */ + /** + * Constructor, deprecated by ContentSummary.Builder. + * + * @param length length. + * @param fileCount file count. + * @param directoryCount directory count. + * @param quota quota. + * @param spaceConsumed space consumed. + * @param spaceQuota space quota. + * */ @Deprecated public ContentSummary( long length, long fileCount, long directoryCount, long quota, @@ -172,7 +186,11 @@ public ContentSummary( setSpaceQuota(spaceQuota); } - /** Constructor for ContentSummary.Builder*/ + /** + * Constructor for ContentSummary.Builder. + * + * @param builder builder. + */ private ContentSummary(Builder builder) { super(builder); this.length = builder.length; @@ -281,6 +299,21 @@ public int hashCode() { private static final String ALL_HEADER = QUOTA_HEADER + SUMMARY_HEADER; + /** + * Output format:<-------18-------> <----------24----------> + * <----------24---------->. <-------------28------------> SNAPSHOT_LENGTH + * SNAPSHOT_FILE_COUNT SNAPSHOT_DIR_COUNT SNAPSHOT_SPACE_CONSUMED + */ + private static final String SNAPSHOT_FORMAT = "%18s %24s %24s %28s "; + + private static final String[] SNAPSHOT_HEADER_FIELDS = + new String[] {"SNAPSHOT_LENGTH", "SNAPSHOT_FILE_COUNT", + "SNAPSHOT_DIR_COUNT", "SNAPSHOT_SPACE_CONSUMED"}; + + /** The header string. */ + private static final String SNAPSHOT_HEADER = + String.format(SNAPSHOT_FORMAT, (Object[]) SNAPSHOT_HEADER_FIELDS); + /** Return the header of the output. * if qOption is false, output directory count, file count, and content size; @@ -293,7 +326,9 @@ public static String getHeader(boolean qOption) { return qOption ? ALL_HEADER : SUMMARY_HEADER; } - + public static String getSnapshotHeader() { + return SNAPSHOT_HEADER; + } /** * Returns the names of the fields from the summary header. @@ -416,7 +451,7 @@ public String toString(boolean qOption, boolean hOption, boolean tOption, } /** - * Formats a size to be human readable or in bytes + * Formats a size to be human readable or in bytes. * @param size value to be formatted * @param humanReadable flag indicating human readable or not * @return String representation of the size @@ -426,4 +461,17 @@ private String formatSize(long size, boolean humanReadable) { ? StringUtils.TraditionalBinaryPrefix.long2String(size, "", 1) : String.valueOf(size); } + + /** + * Return the string representation of the snapshot counts in the output + * format. + * @param hOption flag indicating human readable or not + * @return String representation of the snapshot counts + */ + public String toSnapshot(boolean hOption) { + return String.format(SNAPSHOT_FORMAT, formatSize(snapshotLength, hOption), + formatSize(snapshotFileCount, hOption), + formatSize(snapshotDirectoryCount, hOption), + formatSize(snapshotSpaceConsumed, hOption)); + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CreateFlag.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CreateFlag.java index 71993713ad2eb..ca008e536931d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CreateFlag.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CreateFlag.java @@ -189,6 +189,8 @@ public static void validate(Object path, boolean pathExists, /** * Validate the CreateFlag for the append operation. The flag must contain * APPEND, and cannot contain OVERWRITE. + * + * @param flag enum set flag. */ public static void validateForAppend(EnumSet flag) { validate(flag); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DF.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DF.java index 7a94088e4062d..94f21502c6894 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DF.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DF.java @@ -30,7 +30,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.Shell; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** Filesystem disk space usage statistics. * Uses the unix 'df' program to get mount points, and java.io.File for @@ -65,7 +65,10 @@ public String getDirPath() { return dirPath; } - /** @return a string indicating which filesystem volume we're checking. */ + /** + * @return a string indicating which filesystem volume we're checking. + * @throws IOException raised on errors performing I/O. + */ public String getFilesystem() throws IOException { if (Shell.WINDOWS) { this.filesystem = dirFile.getCanonicalPath().substring(0, 2); @@ -100,7 +103,10 @@ public int getPercentUsed() { return (int) (used * 100.0 / cap); } - /** @return the filesystem mount point for the indicated volume */ + /** + * @return the filesystem mount point for the indicated volume. + * @throws IOException raised on errors performing I/O. + */ public String getMount() throws IOException { // Abort early if specified path does not exist if (!dirFile.exists()) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DU.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DU.java index 6e374c97c3eda..89ac7c3e7cbbe 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DU.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DU.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.fs; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DelegationTokenRenewer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DelegationTokenRenewer.java index 2feb9375255c3..fb839fa1b9d5c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DelegationTokenRenewer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DelegationTokenRenewer.java @@ -18,7 +18,7 @@ package org.apache.hadoop.fs; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import java.io.IOException; import java.lang.ref.WeakReference; @@ -47,7 +47,11 @@ public interface Renewable { /** @return the renew token. */ public Token getRenewToken(); - /** Set delegation token. */ + /** + * Set delegation token. + * @param generic type T. + * @param token token. + */ public void setDelegationToken(Token token); } @@ -172,7 +176,11 @@ public String toString() { /** Queue to maintain the RenewActions to be processed by the {@link #run()} */ private volatile DelayQueue> queue = new DelayQueue>(); - /** For testing purposes */ + /** + * For testing purposes. + * + * @return renew queue length. + */ @VisibleForTesting protected int getRenewQueueLength() { return queue.size(); @@ -211,7 +219,13 @@ static synchronized void reset() { } } - /** Add a renew action to the queue. */ + /** + * Add a renew action to the queue. + * + * @param generic type T. + * @param fs file system. + * @return renew action. + * */ @SuppressWarnings("static-access") public RenewAction addRenewAction(final T fs) { synchronized (this) { @@ -230,8 +244,10 @@ public RenewAction addRenewAction(final T /** * Remove the associated renew action from the queue - * - * @throws IOException + * + * @param generic type T. + * @param fs file system. + * @throws IOException raised on errors performing I/O. */ public void removeRenewAction( final T fs) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/EtagSource.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/EtagSource.java new file mode 100644 index 0000000000000..d7efdc705d8e5 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/EtagSource.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +/** + * An optional interface for {@link FileStatus} subclasses to implement + * to provide access to etags. + * If available FS SHOULD also implement the matching PathCapabilities + * -- etag supported: {@link CommonPathCapabilities#ETAGS_AVAILABLE}. + * -- etag consistent over rename: + * {@link CommonPathCapabilities#ETAGS_PRESERVED_IN_RENAME}. + */ +public interface EtagSource { + + /** + * Return an etag of this file status. + * A return value of null or "" means "no etag" + * @return a possibly null or empty etag. + */ + String getEtag(); + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSBuilder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSBuilder.java index b7757a62e28ad..175df15a543c0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSBuilder.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSBuilder.java @@ -28,6 +28,34 @@ * The base interface which various FileSystem FileContext Builder * interfaces can extend, and which underlying implementations * will then implement. + *

    + * HADOOP-16202 expanded the opt() and must() arguments with + * operator overloading, but HADOOP-18724 identified mapping problems: + * passing a long value in to {@code opt()} could end up invoking + * {@code opt(string, double)}, which could then trigger parse failures. + *

    + * To fix this without forcing existing code to break/be recompiled. + *

      + *
    1. A new method to explicitly set a long value is added: + * {@link #optLong(String, long)} + *
    2. + *
    3. A new method to explicitly set a double value is added: + * {@link #optLong(String, long)} + *
    4. + *
    5. + * All of {@link #opt(String, long)}, {@link #opt(String, float)} and + * {@link #opt(String, double)} invoke {@link #optLong(String, long)}. + *
    6. + *
    7. + * The same changes have been applied to {@code must()} methods. + *
    8. + *
    + * The forwarding of existing double/float setters to the long setters ensure + * that existing code will link, but are guaranteed to always set a long value. + * If you need to write code which works correctly with all hadoop releases, + * covert the option to a string explicitly and then call {@link #opt(String, String)} + * or {@link #must(String, String)} as appropriate. + * * @param Return type on the {@link #build()} call. * @param type of builder itself. */ @@ -37,87 +65,225 @@ public interface FSBuilder> { /** * Set optional Builder parameter. + * @param key key. + * @param value value. + * @return generic type B. */ B opt(@Nonnull String key, @Nonnull String value); /** * Set optional boolean parameter for the Builder. - * + * @param key key. + * @param value value. + * @return generic type B. * @see #opt(String, String) */ - B opt(@Nonnull String key, boolean value); + default B opt(@Nonnull String key, boolean value) { + return opt(key, Boolean.toString(value)); + } /** * Set optional int parameter for the Builder. * + * @param key key. + * @param value value. + * @return generic type B. * @see #opt(String, String) */ - B opt(@Nonnull String key, int value); + default B opt(@Nonnull String key, int value) { + return optLong(key, value); + } /** - * Set optional float parameter for the Builder. + * This parameter is converted to a long and passed + * to {@link #optLong(String, long)} -all + * decimal precision is lost. * + * @param key key. + * @param value value. + * @return generic type B. * @see #opt(String, String) + * @deprecated use {@link #optDouble(String, double)} */ - B opt(@Nonnull String key, float value); + @Deprecated + default B opt(@Nonnull String key, float value) { + return optLong(key, (long) value); + } /** - * Set optional double parameter for the Builder. + * Set optional long parameter for the Builder. * + * @param key key. + * @param value value. + * @return generic type B. + * @deprecated use {@link #optLong(String, long)} where possible. + */ + default B opt(@Nonnull String key, long value) { + return optLong(key, value); + } + + /** + * Pass an optional double parameter for the Builder. + * This parameter is converted to a long and passed + * to {@link #optLong(String, long)} -all + * decimal precision is lost. + * @param key key. + * @param value value. + * @return generic type B. * @see #opt(String, String) + * @deprecated use {@link #optDouble(String, double)} */ - B opt(@Nonnull String key, double value); + @Deprecated + default B opt(@Nonnull String key, double value) { + return optLong(key, (long) value); + } /** * Set an array of string values as optional parameter for the Builder. * + * @param key key. + * @param values values. + * @return generic type B. * @see #opt(String, String) */ B opt(@Nonnull String key, @Nonnull String... values); + /** + * Set optional long parameter for the Builder. + * + * @param key key. + * @param value value. + * @return generic type B. + * @see #opt(String, String) + */ + default B optLong(@Nonnull String key, long value) { + return opt(key, Long.toString(value)); + } + + /** + * Set optional double parameter for the Builder. + * + * @param key key. + * @param value value. + * @return generic type B. + * @see #opt(String, String) + */ + default B optDouble(@Nonnull String key, double value) { + return opt(key, Double.toString(value)); + } + /** * Set mandatory option to the Builder. * * If the option is not supported or unavailable, * the client should expect {@link #build()} throws IllegalArgumentException. + * + * @param key key. + * @param value value. + * @return generic type B. */ B must(@Nonnull String key, @Nonnull String value); /** * Set mandatory boolean option. * + * @param key key. + * @param value value. + * @return generic type B. * @see #must(String, String) */ - B must(@Nonnull String key, boolean value); + default B must(@Nonnull String key, boolean value) { + return must(key, Boolean.toString(value)); + } /** * Set mandatory int option. * + * @param key key. + * @param value value. + * @return generic type B. * @see #must(String, String) */ - B must(@Nonnull String key, int value); + default B must(@Nonnull String key, int value) { + return mustLong(key, value); + } + + /** + * This parameter is converted to a long and passed + * to {@link #mustLong(String, long)} -all + * decimal precision is lost. + * + * @param key key. + * @param value value. + * @return generic type B. + * @deprecated use {@link #mustDouble(String, double)} to set floating point. + */ + @Deprecated + default B must(@Nonnull String key, float value) { + return mustLong(key, (long) value); + } /** - * Set mandatory float option. + * Set mandatory long option. * + * @param key key. + * @param value value. + * @return generic type B. * @see #must(String, String) */ - B must(@Nonnull String key, float value); + @Deprecated + default B must(@Nonnull String key, long value) { + return mustLong(key, (long) value); + } /** - * Set mandatory double option. + * Set mandatory long option, despite passing in a floating + * point value. * + * @param key key. + * @param value value. + * @return generic type B. * @see #must(String, String) */ - B must(@Nonnull String key, double value); + @Deprecated + default B must(@Nonnull String key, double value) { + return mustLong(key, (long) value); + } /** * Set a string array as mandatory option. * + * @param key key. + * @param values values. + * @return generic type B. * @see #must(String, String) */ B must(@Nonnull String key, @Nonnull String... values); + /** + * Set mandatory long parameter for the Builder. + * + * @param key key. + * @param value value. + * @return generic type B. + * @see #opt(String, String) + */ + default B mustLong(@Nonnull String key, long value) { + return must(key, Long.toString(value)); + } + + /** + * Set mandatory double parameter for the Builder. + * + * @param key key. + * @param value value. + * @return generic type B. + * @see #opt(String, String) + */ + default B mustDouble(@Nonnull String key, double value) { + return must(key, Double.toString(value)); + } + /** * Instantiate the object which was being built. * @@ -125,6 +291,7 @@ public interface FSBuilder> { * @throws UnsupportedOperationException if the filesystem does not support * the specific operation. * @throws IOException on filesystem IO errors. + * @return generic type S. */ S build() throws IllegalArgumentException, UnsupportedOperationException, IOException; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java index 31f82975899e1..cca6c28da11a3 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -26,9 +26,15 @@ import java.io.InputStream; import java.nio.ByteBuffer; import java.util.EnumSet; +import java.util.List; +import java.util.function.IntFunction; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.impl.StoreImplementationUtils; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; +import org.apache.hadoop.fs.statistics.IOStatisticsSupport; import org.apache.hadoop.io.ByteBufferPool; import org.apache.hadoop.util.IdentityHashStore; @@ -40,14 +46,14 @@ public class FSDataInputStream extends DataInputStream implements Seekable, PositionedReadable, ByteBufferReadable, HasFileDescriptor, CanSetDropBehind, CanSetReadahead, HasEnhancedByteBufferAccess, CanUnbuffer, StreamCapabilities, - ByteBufferPositionedReadable { + ByteBufferPositionedReadable, IOStatisticsSource { /** * Map ByteBuffers that we have handed out to readers to ByteBufferPool * objects */ private final IdentityHashStore extendedReadBuffers - = new IdentityHashStore(0); + = new IdentityHashStore<>(0); public FSDataInputStream(InputStream in) { super(in); @@ -138,7 +144,8 @@ public boolean seekToNewSource(long targetPos) throws IOException { * * @return the underlying input stream */ - @InterfaceAudience.LimitedPrivate({"HDFS"}) + @InterfaceAudience.Public + @InterfaceStability.Stable public InputStream getWrappedStream() { return in; } @@ -234,10 +241,7 @@ public void unbuffer() { @Override public boolean hasCapability(String capability) { - if (in instanceof StreamCapabilities) { - return ((StreamCapabilities) in).hasCapability(capability); - } - return false; + return StoreImplementationUtils.hasCapability(in, capability); } /** @@ -267,4 +271,31 @@ public void readFully(long position, ByteBuffer buf) throws IOException { "unsupported by " + in.getClass().getCanonicalName()); } } + + /** + * Get the IO Statistics of the nested stream, falling back to + * null if the stream does not implement the interface + * {@link IOStatisticsSource}. + * @return an IOStatistics instance or null + */ + @Override + public IOStatistics getIOStatistics() { + return IOStatisticsSupport.retrieveIOStatistics(in); + } + + @Override + public int minSeekForVectorReads() { + return ((PositionedReadable) in).minSeekForVectorReads(); + } + + @Override + public int maxReadSizeForVectorReads() { + return ((PositionedReadable) in).maxReadSizeForVectorReads(); + } + + @Override + public void readVectored(List ranges, + IntFunction allocate) throws IOException { + ((PositionedReadable) in).readVectored(ranges, allocate); + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStream.java index 5b604e58e2360..94c56b713c1eb 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStream.java @@ -24,13 +24,18 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.impl.StoreImplementationUtils; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; +import org.apache.hadoop.fs.statistics.IOStatisticsSupport; /** Utility that wraps a {@link OutputStream} in a {@link DataOutputStream}. */ @InterfaceAudience.Public @InterfaceStability.Stable public class FSDataOutputStream extends DataOutputStream - implements Syncable, CanSetDropBehind, StreamCapabilities { + implements Syncable, CanSetDropBehind, StreamCapabilities, + IOStatisticsSource, Abortable { private final OutputStream wrappedStream; private static class PositionCache extends FilterOutputStream { @@ -122,10 +127,7 @@ public OutputStream getWrappedStream() { @Override public boolean hasCapability(String capability) { - if (wrappedStream instanceof StreamCapabilities) { - return ((StreamCapabilities) wrappedStream).hasCapability(capability); - } - return false; + return StoreImplementationUtils.hasCapability(wrappedStream, capability); } @Override // Syncable @@ -155,4 +157,32 @@ public void setDropBehind(Boolean dropBehind) throws IOException { "not support setting the drop-behind caching setting."); } } + + /** + * Get the IO Statistics of the nested stream, falling back to + * empty statistics if the stream does not implement the interface + * {@link IOStatisticsSource}. + * @return an IOStatistics instance. + */ + @Override + public IOStatistics getIOStatistics() { + return IOStatisticsSupport.retrieveIOStatistics(wrappedStream); + } + + /** + * Invoke {@code abort()} on the wrapped stream if it + * is Abortable, otherwise raise an + * {@code UnsupportedOperationException}. + * @throws UnsupportedOperationException if not available. + * @return the result. + */ + @Override + public AbortableResult abort() { + if (wrappedStream instanceof Abortable) { + return ((Abortable) wrappedStream).abort(); + } else { + throw new UnsupportedOperationException( + FSExceptionMessages.ABORTABLE_UNSUPPORTED); + } + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStreamBuilder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStreamBuilder.java index 62a3182dfba20..3c3870723e652 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStreamBuilder.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStreamBuilder.java @@ -28,7 +28,7 @@ import java.io.IOException; import java.util.EnumSet; -import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY; @@ -123,6 +123,9 @@ public abstract class FSDataOutputStreamBuilder /** * Constructor. + * + * @param fileSystem file system. + * @param p the path. */ protected FSDataOutputStreamBuilder(@Nonnull FileSystem fileSystem, @Nonnull Path p) { @@ -149,6 +152,9 @@ protected FsPermission getPermission() { /** * Set permission for the file. + * + * @param perm permission. + * @return B Generics Type. */ public B permission(@Nonnull final FsPermission perm) { checkNotNull(perm); @@ -162,6 +168,9 @@ protected int getBufferSize() { /** * Set the size of the buffer to be used. + * + * @param bufSize buffer size. + * @return Generics Type B. */ public B bufferSize(int bufSize) { bufferSize = bufSize; @@ -174,6 +183,9 @@ protected short getReplication() { /** * Set replication factor. + * + * @param replica replica. + * @return Generics Type B. */ public B replication(short replica) { replication = replica; @@ -186,6 +198,9 @@ protected long getBlockSize() { /** * Set block size. + * + * @param blkSize block size. + * @return B Generics Type. */ public B blockSize(long blkSize) { blockSize = blkSize; @@ -194,6 +209,8 @@ public B blockSize(long blkSize) { /** * Return true to create the parent directories if they do not exist. + * + * @return if create the parent directories if they do not exist true,not false. */ protected boolean isRecursive() { return recursive; @@ -201,6 +218,8 @@ protected boolean isRecursive() { /** * Create the parent directory if they do not exist. + * + * @return B Generics Type. */ public B recursive() { recursive = true; @@ -213,6 +232,9 @@ protected Progressable getProgress() { /** * Set the facility of reporting progress. + * + * @param prog progress. + * @return B Generics Type. */ public B progress(@Nonnull final Progressable prog) { checkNotNull(prog); @@ -226,6 +248,8 @@ protected EnumSet getFlags() { /** * Create an FSDataOutputStream at the specified path. + * + * @return return Generics Type B. */ public B create() { flags.add(CreateFlag.CREATE); @@ -236,6 +260,9 @@ public B create() { * Set to true to overwrite the existing file. * Set it to false, an exception will be thrown when calling {@link #build()} * if the file exists. + * + * @param overwrite overrite. + * @return Generics Type B. */ public B overwrite(boolean overwrite) { if (overwrite) { @@ -248,6 +275,8 @@ public B overwrite(boolean overwrite) { /** * Append to an existing file (optional operation). + * + * @return Generics Type B. */ public B append() { flags.add(CreateFlag.APPEND); @@ -260,6 +289,9 @@ protected ChecksumOpt getChecksumOpt() { /** * Set checksum opt. + * + * @param chksumOpt check sum opt. + * @return Generics Type B. */ public B checksumOpt(@Nonnull final ChecksumOpt chksumOpt) { checkNotNull(chksumOpt); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSExceptionMessages.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSExceptionMessages.java index a8e7b71bb119c..f4616f1d72bc7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSExceptionMessages.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSExceptionMessages.java @@ -51,4 +51,10 @@ public class FSExceptionMessages { public static final String PERMISSION_DENIED_BY_STICKY_BIT = "Permission denied by sticky bit"; + + /** + * A call was made to abort(), but it is not supported. + */ + public static final String ABORTABLE_UNSUPPORTED = + "Abortable.abort() is not supported"; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSInputChecker.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSInputChecker.java index de66eab713ab6..ee16ca8a2cd50 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSInputChecker.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSInputChecker.java @@ -82,6 +82,7 @@ protected FSInputChecker( Path file, int numOfRetries) { * @param sum the type of Checksum engine * @param chunkSize maximun chunk size * @param checksumSize the number byte of each checksum + * @param verifyChecksum verify check sum. */ protected FSInputChecker( Path file, int numOfRetries, boolean verifyChecksum, Checksum sum, int chunkSize, int checksumSize ) { @@ -118,6 +119,7 @@ protected FSInputChecker( Path file, int numOfRetries, * @param len maximum number of bytes to read * @param checksum the data buffer into which to write checksums * @return number of bytes read + * @throws IOException raised on errors performing I/O. */ abstract protected int readChunk(long pos, byte[] buf, int offset, int len, byte[] checksum) throws IOException; @@ -129,7 +131,10 @@ abstract protected int readChunk(long pos, byte[] buf, int offset, int len, */ abstract protected long getChunkPosition(long pos); - /** Return true if there is a need for checksum verification */ + /** + * Return true if there is a need for checksum verification. + * @return if there is a need for checksum verification true, not false. + */ protected synchronized boolean needChecksum() { return verifyChecksum && sum != null; } @@ -357,6 +362,9 @@ private void verifySums(final byte b[], final int off, int read) * Convert a checksum byte array to a long * This is deprecated since 0.22 since it is no longer in use * by this class. + * + * @param checksum check sum. + * @return crc. */ @Deprecated static public long checksum2long(byte[] checksum) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSInputStream.java index 672ab15f16c3b..ad2642f7db963 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSInputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSInputStream.java @@ -21,9 +21,12 @@ import java.io.IOException; import java.io.InputStream; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.statistics.IOStatisticsLogging; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -134,4 +137,23 @@ public void readFully(long position, byte[] buffer) throws IOException { readFully(position, buffer, 0, buffer.length); } + + /** + * toString method returns the superclass toString, but if the subclass + * implements {@link IOStatisticsSource} then those statistics are + * extracted and included in the output. + * That is: statistics of subclasses are automatically reported. + * @return a string value. + */ + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(super.toString()); + sb.append('{'); + if (this instanceof IOStatisticsSource) { + sb.append(IOStatisticsLogging.ioStatisticsSourceToString( + (IOStatisticsSource) this)); + } + sb.append('}'); + return sb.toString(); + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSLinkResolver.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSLinkResolver.java index ffe4b34ca5fdb..f85cf7a858152 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSLinkResolver.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSLinkResolver.java @@ -74,7 +74,7 @@ abstract public T next(final AbstractFileSystem fs, final Path p) * @param fc FileContext used to access file systems. * @param path The path to resolve symlinks on. * @return Generic type determined by the implementation of next. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public T resolve(final FileContext fc, final Path path) throws IOException { int count = 0; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSOutputSummer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSOutputSummer.java index 2458b2f40d8d7..4ef512dc257a3 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSOutputSummer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSOutputSummer.java @@ -21,7 +21,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.util.DataChecksum; -import org.apache.htrace.core.TraceScope; +import org.apache.hadoop.tracing.TraceScope; import java.io.IOException; import java.io.OutputStream; @@ -33,7 +33,8 @@ */ @InterfaceAudience.LimitedPrivate({"HDFS"}) @InterfaceStability.Unstable -abstract public class FSOutputSummer extends OutputStream { +abstract public class FSOutputSummer extends OutputStream implements + StreamCapabilities { // data checksum private final DataChecksum sum; // internal buffer for storing data before it is checksumed @@ -185,6 +186,8 @@ public void flush() throws IOException { /** * Return the number of valid bytes currently in the buffer. + * + * @return buffer data size. */ protected synchronized int getBufferedDataSize() { return count; @@ -226,6 +229,10 @@ private void writeChecksumChunks(byte b[], int off, int len) /** * Converts a checksum integer value to a byte stream + * + * @param sum check sum. + * @param checksumSize check sum size. + * @return byte stream. */ static public byte[] convertToByteStream(Checksum sum, int checksumSize) { return int2byte((int)sum.getValue(), new byte[checksumSize]); @@ -244,6 +251,8 @@ static byte[] int2byte(int integer, byte[] bytes) { /** * Resets existing buffer with a new one of the specified size. + * + * @param size size. */ protected synchronized void setChecksumBufSize(int size) { this.buf = new byte[size]; @@ -254,4 +263,9 @@ protected synchronized void setChecksumBufSize(int size) { protected synchronized void resetChecksumBufSize() { setChecksumBufSize(sum.getBytesPerChecksum() * BUFFER_NUM_CHUNKS); } + + @Override + public boolean hasCapability(String capability) { + return false; + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileChecksum.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileChecksum.java index 62f1a9b3f486e..088ad6ebc36ae 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileChecksum.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileChecksum.java @@ -28,20 +28,37 @@ @InterfaceAudience.Public @InterfaceStability.Stable public abstract class FileChecksum implements Writable { - /** The checksum algorithm name */ + /** + * The checksum algorithm name. + * + * @return algorithm name. + */ public abstract String getAlgorithmName(); - /** The length of the checksum in bytes */ + /** + * The length of the checksum in bytes. + * + * @return length. + */ public abstract int getLength(); - /** The value of the checksum in bytes */ + /** + * The value of the checksum in bytes. + * + * @return byte array. + */ public abstract byte[] getBytes(); public ChecksumOpt getChecksumOpt() { return null; } - /** Return true if both the algorithms and the values are the same. */ + /** + * Return true if both the algorithms and the values are the same. + * + * @param other other. + * @return if equal true, not false. + */ @Override public boolean equals(Object other) { if (other == this) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java index df93e89750ee0..5601f166abec8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java @@ -65,12 +65,18 @@ import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.ShutdownHookManager; -import com.google.common.base.Preconditions; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.tracing.Tracer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_BUFFER_SIZE; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_LENGTH; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE; import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; +import static org.apache.hadoop.util.functional.FutureIO.awaitFuture; /** * The FileContext class provides an interface for users of the Hadoop @@ -366,8 +372,8 @@ public AbstractFileSystem run() throws UnsupportedFileSystemException { * Create a FileContext with specified FS as default using the specified * config. * - * @param defFS - * @param aConf + * @param defFS default fs. + * @param aConf configutration. * @return new FileContext with specified FS as default. */ public static FileContext getFileContext(final AbstractFileSystem defFS, @@ -378,7 +384,7 @@ public static FileContext getFileContext(final AbstractFileSystem defFS, /** * Create a FileContext for specified file system using the default config. * - * @param defaultFS + * @param defaultFS default fs. * @return a FileContext with the specified AbstractFileSystem * as the default FS. */ @@ -411,6 +417,7 @@ protected static FileContext getFileContext( * * @throws UnsupportedFileSystemException If the file system from the default * configuration is not supported + * @return file context. */ public static FileContext getFileContext() throws UnsupportedFileSystemException { @@ -430,7 +437,7 @@ public static FileContext getLocalFSFileContext() /** * Create a FileContext for specified URI using the default config. * - * @param defaultFsUri + * @param defaultFsUri defaultFsUri. * @return a FileContext with the specified URI as the default FS. * * @throws UnsupportedFileSystemException If the file system for @@ -444,8 +451,8 @@ public static FileContext getFileContext(final URI defaultFsUri) /** * Create a FileContext for specified default URI using the specified config. * - * @param defaultFsUri - * @param aConf + * @param defaultFsUri defaultFsUri. + * @param aConf configrution. * @return new FileContext for specified uri * @throws UnsupportedFileSystemException If the file system with specified is * not supported @@ -476,7 +483,7 @@ public static FileContext getFileContext(final URI defaultFsUri, * {@link #getFileContext(URI, Configuration)} instead of this one. * * - * @param aConf + * @param aConf configration. * @return new FileContext * @throws UnsupportedFileSystemException If file system in the config * is not supported @@ -507,10 +514,9 @@ public static FileContext getLocalFSFileContext(final Configuration aConf) return getFileContext(FsConstants.LOCAL_FS_URI, aConf); } - /* This method is needed for tests. */ + @VisibleForTesting @InterfaceAudience.Private - @InterfaceStability.Unstable /* return type will change to AFS once - HADOOP-6223 is completed */ + @InterfaceStability.Unstable public AbstractFileSystem getDefaultFileSystem() { return defaultFS; } @@ -555,6 +561,7 @@ public void setWorkingDirectory(final Path newWDir) throws IOException { /** * Gets the working directory for wd-relative names (such a "foo/bar"). + * @return the path. */ public Path getWorkingDirectory() { return workingDir; @@ -601,13 +608,14 @@ public void setUMask(final FsPermission newUmask) { * @throws FileNotFoundException If f does not exist * @throws AccessControlException if access denied * @throws IOException If an IO Error occurred - * + * @throws UnresolvedLinkException If unresolved link occurred. + * * Exceptions applicable to file systems accessed over RPC: * @throws RpcClientException If an exception occurred in the RPC client * @throws RpcServerException If an exception occurred in the RPC server * @throws UnexpectedServerException If server implementation throws * undeclared exception to RPC server - * + * * RuntimeExceptions: * @throws InvalidPathException If path f is not valid */ @@ -621,7 +629,7 @@ public Path resolvePath(final Path f) throws FileNotFoundException, * A Fully-qualified path has scheme and authority specified and an absolute * path. * Use the default file system and working dir in this FileContext to qualify. - * @param path + * @param path the path. * @return qualified path */ public Path makeQualified(final Path path) { @@ -760,6 +768,7 @@ public FSDataOutputStream build() throws IOException { * * Client should expect {@link FSDataOutputStreamBuilder#build()} throw the * same exceptions as create(Path, EnumSet, CreateOpts...). + * @throws IOException If an I/O error occurred. */ public FSDataOutputStreamBuilder create(final Path f) throws IOException { @@ -833,6 +842,8 @@ public Void next(final AbstractFileSystem fs, final Path p) * * RuntimeExceptions: * @throws InvalidPathException If path f is invalid + * + * @return if delete success true, not false. */ public boolean delete(final Path f, final boolean recursive) throws AccessControlException, FileNotFoundException, @@ -863,6 +874,7 @@ public Boolean next(final AbstractFileSystem fs, final Path p) * @throws RpcServerException If an exception occurred in the RPC server * @throws UnexpectedServerException If server implementation throws * undeclared exception to RPC server + * @return input stream. */ public FSDataInputStream open(final Path f) throws AccessControlException, FileNotFoundException, UnsupportedFileSystemException, IOException { @@ -893,6 +905,7 @@ public FSDataInputStream next(final AbstractFileSystem fs, final Path p) * @throws RpcServerException If an exception occurred in the RPC server * @throws UnexpectedServerException If server implementation throws * undeclared exception to RPC server + * @return output stream. */ public FSDataInputStream open(final Path f, final int bufferSize) throws AccessControlException, FileNotFoundException, @@ -1002,6 +1015,7 @@ public Boolean next(final AbstractFileSystem fs, final Path p) * * @param src path to be renamed * @param dst new path after rename + * @param options rename options. * * @throws AccessControlException If access is denied * @throws FileAlreadyExistsException If dst already exists and @@ -1053,7 +1067,7 @@ public Void next(final AbstractFileSystem fs, final Path p) /** * Set permission of a path. - * @param f + * @param f the path. * @param permission - the new absolute permission (umask is not applied) * * @throws AccessControlException If access is denied @@ -1197,7 +1211,7 @@ public FileChecksum next(final AbstractFileSystem fs, final Path p) * Set the verify checksum flag for the file system denoted by the path. * This is only applicable if the * corresponding FileSystem supports checksum. By default doesn't do anything. - * @param verifyChecksum + * @param verifyChecksum verify check sum. * @param f set the verifyChecksum for the Filesystem containing this path * * @throws AccessControlException If access is denied @@ -1249,6 +1263,17 @@ public FileStatus next(final AbstractFileSystem fs, final Path p) }.resolve(this, absF); } + /** + * Synchronize client metadata state. + * + * @throws IOException If an I/O error occurred. + * @throws UnsupportedOperationException If file system for f is + * not supported. + */ + public void msync() throws IOException, UnsupportedOperationException { + defaultFS.msync(); + } + /** * Checks if the user can access a path. The mode specifies which access * checks to perform. If the requested permissions are granted, then the @@ -1604,9 +1629,12 @@ public RemoteIterator next( } /** + * List CorruptFile Blocks. + * + * @param path the path. * @return an iterator over the corrupt files under the given path * (may contain duplicates if a file has more than one corrupt block) - * @throws IOException + * @throws IOException If an I/O error occurred. */ public RemoteIterator listCorruptFileBlocks(Path path) throws IOException { @@ -1730,6 +1758,7 @@ public class Util { * @throws RpcServerException If an exception occurred in the RPC server * @throws UnexpectedServerException If server implementation throws * undeclared exception to RPC server + * @return if f exists true, not false. */ public boolean exists(final Path f) throws AccessControlException, UnsupportedFileSystemException, IOException { @@ -1790,6 +1819,12 @@ public ContentSummary getContentSummary(Path f) /** * See {@link #listStatus(Path[], PathFilter)} + * + * @param files files. + * @throws AccessControlException If access is denied. + * @throws FileNotFoundException If files does not exist. + * @throws IOException If an I/O error occurred. + * @return file status array. */ public FileStatus[] listStatus(Path[] files) throws AccessControlException, FileNotFoundException, IOException { @@ -2045,36 +2080,29 @@ public LocatedFileStatus next() throws IOException { *
    ? *
    Matches any single character. * - *

    *

    * *
    Matches zero or more characters. * - *

    *

    [abc] *
    Matches a single character from character set * {a,b,c}. * - *

    *

    [a-b] *
    Matches a single character from the character range * {a...b}. Note: character a must be * lexicographically less than or equal to character b. * - *

    *

    [^a] *
    Matches a single char that is not from character set or range * {a}. Note that the ^ character must occur * immediately to the right of the opening bracket. * - *

    *

    \c *
    Removes (escapes) any special meaning of character c. * - *

    *

    {ab,cd} *
    Matches a string from the string set {ab, cd} - * - *

    + * *

    {ab,c{de,fh}} *
    Matches a string from string set {ab, cde, cfh} * @@ -2135,6 +2163,18 @@ public FileStatus[] globStatus(final Path pathPattern, /** * Copy file from src to dest. See * {@link #copy(Path, Path, boolean, boolean)} + * + * @param src src. + * @param dst dst. + * @throws AccessControlException If access is denied. + * @throws FileAlreadyExistsException If file src already exists. + * @throws FileNotFoundException if next file does not exist any more. + * @throws ParentNotDirectoryException If parent of src is not a + * directory. + * @throws UnsupportedFileSystemException If file system for + * src/dst is not supported. + * @throws IOException If an I/O error occurred. + * @return if success copy true, not false. */ public boolean copy(final Path src, final Path dst) throws AccessControlException, FileAlreadyExistsException, @@ -2145,8 +2185,8 @@ public boolean copy(final Path src, final Path dst) /** * Copy from src to dst, optionally deleting src and overwriting dst. - * @param src - * @param dst + * @param src src. + * @param dst dst. * @param deleteSource - delete src if true * @param overwrite overwrite dst if true; throw IOException if dst exists * and overwrite is false. @@ -2194,7 +2234,12 @@ public boolean copy(final Path src, final Path dst, boolean deleteSource, EnumSet createFlag = overwrite ? EnumSet.of( CreateFlag.CREATE, CreateFlag.OVERWRITE) : EnumSet.of(CreateFlag.CREATE); - InputStream in = open(qSrc); + InputStream in = awaitFuture(openFile(qSrc) + .opt(FS_OPTION_OPENFILE_READ_POLICY, + FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE) + .optLong(FS_OPTION_OPENFILE_LENGTH, + fs.getLen()) // file length hint for object stores + .build()); try (OutputStream out = create(qDst, createFlag)) { IOUtils.copyBytes(in, out, conf, true); } finally { @@ -2262,7 +2307,7 @@ private static void checkDependencies(Path qualSrc, Path qualDst) * Are qualSrc and qualDst of the same file system? * @param qualPath1 - fully qualified path * @param qualPath2 - fully qualified path - * @return + * @return is same fs true,not false. */ private static boolean isSameFS(Path qualPath1, Path qualPath2) { URI srcUri = qualPath1.toUri(); @@ -2285,6 +2330,13 @@ public synchronized void run() { /** * Resolves all symbolic links in the specified path. * Returns the new path object. + * + * @param f the path. + * @throws FileNotFoundException If f does not exist. + * @throws UnresolvedLinkException If unresolved link occurred. + * @throws AccessControlException If access is denied. + * @throws IOException If an I/O error occurred. + * @return resolve path. */ protected Path resolve(final Path f) throws FileNotFoundException, UnresolvedLinkException, AccessControlException, IOException { @@ -2302,6 +2354,7 @@ public Path next(final AbstractFileSystem fs, final Path p) * to, but not including the final path component. * @param f path to resolve * @return the new path object. + * @throws IOException If an I/O error occurred. */ protected Path resolveIntermediate(final Path f) throws IOException { return new FSLinkResolver() { @@ -2320,7 +2373,7 @@ public FileStatus next(final AbstractFileSystem fs, final Path p) * @param f * Path which needs to be resolved * @return List of AbstractFileSystems accessed in the path - * @throws IOException + * @throws IOException If an I/O error occurred. */ Set resolveAbstractFileSystems(final Path f) throws IOException { @@ -2381,7 +2434,7 @@ public static Map getAllStatistics() { * @param p Path for which delegations tokens are requested. * @param renewer the account name that is allowed to renew the token. * @return List of delegation tokens. - * @throws IOException + * @throws IOException If an I/O error occurred. */ @InterfaceAudience.LimitedPrivate( { "HDFS", "MapReduce" }) public List> getDelegationTokens( @@ -2533,7 +2586,7 @@ public AclStatus next(final AbstractFileSystem fs, final Path p) * @param path Path to modify * @param name xattr name. * @param value xattr value. - * @throws IOException + * @throws IOException If an I/O error occurred. */ public void setXAttr(Path path, String name, byte[] value) throws IOException { @@ -2552,7 +2605,7 @@ public void setXAttr(Path path, String name, byte[] value) * @param name xattr name. * @param value xattr value. * @param flag xattr set flag - * @throws IOException + * @throws IOException If an I/O error occurred. */ public void setXAttr(Path path, final String name, final byte[] value, final EnumSet flag) throws IOException { @@ -2577,7 +2630,7 @@ public Void next(final AbstractFileSystem fs, final Path p) * @param path Path to get extended attribute * @param name xattr name. * @return byte[] xattr value. - * @throws IOException + * @throws IOException If an I/O error occurred. */ public byte[] getXAttr(Path path, final String name) throws IOException { final Path absF = fixRelativePart(path); @@ -2600,7 +2653,7 @@ public byte[] next(final AbstractFileSystem fs, final Path p) * @param path Path to get extended attributes * @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs * of the file or directory - * @throws IOException + * @throws IOException If an I/O error occurred. */ public Map getXAttrs(Path path) throws IOException { final Path absF = fixRelativePart(path); @@ -2624,7 +2677,7 @@ public Map next(final AbstractFileSystem fs, final Path p) * @param names XAttr names. * @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs * of the file or directory - * @throws IOException + * @throws IOException If an I/O error occurred. */ public Map getXAttrs(Path path, final List names) throws IOException { @@ -2647,7 +2700,7 @@ public Map next(final AbstractFileSystem fs, final Path p) * * @param path Path to remove extended attribute * @param name xattr name - * @throws IOException + * @throws IOException If an I/O error occurred. */ public void removeXAttr(Path path, final String name) throws IOException { final Path absF = fixRelativePart(path); @@ -2671,7 +2724,7 @@ public Void next(final AbstractFileSystem fs, final Path p) * @param path Path to get extended attributes * @return List{@literal <}String{@literal >} of the XAttr names of the * file or directory - * @throws IOException + * @throws IOException If an I/O error occurred. */ public List listXAttrs(Path path) throws IOException { final Path absF = fixRelativePart(path); @@ -2788,7 +2841,7 @@ public Void next(final AbstractFileSystem fs, final Path p) /** * Set the source path to satisfy storage policy. * @param path The source path referring to either a directory or a file. - * @throws IOException + * @throws IOException If an I/O error occurred. */ public void satisfyStoragePolicy(final Path path) throws IOException { @@ -2810,6 +2863,7 @@ public Void next(final AbstractFileSystem fs, final Path p) * @param policyName the name of the target storage policy. The list * of supported Storage policies can be retrieved * via {@link #getAllStoragePolicies}. + * @throws IOException If an I/O error occurred. */ public void setStoragePolicy(final Path path, final String policyName) throws IOException { @@ -2827,7 +2881,7 @@ public Void next(final AbstractFileSystem fs, final Path p) /** * Unset the storage policy set for a given file or directory. * @param src file or directory path. - * @throws IOException + * @throws IOException If an I/O error occurred. */ public void unsetStoragePolicy(final Path src) throws IOException { final Path absF = fixRelativePart(src); @@ -2846,7 +2900,7 @@ public Void next(final AbstractFileSystem fs, final Path p) * * @param path file or directory path. * @return storage policy for give file. - * @throws IOException + * @throws IOException If an I/O error occurred. */ public BlockStoragePolicySpi getStoragePolicy(Path path) throws IOException { final Path absF = fixRelativePart(path); @@ -2864,7 +2918,7 @@ public BlockStoragePolicySpi next(final AbstractFileSystem fs, * Retrieve all the storage policies supported by this file system. * * @return all storage policies supported by this filesystem. - * @throws IOException + * @throws IOException If an I/O error occurred. */ public Collection getAllStoragePolicies() throws IOException { @@ -2926,9 +2980,11 @@ public CompletableFuture build() throws IOException { final Path absF = fixRelativePart(getPath()); OpenFileParameters parameters = new OpenFileParameters() .withMandatoryKeys(getMandatoryKeys()) + .withOptionalKeys(getOptionalKeys()) .withOptions(getOptions()) - .withBufferSize(getBufferSize()) - .withStatus(getStatus()); + .withStatus(getStatus()) + .withBufferSize( + getOptions().getInt(FS_OPTION_OPENFILE_BUFFER_SIZE, getBufferSize())); return new FSLinkResolver>() { @Override public CompletableFuture next( @@ -2957,4 +3013,31 @@ public boolean hasPathCapability(Path path, String capability) (fs, p) -> fs.hasPathCapability(p, capability)); } + /** + * Return a set of server default configuration values based on path. + * @param path path to fetch server defaults + * @return server default configuration values for path + * @throws IOException an I/O error occurred + */ + public FsServerDefaults getServerDefaults(final Path path) + throws IOException { + return FsLinkResolution.resolve(this, + fixRelativePart(path), + (fs, p) -> fs.getServerDefaults(p)); + } + + /** + * Create a multipart uploader. + * @param basePath file path under which all files are uploaded + * @return a MultipartUploaderBuilder object to build the uploader + * @throws IOException if some early checks cause IO failures. + * @throws UnsupportedOperationException if support is checked early. + */ + @InterfaceStability.Unstable + public MultipartUploaderBuilder createMultipartUploader(Path basePath) + throws IOException { + return FsLinkResolution.resolve(this, + fixRelativePart(basePath), + (fs, p) -> fs.createMultipartUploader(p)); + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileEncryptionInfo.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileEncryptionInfo.java index 4cfce2eed6c2c..059ec4fd63d1a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileEncryptionInfo.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileEncryptionInfo.java @@ -24,8 +24,8 @@ import org.apache.hadoop.crypto.CipherSuite; import org.apache.hadoop.crypto.CryptoProtocolVersion; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkArgument; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull; /** * FileEncryptionInfo encapsulates all the encryption-related information for @@ -52,6 +52,7 @@ public class FileEncryptionInfo implements Serializable { * @param keyName name of the key used for the encryption zone * @param ezKeyVersionName name of the KeyVersion used to encrypt the * encrypted data encryption key. + * @param version version. */ public FileEncryptionInfo(final CipherSuite suite, final CryptoProtocolVersion version, final byte[] edek, @@ -134,6 +135,8 @@ public String toString() { * * NOTE: * Currently this method is used by CLI for backward compatibility. + * + * @return stable string. */ public String toStringStable() { StringBuilder builder = new StringBuilder("{") diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileRange.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileRange.java new file mode 100644 index 0000000000000..97da65585d6d2 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileRange.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.nio.ByteBuffer; +import java.util.concurrent.CompletableFuture; + +import org.apache.hadoop.fs.impl.FileRangeImpl; + +/** + * A byte range of a file. + * This is used for the asynchronous gather read API of + * {@link PositionedReadable#readVectored}. + */ +public interface FileRange { + + /** + * Get the starting offset of the range. + * @return the byte offset of the start + */ + long getOffset(); + + /** + * Get the length of the range. + * @return the number of bytes in the range. + */ + int getLength(); + + /** + * Get the future data for this range. + * @return the future for the {@link ByteBuffer} that contains the data + */ + CompletableFuture getData(); + + /** + * Set a future for this range's data. + * This method is called by {@link PositionedReadable#readVectored} to store the + * data for the user to pick up later via {@link #getData}. + * @param data the future of the ByteBuffer that will have the data + */ + void setData(CompletableFuture data); + + /** + * Get any reference passed in to the file range constructor. + * This is not used by any implementation code; it is to help + * bind this API to libraries retrieving multiple stripes of + * data in parallel. + * @return a reference or null. + */ + Object getReference(); + + /** + * Factory method to create a FileRange object. + * @param offset starting offset of the range. + * @param length length of the range. + * @return a new instance of FileRangeImpl. + */ + static FileRange createFileRange(long offset, int length) { + return new FileRangeImpl(offset, length, null); + } + + /** + * Factory method to create a FileRange object. + * @param offset starting offset of the range. + * @param length length of the range. + * @param reference nullable reference to store in the range. + * @return a new instance of FileRangeImpl. + */ + static FileRange createFileRange(long offset, int length, Object reference) { + return new FileRangeImpl(offset, length, reference); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java index d7ca8f172f8e2..fcef578b072f1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java @@ -116,6 +116,17 @@ public FileStatus(long length, boolean isdir, int block_replication, /** * Constructor for file systems on which symbolic links are not supported + * + * @param length length. + * @param isdir isdir. + * @param block_replication block replication. + * @param blocksize block size. + * @param modification_time modification time. + * @param access_time access_time. + * @param permission permission. + * @param owner owner. + * @param group group. + * @param path the path. */ public FileStatus(long length, boolean isdir, int block_replication, @@ -182,6 +193,7 @@ public FileStatus(long length, boolean isdir, int block_replication, * Copy constructor. * * @param other FileStatus to copy + * @throws IOException raised on errors performing I/O. */ public FileStatus(FileStatus other) throws IOException { // It's important to call the getters here instead of directly accessing the @@ -375,6 +387,8 @@ protected void setGroup(String group) { /** * @return The contents of the symbolic link. + * + * @throws IOException raised on errors performing I/O. */ public Path getSymlink() throws IOException { if (!isSymlink()) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java index 358db744e65be..6caf1e7167ac2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java @@ -44,6 +44,7 @@ import java.util.Stack; import java.util.TreeSet; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Semaphore; import java.util.concurrent.atomic.AtomicLong; import org.apache.commons.logging.Log; @@ -75,20 +76,22 @@ import org.apache.hadoop.security.token.DelegationTokenIssuer; import org.apache.hadoop.util.ClassUtil; import org.apache.hadoop.util.DataChecksum; +import org.apache.hadoop.util.DurationInfo; import org.apache.hadoop.util.LambdaUtils; import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.ShutdownHookManager; import org.apache.hadoop.util.StringUtils; -import org.apache.htrace.core.Tracer; -import org.apache.htrace.core.TraceScope; +import org.apache.hadoop.tracing.Tracer; +import org.apache.hadoop.tracing.TraceScope; -import com.google.common.base.Preconditions; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_BUFFER_SIZE; +import static org.apache.hadoop.util.Preconditions.checkArgument; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.*; import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; @@ -103,13 +106,13 @@ * All user code that may potentially use the Hadoop Distributed * File System should be written to use a FileSystem object or its * successor, {@link FileContext}. - * + *

    *

    * The local implementation is {@link LocalFileSystem} and distributed * implementation is DistributedFileSystem. There are other implementations * for object stores and (outside the Apache Hadoop codebase), * third party filesystems. - *

    + *

    * Notes *
      *
    1. The behaviour of the filesystem is @@ -132,23 +135,36 @@ * New methods may be marked as Unstable or Evolving for their initial release, * as a warning that they are new and may change based on the * experience of use in applications. + *

      * Important note for developers - * - * If you're making changes here to the public API or protected methods, + *

      + * If you are making changes here to the public API or protected methods, * you must review the following subclasses and make sure that * they are filtering/passing through new methods as appropriate. * - * {@link FilterFileSystem}: methods are passed through. + * {@link FilterFileSystem}: methods are passed through. If not, + * then {@code TestFilterFileSystem.MustNotImplement} must be + * updated with the unsupported interface. + * Furthermore, if the new API's support is probed for via + * {@link #hasPathCapability(Path, String)} then + * {@link FilterFileSystem#hasPathCapability(Path, String)} + * must return false, always. + *

      * {@link ChecksumFileSystem}: checksums are created and * verified. + *

      * {@code TestHarFileSystem} will need its {@code MustNotImplement} * interface updated. * + *

      * There are some external places your changes will break things. * Do co-ordinate changes here. + *

      * * HBase: HBoss + *

      * Hive: HiveShim23 + *

      * {@code shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java} * *****************************************************************/ @@ -187,7 +203,7 @@ public abstract class FileSystem extends Configured public static final String USER_HOME_PREFIX = "/user"; /** FileSystem cache. */ - static final Cache CACHE = new Cache(); + static final Cache CACHE = new Cache(new Configuration()); /** The key this instance is stored under in the cache. */ private Cache.Key key; @@ -267,6 +283,8 @@ public FileSystem run() throws IOException { /** * Returns the configured FileSystem implementation. * @param conf the configuration to use + * @return FileSystem. + * @throws IOException If an I/O error occurred. */ public static FileSystem get(Configuration conf) throws IOException { return get(getDefaultUri(conf), conf); @@ -360,6 +378,7 @@ public String getScheme() { * implement that method. * * @see #canonicalizeUri(URI) + * @return the URI of this filesystem. */ protected URI getCanonicalUri() { return canonicalizeUri(getUri()); @@ -376,6 +395,7 @@ protected URI getCanonicalUri() { * not specified and if {@link #getDefaultPort()} returns a * default port. * + * @param uri url. * @return URI * @see NetUtils#getCanonicalUri(URI, int) */ @@ -439,11 +459,21 @@ public String getCanonicalServiceName() { : null; } - /** @deprecated call {@link #getUri()} instead.*/ + /** + * @return uri to string. + * @deprecated call {@link #getUri()} instead. + */ @Deprecated public String getName() { return getUri().toString(); } - /** @deprecated call {@link #get(URI, Configuration)} instead. */ + /** + * @deprecated call {@link #get(URI, Configuration)} instead. + * + * @param name name. + * @param conf configuration. + * @return file system. + * @throws IOException If an I/O error occurred. + */ @Deprecated public static FileSystem getNamed(String name, Configuration conf) throws IOException { @@ -498,6 +528,9 @@ public static LocalFileSystem getLocal(Configuration conf) * configuration and URI, cached and returned to the caller. *
    2. *
    + * @param uri uri of the filesystem. + * @param conf configrution. + * @return filesystem instance. * @throws IOException if the FileSystem cannot be instantiated. */ public static FileSystem get(URI uri, Configuration conf) throws IOException { @@ -527,7 +560,7 @@ public static FileSystem get(URI uri, Configuration conf) throws IOException { /** * Returns the FileSystem for this URI's scheme and authority and the * given user. Internally invokes {@link #newInstance(URI, Configuration)} - * @param uri of the filesystem + * @param uri uri of the filesystem. * @param conf the configuration to use * @param user to perform the get as * @return filesystem instance @@ -607,6 +640,7 @@ public static LocalFileSystem newInstanceLocal(Configuration conf) * @throws IOException a problem arose closing one or more filesystem. */ public static void closeAll() throws IOException { + debugLogFileSystemClose("closeAll", ""); CACHE.closeAll(); } @@ -617,10 +651,24 @@ public static void closeAll() throws IOException { * @throws IOException a problem arose closing one or more filesystem. */ public static void closeAllForUGI(UserGroupInformation ugi) - throws IOException { + throws IOException { + debugLogFileSystemClose("closeAllForUGI", "UGI: " + ugi); CACHE.closeAll(ugi); } + private static void debugLogFileSystemClose(String methodName, + String additionalInfo) { + if (LOGGER.isDebugEnabled()) { + Throwable throwable = new Throwable().fillInStackTrace(); + LOGGER.debug("FileSystem.{}() by method: {}); {}", methodName, + throwable.getStackTrace()[2], additionalInfo); + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("FileSystem.{}() full stack trace:", methodName, + throwable); + } + } + } + /** * Qualify a path to one which uses this FileSystem and, if relative, * made absolute. @@ -830,6 +878,7 @@ protected void checkPath(Path path) { * @param start offset into the given file * @param len length for which to get locations for * @throws IOException IO failure + * @return block location array. */ public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException { @@ -870,6 +919,7 @@ public BlockLocation[] getFileBlockLocations(FileStatus file, * @param len length for which to get locations for * @throws FileNotFoundException when the path does not exist * @throws IOException IO failure + * @return block location array. */ public BlockLocation[] getFileBlockLocations(Path p, long start, long len) throws IOException { @@ -932,6 +982,7 @@ public Path resolvePath(final Path p) throws IOException { * @param f the file name to open * @param bufferSize the size of the buffer to be used. * @throws IOException IO failure + * @return input stream. */ public abstract FSDataInputStream open(Path f, int bufferSize) throws IOException; @@ -940,6 +991,7 @@ public abstract FSDataInputStream open(Path f, int bufferSize) * Opens an FSDataInputStream at the indicated Path. * @param f the file to open * @throws IOException IO failure + * @return input stream. */ public FSDataInputStream open(Path f) throws IOException { return open(f, getConf().getInt(IO_FILE_BUFFER_SIZE_KEY, @@ -957,6 +1009,7 @@ public FSDataInputStream open(Path f) throws IOException { * @throws IOException IO failure * @throws UnsupportedOperationException If {@link #open(PathHandle, int)} * not overridden by subclass + * @return input stream. */ public FSDataInputStream open(PathHandle fd) throws IOException { return open(fd, getConf().getInt(IO_FILE_BUFFER_SIZE_KEY, @@ -974,6 +1027,7 @@ public FSDataInputStream open(PathHandle fd) throws IOException { * not satisfied * @throws IOException IO failure * @throws UnsupportedOperationException If not overridden by subclass + * @return input stream. */ public FSDataInputStream open(PathHandle fd, int bufferSize) throws IOException { @@ -991,6 +1045,7 @@ public FSDataInputStream open(PathHandle fd, int bufferSize) * not overridden by subclass. * @throws UnsupportedOperationException If this FileSystem cannot enforce * the specified constraints. + * @return path handle. */ public final PathHandle getPathHandle(FileStatus stat, HandleOpt... opt) { // method is final with a default so clients calling getPathHandle(stat) @@ -1006,6 +1061,7 @@ public final PathHandle getPathHandle(FileStatus stat, HandleOpt... opt) { * @param stat Referent in the target FileSystem * @param opt Constraints that determine the validity of the * {@link PathHandle} reference. + * @return path handle. */ protected PathHandle createPathHandle(FileStatus stat, HandleOpt... opt) { throw new UnsupportedOperationException(); @@ -1016,6 +1072,7 @@ protected PathHandle createPathHandle(FileStatus stat, HandleOpt... opt) { * Files are overwritten by default. * @param f the file to create * @throws IOException IO failure + * @return output stream. */ public FSDataOutputStream create(Path f) throws IOException { return create(f, true); @@ -1027,6 +1084,7 @@ public FSDataOutputStream create(Path f) throws IOException { * @param overwrite if a file with this name already exists, then if true, * the file will be overwritten, and if false an exception will be thrown. * @throws IOException IO failure + * @return output stream. */ public FSDataOutputStream create(Path f, boolean overwrite) throws IOException { @@ -1044,6 +1102,7 @@ public FSDataOutputStream create(Path f, boolean overwrite) * @param f the file to create * @param progress to report progress * @throws IOException IO failure + * @return output stream. */ public FSDataOutputStream create(Path f, Progressable progress) throws IOException { @@ -1060,6 +1119,7 @@ public FSDataOutputStream create(Path f, Progressable progress) * @param f the file to create * @param replication the replication factor * @throws IOException IO failure + * @return output stream1 */ public FSDataOutputStream create(Path f, short replication) throws IOException { @@ -1078,6 +1138,7 @@ public FSDataOutputStream create(Path f, short replication) * @param replication the replication factor * @param progress to report progress * @throws IOException IO failure + * @return output stream. */ public FSDataOutputStream create(Path f, short replication, Progressable progress) throws IOException { @@ -1095,6 +1156,7 @@ public FSDataOutputStream create(Path f, short replication, * the file will be overwritten, and if false an error will be thrown. * @param bufferSize the size of the buffer to be used. * @throws IOException IO failure + * @return output stream. */ public FSDataOutputStream create(Path f, boolean overwrite, @@ -1114,7 +1176,9 @@ public FSDataOutputStream create(Path f, * @param overwrite if a file with this name already exists, then if true, * the file will be overwritten, and if false an error will be thrown. * @param bufferSize the size of the buffer to be used. + * @param progress to report progress. * @throws IOException IO failure + * @return output stream. */ public FSDataOutputStream create(Path f, boolean overwrite, @@ -1134,7 +1198,9 @@ public FSDataOutputStream create(Path f, * the file will be overwritten, and if false an error will be thrown. * @param bufferSize the size of the buffer to be used. * @param replication required block replication for the file. + * @param blockSize the size of the buffer to be used. * @throws IOException IO failure + * @return output stream. */ public FSDataOutputStream create(Path f, boolean overwrite, @@ -1152,7 +1218,10 @@ public FSDataOutputStream create(Path f, * the file will be overwritten, and if false an error will be thrown. * @param bufferSize the size of the buffer to be used. * @param replication required block replication for the file. + * @param blockSize the size of the buffer to be used. + * @param progress to report progress. * @throws IOException IO failure + * @return output stream. */ public FSDataOutputStream create(Path f, boolean overwrite, @@ -1179,6 +1248,7 @@ public FSDataOutputStream create(Path f, * @param progress the progress reporter * @throws IOException IO failure * @see #setPermission(Path, FsPermission) + * @return output stream. */ public abstract FSDataOutputStream create(Path f, FsPermission permission, @@ -1200,6 +1270,7 @@ public abstract FSDataOutputStream create(Path f, * @param progress the progress reporter * @throws IOException IO failure * @see #setPermission(Path, FsPermission) + * @return output stream. */ public FSDataOutputStream create(Path f, FsPermission permission, @@ -1226,6 +1297,7 @@ public FSDataOutputStream create(Path f, * found in conf will be used. * @throws IOException IO failure * @see #setPermission(Path, FsPermission) + * @return output stream. */ public FSDataOutputStream create(Path f, FsPermission permission, @@ -1247,6 +1319,16 @@ public FSDataOutputStream create(Path f, * the permission with umask before calling this method. * This a temporary method added to support the transition from FileSystem * to FileContext for user applications. + * + * @param f path. + * @param absolutePermission permission. + * @param flag create flag. + * @param bufferSize buffer size. + * @param replication replication. + * @param blockSize block size. + * @param progress progress. + * @param checksumOpt check sum opt. + * @return output stream. * @throws IOException IO failure */ @Deprecated @@ -1301,6 +1383,11 @@ protected boolean primitiveMkdir(Path f, FsPermission absolutePermission) * with umask before calling this method. * This a temporary method added to support the transition from FileSystem * to FileContext for user applications. + * + * @param f the path. + * @param absolutePermission permission. + * @param createParent create parent. + * @throws IOException IO failure. */ @Deprecated protected void primitiveMkdir(Path f, FsPermission absolutePermission, @@ -1340,6 +1427,7 @@ protected void primitiveMkdir(Path f, FsPermission absolutePermission, * @param progress the progress reporter * @throws IOException IO failure * @see #setPermission(Path, FsPermission) + * @return output stream. */ public FSDataOutputStream createNonRecursive(Path f, boolean overwrite, @@ -1363,6 +1451,7 @@ public FSDataOutputStream createNonRecursive(Path f, * @param progress the progress reporter * @throws IOException IO failure * @see #setPermission(Path, FsPermission) + * @return output stream. */ public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, @@ -1386,6 +1475,7 @@ public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, * @param progress the progress reporter * @throws IOException IO failure * @see #setPermission(Path, FsPermission) + * @return output stream. */ public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, EnumSet flags, int bufferSize, short replication, long blockSize, @@ -1400,6 +1490,7 @@ public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, * Important: the default implementation is not atomic * @param f path to use for create * @throws IOException IO failure + * @return if create new file success true,not false. */ public boolean createNewFile(Path f) throws IOException { if (exists(f)) { @@ -1420,6 +1511,7 @@ public boolean createNewFile(Path f) throws IOException { * @throws IOException IO failure * @throws UnsupportedOperationException if the operation is unsupported * (default). + * @return output stream. */ public FSDataOutputStream append(Path f) throws IOException { return append(f, getConf().getInt(IO_FILE_BUFFER_SIZE_KEY, @@ -1434,6 +1526,7 @@ public FSDataOutputStream append(Path f) throws IOException { * @throws IOException IO failure * @throws UnsupportedOperationException if the operation is unsupported * (default). + * @return output stream. */ public FSDataOutputStream append(Path f, int bufferSize) throws IOException { return append(f, bufferSize, null); @@ -1447,10 +1540,44 @@ public FSDataOutputStream append(Path f, int bufferSize) throws IOException { * @throws IOException IO failure * @throws UnsupportedOperationException if the operation is unsupported * (default). + * @return output stream. */ public abstract FSDataOutputStream append(Path f, int bufferSize, Progressable progress) throws IOException; + /** + * Append to an existing file (optional operation). + * @param f the existing file to be appended. + * @param appendToNewBlock whether to append data to a new block + * instead of the end of the last partial block + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + * (default). + * @return output stream. + */ + public FSDataOutputStream append(Path f, boolean appendToNewBlock) throws IOException { + return append(f, getConf().getInt(IO_FILE_BUFFER_SIZE_KEY, + IO_FILE_BUFFER_SIZE_DEFAULT), null, appendToNewBlock); + } + + /** + * Append to an existing file (optional operation). + * This function is used for being overridden by some FileSystem like DistributedFileSystem + * @param f the existing file to be appended. + * @param bufferSize the size of the buffer to be used. + * @param progress for reporting progress if it is not null. + * @param appendToNewBlock whether to append data to a new block + * instead of the end of the last partial block + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + * (default). + * @return output stream. + */ + public FSDataOutputStream append(Path f, int bufferSize, + Progressable progress, boolean appendToNewBlock) throws IOException { + return append(f, bufferSize, progress); + } + /** * Concat existing files together. * @param trg the path to the target destination. @@ -1485,7 +1612,7 @@ public short getReplication(Path src) throws IOException { * This is the default behavior. * @param src file name * @param replication new replication - * @throws IOException + * @throws IOException an IO failure. * @return true if successful, or the feature in unsupported; * false if replication is supported but the file does not exist, * or is a directory @@ -1514,11 +1641,12 @@ public boolean setReplication(Path src, short replication) *

    * If OVERWRITE option is not passed as an argument, rename fails * if the dst already exists. + *

    *

    * If OVERWRITE option is passed as an argument, rename overwrites * the dst if it is a file or an empty directory. Rename fails if dst is * a non-empty directory. - *

    + *

    * Note that atomicity of rename is dependent on the file system * implementation. Please refer to the file system documentation for * details. This default implementation is non atomic. @@ -1526,9 +1654,11 @@ public boolean setReplication(Path src, short replication) * This method is deprecated since it is a temporary method added to * support the transition from FileSystem to FileContext for user * applications. + *

    * * @param src path to be renamed * @param dst new path after rename + * @param options rename options. * @throws FileNotFoundException src path does not exist, or the parent * path of dst does not exist. * @throws FileAlreadyExistsException dest path exists and is a file @@ -1623,6 +1753,9 @@ public boolean truncate(Path f, long newLength) throws IOException { /** * Delete a file/directory. + * @param f the path. + * @throws IOException IO failure. + * @return if delete success true, not false. * @deprecated Use {@link #delete(Path, boolean)} instead. */ @Deprecated @@ -1739,6 +1872,7 @@ public boolean exists(Path f) throws IOException { * @param f path to check * @throws IOException IO failure * @deprecated Use {@link #getFileStatus(Path)} instead + * @return if f is directory true, not false. */ @Deprecated public boolean isDirectory(Path f) throws IOException { @@ -1756,6 +1890,7 @@ public boolean isDirectory(Path f) throws IOException { * @param f path to check * @throws IOException IO failure * @deprecated Use {@link #getFileStatus(Path)} instead + * @return if f is file true, not false. */ @Deprecated public boolean isFile(Path f) throws IOException { @@ -1768,6 +1903,7 @@ public boolean isFile(Path f) throws IOException { /** * The number of bytes in a file. + * @param f the path. * @return the number of bytes; 0 for a directory * @deprecated Use {@link #getFileStatus(Path)} instead. * @throws FileNotFoundException if the path does not resolve @@ -1782,6 +1918,7 @@ public long getLength(Path f) throws IOException { * @param f path to use * @throws FileNotFoundException if the path does not resolve * @throws IOException IO failure + * @return content summary. */ public ContentSummary getContentSummary(Path f) throws IOException { FileStatus status = getFileStatus(f); @@ -1916,9 +2053,9 @@ public boolean hasMore() { * @param f Path to list * @param token opaque iteration token returned by previous call, or null * if this is the first call. - * @return - * @throws FileNotFoundException - * @throws IOException + * @return directory entries. + * @throws FileNotFoundException when the path does not exist. + * @throws IOException If an I/O error occurred. */ @InterfaceAudience.Private protected DirectoryEntries listStatusBatch(Path f, byte[] token) throws @@ -1949,6 +2086,8 @@ private void listStatus(ArrayList results, Path f, /** * List corrupted file blocks. + * + * @param path the path. * @return an iterator over the corrupt files under the given path * (may contain duplicates if a file has more than one corrupt block) * @throws UnsupportedOperationException if the operation is unsupported @@ -2042,36 +2181,29 @@ public FileStatus[] listStatus(Path[] files, PathFilter filter) *
    ? *
    Matches any single character. * - *

    *

    * *
    Matches zero or more characters. * - *

    *

    [abc] *
    Matches a single character from character set * {a,b,c}. * - *

    *

    [a-b] *
    Matches a single character from the character range * {a...b}. Note that character a must be * lexicographically less than or equal to character b. * - *

    *

    [^a] *
    Matches a single character that is not from character set or range * {a}. Note that the ^ character must occur * immediately to the right of the opening bracket. * - *

    *

    \c *
    Removes (escapes) any special meaning of character c. * - *

    *

    {ab,cd} *
    Matches a string from the string set {ab, cd} * - *

    *

    {ab,c{de,fh}} *
    Matches a string from the string set {ab, cde, cfh} * @@ -2200,7 +2332,9 @@ private void fetchMore() throws IOException { @Override @SuppressWarnings("unchecked") public T next() throws IOException { - Preconditions.checkState(hasNext(), "No more items in iterator"); + if (!hasNext()) { + throw new NoSuchElementException("No more items in iterator"); + } if (i == entries.getEntries().length) { fetchMore(); } @@ -2281,8 +2415,14 @@ private void handleFileStat(LocatedFileStatus stat) throws IOException { if (stat.isFile()) { // file curFile = stat; } else if (recursive) { // directory - itors.push(curItor); - curItor = listLocatedStatus(stat.getPath()); + try { + RemoteIterator newDirItor = listLocatedStatus(stat.getPath()); + itors.push(curItor); + curItor = newDirItor; + } catch (FileNotFoundException ignored) { + LOGGER.debug("Directory {} deleted while attempting for recursive listing", + stat.getPath()); + } } } @@ -2300,6 +2440,7 @@ public LocatedFileStatus next() throws IOException { /** Return the current user's home directory in this FileSystem. * The default implementation returns {@code "/user/$USER/"}. + * @return the path. */ public Path getHomeDirectory() { String username; @@ -2362,6 +2503,7 @@ public boolean mkdirs(Path f) throws IOException { * @param f path to create * @param permission to apply to f * @throws IOException IO failure + * @return if mkdir success true, not false. */ public abstract boolean mkdirs(Path f, FsPermission permission ) throws IOException; @@ -2409,6 +2551,7 @@ public void moveFromLocalFile(Path src, Path dst) * @param delSrc whether to delete the src * @param src path * @param dst path + * @throws IOException IO failure. */ public void copyFromLocalFile(boolean delSrc, Path src, Path dst) throws IOException { @@ -2523,6 +2666,7 @@ public void copyToLocalFile(boolean delSrc, Path src, Path dst, * @param fsOutputFile path of output file * @param tmpLocalFile path of local tmp file * @throws IOException IO failure + * @return the path. */ public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) throws IOException { @@ -2556,14 +2700,21 @@ public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) */ @Override public void close() throws IOException { + debugLogFileSystemClose("close", "Key: " + key + "; URI: " + getUri() + + "; Object Identity Hash: " + + Integer.toHexString(System.identityHashCode(this))); // delete all files that were marked as delete-on-exit. - processDeleteOnExit(); - CACHE.remove(this.key, this); + try { + processDeleteOnExit(); + } finally { + CACHE.remove(this.key, this); + } } /** * Return the total size of all files in the filesystem. * @throws IOException IO failure + * @return the number of path used. */ public long getUsed() throws IOException { Path path = new Path("/"); @@ -2572,7 +2723,9 @@ public long getUsed() throws IOException { /** * Return the total size of all files from a specified path. + * @param path the path. * @throws IOException IO failure + * @return the number of path content summary. */ public long getUsed(Path path) throws IOException { return getContentSummary(path).getLength(); @@ -2595,6 +2748,7 @@ public long getBlockSize(Path f) throws IOException { * Return the number of bytes that large input files should be optimally * be split into to minimize I/O time. * @deprecated use {@link #getDefaultBlockSize(Path)} instead + * @return default block size. */ @Deprecated public long getDefaultBlockSize() { @@ -2641,6 +2795,20 @@ public short getDefaultReplication(Path path) { */ public abstract FileStatus getFileStatus(Path f) throws IOException; + /** + * Synchronize client metadata state. + *

    + * In some FileSystem implementations such as HDFS metadata + * synchronization is essential to guarantee consistency of read requests + * particularly in HA setting. + * @throws IOException If an I/O error occurred. + * @throws UnsupportedOperationException if the operation is unsupported. + */ + public void msync() throws IOException, UnsupportedOperationException { + throw new UnsupportedOperationException(getClass().getCanonicalName() + + " does not support method msync"); + } + /** * Checks if the user can access a path. The mode specifies which access * checks to perform. If the requested permissions are granted, then the @@ -2710,6 +2878,8 @@ static void checkAccessPermissions(FileStatus stat, FsAction mode) /** * See {@link FileContext#fixRelativePart}. + * @param p the path. + * @return relative part. */ protected Path fixRelativePart(Path p) { if (p.isUriPathAbsolute()) { @@ -2721,6 +2891,18 @@ protected Path fixRelativePart(Path p) { /** * See {@link FileContext#createSymlink(Path, Path, boolean)}. + * + * @param target target path. + * @param link link. + * @param createParent create parent. + * @throws AccessControlException if access is denied. + * @throws FileAlreadyExistsException when the path does not exist. + * @throws FileNotFoundException when the path does not exist. + * @throws ParentNotDirectoryException if the parent path of dest is not + * a directory. + * @throws UnsupportedFileSystemException if there was no known implementation + * for the scheme. + * @throws IOException raised on errors performing I/O. */ public void createSymlink(final Path target, final Path link, final boolean createParent) throws AccessControlException, @@ -2734,8 +2916,14 @@ public void createSymlink(final Path target, final Path link, /** * See {@link FileContext#getFileLinkStatus(Path)}. - * @throws FileNotFoundException when the path does not exist - * @throws IOException see specific implementation + * + * @param f the path. + * @throws AccessControlException if access is denied. + * @throws FileNotFoundException when the path does not exist. + * @throws IOException raised on errors performing I/O. + * @throws UnsupportedFileSystemException if there was no known implementation + * for the scheme. + * @return file status */ public FileStatus getFileLinkStatus(final Path f) throws AccessControlException, FileNotFoundException, @@ -2746,6 +2934,7 @@ public FileStatus getFileLinkStatus(final Path f) /** * See {@link AbstractFileSystem#supportsSymlinks()}. + * @return if support symlinkls true, not false. */ public boolean supportsSymlinks() { return false; @@ -2753,8 +2942,11 @@ public boolean supportsSymlinks() { /** * See {@link FileContext#getLinkTarget(Path)}. + * @param f the path. * @throws UnsupportedOperationException if the operation is unsupported * (default outcome). + * @throws IOException IO failure. + * @return the path. */ public Path getLinkTarget(Path f) throws IOException { // Supporting filesystems should override this method @@ -2764,8 +2956,11 @@ public Path getLinkTarget(Path f) throws IOException { /** * See {@link AbstractFileSystem#getLinkTarget(Path)}. + * @param f the path. * @throws UnsupportedOperationException if the operation is unsupported * (default outcome). + * @throws IOException IO failure. + * @return the path. */ protected Path resolveLink(Path f) throws IOException { // Supporting filesystems should override this method @@ -3169,7 +3364,7 @@ public void removeXAttr(Path path, String name) throws IOException { /** * Set the source path to satisfy storage policy. * @param path The source path referring to either a directory or a file. - * @throws IOException + * @throws IOException If an I/O error occurred. */ public void satisfyStoragePolicy(final Path path) throws IOException { throw new UnsupportedOperationException( @@ -3405,7 +3600,9 @@ public static Class getFileSystemClass(String scheme, private static FileSystem createFileSystem(URI uri, Configuration conf) throws IOException { Tracer tracer = FsTracer.get(conf); - try(TraceScope scope = tracer.newScope("FileSystem#createFileSystem")) { + try(TraceScope scope = tracer.newScope("FileSystem#createFileSystem"); + DurationInfo ignored = + new DurationInfo(LOGGER, false, "Creating FS %s", uri)) { scope.addKVAnnotation("scheme", uri.getScheme()); Class clazz = getFileSystemClass(uri.getScheme(), conf); @@ -3415,9 +3612,9 @@ private static FileSystem createFileSystem(URI uri, Configuration conf) } catch (IOException | RuntimeException e) { // exception raised during initialization. // log summary at warn and full stack at debug - LOGGER.warn("Failed to initialize fileystem {}: {}", + LOGGER.warn("Failed to initialize filesystem {}: {}", uri, e.toString()); - LOGGER.debug("Failed to initialize fileystem", e); + LOGGER.debug("Failed to initialize filesystem", e); // then (robustly) close the FS, so as to invoke any // cleanup code. IOUtils.cleanupWithLogger(LOGGER, fs); @@ -3428,15 +3625,39 @@ private static FileSystem createFileSystem(URI uri, Configuration conf) } /** Caching FileSystem objects. */ - static class Cache { + static final class Cache { private final ClientFinalizer clientFinalizer = new ClientFinalizer(); private final Map map = new HashMap<>(); private final Set toAutoClose = new HashSet<>(); + /** Semaphore used to serialize creation of new FS instances. */ + private final Semaphore creatorPermits; + + /** + * Counter of the number of discarded filesystem instances + * in this cache. Primarily for testing, but it could possibly + * be made visible as some kind of metric. + */ + private final AtomicLong discardedInstances = new AtomicLong(0); + /** A variable that makes all objects in the cache unique. */ private static AtomicLong unique = new AtomicLong(1); + /** + * Instantiate. The configuration is used to read the + * count of permits issued for concurrent creation + * of filesystem instances. + * @param conf configuration + */ + Cache(final Configuration conf) { + int permits = conf.getInt(FS_CREATION_PARALLEL_COUNT, + FS_CREATION_PARALLEL_COUNT_DEFAULT); + checkArgument(permits > 0, "Invalid value of %s: %s", + FS_CREATION_PARALLEL_COUNT, permits); + creatorPermits = new Semaphore(permits); + } + FileSystem get(URI uri, Configuration conf) throws IOException{ Key key = new Key(uri, conf); return getInternal(uri, conf, key); @@ -3459,7 +3680,7 @@ FileSystem getUnique(URI uri, Configuration conf) throws IOException{ * @param conf configuration * @param key key to store/retrieve this FileSystem in the cache * @return a cached or newly instantiated FileSystem. - * @throws IOException + * @throws IOException If an I/O error occurred. */ private FileSystem getInternal(URI uri, Configuration conf, Key key) throws IOException{ @@ -3470,33 +3691,82 @@ private FileSystem getInternal(URI uri, Configuration conf, Key key) if (fs != null) { return fs; } - - fs = createFileSystem(uri, conf); - final long timeout = conf.getTimeDuration(SERVICE_SHUTDOWN_TIMEOUT, - SERVICE_SHUTDOWN_TIMEOUT_DEFAULT, - ShutdownHookManager.TIME_UNIT_DEFAULT); - synchronized (this) { // refetch the lock again - FileSystem oldfs = map.get(key); - if (oldfs != null) { // a file system is created while lock is releasing - fs.close(); // close the new file system - return oldfs; // return the old file system - } - - // now insert the new file system into the map - if (map.isEmpty() - && !ShutdownHookManager.get().isShutdownInProgress()) { - ShutdownHookManager.get().addShutdownHook(clientFinalizer, - SHUTDOWN_HOOK_PRIORITY, timeout, - ShutdownHookManager.TIME_UNIT_DEFAULT); + // fs not yet created, acquire lock + // to construct an instance. + try (DurationInfo d = new DurationInfo(LOGGER, false, + "Acquiring creator semaphore for %s", uri)) { + creatorPermits.acquireUninterruptibly(); + } + FileSystem fsToClose = null; + try { + // See if FS was instantiated by another thread while waiting + // for the permit. + synchronized (this) { + fs = map.get(key); } - fs.key = key; - map.put(key, fs); - if (conf.getBoolean( - FS_AUTOMATIC_CLOSE_KEY, FS_AUTOMATIC_CLOSE_DEFAULT)) { - toAutoClose.add(key); + if (fs != null) { + LOGGER.debug("Filesystem {} created while awaiting semaphore", uri); + return fs; } - return fs; + // create the filesystem + fs = createFileSystem(uri, conf); + final long timeout = conf.getTimeDuration(SERVICE_SHUTDOWN_TIMEOUT, + SERVICE_SHUTDOWN_TIMEOUT_DEFAULT, + ShutdownHookManager.TIME_UNIT_DEFAULT); + // any FS to close outside of the synchronized section + synchronized (this) { // lock on the Cache object + + // see if there is now an entry for the FS, which happens + // if another thread's creation overlapped with this one. + FileSystem oldfs = map.get(key); + if (oldfs != null) { + // a file system was created in a separate thread. + // save the FS reference to close outside all locks, + // and switch to returning the oldFS + fsToClose = fs; + fs = oldfs; + } else { + // register the clientFinalizer if needed and shutdown isn't + // already active + if (map.isEmpty() + && !ShutdownHookManager.get().isShutdownInProgress()) { + ShutdownHookManager.get().addShutdownHook(clientFinalizer, + SHUTDOWN_HOOK_PRIORITY, timeout, + ShutdownHookManager.TIME_UNIT_DEFAULT); + } + // insert the new file system into the map + fs.key = key; + map.put(key, fs); + if (conf.getBoolean( + FS_AUTOMATIC_CLOSE_KEY, FS_AUTOMATIC_CLOSE_DEFAULT)) { + toAutoClose.add(key); + } + } + } // end of synchronized block + } finally { + // release the creator permit. + creatorPermits.release(); + } + if (fsToClose != null) { + LOGGER.debug("Duplicate FS created for {}; discarding {}", + uri, fs); + discardedInstances.incrementAndGet(); + // close the new file system + // note this will briefly remove and reinstate "fsToClose" from + // the map. It is done in a synchronized block so will not be + // visible to others. + IOUtils.cleanupWithLogger(LOGGER, fsToClose); } + return fs; + } + + /** + * Get the count of discarded instances. + * @return the new instance. + */ + @VisibleForTesting + long getDiscardedInstances() { + return discardedInstances.get(); } synchronized void remove(Key key, FileSystem fs) { @@ -3901,6 +4171,7 @@ public void run() { /** * Get or create the thread-local data associated with the current thread. + * @return statistics data. */ public StatisticsData getThreadStatistics() { StatisticsData data = threadData.get(); @@ -4259,6 +4530,7 @@ public static synchronized Map getStatistics() { /** * Return the FileSystem classes that have Statistics. * @deprecated use {@link #getGlobalStorageStatistics()} + * @return statistics lists. */ @Deprecated public static synchronized List getAllStatistics() { @@ -4267,6 +4539,7 @@ public static synchronized List getAllStatistics() { /** * Get the statistics for a particular file system. + * @param scheme scheme. * @param cls the class to lookup * @return a statistics object * @deprecated use {@link #getGlobalStorageStatistics()} @@ -4301,6 +4574,7 @@ public static synchronized void clearStatistics() { /** * Print all statistics for all file systems to {@code System.out} + * @throws IOException If an I/O error occurred. */ public static synchronized void printStatistics() throws IOException { @@ -4341,6 +4615,7 @@ public StorageStatistics getStorageStatistics() { /** * Get the global storage statistics. + * @return global storage statistics. */ public static GlobalStorageStatistics getGlobalStorageStatistics() { return GlobalStorageStatistics.INSTANCE; @@ -4494,7 +4769,7 @@ protected CompletableFuture openFileWithOptions( final OpenFileParameters parameters) throws IOException { AbstractFSBuilderImpl.rejectUnknownMandatoryKeys( parameters.getMandatoryKeys(), - Collections.emptySet(), + Options.OpenFileOptions.FS_OPTION_OPENFILE_STANDARD_OPTIONS, "for " + path); return LambdaUtils.eval( new CompletableFuture<>(), () -> @@ -4522,7 +4797,7 @@ protected CompletableFuture openFileWithOptions( final OpenFileParameters parameters) throws IOException { AbstractFSBuilderImpl.rejectUnknownMandatoryKeys( parameters.getMandatoryKeys(), - Collections.emptySet(), ""); + Options.OpenFileOptions.FS_OPTION_OPENFILE_STANDARD_OPTIONS, ""); CompletableFuture result = new CompletableFuture<>(); try { result.complete(open(pathHandle, parameters.getBufferSize())); @@ -4629,9 +4904,11 @@ public CompletableFuture build() throws IOException { Optional optionalPath = getOptionalPath(); OpenFileParameters parameters = new OpenFileParameters() .withMandatoryKeys(getMandatoryKeys()) + .withOptionalKeys(getOptionalKeys()) .withOptions(getOptions()) - .withBufferSize(getBufferSize()) - .withStatus(super.getStatus()); // explicit to avoid IDE warnings + .withStatus(super.getStatus()) + .withBufferSize( + getOptions().getInt(FS_OPTION_OPENFILE_BUFFER_SIZE, getBufferSize())); if(optionalPath.isPresent()) { return getFS().openFileWithOptions(optionalPath.get(), parameters); @@ -4643,4 +4920,17 @@ public CompletableFuture build() throws IOException { } + /** + * Create a multipart uploader. + * @param basePath file path under which all files are uploaded + * @return a MultipartUploaderBuilder object to build the uploader + * @throws IOException if some early checks cause IO failures. + * @throws UnsupportedOperationException if support is checked early. + */ + @InterfaceStability.Unstable + public MultipartUploaderBuilder createMultipartUploader(Path basePath) + throws IOException { + methodNotSupported(); + return null; + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemLinkResolver.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemLinkResolver.java index 7eec0eb7cec54..593495a1daa88 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemLinkResolver.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemLinkResolver.java @@ -38,8 +38,8 @@ public abstract class FileSystemLinkResolver { * an UnresolvedLinkException if called on an unresolved {@link Path}. * @param p Path on which to perform an operation * @return Generic type returned by operation - * @throws IOException - * @throws UnresolvedLinkException + * @throws IOException raised on errors performing I/O. + * @throws UnresolvedLinkException unresolved link exception. */ abstract public T doCall(final Path p) throws IOException, UnresolvedLinkException; @@ -54,7 +54,7 @@ abstract public T doCall(final Path p) throws IOException, * @param p * Resolved Target of path * @return Generic type determined by implementation - * @throws IOException + * @throws IOException raised on errors performing I/O. */ abstract public T next(final FileSystem fs, final Path p) throws IOException; @@ -66,7 +66,7 @@ abstract public T doCall(final Path p) throws IOException, * @param filesys FileSystem with which to try call * @param path Path with which to try call * @return Generic type determined by implementation - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public T resolve(final FileSystem filesys, final Path path) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemMultipartUploader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemMultipartUploader.java deleted file mode 100644 index b77c244220a9e..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemMultipartUploader.java +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.fs; - -import java.io.IOException; -import java.io.InputStream; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; -import java.util.Map; -import java.util.UUID; -import java.util.stream.Collectors; - -import com.google.common.base.Charsets; - -import org.apache.commons.compress.utils.IOUtils; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.permission.FsPermission; - -import static org.apache.hadoop.fs.Path.mergePaths; -import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; - -/** - * A MultipartUploader that uses the basic FileSystem commands. - * This is done in three stages: - *

      - *
    • Init - create a temp {@code _multipart} directory.
    • - *
    • PutPart - copying the individual parts of the file to the temp - * directory.
    • - *
    • Complete - use {@link FileSystem#concat} to merge the files; - * and then delete the temp directory.
    • - *
    - */ -@InterfaceAudience.Private -@InterfaceStability.Unstable -public class FileSystemMultipartUploader extends MultipartUploader { - - private final FileSystem fs; - - public FileSystemMultipartUploader(FileSystem fs) { - this.fs = fs; - } - - @Override - public UploadHandle initialize(Path filePath) throws IOException { - Path collectorPath = createCollectorPath(filePath); - fs.mkdirs(collectorPath, FsPermission.getDirDefault()); - - ByteBuffer byteBuffer = ByteBuffer.wrap( - collectorPath.toString().getBytes(Charsets.UTF_8)); - return BBUploadHandle.from(byteBuffer); - } - - @Override - public PartHandle putPart(Path filePath, InputStream inputStream, - int partNumber, UploadHandle uploadId, long lengthInBytes) - throws IOException { - checkPutArguments(filePath, inputStream, partNumber, uploadId, - lengthInBytes); - byte[] uploadIdByteArray = uploadId.toByteArray(); - checkUploadId(uploadIdByteArray); - Path collectorPath = new Path(new String(uploadIdByteArray, 0, - uploadIdByteArray.length, Charsets.UTF_8)); - Path partPath = - mergePaths(collectorPath, mergePaths(new Path(Path.SEPARATOR), - new Path(Integer.toString(partNumber) + ".part"))); - try(FSDataOutputStream fsDataOutputStream = - fs.createFile(partPath).build()) { - IOUtils.copy(inputStream, fsDataOutputStream, 4096); - } finally { - cleanupWithLogger(LOG, inputStream); - } - return BBPartHandle.from(ByteBuffer.wrap( - partPath.toString().getBytes(Charsets.UTF_8))); - } - - private Path createCollectorPath(Path filePath) { - String uuid = UUID.randomUUID().toString(); - return mergePaths(filePath.getParent(), - mergePaths(new Path(filePath.getName().split("\\.")[0]), - mergePaths(new Path("_multipart_" + uuid), - new Path(Path.SEPARATOR)))); - } - - private PathHandle getPathHandle(Path filePath) throws IOException { - FileStatus status = fs.getFileStatus(filePath); - return fs.getPathHandle(status); - } - - private long totalPartsLen(List partHandles) throws IOException { - long totalLen = 0; - for (Path p: partHandles) { - totalLen += fs.getFileStatus(p).getLen(); - } - return totalLen; - } - - @Override - @SuppressWarnings("deprecation") // rename w/ OVERWRITE - public PathHandle complete(Path filePath, Map handleMap, - UploadHandle multipartUploadId) throws IOException { - - checkUploadId(multipartUploadId.toByteArray()); - - checkPartHandles(handleMap); - List> handles = - new ArrayList<>(handleMap.entrySet()); - handles.sort(Comparator.comparingInt(Map.Entry::getKey)); - - List partHandles = handles - .stream() - .map(pair -> { - byte[] byteArray = pair.getValue().toByteArray(); - return new Path(new String(byteArray, 0, byteArray.length, - Charsets.UTF_8)); - }) - .collect(Collectors.toList()); - - byte[] uploadIdByteArray = multipartUploadId.toByteArray(); - Path collectorPath = new Path(new String(uploadIdByteArray, 0, - uploadIdByteArray.length, Charsets.UTF_8)); - - boolean emptyFile = totalPartsLen(partHandles) == 0; - if (emptyFile) { - fs.create(filePath).close(); - } else { - Path filePathInsideCollector = mergePaths(collectorPath, - new Path(Path.SEPARATOR + filePath.getName())); - fs.create(filePathInsideCollector).close(); - fs.concat(filePathInsideCollector, - partHandles.toArray(new Path[handles.size()])); - fs.rename(filePathInsideCollector, filePath, Options.Rename.OVERWRITE); - } - fs.delete(collectorPath, true); - return getPathHandle(filePath); - } - - @Override - public void abort(Path filePath, UploadHandle uploadId) throws IOException { - byte[] uploadIdByteArray = uploadId.toByteArray(); - checkUploadId(uploadIdByteArray); - Path collectorPath = new Path(new String(uploadIdByteArray, 0, - uploadIdByteArray.length, Charsets.UTF_8)); - - // force a check for a file existing; raises FNFE if not found - fs.getFileStatus(collectorPath); - fs.delete(collectorPath, true); - } - - /** - * Factory for creating MultipartUploaderFactory objects for file:// - * filesystems. - */ - public static class Factory extends MultipartUploaderFactory { - protected MultipartUploader createMultipartUploader(FileSystem fs, - Configuration conf) { - if (fs.getScheme().equals("file")) { - return new FileSystemMultipartUploader(fs); - } - return null; - } - } -} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemStorageStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemStorageStatistics.java index 43c23abadea44..f717e03692378 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemStorageStatistics.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemStorageStatistics.java @@ -20,7 +20,7 @@ import java.util.Iterator; import java.util.NoSuchElementException; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.FileSystem.Statistics.StatisticsData; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java index 7bc93f9bf5db8..933f56927741d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java @@ -36,13 +36,17 @@ import java.nio.charset.CharsetEncoder; import java.nio.charset.StandardCharsets; import java.nio.file.AccessDeniedException; +import java.nio.file.attribute.PosixFilePermission; import java.nio.file.FileSystems; import java.nio.file.Files; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Enumeration; +import java.util.EnumSet; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -51,13 +55,14 @@ import java.util.jar.JarOutputStream; import java.util.jar.Manifest; import java.util.zip.GZIPInputStream; -import java.util.zip.ZipEntry; -import java.util.zip.ZipFile; -import java.util.zip.ZipInputStream; import org.apache.commons.collections.map.CaseInsensitiveMap; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; +import org.apache.commons.compress.archivers.zip.ZipFile; +import org.apache.commons.io.FileUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -71,6 +76,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_LENGTH; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE; +import static org.apache.hadoop.util.functional.FutureIO.awaitFuture; + /** * A collection of file-processing util methods */ @@ -151,6 +161,8 @@ public static void fullyDeleteOnExit(final File file) { * (3) If dir is a normal file, it is deleted. * (4) If dir is a normal directory, then dir and all its contents recursively * are deleted. + * @param dir dir. + * @return fully delete status. */ public static boolean fullyDelete(final File dir) { return fullyDelete(dir, false); @@ -246,6 +258,9 @@ private static boolean deleteImpl(final File f, final boolean doLog) { * we return false, the directory may be partially-deleted. * If dir is a symlink to a directory, all the contents of the actual * directory pointed to by dir will be deleted. + * + * @param dir dir. + * @return fullyDeleteContents Status. */ public static boolean fullyDeleteContents(final File dir) { return fullyDeleteContents(dir, false); @@ -256,8 +271,11 @@ public static boolean fullyDeleteContents(final File dir) { * we return false, the directory may be partially-deleted. * If dir is a symlink to a directory, all the contents of the actual * directory pointed to by dir will be deleted. + * + * @param dir dir. * @param tryGrantPermissions if 'true', try grant +rwx permissions to this * and all the underlying directories before trying to delete their contents. + * @return fully delete contents status. */ public static boolean fullyDeleteContents(final File dir, final boolean tryGrantPermissions) { if (tryGrantPermissions) { @@ -300,7 +318,7 @@ public static boolean fullyDeleteContents(final File dir, final boolean tryGrant * * @param fs {@link FileSystem} on which the path is present * @param dir directory to recursively delete - * @throws IOException + * @throws IOException raised on errors performing I/O. * @deprecated Use {@link FileSystem#delete(Path, boolean)} */ @Deprecated @@ -332,7 +350,17 @@ private static void checkDependencies(FileSystem srcFS, } } - /** Copy files between FileSystems. */ + /** + * Copy files between FileSystems. + * @param srcFS src fs. + * @param src src. + * @param dstFS dst fs. + * @param dst dst. + * @param deleteSource delete source. + * @param conf configuration. + * @return if copy success true, not false. + * @throws IOException raised on errors performing I/O. + */ public static boolean copy(FileSystem srcFS, Path src, FileSystem dstFS, Path dst, boolean deleteSource, @@ -380,7 +408,19 @@ public static boolean copy(FileSystem srcFS, Path[] srcs, return returnVal; } - /** Copy files between FileSystems. */ + /** + * Copy files between FileSystems. + * + * @param srcFS srcFs. + * @param src src. + * @param dstFS dstFs. + * @param dst dst. + * @param deleteSource delete source. + * @param overwrite overwrite. + * @param conf configuration. + * @throws IOException raised on errors performing I/O. + * @return true if the operation succeeded. + */ public static boolean copy(FileSystem srcFS, Path src, FileSystem dstFS, Path dst, boolean deleteSource, @@ -390,7 +430,33 @@ public static boolean copy(FileSystem srcFS, Path src, return copy(srcFS, fileStatus, dstFS, dst, deleteSource, overwrite, conf); } - /** Copy files between FileSystems. */ + /** + * Copy a file/directory tree within/between filesystems. + *

    + * returns true if the operation succeeded. When deleteSource is true, + * this means "after the copy, delete(source) returned true" + * If the destination is a directory, and mkdirs (dest) fails, + * the operation will return false rather than raise any exception. + *

    + * The overwrite flag is about overwriting files; it has no effect about + * handing an attempt to copy a file atop a directory (expect an IOException), + * or a directory over a path which contains a file (mkdir will fail, so + * "false"). + *

    + * The operation is recursive, and the deleteSource operation takes place + * as each subdirectory is copied. Therefore, if an operation fails partway + * through, the source tree may be partially deleted. + *

    + * @param srcFS source filesystem + * @param srcStatus status of source + * @param dstFS destination filesystem + * @param dst path of source + * @param deleteSource delete the source? + * @param overwrite overwrite files at destination? + * @param conf configuration to use when opening files + * @return true if the operation succeeded. + * @throws IOException failure + */ public static boolean copy(FileSystem srcFS, FileStatus srcStatus, FileSystem dstFS, Path dst, boolean deleteSource, @@ -403,22 +469,27 @@ public static boolean copy(FileSystem srcFS, FileStatus srcStatus, if (!dstFS.mkdirs(dst)) { return false; } - FileStatus contents[] = srcFS.listStatus(src); - for (int i = 0; i < contents.length; i++) { - copy(srcFS, contents[i], dstFS, - new Path(dst, contents[i].getPath().getName()), - deleteSource, overwrite, conf); + RemoteIterator contents = srcFS.listStatusIterator(src); + while (contents.hasNext()) { + FileStatus next = contents.next(); + copy(srcFS, next, dstFS, + new Path(dst, next.getPath().getName()), + deleteSource, overwrite, conf); } } else { - InputStream in=null; + InputStream in = null; OutputStream out = null; try { - in = srcFS.open(src); + in = awaitFuture(srcFS.openFile(src) + .opt(FS_OPTION_OPENFILE_READ_POLICY, + FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE) + .optLong(FS_OPTION_OPENFILE_LENGTH, + srcStatus.getLen()) // file length hint for object stores + .build()); out = dstFS.create(dst, overwrite); IOUtils.copyBytes(in, out, conf, true); } catch (IOException e) { - IOUtils.closeStream(out); - IOUtils.closeStream(in); + IOUtils.cleanupWithLogger(LOG, in, out); throw e; } } @@ -430,7 +501,17 @@ public static boolean copy(FileSystem srcFS, FileStatus srcStatus, } - /** Copy local files to a FileSystem. */ + /** + * Copy local files to a FileSystem. + * + * @param src src. + * @param dstFS dstFs. + * @param dst dst. + * @param deleteSource delete source. + * @param conf configuration. + * @throws IOException raised on errors performing I/O. + * @return true if the operation succeeded. + */ public static boolean copy(File src, FileSystem dstFS, Path dst, boolean deleteSource, @@ -473,7 +554,17 @@ public static boolean copy(File src, } } - /** Copy FileSystem files to local files. */ + /** + * Copy FileSystem files to local files. + * + * @param srcFS srcFs. + * @param src src. + * @param dst dst. + * @param deleteSource delete source. + * @param conf configuration. + * @throws IOException raised on errors performing I/O. + * @return true if the operation succeeded. + */ public static boolean copy(FileSystem srcFS, Path src, File dst, boolean deleteSource, Configuration conf) throws IOException { @@ -497,7 +588,11 @@ private static boolean copy(FileSystem srcFS, FileStatus srcStatus, deleteSource, conf); } } else { - InputStream in = srcFS.open(src); + InputStream in = awaitFuture(srcFS.openFile(src) + .withFileStatus(srcStatus) + .opt(FS_OPTION_OPENFILE_READ_POLICY, + FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE) + .build()); IOUtils.copyBytes(in, Files.newOutputStream(dst.toPath()), conf); } if (deleteSource) { @@ -518,6 +613,9 @@ private static Path checkDest(String srcName, FileSystem dstFS, Path dst, if (null != sdst) { if (sdst.isDirectory()) { if (null == srcName) { + if (overwrite) { + return dst; + } throw new PathIsDirectoryException(dst.toString()); } return checkDest(null, dstFS, new Path(dst, srcName), overwrite); @@ -598,18 +696,12 @@ public static long getDU(File dir) { return dir.length(); } else { File[] allFiles = dir.listFiles(); - if(allFiles != null) { - for (int i = 0; i < allFiles.length; i++) { - boolean isSymLink; - try { - isSymLink = org.apache.commons.io.FileUtils.isSymlink(allFiles[i]); - } catch(IOException ioe) { - isSymLink = true; - } - if(!isSymLink) { - size += getDU(allFiles[i]); - } - } + if (allFiles != null) { + for (File f : allFiles) { + if (!org.apache.commons.io.FileUtils.isSymlink(f)) { + size += getDU(f); + } + } } return size; } @@ -624,12 +716,12 @@ public static long getDU(File dir) { */ public static void unZip(InputStream inputStream, File toDir) throws IOException { - try (ZipInputStream zip = new ZipInputStream(inputStream)) { + try (ZipArchiveInputStream zip = new ZipArchiveInputStream(inputStream)) { int numOfFailedLastModifiedSet = 0; String targetDirPath = toDir.getCanonicalPath() + File.separator; - for(ZipEntry entry = zip.getNextEntry(); + for(ZipArchiveEntry entry = zip.getNextZipEntry(); entry != null; - entry = zip.getNextEntry()) { + entry = zip.getNextZipEntry()) { if (!entry.isDirectory()) { File file = new File(toDir, entry.getName()); if (!file.getCanonicalPath().startsWith(targetDirPath)) { @@ -648,6 +740,9 @@ public static void unZip(InputStream inputStream, File toDir) if (!file.setLastModified(entry.getTime())) { numOfFailedLastModifiedSet++; } + if (entry.getPlatform() == ZipArchiveEntry.PLATFORM_UNIX) { + Files.setPosixFilePermissions(file.toPath(), permissionsFromMode(entry.getUnixMode())); + } } } if (numOfFailedLastModifiedSet > 0) { @@ -657,6 +752,49 @@ public static void unZip(InputStream inputStream, File toDir) } } + /** + * The permission operation of this method only involves users, user groups, and others. + * If SUID is set, only executable permissions are reserved. + * @param mode Permissions are represented by numerical values + * @return The original permissions for files are stored in collections + */ + private static Set permissionsFromMode(int mode) { + EnumSet permissions = + EnumSet.noneOf(PosixFilePermission.class); + addPermissions(permissions, mode, PosixFilePermission.OTHERS_READ, + PosixFilePermission.OTHERS_WRITE, PosixFilePermission.OTHERS_EXECUTE); + addPermissions(permissions, mode >> 3, PosixFilePermission.GROUP_READ, + PosixFilePermission.GROUP_WRITE, PosixFilePermission.GROUP_EXECUTE); + addPermissions(permissions, mode >> 6, PosixFilePermission.OWNER_READ, + PosixFilePermission.OWNER_WRITE, PosixFilePermission.OWNER_EXECUTE); + return permissions; + } + + /** + * Assign the original permissions to the file + * @param permissions The original permissions for files are stored in collections + * @param mode Use a value of type int to indicate permissions + * @param r Read permission + * @param w Write permission + * @param x Execute permission + */ + private static void addPermissions( + Set permissions, + int mode, + PosixFilePermission r, + PosixFilePermission w, + PosixFilePermission x) { + if ((mode & 1L) == 1L) { + permissions.add(x); + } + if ((mode & 2L) == 2L) { + permissions.add(w); + } + if ((mode & 4L) == 4L) { + permissions.add(r); + } + } + /** * Given a File input it will unzip it in the unzip directory. * passed as the second parameter @@ -665,14 +803,14 @@ public static void unZip(InputStream inputStream, File toDir) * @throws IOException An I/O exception has occurred */ public static void unZip(File inFile, File unzipDir) throws IOException { - Enumeration entries; + Enumeration entries; ZipFile zipFile = new ZipFile(inFile); try { - entries = zipFile.entries(); + entries = zipFile.getEntries(); String targetDirPath = unzipDir.getCanonicalPath() + File.separator; while (entries.hasMoreElements()) { - ZipEntry entry = entries.nextElement(); + ZipArchiveEntry entry = entries.nextElement(); if (!entry.isDirectory()) { InputStream in = zipFile.getInputStream(entry); try { @@ -697,6 +835,9 @@ public static void unZip(File inFile, File unzipDir) throws IOException { } finally { out.close(); } + if (entry.getPlatform() == ZipArchiveEntry.PLATFORM_UNIX) { + Files.setPosixFilePermissions(file.toPath(), permissionsFromMode(entry.getUnixMode())); + } } finally { in.close(); } @@ -847,7 +988,7 @@ public static void unTar(InputStream inputStream, File untarDir, * * @param inFile The tar file as input. * @param untarDir The untar directory where to untar the tar file. - * @throws IOException + * @throws IOException an exception occurred. */ public static void unTar(File inFile, File untarDir) throws IOException { if (!untarDir.mkdirs()) { @@ -890,10 +1031,13 @@ private static void unTarUsingTar(InputStream inputStream, File untarDir, private static void unTarUsingTar(File inFile, File untarDir, boolean gzipped) throws IOException { StringBuffer untarCommand = new StringBuffer(); + // not using canonical path here; this postpones relative path + // resolution until bash is executed. + final String source = "'" + FileUtil.makeSecureShellPath(inFile) + "'"; if (gzipped) { - untarCommand.append(" gzip -dc '") - .append(FileUtil.makeSecureShellPath(inFile)) - .append("' | ("); + untarCommand.append(" gzip -dc ") + .append(source) + .append(" | ("); } untarCommand.append("cd '") .append(FileUtil.makeSecureShellPath(untarDir)) @@ -903,15 +1047,17 @@ private static void unTarUsingTar(File inFile, File untarDir, if (gzipped) { untarCommand.append(" -)"); } else { - untarCommand.append(FileUtil.makeSecureShellPath(inFile)); + untarCommand.append(source); } + LOG.debug("executing [{}]", untarCommand); String[] shellCmd = { "bash", "-c", untarCommand.toString() }; ShellCommandExecutor shexec = new ShellCommandExecutor(shellCmd); shexec.execute(); int exitcode = shexec.getExitCode(); if (exitcode != 0) { throw new IOException("Error untarring file " + inFile + - ". Tar process exited with exit code " + exitcode); + ". Tar process exited with exit code " + exitcode + + " from command " + untarCommand); } } @@ -968,6 +1114,14 @@ private static void unpackEntries(TarArchiveInputStream tis, + " would create entry outside of " + outputDir); } + if (entry.isSymbolicLink() || entry.isLink()) { + String canonicalTargetPath = getCanonicalPath(entry.getLinkName(), outputDir); + if (!canonicalTargetPath.startsWith(targetDirPath)) { + throw new IOException( + "expanding " + entry.getName() + " would create entry outside of " + outputDir); + } + } + if (entry.isDirectory()) { File subDir = new File(outputDir, entry.getName()); if (!subDir.mkdirs() && !subDir.isDirectory()) { @@ -983,10 +1137,12 @@ private static void unpackEntries(TarArchiveInputStream tis, } if (entry.isSymbolicLink()) { - // Create symbolic link relative to tar parent dir - Files.createSymbolicLink(FileSystems.getDefault() - .getPath(outputDir.getPath(), entry.getName()), - FileSystems.getDefault().getPath(entry.getLinkName())); + // Create symlink with canonical target path to ensure that we don't extract + // outside targetDirPath + String canonicalTargetPath = getCanonicalPath(entry.getLinkName(), outputDir); + Files.createSymbolicLink( + FileSystems.getDefault().getPath(outputDir.getPath(), entry.getName()), + FileSystems.getDefault().getPath(canonicalTargetPath)); return; } @@ -998,7 +1154,8 @@ private static void unpackEntries(TarArchiveInputStream tis, } if (entry.isLink()) { - File src = new File(outputDir, entry.getLinkName()); + String canonicalTargetPath = getCanonicalPath(entry.getLinkName(), outputDir); + File src = new File(canonicalTargetPath); HardLink.createHardLink(src, outputFile); return; } @@ -1006,6 +1163,20 @@ private static void unpackEntries(TarArchiveInputStream tis, org.apache.commons.io.FileUtils.copyToFile(tis, outputFile); } + /** + * Gets the canonical path for the given path. + * + * @param path The path for which the canonical path needs to be computed. + * @param parentDir The parent directory to use if the path is a relative path. + * @return The canonical path of the given path. + */ + private static String getCanonicalPath(String path, File parentDir) throws IOException { + java.nio.file.Path targetPath = Paths.get(path); + return (targetPath.isAbsolute() ? + new File(path) : + new File(parentDir, path)).getCanonicalPath(); + } + /** * Class for creating hardlinks. * Supports Unix, WindXP. @@ -1028,6 +1199,7 @@ public static class HardLink extends org.apache.hadoop.fs.HardLink { * @param target the target for symlink * @param linkname the symlink * @return 0 on success + * @throws IOException raised on errors performing I/O. */ public static int symLink(String target, String linkname) throws IOException{ @@ -1089,8 +1261,8 @@ public static int symLink(String target, String linkname) throws IOException{ * @param filename the name of the file to change * @param perm the permission string * @return the exit code from the command - * @throws IOException - * @throws InterruptedException + * @throws IOException raised on errors performing I/O. + * @throws InterruptedException command interrupted. */ public static int chmod(String filename, String perm ) throws IOException, InterruptedException { @@ -1104,7 +1276,7 @@ public static int chmod(String filename, String perm * @param perm permission string * @param recursive true, if permissions should be changed recursively * @return the exit code from the command. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static int chmod(String filename, String perm, boolean recursive) throws IOException { @@ -1130,7 +1302,7 @@ public static int chmod(String filename, String perm, boolean recursive) * @param file the file to change * @param username the new user owner name * @param groupname the new group owner name - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static void setOwner(File file, String username, String groupname) throws IOException { @@ -1147,7 +1319,7 @@ public static void setOwner(File file, String username, * Platform independent implementation for {@link File#setReadable(boolean)} * File#setReadable does not work as expected on Windows. * @param f input file - * @param readable + * @param readable readable. * @return true on success, false otherwise */ public static boolean setReadable(File f, boolean readable) { @@ -1168,7 +1340,7 @@ public static boolean setReadable(File f, boolean readable) { * Platform independent implementation for {@link File#setWritable(boolean)} * File#setWritable does not work as expected on Windows. * @param f input file - * @param writable + * @param writable writable. * @return true on success, false otherwise */ public static boolean setWritable(File f, boolean writable) { @@ -1192,7 +1364,7 @@ public static boolean setWritable(File f, boolean writable) { * behavior on Windows as on Unix platforms. Creating, deleting or renaming * a file within that folder will still succeed on Windows. * @param f input file - * @param executable + * @param executable executable. * @return true on success, false otherwise */ public static boolean setExecutable(File f, boolean executable) { @@ -1271,7 +1443,7 @@ public static boolean canExecute(File f) { * of forking if group == other. * @param f the file to change * @param permission the new permissions - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static void setPermission(File f, FsPermission permission ) throws IOException { @@ -1576,6 +1748,7 @@ public static List getJarsInDirectory(String path) { * wildcard path to return all jars from the directory to use in a classpath. * * @param path the path to the directory. The path may include the wildcard. + * @param useLocal use local. * @return the list of jars as URLs, or an empty list if there are no jars, or * the directory does not exist */ @@ -1806,7 +1979,7 @@ public static FileSystem write(final FileSystem fs, final Path path, * specified charset. This utility method opens the file for writing, creating * the file if it does not exist, or overwrites an existing file. * - * @param FileContext the file context with which to create the file + * @param fs the file context with which to create the file * @param path the path to the file * @param charseq the char sequence to write to the file * @param cs the charset to use for encoding @@ -1869,4 +2042,20 @@ public static FileContext write(final FileContext fileContext, final Path path, final CharSequence charseq) throws IOException { return write(fileContext, path, charseq, StandardCharsets.UTF_8); } + + @InterfaceAudience.LimitedPrivate({"ViewDistributedFileSystem"}) + @InterfaceStability.Unstable + /** + * Used in ViewDistributedFileSystem rename API to get access to the protected + * API of FileSystem interface. Even though Rename with options API + * deprecated, we are still using as part of trash. If any filesystem provided + * implementation to this protected FileSystem API, we can't invoke it with + * out casting to the specific filesystem. This util method is proposed to get + * the access to FileSystem#rename with options. + */ + @SuppressWarnings("deprecation") + public static void rename(FileSystem srcFs, Path src, Path dst, + final Options.Rename... options) throws IOException { + srcFs.rename(src, dst, options); + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java index cf12ea3898a7f..cdbe51e330701 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java @@ -41,6 +41,8 @@ import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.util.Progressable; +import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; + /**************************************************************** * A FilterFileSystem contains * some other file system, which it uses as @@ -231,7 +233,7 @@ public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, * * @param src file name * @param replication new replication - * @throws IOException + * @throws IOException raised on errors performing I/O. * @return true if successful; * false if file does not exist or is a directory */ @@ -302,7 +304,7 @@ public Path getHomeDirectory() { * Set the current working directory for the given file system. All relative * paths will be resolved relative to it. * - * @param newDir + * @param newDir new dir. */ @Override public void setWorkingDirectory(Path newDir) { @@ -460,6 +462,11 @@ public FileStatus getFileStatus(Path f) throws IOException { return fs.getFileStatus(f); } + @Override + public void msync() throws IOException, UnsupportedOperationException { + fs.msync(); + } + @Override public void access(Path path, FsAction mode) throws AccessControlException, FileNotFoundException, IOException { @@ -728,7 +735,16 @@ protected CompletableFuture openFileWithOptions( @Override public boolean hasPathCapability(final Path path, final String capability) throws IOException { - return fs.hasPathCapability(path, capability); + switch (validatePathCapabilityArgs(makeQualified(path), capability)) { + case CommonPathCapabilities.FS_MULTIPART_UPLOADER: + case CommonPathCapabilities.FS_EXPERIMENTAL_BATCH_LISTING: + // operations known to be unsupported, irrespective of what + // the wrapped class implements. + return false; + default: + // the feature is not implemented. + return fs.hasPathCapability(path, capability); + } } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFs.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFs.java index e197506edc88b..7d979b37b4a50 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFs.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFs.java @@ -124,6 +124,11 @@ public FileStatus getFileStatus(Path f) return myFs.getFileStatus(f); } + @Override + public void msync() throws IOException, UnsupportedOperationException { + myFs.msync(); + } + @Override public void access(Path path, FsAction mode) throws AccessControlException, FileNotFoundException, UnresolvedLinkException, IOException { @@ -448,4 +453,10 @@ public boolean hasPathCapability(final Path path, throws IOException { return myFs.hasPathCapability(path, capability); } + + @Override + public MultipartUploaderBuilder createMultipartUploader(final Path basePath) + throws IOException { + return myFs.createMultipartUploader(basePath); + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsConstants.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsConstants.java index cfef1c3827917..603454210644d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsConstants.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsConstants.java @@ -42,4 +42,7 @@ public interface FsConstants { */ public static final URI VIEWFS_URI = URI.create("viewfs:///"); public static final String VIEWFS_SCHEME = "viewfs"; + String FS_VIEWFS_OVERLOAD_SCHEME_TARGET_FS_IMPL_PATTERN = + "fs.viewfs.overload.scheme.target.%s.impl"; + String VIEWFS_TYPE = "viewfs"; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java index 680e742a36059..73258661ec191 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java @@ -35,8 +35,8 @@ import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; -import org.apache.htrace.core.TraceScope; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.tracing.TraceScope; +import org.apache.hadoop.tracing.Tracer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -130,7 +130,7 @@ public Path getCurrentTrashDir() throws IOException { * Returns the current trash location for the path specified * @param path to be deleted * @return path to the trash - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public Path getCurrentTrashDir(Path path) throws IOException { return getTrash().getCurrentTrashDir(path); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsStatus.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsStatus.java index d392c7d765d72..c4bc341bf4f7c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsStatus.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsStatus.java @@ -35,24 +35,39 @@ public class FsStatus implements Writable { private long used; private long remaining; - /** Construct a FsStatus object, using the specified statistics */ + /** + * Construct a FsStatus object, using the specified statistics. + * + * @param capacity capacity. + * @param used used. + * @param remaining remaining. + */ public FsStatus(long capacity, long used, long remaining) { this.capacity = capacity; this.used = used; this.remaining = remaining; } - /** Return the capacity in bytes of the file system */ + /** + * Return the capacity in bytes of the file system. + * @return capacity. + */ public long getCapacity() { return capacity; } - /** Return the number of bytes used on the file system */ + /** + * Return the number of bytes used on the file system. + * @return used. + */ public long getUsed() { return used; } - /** Return the number of remaining bytes on the file system */ + /** + * Return the number of remaining bytes on the file system. + * @return remaining. + */ public long getRemaining() { return remaining; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsTracer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsTracer.java index e422336739a44..5b48b35566706 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsTracer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsTracer.java @@ -17,12 +17,11 @@ */ package org.apache.hadoop.fs; -import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.tracing.TraceUtils; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.tracing.Tracer; /** * Holds the HTrace Tracer used for FileSystem operations. @@ -47,18 +46,6 @@ public static synchronized Tracer get(Configuration conf) { return instance; } - @VisibleForTesting - public static synchronized void clear() { - if (instance == null) { - return; - } - try { - instance.close(); - } finally { - instance = null; - } - } - private FsTracer() { } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlConnection.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlConnection.java index c5429d2370250..11b3e91e86c3a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlConnection.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlConnection.java @@ -24,7 +24,7 @@ import java.net.URL; import java.net.URLConnection; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FutureDataInputStreamBuilder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FutureDataInputStreamBuilder.java index 27a522e593001..e7f441a75d3c8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FutureDataInputStreamBuilder.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FutureDataInputStreamBuilder.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.fs; +import javax.annotation.Nullable; import java.io.IOException; import java.util.concurrent.CompletableFuture; @@ -34,7 +35,7 @@ * options accordingly, for example: * * If the option is not related to the file system, the option will be ignored. - * If the option is must, but not supported by the file system, a + * If the option is must, but not supported/known by the file system, an * {@link IllegalArgumentException} will be thrown. * */ @@ -51,10 +52,11 @@ CompletableFuture build() /** * A FileStatus may be provided to the open request. * It is up to the implementation whether to use this or not. - * @param status status. + * @param status status: may be null * @return the builder. */ - default FutureDataInputStreamBuilder withFileStatus(FileStatus status) { + default FutureDataInputStreamBuilder withFileStatus( + @Nullable FileStatus status) { return this; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobExpander.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobExpander.java index cb430ed3f6251..c87444c6c87f7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobExpander.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobExpander.java @@ -56,9 +56,9 @@ public StringWithOffset(String string, int offset) { * {a,b}/{c/\d} - {a,b}/c/d * * - * @param filePattern + * @param filePattern file pattern. * @return expanded file patterns - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static List expand(String filePattern) throws IOException { List fullyExpanded = new ArrayList(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobalStorageStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobalStorageStatistics.java index 2dba525e5d9d1..5a8497773a69b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobalStorageStatistics.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobalStorageStatistics.java @@ -23,7 +23,7 @@ import java.util.NoSuchElementException; import java.util.TreeMap; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; /** @@ -104,6 +104,8 @@ public synchronized void reset() { /** * Get an iterator that we can use to iterate throw all the global storage * statistics objects. + * + * @return StorageStatistics Iterator. */ synchronized public Iterator iterator() { Entry first = map.firstEntry(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java index f301f22057925..cfe0610edec8c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java @@ -27,12 +27,12 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.util.DurationInfo; -import org.apache.htrace.core.TraceScope; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.tracing.TraceScope; +import org.apache.hadoop.tracing.Tracer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull; /** * Implementation of {@link FileSystem#globStatus(Path, PathFilter)}. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java index 5f4c4a236e96c..1d64b0bcbe921 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java @@ -35,6 +35,7 @@ import java.net.URISyntaxException; import java.net.URLDecoder; import java.util.*; +import java.util.concurrent.ConcurrentHashMap; import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; @@ -462,7 +463,7 @@ static BlockLocation[] fixBlockLocations(BlockLocation[] locations, * @param start the start of the desired range in the contained file * @param len the length of the desired range * @return block locations for this segment of file - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public BlockLocation[] getFileBlockLocations(FileStatus file, long start, @@ -513,41 +514,22 @@ private void fileStatusesInIndex(HarStatus parent, List statuses) if (!parentString.endsWith(Path.SEPARATOR)){ parentString += Path.SEPARATOR; } - Path harPath = new Path(parentString); - int harlen = harPath.depth(); - final Map cache = new TreeMap(); - - for (HarStatus hstatus : metadata.archive.values()) { - String child = hstatus.getName(); - if ((child.startsWith(parentString))) { - Path thisPath = new Path(child); - if (thisPath.depth() == harlen + 1) { - statuses.add(toFileStatus(hstatus, cache)); - } - } + + for (String child: parent.children) { + Path p = new Path(parentString + child); + statuses.add(toFileStatus(metadata.archive.get(p))); } } /** * Combine the status stored in the index and the underlying status. * @param h status stored in the index - * @param cache caching the underlying file statuses * @return the combined file status - * @throws IOException + * @throws IOException raised on errors performing I/O. */ - private FileStatus toFileStatus(HarStatus h, - Map cache) throws IOException { - FileStatus underlying = null; - if (cache != null) { - underlying = cache.get(h.partName); - } - if (underlying == null) { - final Path p = h.isDir? archivePath: new Path(archivePath, h.partName); - underlying = fs.getFileStatus(p); - if (cache != null) { - cache.put(h.partName, underlying); - } - } + private FileStatus toFileStatus(HarStatus h) throws IOException { + final Path p = h.isDir ? archivePath : new Path(archivePath, h.partName); + FileStatus underlying = metadata.getPartFileStatus(p); long modTime = 0; int version = metadata.getVersion(); @@ -653,12 +635,12 @@ public long getModificationTime() { * while creating a hadoop archive. * @param f the path in har filesystem * @return filestatus. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public FileStatus getFileStatus(Path f) throws IOException { HarStatus hstatus = getFileHarStatus(f); - return toFileStatus(hstatus, null); + return toFileStatus(hstatus); } private HarStatus getFileHarStatus(Path f) throws IOException { @@ -676,6 +658,11 @@ private HarStatus getFileHarStatus(Path f) throws IOException { return hstatus; } + @Override + public void msync() throws IOException, UnsupportedOperationException { + fs.msync(); + } + /** * @return null since no checksum algorithm is implemented. */ @@ -810,7 +797,7 @@ public FileStatus[] listStatus(Path f) throws IOException { if (hstatus.isDir()) { fileStatusesInIndex(hstatus, statuses); } else { - statuses.add(toFileStatus(hstatus, null)); + statuses.add(toFileStatus(hstatus)); } return statuses.toArray(new FileStatus[statuses.size()]); @@ -1117,7 +1104,7 @@ public void setDropBehind(Boolean dropBehind) throws IOException { * @param start the start position in the part file * @param length the length of valid data in the part file * @param bufsize the buffer size - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public HarFSDataInputStream(FileSystem fs, Path p, long start, long length, int bufsize) throws IOException { @@ -1138,7 +1125,8 @@ private class HarMetaData { List stores = new ArrayList(); Map archive = new HashMap(); - private Map partFileStatuses = new HashMap(); + // keys are always the internal har path. + private Map partFileStatuses = new ConcurrentHashMap<>(); public HarMetaData(FileSystem fs, Path masterIndexPath, Path archiveIndexPath) { this.fs = fs; @@ -1146,16 +1134,23 @@ public HarMetaData(FileSystem fs, Path masterIndexPath, Path archiveIndexPath) { this.archiveIndexPath = archiveIndexPath; } - public FileStatus getPartFileStatus(Path partPath) throws IOException { + public FileStatus getPartFileStatus(Path path) throws IOException { + Path partPath = getPathInHar(path); FileStatus status; status = partFileStatuses.get(partPath); if (status == null) { - status = fs.getFileStatus(partPath); + status = fs.getFileStatus(path); partFileStatuses.put(partPath, status); } return status; } + private void addPartFileStatuses(Path path) throws IOException { + for (FileStatus stat : fs.listStatus(path)) { + partFileStatuses.put(getPathInHar(stat.getPath()), stat); + } + } + public long getMasterIndexTimestamp() { return masterIndexTimestamp; } @@ -1212,16 +1207,22 @@ private void parseMetaData() throws IOException { try { FileStatus archiveStat = fs.getFileStatus(archiveIndexPath); archiveIndexTimestamp = archiveStat.getModificationTime(); - LineReader aLin; + + // pre-populate part cache. + addPartFileStatuses(archiveIndexPath.getParent()); + LineReader aLin = null; // now start reading the real index file + long pos = -1; for (Store s: stores) { - read = 0; - aIn.seek(s.begin); - aLin = new LineReader(aIn, getConf()); - while (read + s.begin < s.end) { - int tmp = aLin.readLine(line); - read += tmp; + if (pos != s.begin) { + pos = s.begin; + aIn.seek(s.begin); + aLin = new LineReader(aIn, getConf()); + } + + while (pos < s.end) { + pos += aLin.readLine(line); String lineFeed = line.toString(); String[] parsed = lineFeed.split(" "); parsed[0] = decodeFileName(parsed[0]); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HardLink.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HardLink.java index 8b47dfeb9a7ce..16bc956f1f3d8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HardLink.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HardLink.java @@ -29,7 +29,7 @@ import org.apache.hadoop.util.Shell.ExitCodeException; import org.apache.hadoop.util.Shell.ShellCommandExecutor; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import static java.nio.file.Files.createLink; @@ -156,6 +156,7 @@ String[] linkCount(File file) throws IOException { * Creates a hardlink * @param file - existing source file * @param linkName - desired target link file + * @throws IOException raised on errors performing I/O. */ public static void createHardLink(File file, File linkName) throws IOException { @@ -177,6 +178,7 @@ public static void createHardLink(File file, File linkName) * @param fileBaseNames - list of path-less file names, as returned by * parentDir.list() * @param linkDir - where the hardlinks should be put. It must already exist. + * @throws IOException raised on errors performing I/O. */ public static void createHardLinkMult(File parentDir, String[] fileBaseNames, File linkDir) throws IOException { @@ -204,6 +206,10 @@ public static void createHardLinkMult(File parentDir, String[] fileBaseNames, /** * Retrieves the number of links to the specified file. + * + * @param fileName file name. + * @throws IOException raised on errors performing I/O. + * @return link count. */ public static int getLinkCount(File fileName) throws IOException { if (fileName == null) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HasFileDescriptor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HasFileDescriptor.java index bcf325ceca5df..a0e89d6aeac44 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HasFileDescriptor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HasFileDescriptor.java @@ -33,7 +33,7 @@ public interface HasFileDescriptor { /** * @return the FileDescriptor - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public FileDescriptor getFileDescriptor() throws IOException; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/InternalOperations.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/InternalOperations.java new file mode 100644 index 0000000000000..2db33eead9288 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/InternalOperations.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; + + +/** + * This method allows access to Package-scoped operations from classes + * in org.apache.hadoop.fs.impl and other file system implementations + * in the hadoop modules. + * This is absolutely not for used by any other application or library. + */ +@InterfaceAudience.Private +public class InternalOperations { + + @SuppressWarnings("deprecation") // rename w/ OVERWRITE + public void rename(FileSystem fs, final Path src, final Path dst, + final Options.Rename...options) throws IOException { + fs.rename(src, dst, options); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LeaseRecoverable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LeaseRecoverable.java new file mode 100644 index 0000000000000..87b54663e6ca6 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LeaseRecoverable.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import java.io.IOException; + +/** + * Whether the given Path of the FileSystem has the capability to perform lease recovery. + */ +public interface LeaseRecoverable { + + /** + * Start the lease recovery of a file. + * + * @param file path to a file. + * @return true if the file is already closed, and it does not require lease recovery. + * @throws IOException if an error occurs during lease recovery. + * @throws UnsupportedOperationException if lease recovery is not supported by this filesystem. + */ + boolean recoverLease(Path file) throws IOException; + + /** + * Get the close status of a file. + * @param file The string representation of the path to the file + * @return return true if file is closed + * @throws IOException If an I/O error occurred + * @throws UnsupportedOperationException if isFileClosed is not supported by this filesystem. + */ + boolean isFileClosed(Path file) throws IOException; +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java index 5f266a7b82555..d8ab16f41d3ac 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java @@ -78,8 +78,9 @@ public class LocalDirAllocator { private final DiskValidator diskValidator; - /**Create an allocator object - * @param contextCfgItemName + /** + * Create an allocator object. + * @param contextCfgItemName contextCfgItemName. */ public LocalDirAllocator(String contextCfgItemName) { this.contextCfgItemName = contextCfgItemName; @@ -123,7 +124,7 @@ private AllocatorPerContext obtainContext(String contextCfgItemName) { * available disk) * @param conf the Configuration object * @return the complete path to the file on a local disk - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public Path getLocalPathForWrite(String pathStr, Configuration conf) throws IOException { @@ -139,7 +140,7 @@ public Path getLocalPathForWrite(String pathStr, * @param size the size of the file that is going to be written * @param conf the Configuration object * @return the complete path to the file on a local disk - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public Path getLocalPathForWrite(String pathStr, long size, Configuration conf) throws IOException { @@ -156,7 +157,7 @@ public Path getLocalPathForWrite(String pathStr, long size, * @param conf the Configuration object * @param checkWrite ensure that the path is writable * @return the complete path to the file on a local disk - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public Path getLocalPathForWrite(String pathStr, long size, Configuration conf, @@ -171,7 +172,7 @@ public Path getLocalPathForWrite(String pathStr, long size, * @param pathStr the requested file (this will be searched) * @param conf the Configuration object * @return the complete path to the file on a local disk - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public Path getLocalPathToRead(String pathStr, Configuration conf) throws IOException { @@ -184,7 +185,7 @@ public Path getLocalPathToRead(String pathStr, * @param pathStr the path underneath the roots * @param conf the configuration to look up the roots in * @return all of the paths that exist under any of the roots - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public Iterable getAllLocalPathsToRead(String pathStr, Configuration conf @@ -205,7 +206,7 @@ public Iterable getAllLocalPathsToRead(String pathStr, * @param size the size of the file that is going to be written * @param conf the Configuration object * @return a unique temporary file - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public File createTmpFileForWrite(String pathStr, long size, Configuration conf) throws IOException { @@ -213,8 +214,9 @@ public File createTmpFileForWrite(String pathStr, long size, return context.createTmpFileForWrite(pathStr, size, conf); } - /** Method to check whether a context is valid - * @param contextCfgItemName + /** + * Method to check whether a context is valid. + * @param contextCfgItemName contextCfgItemName. * @return true/false */ public static boolean isContextValid(String contextCfgItemName) { @@ -224,9 +226,9 @@ public static boolean isContextValid(String contextCfgItemName) { } /** - * Removes the context from the context config items + * Removes the context from the context config items. * - * @param contextCfgItemName + * @param contextCfgItemName contextCfgItemName. */ @Deprecated @InterfaceAudience.LimitedPrivate({"MapReduce"}) @@ -236,8 +238,9 @@ public static void removeContext(String contextCfgItemName) { } } - /** We search through all the configured dirs for the file's existence - * and return true when we find + /** + * We search through all the configured dirs for the file's existence + * and return true when we find. * @param pathStr the requested file (this will be searched) * @param conf the Configuration object * @return true if files exist. false otherwise @@ -393,6 +396,10 @@ public Path getLocalPathForWrite(String pathStr, long size, Context ctx = confChanged(conf); int numDirs = ctx.localDirs.length; int numDirsSearched = 0; + // Max capacity in any directory + long maxCapacity = 0; + String errorText = null; + IOException diskException = null; //remove the leading slash from the path (to make sure that the uri //resolution results in a valid path on the dir being checked) if (pathStr.startsWith("/")) { @@ -407,7 +414,14 @@ public Path getLocalPathForWrite(String pathStr, long size, //build the "roulette wheel" for(int i =0; i < ctx.dirDF.length; ++i) { - availableOnDisk[i] = ctx.dirDF[i].getAvailable(); + final DF target = ctx.dirDF[i]; + // attempt to recreate the dir so that getAvailable() is valid + // if it fails, getAvailable() will return 0, so the dir will + // be declared unavailable. + // return value is logged at debug to keep spotbugs quiet. + final boolean b = new File(target.getDirPath()).mkdirs(); + LOG.debug("mkdirs of {}={}", target, b); + availableOnDisk[i] = target.getAvailable(); totalAvailable += availableOnDisk[i]; } @@ -441,9 +455,18 @@ public Path getLocalPathForWrite(String pathStr, long size, int dirNum = ctx.getAndIncrDirNumLastAccessed(randomInc); while (numDirsSearched < numDirs) { long capacity = ctx.dirDF[dirNum].getAvailable(); + if (capacity > maxCapacity) { + maxCapacity = capacity; + } if (capacity > size) { - returnPath = - createPath(ctx.localDirs[dirNum], pathStr, checkWrite); + try { + returnPath = createPath(ctx.localDirs[dirNum], pathStr, + checkWrite); + } catch (IOException e) { + errorText = e.getMessage(); + diskException = e; + LOG.debug("DiskException caught for dir {}", ctx.localDirs[dirNum], e); + } if (returnPath != null) { ctx.getAndIncrDirNumLastAccessed(numDirsSearched); break; @@ -459,8 +482,13 @@ public Path getLocalPathForWrite(String pathStr, long size, } //no path found - throw new DiskErrorException("Could not find any valid local " + - "directory for " + pathStr); + String newErrorText = "Could not find any valid local directory for " + + pathStr + " with requested size " + size + + " as the max capacity in any directory is " + maxCapacity; + if (errorText != null) { + newErrorText = newErrorText + " due to " + errorText; + } + throw new DiskErrorException(newErrorText, diskException); } /** Creates a file on the local FS. Pass size as diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalFileSystem.java index c41190a7b360b..590cbd9a49ece 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalFileSystem.java @@ -71,7 +71,11 @@ public LocalFileSystem(FileSystem rawLocalFileSystem) { super(rawLocalFileSystem); } - /** Convert a path to a File. */ + /** + * Convert a path to a File. + * @param path the path. + * @return file. + */ public File pathToFile(Path path) { return ((RawLocalFileSystem)fs).pathToFile(path); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java index 5a4a6a97cc4f7..354e4a6b4657d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java @@ -28,7 +28,13 @@ public MD5MD5CRC32CastagnoliFileChecksum() { this(0, 0, null); } - /** Create a MD5FileChecksum */ + /** + * Create a MD5FileChecksum. + * + * @param bytesPerCRC bytesPerCRC. + * @param crcPerBlock crcPerBlock. + * @param md5 md5. + */ public MD5MD5CRC32CastagnoliFileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) { super(bytesPerCRC, crcPerBlock, md5); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java index 3fdb7e982621c..c5ac381f78238 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java @@ -44,7 +44,13 @@ public MD5MD5CRC32FileChecksum() { this(0, 0, null); } - /** Create a MD5FileChecksum */ + /** + * Create a MD5FileChecksum. + * + * @param bytesPerCRC bytesPerCRC. + * @param crcPerBlock crcPerBlock. + * @param md5 md5. + */ public MD5MD5CRC32FileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) { this.bytesPerCRC = bytesPerCRC; this.crcPerBlock = crcPerBlock; @@ -76,7 +82,10 @@ public byte[] getBytes() { return WritableUtils.toByteArray(this); } - /** returns the CRC type */ + /** + * returns the CRC type. + * @return data check sum type. + */ public DataChecksum.Type getCrcType() { // default to the one that is understood by all releases. return DataChecksum.Type.CRC32; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java index 5164d0200d28d..f7996c8623717 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java @@ -28,7 +28,13 @@ public MD5MD5CRC32GzipFileChecksum() { this(0, 0, null); } - /** Create a MD5FileChecksum */ + /** + * Create a MD5FileChecksum. + * + * @param bytesPerCRC bytesPerCRC. + * @param crcPerBlock crcPerBlock. + * @param md5 md5. + */ public MD5MD5CRC32GzipFileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) { super(bytesPerCRC, crcPerBlock, md5); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MultipartUploader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MultipartUploader.java index 7ed987eed90dd..5e4eda26c7f1d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MultipartUploader.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MultipartUploader.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -15,45 +15,33 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.hadoop.fs; import java.io.Closeable; import java.io.IOException; import java.io.InputStream; import java.util.Map; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import java.util.concurrent.CompletableFuture; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; - -import static com.google.common.base.Preconditions.checkArgument; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; /** * MultipartUploader is an interface for copying files multipart and across - * multiple nodes. Users should: - *
      - *
    1. Initialize an upload.
    2. - *
    3. Upload parts in any order.
    4. - *
    5. Complete the upload in order to have it materialize in the destination - * FS.
    6. - *
    + * multiple nodes. + *

    + * The interface extends {@link IOStatisticsSource} so that there is no + * need to cast an instance to see if is a source of statistics. + * However, implementations MAY return null for their actual statistics. + *

    */ -@InterfaceAudience.Private +@InterfaceAudience.Public @InterfaceStability.Unstable -public abstract class MultipartUploader implements Closeable { - public static final Logger LOG = - LoggerFactory.getLogger(MultipartUploader.class); +public interface MultipartUploader extends Closeable, + IOStatisticsSource { - /** - * Perform any cleanup. - * The upload is not required to support any operations after this. - * @throws IOException problems on close. - */ - @Override - public void close() throws IOException { - } /** * Initialize a multipart upload. @@ -61,94 +49,64 @@ public void close() throws IOException { * @return unique identifier associating part uploads. * @throws IOException IO failure */ - public abstract UploadHandle initialize(Path filePath) throws IOException; + CompletableFuture startUpload(Path filePath) + throws IOException; /** * Put part as part of a multipart upload. * It is possible to have parts uploaded in any order (or in parallel). - * @param filePath Target path for upload (same as {@link #initialize(Path)}). + * @param uploadId Identifier from {@link #startUpload(Path)}. + * @param partNumber Index of the part relative to others. + * @param filePath Target path for upload (as {@link #startUpload(Path)}). * @param inputStream Data for this part. Implementations MUST close this * stream after reading in the data. - * @param partNumber Index of the part relative to others. - * @param uploadId Identifier from {@link #initialize(Path)}. * @param lengthInBytes Target length to read from the stream. * @return unique PartHandle identifier for the uploaded part. * @throws IOException IO failure */ - public abstract PartHandle putPart(Path filePath, InputStream inputStream, - int partNumber, UploadHandle uploadId, long lengthInBytes) + CompletableFuture putPart( + UploadHandle uploadId, + int partNumber, + Path filePath, + InputStream inputStream, + long lengthInBytes) throws IOException; /** * Complete a multipart upload. - * @param filePath Target path for upload (same as {@link #initialize(Path)}. + * @param uploadId Identifier from {@link #startUpload(Path)}. + * @param filePath Target path for upload (as {@link #startUpload(Path)}. * @param handles non-empty map of part number to part handle. - * from {@link #putPart(Path, InputStream, int, UploadHandle, long)}. - * @param multipartUploadId Identifier from {@link #initialize(Path)}. + * from {@link #putPart(UploadHandle, int, Path, InputStream, long)}. * @return unique PathHandle identifier for the uploaded file. * @throws IOException IO failure */ - public abstract PathHandle complete(Path filePath, - Map handles, - UploadHandle multipartUploadId) + CompletableFuture complete( + UploadHandle uploadId, + Path filePath, + Map handles) throws IOException; /** * Aborts a multipart upload. - * @param filePath Target path for upload (same as {@link #initialize(Path)}. - * @param multipartUploadId Identifier from {@link #initialize(Path)}. + * @param uploadId Identifier from {@link #startUpload(Path)}. + * @param filePath Target path for upload (same as {@link #startUpload(Path)}. * @throws IOException IO failure + * @return a future; the operation will have completed */ - public abstract void abort(Path filePath, UploadHandle multipartUploadId) + CompletableFuture abort(UploadHandle uploadId, Path filePath) throws IOException; /** - * Utility method to validate uploadIDs. - * @param uploadId Upload ID - * @throws IllegalArgumentException invalid ID - */ - protected void checkUploadId(byte[] uploadId) - throws IllegalArgumentException { - checkArgument(uploadId != null, "null uploadId"); - checkArgument(uploadId.length > 0, - "Empty UploadId is not valid"); - } - - /** - * Utility method to validate partHandles. - * @param partHandles handles - * @throws IllegalArgumentException if the parts are invalid + * Best effort attempt to aborts multipart uploads under a path. + * Not all implementations support this, and those which do may + * be vulnerable to eventually consistent listings of current uploads + * -some may be missed. + * @param path path to abort uploads under. + * @return a future to the number of entries aborted; + * -1 if aborting is unsupported + * @throws IOException IO failure */ - protected void checkPartHandles(Map partHandles) { - checkArgument(!partHandles.isEmpty(), - "Empty upload"); - partHandles.keySet() - .stream() - .forEach(key -> - checkArgument(key > 0, - "Invalid part handle index %s", key)); - } + CompletableFuture abortUploadsUnderPath(Path path) throws IOException; - /** - * Check all the arguments to the - * {@link #putPart(Path, InputStream, int, UploadHandle, long)} operation. - * @param filePath Target path for upload (same as {@link #initialize(Path)}). - * @param inputStream Data for this part. Implementations MUST close this - * stream after reading in the data. - * @param partNumber Index of the part relative to others. - * @param uploadId Identifier from {@link #initialize(Path)}. - * @param lengthInBytes Target length to read from the stream. - * @throws IllegalArgumentException invalid argument - */ - protected void checkPutArguments(Path filePath, - InputStream inputStream, - int partNumber, - UploadHandle uploadId, - long lengthInBytes) throws IllegalArgumentException { - checkArgument(filePath != null, "null filePath"); - checkArgument(inputStream != null, "null inputStream"); - checkArgument(partNumber > 0, "Invalid part number: %d", partNumber); - checkArgument(uploadId != null, "null uploadId"); - checkArgument(lengthInBytes >= 0, "Invalid part length: %d", lengthInBytes); - } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MultipartUploaderBuilder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MultipartUploaderBuilder.java new file mode 100644 index 0000000000000..e7b0865063ee5 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MultipartUploaderBuilder.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import javax.annotation.Nonnull; +import java.io.IOException; + +import org.apache.hadoop.fs.permission.FsPermission; + +/** + * Builder interface for Multipart readers. + * @param MultipartUploader Generic Type. + * @param MultipartUploaderBuilder Generic Type. + */ +public interface MultipartUploaderBuilder> + extends FSBuilder { + + /** + * Set permission for the file. + * @param perm permission. + * @return B Generics Type. + */ + B permission(@Nonnull FsPermission perm); + + /** + * Set the size of the buffer to be used. + * @param bufSize buffer size. + * @return B Generics Type. + */ + B bufferSize(int bufSize); + + /** + * Set replication factor. + * @param replica replica. + * @return B Generics Type. + */ + B replication(short replica); + + /** + * Set block size. + * @param blkSize blkSize. + * @return B Generics Type. + */ + B blockSize(long blkSize); + + /** + * Create an FSDataOutputStream at the specified path. + * @return B Generics Type. + */ + B create(); + + /** + * Set to true to overwrite the existing file. + * Set it to false, an exception will be thrown when calling {@link #build()} + * if the file exists. + * @param overwrite overwrite. + * @return B Generics Type. + */ + B overwrite(boolean overwrite); + + /** + * Append to an existing file (optional operation). + * @return B Generics Type. + */ + B append(); + + /** + * Set checksum opt. + * @param chksumOpt chk sum opt. + * @return B Generics Type. + */ + B checksumOpt(@Nonnull Options.ChecksumOpt chksumOpt); + + /** + * Create the FSDataOutputStream to write on the file system. + * + * @throws IllegalArgumentException if the parameters are not valid. + * @throws IOException on errors when file system creates or appends the file. + * @return S Generics Type. + */ + S build() throws IllegalArgumentException, IOException; +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MultipartUploaderFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MultipartUploaderFactory.java deleted file mode 100644 index e35b6bf18bbd6..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MultipartUploaderFactory.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.fs; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.Iterator; -import java.util.ServiceLoader; - -/** - * {@link ServiceLoader}-driven uploader API for storage services supporting - * multipart uploads. - */ -@InterfaceAudience.Private -@InterfaceStability.Unstable -public abstract class MultipartUploaderFactory { - public static final Logger LOG = - LoggerFactory.getLogger(MultipartUploaderFactory.class); - - /** - * Multipart Uploaders listed as services. - */ - private static ServiceLoader serviceLoader = - ServiceLoader.load(MultipartUploaderFactory.class, - MultipartUploaderFactory.class.getClassLoader()); - - // Iterate through the serviceLoader to avoid lazy loading. - // Lazy loading would require synchronization in concurrent use cases. - static { - Iterator iterServices = serviceLoader.iterator(); - while (iterServices.hasNext()) { - iterServices.next(); - } - } - - /** - * Get the multipart loader for a specific filesystem. - * @param fs filesystem - * @param conf configuration - * @return an uploader, or null if one was found. - * @throws IOException failure during the creation process. - */ - public static MultipartUploader get(FileSystem fs, Configuration conf) - throws IOException { - MultipartUploader mpu = null; - for (MultipartUploaderFactory factory : serviceLoader) { - mpu = factory.createMultipartUploader(fs, conf); - if (mpu != null) { - break; - } - } - return mpu; - } - - protected abstract MultipartUploader createMultipartUploader(FileSystem fs, - Configuration conf) throws IOException; -} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Options.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Options.java index 75bc12df8fdcf..9ef7de657dc15 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Options.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Options.java @@ -17,9 +17,13 @@ */ package org.apache.hadoop.fs; +import java.util.Collections; import java.util.Optional; +import java.util.Set; import java.util.function.Function; import java.util.function.BiFunction; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -276,7 +280,9 @@ public String toString() { } /** - * Create a ChecksumOpts that disables checksum + * Create a ChecksumOpts that disables checksum. + * + * @return ChecksumOpt. */ public static ChecksumOpt createDisabled() { return new ChecksumOpt(DataChecksum.Type.NULL, -1); @@ -291,6 +297,7 @@ public static ChecksumOpt createDisabled() { * @param userOpt User-specified checksum option. Ignored if null. * @param userBytesPerChecksum User-specified bytesPerChecksum * Ignored if {@literal <} 0. + * @return ChecksumOpt. */ public static ChecksumOpt processChecksumOpt(ChecksumOpt defaultOpt, ChecksumOpt userOpt, int userBytesPerChecksum) { @@ -326,6 +333,8 @@ public static ChecksumOpt processChecksumOpt(ChecksumOpt defaultOpt, * * @param defaultOpt Default checksum option * @param userOpt User-specified checksum option + * + * @return ChecksumOpt. */ public static ChecksumOpt processChecksumOpt(ChecksumOpt defaultOpt, ChecksumOpt userOpt) { @@ -518,4 +527,119 @@ public enum ChecksumCombineMode { MD5MD5CRC, // MD5 of block checksums, which are MD5 over chunk CRCs COMPOSITE_CRC // Block/chunk-independent composite CRC } + + /** + * The standard {@code openFile()} options. + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public static final class OpenFileOptions { + + private OpenFileOptions() { + } + + /** + * Prefix for all standard filesystem options: {@value}. + */ + private static final String FILESYSTEM_OPTION = "fs.option."; + + /** + * Prefix for all openFile options: {@value}. + */ + public static final String FS_OPTION_OPENFILE = + FILESYSTEM_OPTION + "openfile."; + + /** + * OpenFile option for file length: {@value}. + */ + public static final String FS_OPTION_OPENFILE_LENGTH = + FS_OPTION_OPENFILE + "length"; + + /** + * OpenFile option for split start: {@value}. + */ + public static final String FS_OPTION_OPENFILE_SPLIT_START = + FS_OPTION_OPENFILE + "split.start"; + + /** + * OpenFile option for split end: {@value}. + */ + public static final String FS_OPTION_OPENFILE_SPLIT_END = + FS_OPTION_OPENFILE + "split.end"; + + /** + * OpenFile option for buffer size: {@value}. + */ + public static final String FS_OPTION_OPENFILE_BUFFER_SIZE = + FS_OPTION_OPENFILE + "buffer.size"; + + /** + * OpenFile option for read policies: {@value}. + */ + public static final String FS_OPTION_OPENFILE_READ_POLICY = + FS_OPTION_OPENFILE + "read.policy"; + + /** + * Set of standard options which openFile implementations + * MUST recognize, even if they ignore the actual values. + */ + public static final Set FS_OPTION_OPENFILE_STANDARD_OPTIONS = + Collections.unmodifiableSet(Stream.of( + FS_OPTION_OPENFILE_BUFFER_SIZE, + FS_OPTION_OPENFILE_READ_POLICY, + FS_OPTION_OPENFILE_LENGTH, + FS_OPTION_OPENFILE_SPLIT_START, + FS_OPTION_OPENFILE_SPLIT_END) + .collect(Collectors.toSet())); + + /** + * Read policy for adaptive IO: {@value}. + */ + public static final String FS_OPTION_OPENFILE_READ_POLICY_ADAPTIVE = + "adaptive"; + + /** + * Read policy {@value} -whateve the implementation does by default. + */ + public static final String FS_OPTION_OPENFILE_READ_POLICY_DEFAULT = + "default"; + + /** + * Read policy for random IO: {@value}. + */ + public static final String FS_OPTION_OPENFILE_READ_POLICY_RANDOM = + "random"; + + /** + * Read policy for sequential IO: {@value}. + */ + public static final String FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL = + "sequential"; + + /** + * Vectored IO API to be used: {@value}. + */ + public static final String FS_OPTION_OPENFILE_READ_POLICY_VECTOR = + "vector"; + + /** + * Whole file to be read, end-to-end: {@value}. + */ + public static final String FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE = + "whole-file"; + + /** + * All the current read policies as a set. + */ + public static final Set FS_OPTION_OPENFILE_READ_POLICIES = + Collections.unmodifiableSet(Stream.of( + FS_OPTION_OPENFILE_READ_POLICY_ADAPTIVE, + FS_OPTION_OPENFILE_READ_POLICY_DEFAULT, + FS_OPTION_OPENFILE_READ_POLICY_RANDOM, + FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL, + FS_OPTION_OPENFILE_READ_POLICY_VECTOR, + FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE) + .collect(Collectors.toSet())); + + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PartialListing.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PartialListing.java index 043f84612dc8b..01730889a2b41 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PartialListing.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PartialListing.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.fs; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.lang3.builder.ToStringBuilder; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -27,10 +27,11 @@ import java.util.List; /** + *

    * A partial listing of the children of a parent directory. Since it is a * partial listing, multiple PartialListing may need to be combined to obtain * the full listing of a parent directory. - *

    + *

    * ListingBatch behaves similar to a Future, in that getting the result via * {@link #get()} will throw an Exception if there was a failure. */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java index 2649d279aa15f..9daa782e1c5f2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java @@ -447,7 +447,12 @@ public Path getParent() { * @return a new path with the suffix added */ public Path suffix(String suffix) { - return new Path(getParent(), getName()+suffix); + Path parent = getParent(); + if (parent == null) { + return new Path("/", getName() + suffix); + } + + return new Path(parent, getName() + suffix); } @Override diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIOException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIOException.java index deb3880ee4195..f32f2a93544bf 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIOException.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIOException.java @@ -64,7 +64,13 @@ public PathIOException(String path, String error) { this.path = path; } - protected PathIOException(String path, String error, Throwable cause) { + /** + * Use a subclass of PathIOException if possible. + * @param path for the exception + * @param error custom string to use an the error text + * @param cause cause of exception. + */ + public PathIOException(String path, String error, Throwable cause) { super(error, cause); this.path = path; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PositionedReadable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PositionedReadable.java index 6744d17a72666..7380402eb6156 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PositionedReadable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PositionedReadable.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,7 +17,11 @@ */ package org.apache.hadoop.fs; -import java.io.*; +import java.io.EOFException; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.function.IntFunction; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -85,4 +89,47 @@ void readFully(long position, byte[] buffer, int offset, int length) * the read operation completed */ void readFully(long position, byte[] buffer) throws IOException; + + /** + * What is the smallest reasonable seek? + * @return the minimum number of bytes + */ + default int minSeekForVectorReads() { + return 4 * 1024; + } + + /** + * What is the largest size that we should group ranges together as? + * @return the number of bytes to read at once + */ + default int maxReadSizeForVectorReads() { + return 1024 * 1024; + } + + /** + * Read fully a list of file ranges asynchronously from this file. + * The default iterates through the ranges to read each synchronously, but + * the intent is that FSDataInputStream subclasses can make more efficient + * readers. + * As a result of the call, each range will have FileRange.setData(CompletableFuture) + * called with a future that when complete will have a ByteBuffer with the + * data from the file's range. + *

    + * The position returned by getPos() after readVectored() is undefined. + *

    + *

    + * If a file is changed while the readVectored() operation is in progress, the output is + * undefined. Some ranges may have old data, some may have new and some may have both. + *

    + *

    + * While a readVectored() operation is in progress, normal read api calls may block. + *

    + * @param ranges the byte ranges to read + * @param allocate the function to allocate ByteBuffer + * @throws IOException any IOE. + */ + default void readVectored(List ranges, + IntFunction allocate) throws IOException { + VectoredReadUtils.readVectored(this, ranges, allocate); + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/QuotaUsage.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/QuotaUsage.java index 11cc93401748e..cf0783cf8a945 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/QuotaUsage.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/QuotaUsage.java @@ -105,7 +105,9 @@ public QuotaUsage build() { // Make it protected for the deprecated ContentSummary constructor. protected QuotaUsage() { } - /** Build the instance based on the builder. */ + /** Build the instance based on the builder. + * @param builder bulider. + */ protected QuotaUsage(Builder builder) { this.fileAndDirectoryCount = builder.fileAndDirectoryCount; this.quota = builder.quota; @@ -127,37 +129,67 @@ protected void setSpaceQuota(long spaceQuota) { this.spaceQuota = spaceQuota; } - /** Return the directory count. */ + /** + * Return the directory count. + * + * @return file and directory count. + */ public long getFileAndDirectoryCount() { return fileAndDirectoryCount; } - /** Return the directory quota. */ + /** + * Return the directory quota. + * + * @return quota. + */ public long getQuota() { return quota; } - /** Return (disk) space consumed. */ + /** + * Return (disk) space consumed. + * + * @return space consumed. + */ public long getSpaceConsumed() { return spaceConsumed; } - /** Return (disk) space quota. */ + /** + * Return (disk) space quota. + * + * @return space quota. + */ public long getSpaceQuota() { return spaceQuota; } - /** Return storage type quota. */ + /** + * Return storage type quota. + * + * @param type storage type. + * @return type quota. + */ public long getTypeQuota(StorageType type) { return (typeQuota != null) ? typeQuota[type.ordinal()] : -1L; } - /** Return storage type consumed. */ + /** + * Return storage type consumed. + * + * @param type storage type. + * @return type consumed. + */ public long getTypeConsumed(StorageType type) { return (typeConsumed != null) ? typeConsumed[type.ordinal()] : 0L; } - /** Return true if any storage type quota has been set. */ + /** + * Return true if any storage type quota has been set. + * + * @return if any storage type quota has been set true, not false. + * */ public boolean isTypeQuotaSet() { if (typeQuota != null) { for (StorageType t : StorageType.getTypesSupportingQuota()) { @@ -169,7 +201,12 @@ public boolean isTypeQuotaSet() { return false; } - /** Return true if any storage type consumption information is available. */ + /** + * Return true if any storage type consumption information is available. + * + * @return if any storage type consumption information + * is available, not false. + */ public boolean isTypeConsumedAvailable() { if (typeConsumed != null) { for (StorageType t : StorageType.getTypesSupportingQuota()) { @@ -269,12 +306,15 @@ public String toString(boolean hOption) { return toString(hOption, false, null); } - /** Return the string representation of the object in the output format. + /** + * Return the string representation of the object in the output format. * if hOption is false file sizes are returned in bytes * if hOption is true file sizes are returned in human readable * * @param hOption a flag indicating if human readable output if to be used - * @return the string representation of the object + * @param tOption type option. + * @param types storage types. + * @return the string representation of the object. */ public String toString(boolean hOption, boolean tOption, List types) { @@ -326,7 +366,7 @@ protected String getTypesQuotaUsage(boolean hOption, /** * return the header of with the StorageTypes. * - * @param storageTypes + * @param storageTypes storage types. * @return storage header string */ public static String getStorageTypeHeader(List storageTypes) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java index cf2210575da15..c836d3f96b459 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,7 +19,7 @@ package org.apache.hadoop.fs; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import java.io.BufferedOutputStream; import java.io.DataOutput; @@ -33,20 +33,35 @@ import java.io.FileDescriptor; import java.net.URI; import java.nio.ByteBuffer; +import java.nio.channels.AsynchronousFileChannel; +import java.nio.channels.CompletionHandler; import java.nio.file.Files; import java.nio.file.NoSuchFileException; +import java.nio.file.StandardOpenOption; import java.nio.file.attribute.BasicFileAttributes; import java.nio.file.attribute.BasicFileAttributeView; import java.nio.file.attribute.FileTime; import java.util.Arrays; import java.util.EnumSet; +import java.util.Locale; import java.util.Optional; import java.util.StringTokenizer; +import java.util.concurrent.atomic.AtomicLong; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.function.IntFunction; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.impl.StoreImplementationUtils; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsAggregator; +import org.apache.hadoop.fs.statistics.IOStatisticsContext; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; +import org.apache.hadoop.fs.statistics.BufferedIOStatisticsOutputStream; +import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.nativeio.NativeIO; import org.apache.hadoop.util.Progressable; @@ -54,6 +69,15 @@ import org.apache.hadoop.util.StringUtils; import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; +import static org.apache.hadoop.fs.VectoredReadUtils.sortRanges; +import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_READ_BYTES; +import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_READ_EXCEPTIONS; +import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_READ_SEEK_OPERATIONS; +import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_READ_SKIP_BYTES; +import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_READ_SKIP_OPERATIONS; +import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_WRITE_BYTES; +import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_WRITE_EXCEPTIONS; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.iostatisticsStore; /**************************************************************** * Implement the FileSystem API for the raw local filesystem. @@ -64,6 +88,7 @@ public class RawLocalFileSystem extends FileSystem { static final URI NAME = URI.create("file:///"); private Path workingDir; + private long defaultBlockSize; // Temporary workaround for HADOOP-9652. private static boolean useDeprecatedFileStatus = true; @@ -84,7 +109,12 @@ private Path makeAbsolute(Path f) { } } - /** Convert a path to a File. */ + /** + * Convert a path to a File. + * + * @param path the path. + * @return file. + */ public File pathToFile(Path path) { checkPath(path); if (!path.isAbsolute()) { @@ -100,17 +130,47 @@ public File pathToFile(Path path) { public void initialize(URI uri, Configuration conf) throws IOException { super.initialize(uri, conf); setConf(conf); + defaultBlockSize = getDefaultBlockSize(new Path(uri)); } /******************************************************* * For open()'s FSInputStream. *******************************************************/ - class LocalFSFileInputStream extends FSInputStream implements HasFileDescriptor { + class LocalFSFileInputStream extends FSInputStream implements + HasFileDescriptor, IOStatisticsSource, StreamCapabilities { private FileInputStream fis; + private final File name; private long position; + private AsynchronousFileChannel asyncChannel = null; + + /** + * Minimal set of counters. + */ + private final IOStatisticsStore ioStatistics = iostatisticsStore() + .withCounters( + STREAM_READ_BYTES, + STREAM_READ_EXCEPTIONS, + STREAM_READ_SEEK_OPERATIONS, + STREAM_READ_SKIP_OPERATIONS, + STREAM_READ_SKIP_BYTES) + .build(); + + /** Reference to the bytes read counter for slightly faster counting. */ + private final AtomicLong bytesRead; + + /** + * Thread level IOStatistics aggregator to update in close(). + */ + private final IOStatisticsAggregator + ioStatisticsAggregator; public LocalFSFileInputStream(Path f) throws IOException { - fis = new FileInputStream(pathToFile(f)); + name = pathToFile(f); + fis = new FileInputStream(name); + bytesRead = ioStatistics.getCounterReference( + STREAM_READ_BYTES); + ioStatisticsAggregator = + IOStatisticsContext.getCurrentIOStatisticsContext().getAggregator(); } @Override @@ -133,16 +193,26 @@ public boolean seekToNewSource(long targetPos) throws IOException { return false; } - /* - * Just forward to the fis + /** + * Just forward to the fis. */ @Override public int available() throws IOException { return fis.available(); } @Override - public void close() throws IOException { fis.close(); } - @Override public boolean markSupported() { return false; } - + + @Override + public void close() throws IOException { + try { + fis.close(); + if (asyncChannel != null) { + asyncChannel.close(); + } + } finally { + ioStatisticsAggregator.aggregate(ioStatistics); + } + } + @Override public int read() throws IOException { try { @@ -150,9 +220,11 @@ public int read() throws IOException { if (value >= 0) { this.position++; statistics.incrementBytesRead(1); + bytesRead.addAndGet(1); } return value; } catch (IOException e) { // unexpected exception + ioStatistics.incrementCounter(STREAM_READ_EXCEPTIONS); throw new FSError(e); // assume native fs error } } @@ -166,9 +238,11 @@ public int read(byte[] b, int off, int len) throws IOException { if (value > 0) { this.position += value; statistics.incrementBytesRead(value); + bytesRead.addAndGet(value); } return value; } catch (IOException e) { // unexpected exception + ioStatistics.incrementCounter(STREAM_READ_EXCEPTIONS); throw new FSError(e); // assume native fs error } } @@ -187,18 +261,22 @@ public int read(long position, byte[] b, int off, int len) int value = fis.getChannel().read(bb, position); if (value > 0) { statistics.incrementBytesRead(value); + ioStatistics.incrementCounter(STREAM_READ_BYTES, value); } return value; } catch (IOException e) { + ioStatistics.incrementCounter(STREAM_READ_EXCEPTIONS); throw new FSError(e); } } @Override public long skip(long n) throws IOException { + ioStatistics.incrementCounter(STREAM_READ_SKIP_OPERATIONS); long value = fis.skip(n); if (value > 0) { this.position += value; + ioStatistics.incrementCounter(STREAM_READ_SKIP_BYTES, value); } return value; } @@ -207,8 +285,108 @@ public long skip(long n) throws IOException { public FileDescriptor getFileDescriptor() throws IOException { return fis.getFD(); } + + @Override + public boolean hasCapability(String capability) { + // a bit inefficient, but intended to make it easier to add + // new capabilities. + switch (capability.toLowerCase(Locale.ENGLISH)) { + case StreamCapabilities.IOSTATISTICS: + case StreamCapabilities.IOSTATISTICS_CONTEXT: + case StreamCapabilities.VECTOREDIO: + return true; + default: + return false; + } + } + + @Override + public IOStatistics getIOStatistics() { + return ioStatistics; + } + + AsynchronousFileChannel getAsyncChannel() throws IOException { + if (asyncChannel == null) { + synchronized (this) { + asyncChannel = AsynchronousFileChannel.open(name.toPath(), + StandardOpenOption.READ); + } + } + return asyncChannel; + } + + @Override + public void readVectored(List ranges, + IntFunction allocate) throws IOException { + + List sortedRanges = Arrays.asList(sortRanges(ranges)); + // Set up all of the futures, so that we can use them if things fail + for(FileRange range: sortedRanges) { + VectoredReadUtils.validateRangeRequest(range); + range.setData(new CompletableFuture<>()); + } + try { + AsynchronousFileChannel channel = getAsyncChannel(); + ByteBuffer[] buffers = new ByteBuffer[sortedRanges.size()]; + AsyncHandler asyncHandler = new AsyncHandler(channel, sortedRanges, buffers); + for(int i = 0; i < sortedRanges.size(); ++i) { + FileRange range = sortedRanges.get(i); + buffers[i] = allocate.apply(range.getLength()); + channel.read(buffers[i], range.getOffset(), i, asyncHandler); + } + } catch (IOException ioe) { + LOG.debug("Exception occurred during vectored read ", ioe); + for(FileRange range: sortedRanges) { + range.getData().completeExceptionally(ioe); + } + } + } } - + + /** + * A CompletionHandler that implements readFully and translates back + * into the form of CompletionHandler that our users expect. + */ + static class AsyncHandler implements CompletionHandler { + private final AsynchronousFileChannel channel; + private final List ranges; + private final ByteBuffer[] buffers; + + AsyncHandler(AsynchronousFileChannel channel, + List ranges, + ByteBuffer[] buffers) { + this.channel = channel; + this.ranges = ranges; + this.buffers = buffers; + } + + @Override + public void completed(Integer result, Integer r) { + FileRange range = ranges.get(r); + ByteBuffer buffer = buffers[r]; + if (result == -1) { + failed(new EOFException("Read past End of File"), r); + } else { + if (buffer.remaining() > 0) { + // issue a read for the rest of the buffer + // QQ: What if this fails? It has the same handler. + channel.read(buffer, range.getOffset() + buffer.position(), r, this); + } else { + // QQ: Why is this required? I think because we don't want the + // user to read data beyond limit. + buffer.flip(); + range.getData().complete(buffer); + } + } + } + + @Override + public void failed(Throwable exc, Integer r) { + LOG.debug("Failed while reading range " + r + " {} ", exc); + ranges.get(r).getData().completeExceptionally(exc); + } + } + @Override public FSDataInputStream open(Path f, int bufferSize) throws IOException { getFileStatus(f); @@ -231,12 +409,32 @@ public FSDataInputStream open(PathHandle fd, int bufferSize) /********************************************************* * For create()'s FSOutputStream. *********************************************************/ - class LocalFSFileOutputStream extends OutputStream { + final class LocalFSFileOutputStream extends OutputStream implements + IOStatisticsSource, StreamCapabilities, Syncable { private FileOutputStream fos; - + + /** + * Minimal set of counters. + */ + private final IOStatisticsStore ioStatistics = iostatisticsStore() + .withCounters( + STREAM_WRITE_BYTES, + STREAM_WRITE_EXCEPTIONS) + .build(); + + /** + * Thread level IOStatistics aggregator to update in close(). + */ + private final IOStatisticsAggregator + ioStatisticsAggregator; + private LocalFSFileOutputStream(Path f, boolean append, FsPermission permission) throws IOException { File file = pathToFile(f); + // store the aggregator before attempting any IO. + ioStatisticsAggregator = + IOStatisticsContext.getCurrentIOStatisticsContext().getAggregator(); + if (!append && permission == null) { permission = FsPermission.getFileDefault(); } @@ -263,17 +461,26 @@ private LocalFSFileOutputStream(Path f, boolean append, } /* - * Just forward to the fos + * Close the fos; update the IOStatisticsContext. */ @Override - public void close() throws IOException { fos.close(); } + public void close() throws IOException { + try { + fos.close(); + } finally { + ioStatisticsAggregator.aggregate(ioStatistics); + } + } + @Override public void flush() throws IOException { fos.flush(); } @Override public void write(byte[] b, int off, int len) throws IOException { try { fos.write(b, off, len); + ioStatistics.incrementCounter(STREAM_WRITE_BYTES, len); } catch (IOException e) { // unexpected exception + ioStatistics.incrementCounter(STREAM_WRITE_EXCEPTIONS); throw new FSError(e); // assume native fs error } } @@ -282,10 +489,45 @@ public void write(byte[] b, int off, int len) throws IOException { public void write(int b) throws IOException { try { fos.write(b); + ioStatistics.incrementCounter(STREAM_WRITE_BYTES); } catch (IOException e) { // unexpected exception + ioStatistics.incrementCounter(STREAM_WRITE_EXCEPTIONS); throw new FSError(e); // assume native fs error } } + + @Override + public void hflush() throws IOException { + flush(); + } + + /** + * HSync calls sync on fhe file descriptor after a local flush() call. + * @throws IOException failure + */ + @Override + public void hsync() throws IOException { + flush(); + fos.getFD().sync(); + } + + @Override + public boolean hasCapability(String capability) { + // a bit inefficient, but intended to make it easier to add + // new capabilities. + switch (capability.toLowerCase(Locale.ENGLISH)) { + case StreamCapabilities.IOSTATISTICS: + case StreamCapabilities.IOSTATISTICS_CONTEXT: + return true; + default: + return StoreImplementationUtils.isProbeForSyncable(capability); + } + } + + @Override + public IOStatistics getIOStatistics() { + return ioStatistics; + } } @Override @@ -318,8 +560,8 @@ private FSDataOutputStream create(Path f, boolean overwrite, if (parent != null && !mkdirs(parent)) { throw new IOException("Mkdirs failed to create " + parent.toString()); } - return new FSDataOutputStream(new BufferedOutputStream( - createOutputStreamWithMode(f, false, permission), bufferSize), + return new FSDataOutputStream(new BufferedIOStatisticsOutputStream( + createOutputStreamWithMode(f, false, permission), bufferSize, true), statistics); } @@ -340,8 +582,8 @@ public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, if (exists(f) && !flags.contains(CreateFlag.OVERWRITE)) { throw new FileAlreadyExistsException("File already exists: " + f); } - return new FSDataOutputStream(new BufferedOutputStream( - createOutputStreamWithMode(f, false, permission), bufferSize), + return new FSDataOutputStream(new BufferedIOStatisticsOutputStream( + createOutputStreamWithMode(f, false, permission), bufferSize, true), statistics); } @@ -518,7 +760,12 @@ public FileStatus[] listStatus(Path f) throws IOException { } return new FileStatus[] { new DeprecatedRawLocalFileStatus(localf, - getDefaultBlockSize(f), this) }; + defaultBlockSize, this) }; + } + + @Override + public boolean exists(Path f) throws IOException { + return pathToFile(f).exists(); } protected boolean mkOneDir(File p2f) throws IOException { @@ -663,7 +910,7 @@ private FileStatus deprecatedGetFileStatus(Path f) throws IOException { File path = pathToFile(f); if (path.exists()) { return new DeprecatedRawLocalFileStatus(pathToFile(f), - getDefaultBlockSize(f), this); + defaultBlockSize, this); } else { throw new FileNotFoundException("File " + f + " does not exist"); } @@ -1051,7 +1298,7 @@ private FileStatus deprecatedGetFileLinkStatusInternal(final Path f) private FileStatus getNativeFileLinkStatus(final Path f, boolean dereference) throws IOException { checkPath(f); - Stat stat = new Stat(f, getDefaultBlockSize(f), dereference, this); + Stat stat = new Stat(f, defaultBlockSize, dereference, this); FileStatus status = stat.getFileStatus(); return status; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/SafeMode.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/SafeMode.java new file mode 100644 index 0000000000000..37713dfcf7835 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/SafeMode.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import java.io.IOException; + +/** + * Whether the given filesystem is in any status of safe mode. + */ +public interface SafeMode { + + /** + * Enter, leave, or get safe mode. + * + * @param action One of {@link SafeModeAction} LEAVE, ENTER, GET, FORCE_EXIT. + * @throws IOException if set safe mode fails to proceed. + * @return true if the action is successfully accepted, otherwise false means rejected. + */ + default boolean setSafeMode(SafeModeAction action) throws IOException { + return setSafeMode(action, false); + } + + /** + * Enter, leave, or get safe mode. + * + * @param action One of {@link SafeModeAction} LEAVE, ENTER, GET, FORCE_EXIT. + * @param isChecked If true check only for Active metadata node / NameNode's status, + * else check first metadata node / NameNode's status. + * @throws IOException if set safe mode fails to proceed. + * @return true if the action is successfully accepted, otherwise false means rejected. + */ + boolean setSafeMode(SafeModeAction action, boolean isChecked) throws IOException; + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/SafeModeAction.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/SafeModeAction.java new file mode 100644 index 0000000000000..775f41637e00f --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/SafeModeAction.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +/** + * An identical copy from org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction, that helps + * the other file system implementation to define {@link SafeMode}. + */ +public enum SafeModeAction { + /** + * Starting entering into safe mode. + */ + ENTER, + /** + * Gracefully exit from safe mode. + */ + LEAVE, + /** + * Force Exit from safe mode. + */ + FORCE_EXIT, + /** + * Get the status of the safe mode. + */ + GET; +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Seekable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Seekable.java index 919c857ffa628..f7546d58e6084 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Seekable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Seekable.java @@ -32,17 +32,27 @@ public interface Seekable { * Seek to the given offset from the start of the file. * The next read() will be from that location. Can't * seek past the end of the file. + * + * @param pos offset from the start of the file. + * @throws IOException raised on errors performing I/O. */ void seek(long pos) throws IOException; - + /** * Return the current offset from the start of the file + * + * @return offset from the start of the file. + * @throws IOException raised on errors performing I/O. */ long getPos() throws IOException; /** - * Seeks a different copy of the data. Returns true if + * Seeks a different copy of the data. Returns true if * found a new source, false otherwise. + * + * @param targetPos target position. + * @return true if found a new source, false otherwise. + * @throws IOException raised on errors performing I/O. */ @InterfaceAudience.Private boolean seekToNewSource(long targetPos) throws IOException; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Stat.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Stat.java index 5e80a140175e6..e31e3b9c8ace1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Stat.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Stat.java @@ -30,7 +30,7 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.util.Shell; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Wrapper for the Unix stat(1) command. Used to workaround the lack of @@ -76,8 +76,8 @@ public FileStatus getFileStatus() throws IOException { } /** - * Whether Stat is supported on the current platform - * @return + * Whether Stat is supported on the current platform. + * @return if is available true, not false. */ public static boolean isAvailable() { if (Shell.LINUX || Shell.FREEBSD || Shell.MAC) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StorageStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StorageStatistics.java index 74631b5695537..b4a86ab781280 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StorageStatistics.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StorageStatistics.java @@ -19,6 +19,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.statistics.StoreStatisticNames; import java.util.Iterator; @@ -27,15 +28,16 @@ * instance. */ @InterfaceAudience.Public +@InterfaceStability.Stable public abstract class StorageStatistics { /** * These are common statistic names. - * + *

    * The following names are considered general and preserved across different * StorageStatistics classes. When implementing a new StorageStatistics, it is * highly recommended to use the common statistic names. - * + *

    * When adding new common statistic name constants, please make them unique. * By convention, they are implicitly unique: *

      @@ -43,39 +45,46 @@ public abstract class StorageStatistics { * underscores. *
    • the value of the constants are lowercase of the constant names.
    • *
    + * See {@link StoreStatisticNames} for the field names used here + * and elsewhere. */ @InterfaceStability.Evolving public interface CommonStatisticNames { // The following names are for file system operation invocations - String OP_APPEND = "op_append"; - String OP_COPY_FROM_LOCAL_FILE = "op_copy_from_local_file"; - String OP_CREATE = "op_create"; - String OP_CREATE_NON_RECURSIVE = "op_create_non_recursive"; - String OP_DELETE = "op_delete"; - String OP_EXISTS = "op_exists"; - String OP_GET_CONTENT_SUMMARY = "op_get_content_summary"; - String OP_GET_DELEGATION_TOKEN = "op_get_delegation_token"; - String OP_GET_FILE_CHECKSUM = "op_get_file_checksum"; - String OP_GET_FILE_STATUS = "op_get_file_status"; - String OP_GET_STATUS = "op_get_status"; - String OP_GLOB_STATUS = "op_glob_status"; - String OP_IS_FILE = "op_is_file"; - String OP_IS_DIRECTORY = "op_is_directory"; - String OP_LIST_FILES = "op_list_files"; - String OP_LIST_LOCATED_STATUS = "op_list_located_status"; - String OP_LIST_STATUS = "op_list_status"; - String OP_MKDIRS = "op_mkdirs"; - String OP_MODIFY_ACL_ENTRIES = "op_modify_acl_entries"; - String OP_OPEN = "op_open"; - String OP_REMOVE_ACL = "op_remove_acl"; - String OP_REMOVE_ACL_ENTRIES = "op_remove_acl_entries"; - String OP_REMOVE_DEFAULT_ACL = "op_remove_default_acl"; - String OP_RENAME = "op_rename"; - String OP_SET_ACL = "op_set_acl"; - String OP_SET_OWNER = "op_set_owner"; - String OP_SET_PERMISSION = "op_set_permission"; - String OP_SET_TIMES = "op_set_times"; - String OP_TRUNCATE = "op_truncate"; + String OP_APPEND = StoreStatisticNames.OP_APPEND; + String OP_COPY_FROM_LOCAL_FILE = + StoreStatisticNames.OP_COPY_FROM_LOCAL_FILE; + String OP_CREATE = StoreStatisticNames.OP_CREATE; + String OP_CREATE_NON_RECURSIVE = + StoreStatisticNames.OP_CREATE_NON_RECURSIVE; + String OP_DELETE = StoreStatisticNames.OP_DELETE; + String OP_EXISTS = StoreStatisticNames.OP_EXISTS; + String OP_GET_CONTENT_SUMMARY = + StoreStatisticNames.OP_GET_CONTENT_SUMMARY; + String OP_GET_DELEGATION_TOKEN = + StoreStatisticNames.OP_GET_DELEGATION_TOKEN; + String OP_GET_FILE_CHECKSUM = StoreStatisticNames.OP_GET_FILE_CHECKSUM; + String OP_GET_FILE_STATUS = StoreStatisticNames.OP_GET_FILE_STATUS; + String OP_GET_STATUS = StoreStatisticNames.OP_GET_STATUS; + String OP_GLOB_STATUS = StoreStatisticNames.OP_GLOB_STATUS; + String OP_IS_FILE = StoreStatisticNames.OP_IS_FILE; + String OP_IS_DIRECTORY = StoreStatisticNames.OP_IS_DIRECTORY; + String OP_LIST_FILES = StoreStatisticNames.OP_LIST_FILES; + String OP_LIST_LOCATED_STATUS = + StoreStatisticNames.OP_LIST_LOCATED_STATUS; + String OP_LIST_STATUS = StoreStatisticNames.OP_LIST_STATUS; + String OP_MKDIRS = StoreStatisticNames.OP_MKDIRS; + String OP_MODIFY_ACL_ENTRIES = StoreStatisticNames.OP_MODIFY_ACL_ENTRIES; + String OP_OPEN = StoreStatisticNames.OP_OPEN; + String OP_REMOVE_ACL = StoreStatisticNames.OP_REMOVE_ACL; + String OP_REMOVE_ACL_ENTRIES = StoreStatisticNames.OP_REMOVE_ACL_ENTRIES; + String OP_REMOVE_DEFAULT_ACL = StoreStatisticNames.OP_REMOVE_DEFAULT_ACL; + String OP_RENAME = StoreStatisticNames.OP_RENAME; + String OP_SET_ACL = StoreStatisticNames.OP_SET_ACL; + String OP_SET_OWNER = StoreStatisticNames.OP_SET_OWNER; + String OP_SET_PERMISSION = StoreStatisticNames.OP_SET_PERMISSION; + String OP_SET_TIMES = StoreStatisticNames.OP_SET_TIMES; + String OP_TRUNCATE = StoreStatisticNames.OP_TRUNCATE; } /** @@ -118,6 +127,7 @@ public StorageStatistics(String name) { /** * Get the name of this StorageStatistics object. + * @return name of this StorageStatistics object */ public String getName() { return name; @@ -136,12 +146,15 @@ public String getScheme() { * * The values returned will depend on the type of FileSystem or FileContext * object. The values do not necessarily reflect a snapshot in time. + * + * @return LongStatistic Iterator. */ public abstract Iterator getLongStatistics(); /** * Get the value of a statistic. * + * @param key key. * @return null if the statistic is not being tracked or is not a * long statistic. The value of the statistic, otherwise. */ @@ -150,6 +163,7 @@ public String getScheme() { /** * Return true if a statistic is being tracked. * + * @param key key. * @return True only if the statistic is being tracked. */ public abstract boolean isTracked(String key); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StreamCapabilities.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StreamCapabilities.java index e68e7b351ed78..93ed57ef83057 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StreamCapabilities.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StreamCapabilities.java @@ -34,7 +34,11 @@ public interface StreamCapabilities { /** * Stream hflush capability implemented by {@link Syncable#hflush()}. + * + * Use the {@link #HSYNC} probe to check for the support of Syncable; + * it's that presence of {@code hsync()} which matters. */ + @Deprecated String HFLUSH = "hflush"; /** @@ -71,6 +75,30 @@ public interface StreamCapabilities { */ String PREADBYTEBUFFER = "in:preadbytebuffer"; + /** + * IOStatisticsSource API. + */ + String IOSTATISTICS = "iostatistics"; + + /** + * Support for vectored IO api. + * See {@code PositionedReadable#readVectored(List, IntFunction)}. + */ + String VECTOREDIO = "in:readvectored"; + + /** + * Stream abort() capability implemented by {@link Abortable#abort()}. + * This matches the Path Capability + * {@link CommonPathCapabilities#ABORTABLE_STREAM}. + */ + String ABORTABLE_STREAM = CommonPathCapabilities.ABORTABLE_STREAM; + + /** + * Streams that support IOStatistics context and capture thread-level + * IOStatistics. + */ + String IOSTATISTICS_CONTEXT = "fs.capability.iocontext.supported"; + /** * Capabilities that a stream can support and be queried for. */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Syncable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Syncable.java index 7ec3509ce1df6..9cd458592ca22 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Syncable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Syncable.java @@ -23,20 +23,24 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -/** This interface for flush/sync operation. */ +/** + * This is the interface for flush/sync operations. + * Consult the Hadoop filesystem specification for the definition of the + * semantics of these operations. + */ @InterfaceAudience.Public -@InterfaceStability.Evolving +@InterfaceStability.Stable public interface Syncable { - + /** Flush out the data in client's user buffer. After the return of * this call, new readers will see the data. * @throws IOException if any error occurs */ - public void hflush() throws IOException; - + void hflush() throws IOException; + /** Similar to posix fsync, flush out the data in client's user buffer * all the way to the disk device (but the disk may have it in its cache). * @throws IOException if error occurs */ - public void hsync() throws IOException; + void hsync() throws IOException; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Trash.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Trash.java index e29cb9a4e0e33..73749dd25497a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Trash.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Trash.java @@ -23,8 +23,10 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.viewfs.ViewFileSystem; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.fs.viewfs.Constants.*; /** * Provides a trash facility which supports pluggable Trash policies. @@ -43,6 +45,7 @@ public class Trash extends Configured { /** * Construct a trash can accessor. * @param conf a Configuration + * @throws IOException raised on errors performing I/O. */ public Trash(Configuration conf) throws IOException { this(FileSystem.get(conf), conf); @@ -52,6 +55,7 @@ public Trash(Configuration conf) throws IOException { * Construct a trash can accessor for the FileSystem provided. * @param fs the FileSystem * @param conf a Configuration + * @throws IOException raised on errors performing I/O. */ public Trash(FileSystem fs, Configuration conf) throws IOException { super(conf); @@ -65,7 +69,7 @@ public Trash(FileSystem fs, Configuration conf) throws IOException { * Hence we get the file system of the fully-qualified resolved-path and * then move the path p to the trashbin in that volume, * @param fs - the filesystem of path p - * @param p - the path being deleted - to be moved to trasg + * @param p - the path being deleted - to be moved to trash * @param conf - configuration * @return false if the item is already in the trash or trash is disabled * @throws IOException on error @@ -92,52 +96,100 @@ public static boolean moveToAppropriateTrash(FileSystem fs, Path p, LOG.warn("Failed to get server trash configuration", e); throw new IOException("Failed to get server trash configuration", e); } + + /* + * In HADOOP-18144, we changed getTrashRoot() in ViewFileSystem to return a + * viewFS path, instead of a targetFS path. moveToTrash works for + * ViewFileSystem now. ViewFileSystem will do path resolution internally by + * itself. + * + * When localized trash flag is enabled: + * 1). if fs is a ViewFileSystem, we can initialize Trash() with a + * ViewFileSystem object; + * 2). When fs is not a ViewFileSystem, the only place we would need to + * resolve a path is for symbolic links. However, symlink is not + * enabled in Hadoop due to the complexity to support it + * (HADOOP-10019). + */ + if (conf.getBoolean(CONFIG_VIEWFS_TRASH_FORCE_INSIDE_MOUNT_POINT, + CONFIG_VIEWFS_TRASH_FORCE_INSIDE_MOUNT_POINT_DEFAULT)) { + Trash trash = new Trash(fs, conf); + return trash.moveToTrash(p); + } + Trash trash = new Trash(fullyResolvedFs, conf); return trash.moveToTrash(fullyResolvedPath); } /** - * Returns whether the trash is enabled for this filesystem + * Returns whether the trash is enabled for this filesystem. + * + * @return return if isEnabled true,not false. */ public boolean isEnabled() { return trashPolicy.isEnabled(); } /** Move a file or directory to the current trash directory. + * + * @param path the path. * @return false if the item is already in the trash or trash is disabled + * @throws IOException raised on errors performing I/O. */ public boolean moveToTrash(Path path) throws IOException { return trashPolicy.moveToTrash(path); } - /** Create a trash checkpoint. */ + /** + * Create a trash checkpoint. + * @throws IOException raised on errors performing I/O. + */ public void checkpoint() throws IOException { trashPolicy.createCheckpoint(); } - /** Delete old checkpoint(s). */ + /** + * Delete old checkpoint(s). + * @throws IOException raised on errors performing I/O. + */ public void expunge() throws IOException { trashPolicy.deleteCheckpoint(); } - /** Delete all trash immediately. */ + /** + * Delete all trash immediately. + * @throws IOException raised on errors performing I/O. + */ public void expungeImmediately() throws IOException { trashPolicy.createCheckpoint(); trashPolicy.deleteCheckpointsImmediately(); } - /** get the current working directory */ + /** + * get the current working directory. + * + * @throws IOException on raised on errors performing I/O. + * @return Trash Dir. + */ Path getCurrentTrashDir() throws IOException { return trashPolicy.getCurrentTrashDir(); } - /** get the configured trash policy */ + /** + * get the configured trash policy. + * + * @return TrashPolicy. + */ TrashPolicy getTrashPolicy() { return trashPolicy; } - /** Return a {@link Runnable} that periodically empties the trash of all + /** + * Return a {@link Runnable} that periodically empties the trash of all * users, intended to be run by the superuser. + * + * @throws IOException on raised on errors performing I/O. + * @return Runnable. */ public Runnable getEmptier() throws IOException { return trashPolicy.getEmptier(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicy.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicy.java index 64fb81be99ee3..35e51f9e1cfb1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicy.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicy.java @@ -60,27 +60,34 @@ public void initialize(Configuration conf, FileSystem fs) { /** * Returns whether the Trash Policy is enabled for this filesystem. + * + * @return if isEnabled true,not false. */ public abstract boolean isEnabled(); /** * Move a file or directory to the current trash directory. + * @param path the path. * @return false if the item is already in the trash or trash is disabled + * @throws IOException raised on errors performing I/O. */ public abstract boolean moveToTrash(Path path) throws IOException; /** - * Create a trash checkpoint. + * Create a trash checkpoint. + * @throws IOException raised on errors performing I/O. */ public abstract void createCheckpoint() throws IOException; /** * Delete old trash checkpoint(s). + * @throws IOException raised on errors performing I/O. */ public abstract void deleteCheckpoint() throws IOException; /** * Delete all checkpoints immediately, ie empty trash. + * @throws IOException raised on errors performing I/O. */ public abstract void deleteCheckpointsImmediately() throws IOException; @@ -94,6 +101,8 @@ public void initialize(Configuration conf, FileSystem fs) { * TrashPolicy#getCurrentTrashDir(Path path). * It returns the trash location correctly for the path specified no matter * the path is in encryption zone or not. + * + * @return the path. */ public abstract Path getCurrentTrashDir(); @@ -102,7 +111,7 @@ public void initialize(Configuration conf, FileSystem fs) { * Policy * @param path path to be deleted * @return current trash directory for the path to be deleted - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public Path getCurrentTrashDir(Path path) throws IOException { throw new UnsupportedOperationException(); @@ -111,6 +120,9 @@ public Path getCurrentTrashDir(Path path) throws IOException { /** * Return a {@link Runnable} that periodically empties the trash of all * users, intended to be run by the superuser. + * + * @throws IOException raised on errors performing I/O. + * @return Runnable. */ public abstract Runnable getEmptier() throws IOException; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicyDefault.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicyDefault.java index 18972ea3ecf79..bb38a934ea304 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicyDefault.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicyDefault.java @@ -38,7 +38,7 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.util.Time; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -191,8 +191,8 @@ public boolean moveToTrash(Path path) throws IOException { cause = e; } } - throw (IOException) - new IOException("Failed to move to trash: " + path).initCause(cause); + throw new IOException("Failed to move " + path + " to trash " + trashPath, + cause); } @SuppressWarnings("deprecation") diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/UnionStorageStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/UnionStorageStatistics.java index 3d5b6af794682..2497ded48e7e9 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/UnionStorageStatistics.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/UnionStorageStatistics.java @@ -20,7 +20,7 @@ import java.util.Iterator; import java.util.NoSuchElementException; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java new file mode 100644 index 0000000000000..cf1b1ef969863 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java @@ -0,0 +1,329 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import java.io.EOFException; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.function.IntFunction; + +import org.apache.hadoop.fs.impl.CombinedFileRange; +import org.apache.hadoop.util.Preconditions; +import org.apache.hadoop.util.functional.Function4RaisingIOE; + +/** + * Utility class which implements helper methods used + * in vectored IO implementation. + */ +public final class VectoredReadUtils { + + private static final int TMP_BUFFER_MAX_SIZE = 64 * 1024; + + /** + * Validate a single range. + * @param range file range. + * @throws EOFException any EOF Exception. + */ + public static void validateRangeRequest(FileRange range) + throws EOFException { + + Preconditions.checkArgument(range.getLength() >= 0, "length is negative"); + if (range.getOffset() < 0) { + throw new EOFException("position is negative"); + } + } + + /** + * Validate a list of vectored read ranges. + * @param ranges list of ranges. + * @throws EOFException any EOF exception. + */ + public static void validateVectoredReadRanges(List ranges) + throws EOFException { + for (FileRange range : ranges) { + validateRangeRequest(range); + } + } + + + + /** + * This is the default implementation which iterates through the ranges + * to read each synchronously, but the intent is that subclasses + * can make more efficient readers. + * The data or exceptions are pushed into {@link FileRange#getData()}. + * @param stream the stream to read the data from + * @param ranges the byte ranges to read + * @param allocate the byte buffer allocation + */ + public static void readVectored(PositionedReadable stream, + List ranges, + IntFunction allocate) { + for (FileRange range: ranges) { + range.setData(readRangeFrom(stream, range, allocate)); + } + } + + /** + * Synchronously reads a range from the stream dealing with the combinations + * of ByteBuffers buffers and PositionedReadable streams. + * @param stream the stream to read from + * @param range the range to read + * @param allocate the function to allocate ByteBuffers + * @return the CompletableFuture that contains the read data + */ + public static CompletableFuture readRangeFrom(PositionedReadable stream, + FileRange range, + IntFunction allocate) { + CompletableFuture result = new CompletableFuture<>(); + try { + ByteBuffer buffer = allocate.apply(range.getLength()); + if (stream instanceof ByteBufferPositionedReadable) { + ((ByteBufferPositionedReadable) stream).readFully(range.getOffset(), + buffer); + buffer.flip(); + } else { + readNonByteBufferPositionedReadable(stream, range, buffer); + } + result.complete(buffer); + } catch (IOException ioe) { + result.completeExceptionally(ioe); + } + return result; + } + + private static void readNonByteBufferPositionedReadable(PositionedReadable stream, + FileRange range, + ByteBuffer buffer) throws IOException { + if (buffer.isDirect()) { + readInDirectBuffer(range.getLength(), + buffer, + (position, buffer1, offset, length) -> { + stream.readFully(position, buffer1, offset, length); + return null; + }); + buffer.flip(); + } else { + stream.readFully(range.getOffset(), buffer.array(), + buffer.arrayOffset(), range.getLength()); + } + } + + /** + * Read bytes from stream into a byte buffer using an + * intermediate byte array. + * @param length number of bytes to read. + * @param buffer buffer to fill. + * @param operation operation to use for reading data. + * @throws IOException any IOE. + */ + public static void readInDirectBuffer(int length, + ByteBuffer buffer, + Function4RaisingIOE operation) throws IOException { + if (length == 0) { + return; + } + int readBytes = 0; + int position = 0; + int tmpBufferMaxSize = Math.min(TMP_BUFFER_MAX_SIZE, length); + byte[] tmp = new byte[tmpBufferMaxSize]; + while (readBytes < length) { + int currentLength = (readBytes + tmpBufferMaxSize) < length ? + tmpBufferMaxSize + : (length - readBytes); + operation.apply(position, tmp, 0, currentLength); + buffer.put(tmp, 0, currentLength); + position = position + currentLength; + readBytes = readBytes + currentLength; + } + } + + /** + * Is the given input list. + *
      + *
    • already sorted by offset
    • + *
    • each range is more than minimumSeek apart
    • + *
    • the start and end of each range is a multiple of chunkSize
    • + *
    + * + * @param input the list of input ranges. + * @param chunkSize the size of the chunks that the offset and end must align to. + * @param minimumSeek the minimum distance between ranges. + * @return true if we can use the input list as is. + */ + public static boolean isOrderedDisjoint(List input, + int chunkSize, + int minimumSeek) { + long previous = -minimumSeek; + for (FileRange range: input) { + long offset = range.getOffset(); + long end = range.getOffset() + range.getLength(); + if (offset % chunkSize != 0 || + end % chunkSize != 0 || + (offset - previous < minimumSeek)) { + return false; + } + previous = end; + } + return true; + } + + /** + * Calculates floor value of offset based on chunk size. + * @param offset file offset. + * @param chunkSize file chunk size. + * @return floor value. + */ + public static long roundDown(long offset, int chunkSize) { + if (chunkSize > 1) { + return offset - (offset % chunkSize); + } else { + return offset; + } + } + + /** + * Calculates the ceil value of offset based on chunk size. + * @param offset file offset. + * @param chunkSize file chunk size. + * @return ceil value. + */ + public static long roundUp(long offset, int chunkSize) { + if (chunkSize > 1) { + long next = offset + chunkSize - 1; + return next - (next % chunkSize); + } else { + return offset; + } + } + + /** + * Check if the input ranges are overlapping in nature. + * We call two ranges to be overlapping when start offset + * of second is less than the end offset of first. + * End offset is calculated as start offset + length. + * @param input list if input ranges. + * @return true/false based on logic explained above. + */ + public static List validateNonOverlappingAndReturnSortedRanges( + List input) { + + if (input.size() <= 1) { + return input; + } + FileRange[] sortedRanges = sortRanges(input); + FileRange prev = sortedRanges[0]; + for (int i=1; i input) { + FileRange[] sortedRanges = input.toArray(new FileRange[0]); + Arrays.sort(sortedRanges, Comparator.comparingLong(FileRange::getOffset)); + return sortedRanges; + } + + /** + * Merge sorted ranges to optimize the access from the underlying file + * system. + * The motivations are that: + *
      + *
    • Upper layers want to pass down logical file ranges.
    • + *
    • Fewer reads have better performance.
    • + *
    • Applications want callbacks as ranges are read.
    • + *
    • Some file systems want to round ranges to be at checksum boundaries.
    • + *
    + * + * @param sortedRanges already sorted list of ranges based on offset. + * @param chunkSize round the start and end points to multiples of chunkSize + * @param minimumSeek the smallest gap that we should seek over in bytes + * @param maxSize the largest combined file range in bytes + * @return the list of sorted CombinedFileRanges that cover the input + */ + public static List mergeSortedRanges(List sortedRanges, + int chunkSize, + int minimumSeek, + int maxSize) { + + CombinedFileRange current = null; + List result = new ArrayList<>(sortedRanges.size()); + + // now merge together the ones that merge + for (FileRange range: sortedRanges) { + long start = roundDown(range.getOffset(), chunkSize); + long end = roundUp(range.getOffset() + range.getLength(), chunkSize); + if (current == null || !current.merge(start, end, range, minimumSeek, maxSize)) { + current = new CombinedFileRange(start, end, range); + result.add(current); + } + } + return result; + } + + /** + * Slice the data that was read to the user's request. + * This function assumes that the user's request is completely subsumed by the + * read data. This always creates a new buffer pointing to the same underlying + * data but with its own mark and position fields such that reading one buffer + * can't effect other's mark and position. + * @param readData the buffer with the readData + * @param readOffset the offset in the file for the readData + * @param request the user's request + * @return the readData buffer that is sliced to the user's request + */ + public static ByteBuffer sliceTo(ByteBuffer readData, long readOffset, + FileRange request) { + int offsetChange = (int) (request.getOffset() - readOffset); + int requestLength = request.getLength(); + // Create a new buffer that is backed by the original contents + // The buffer will have position 0 and the same limit as the original one + readData = readData.slice(); + // Change the offset and the limit of the buffer as the reader wants to see + // only relevant data + readData.position(offsetChange); + readData.limit(offsetChange + requestLength); + // Create a new buffer after the limit change so that only that portion of the data is + // returned to the reader. + readData = readData.slice(); + return readData; + } + + /** + * private constructor. + */ + private VectoredReadUtils() { + throw new UnsupportedOperationException(); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/XAttrCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/XAttrCodec.java index e15968dd6d273..3f1c9d7d92940 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/XAttrCodec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/XAttrCodec.java @@ -25,7 +25,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * The value of XAttr is byte[], this class is to @@ -68,7 +68,7 @@ public enum XAttrCodec { * the given string is treated as text. * @param value string representation of the value. * @return byte[] the value - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static byte[] decodeValue(String value) throws IOException { byte[] result = null; @@ -103,9 +103,9 @@ public static byte[] decodeValue(String value) throws IOException { * while strings encoded as hexadecimal and base64 are prefixed with * 0x and 0s, respectively. * @param value byte[] value - * @param encoding + * @param encoding encoding. * @return String string representation of value - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static String encodeValue(byte[] value, XAttrCodec encoding) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/audit/AuditConstants.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/audit/AuditConstants.java new file mode 100644 index 0000000000000..9edf92261544c --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/audit/AuditConstants.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.audit; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Constants related to auditing. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public final class AuditConstants { + + private AuditConstants() { + } + + /** + * The host from where requests originate: {@value}. + * example.org is used as the IETF require that it never resolves. + * This isn't always met by some mobile/consumer DNS services, but + * we don't worry about that. What is important is that + * a scan for "example.org" in the logs will exclusively find + * entries from this referrer. + */ + public static final String REFERRER_ORIGIN_HOST = "audit.example.org"; + + /** + * Header: Command: {@value}. + * Set by tool runner. + */ + public static final String PARAM_COMMAND = "cm"; + + /** + * Header: FileSystem ID: {@value}. + */ + public static final String PARAM_FILESYSTEM_ID = "fs"; + + /** + * Header: operation ID: {@value}. + */ + public static final String PARAM_ID = "id"; + + /** + * JobID query header: {@value}. + */ + public static final String PARAM_JOB_ID = "ji"; + + /** + * Header: operation: {@value}. + * These should be from StoreStatisticNames or similar, + * and are expected to be at the granularity of FS + * API operations. + */ + public static final String PARAM_OP = "op"; + + /** + * Header: first path of operation: {@value}. + */ + public static final String PARAM_PATH = "p1"; + + /** + * Header: second path of operation: {@value}. + */ + public static final String PARAM_PATH2 = "p2"; + + /** + * Header: Principal: {@value}. + */ + public static final String PARAM_PRINCIPAL = "pr"; + + /** + * Header: Process ID: {@value}. + */ + public static final String PARAM_PROCESS = "ps"; + + /** + * Header: Range for GET request data: {@value}. + */ + public static final String PARAM_RANGE = "rg"; + + /** + * Task Attempt ID query header: {@value}. + */ + public static final String PARAM_TASK_ATTEMPT_ID = "ta"; + + /** + * Thread 0: the thread which created a span {@value}. + */ + public static final String PARAM_THREAD0 = "t0"; + + /** + * Thread 1: the thread making the S3 request: {@value}. + */ + public static final String PARAM_THREAD1 = "t1"; + + /** + * Timestamp of span creation: {@value}. + */ + public static final String PARAM_TIMESTAMP = "ts"; + + /** + * Num of files to be deleted as part of the bulk delete request. + */ + public static final String DELETE_KEYS_SIZE = "ks"; + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/audit/AuditStatisticNames.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/audit/AuditStatisticNames.java new file mode 100644 index 0000000000000..0ee9d626bd9c7 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/audit/AuditStatisticNames.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.audit; + +/** + * Statistic Names for Auditing. + */ +public final class AuditStatisticNames { + + private AuditStatisticNames() { + } + + /** + * Audit failure: {@value}. + */ + public static final String AUDIT_FAILURE = "audit_failure"; + + /** + * A request was executed and the auditor invoked: {@value}. + */ + public static final String AUDIT_REQUEST_EXECUTION + = "audit_request_execution"; + + /** + * Audit span created: {@value}. + */ + public static final String AUDIT_SPAN_CREATION = "audit_span_creation"; + + /** + * Access check during audit rejected: {@value}. + */ + public static final String AUDIT_ACCESS_CHECK_FAILURE + = "audit_access_check_failure"; +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/audit/CommonAuditContext.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/audit/CommonAuditContext.java new file mode 100644 index 0000000000000..2dcd4f8b3f570 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/audit/CommonAuditContext.java @@ -0,0 +1,313 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.audit; + +import java.util.Iterator; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Supplier; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_COMMAND; +import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_PROCESS; +import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_THREAD1; + +/** + * The common audit context is a map of common context information + * which can be used with any audit span. + * This context is shared across all Filesystems within the + * thread. + * Audit spans will be created with a reference to the current + * context of their thread; + * That reference is retained even as they are moved across threads, so + * context information (including thread ID Java runtime). + * + * The Global context entries are a set of key-value pairs which span + * all threads; the {@code HttpReferrerAuditHeader} picks these + * up automatically. It is intended for minimal use of + * shared constant values (process ID, entry point). + * + * An attribute set in {@link #setGlobalContextEntry(String, String)} + * will be set across all audit spans in all threads. + * + * The {@link #noteEntryPoint(Object)} method should be + * used in entry points (ToolRunner.run, etc). It extracts + * the final element of the classname and attaches that + * to the global context with the attribute key + * {@link AuditConstants#PARAM_COMMAND}, if not already + * set. + * This helps identify the application being executued. + * + * All other values set are specific to this context, which + * is thread local. + * The attributes which can be added to ths common context include + * evaluator methods which will be evaluated in whichever thread + * invokes {@link #getEvaluatedEntries()} and then evaluates them. + * That map of evaluated options may evaluated later, in a different + * thread. + * + * For setting and clearing thread-level options, use + * {@link #currentAuditContext()} to get the thread-local + * context for the caller, which can then be manipulated. + * + * For further information, especially related to memory consumption, + * read the document `auditing_architecture` in the `hadoop-aws` module. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public final class CommonAuditContext { + + private static final Logger LOG = LoggerFactory.getLogger( + CommonAuditContext.class); + + /** + * Process ID; currently built from UUID and timestamp. + */ + public static final String PROCESS_ID = UUID.randomUUID().toString(); + + /** + * Context values which are global. + * To be used very sparingly. + */ + private static final Map GLOBAL_CONTEXT_MAP = + new ConcurrentHashMap<>(); + + /** + * Map of data. Concurrent so when shared across threads + * there are no problems. + * Supplier operations must themselves be thread safe. + */ + private final Map> evaluatedEntries = + new ConcurrentHashMap<>(1); + + static { + // process ID is fixed. + setGlobalContextEntry(PARAM_PROCESS, PROCESS_ID); + } + + /** + * Thread local context. + * Use a weak reference just to keep memory costs down. + * The S3A committers all have a strong reference, so if they are + * retained, context is retained. + * If a span retains the context, then it will also stay valid until + * the span is finalized. + */ + private static final ThreadLocal ACTIVE_CONTEXT = + ThreadLocal.withInitial(CommonAuditContext::createInstance); + + private CommonAuditContext() { + } + + /** + * Put a context entry. + * @param key key + * @param value new value., If null, triggers removal. + * @return old value or null + */ + public Supplier put(String key, String value) { + if (value != null) { + return evaluatedEntries.put(key, () -> value); + } else { + return evaluatedEntries.remove(key); + } + } + + /** + * Put a context entry dynamically evaluated on demand. + * Important: as these supplier methods are long-lived, + * the supplier function MUST NOT be part of/refer to + * any object instance of significant memory size. + * Applications SHOULD remove references when they are + * no longer needed. + * When logged at TRACE, prints the key and stack trace of the caller, + * to allow for debugging of any problems. + * @param key key + * @param value new value + * @return old value or null + */ + public Supplier put(String key, Supplier value) { + if (LOG.isTraceEnabled()) { + LOG.trace("Adding context entry {}", key, new Exception(key)); + } + return evaluatedEntries.put(key, value); + } + + /** + * Remove a context entry. + * @param key key + */ + public void remove(String key) { + if (LOG.isTraceEnabled()) { + LOG.trace("Remove context entry {}", key); + } + evaluatedEntries.remove(key); + } + + /** + * Get a context entry. + * @param key key + * @return value or null + */ + public String get(String key) { + Supplier supplier = evaluatedEntries.get(key); + return supplier != null + ? supplier.get() + : null; + } + + /** + * Rest the context; will set the standard options again. + * Primarily for testing. + */ + public void reset() { + evaluatedEntries.clear(); + init(); + } + + /** + * Initialize. + */ + private void init() { + + // thread 1 is dynamic + put(PARAM_THREAD1, CommonAuditContext::currentThreadID); + } + + /** + * Does the context contain a specific key? + * @param key key + * @return true if it is in the context. + */ + public boolean containsKey(String key) { + return evaluatedEntries.containsKey(key); + } + + /** + * Demand invoked to create the instance for this thread. + * @return an instance. + */ + private static CommonAuditContext createInstance() { + CommonAuditContext context = new CommonAuditContext(); + context.init(); + return context; + } + + /** + * Get the current common audit context. Thread local. + * @return the audit context of this thread. + */ + public static CommonAuditContext currentAuditContext() { + return ACTIVE_CONTEXT.get(); + } + + /** + * A thread ID which is unique for this process and shared across all + * S3A clients on the same thread, even those using different FS instances. + * @return a thread ID for reporting. + */ + public static String currentThreadID() { + return Long.toString(Thread.currentThread().getId()); + } + + /** + * Get the evaluated operations. + * This is the map unique to this context. + * @return the operations map. + */ + public Map> getEvaluatedEntries() { + return evaluatedEntries; + } + + /** + * Set a global entry. + * @param key key + * @param value value + */ + public static void setGlobalContextEntry(String key, String value) { + GLOBAL_CONTEXT_MAP.put(key, value); + } + + /** + * Get a global entry. + * @param key key + * @return value or null + */ + public static String getGlobalContextEntry(String key) { + return GLOBAL_CONTEXT_MAP.get(key); + } + + /** + * Remove a global entry. + * @param key key to clear. + */ + public static void removeGlobalContextEntry(String key) { + GLOBAL_CONTEXT_MAP.remove(key); + } + + /** + * Add the entry point as a context entry with the key + * {@link AuditConstants#PARAM_COMMAND} + * if it has not already been recorded. + * This is called via ToolRunner but may be used at any + * other entry point. + * @param tool object loaded/being launched. + */ + public static void noteEntryPoint(Object tool) { + if (tool != null && !GLOBAL_CONTEXT_MAP.containsKey(PARAM_COMMAND)) { + String classname = tool.getClass().toString(); + int lastDot = classname.lastIndexOf('.'); + int l = classname.length(); + if (lastDot > 0 && lastDot < (l - 1)) { + String name = classname.substring(lastDot + 1, l); + setGlobalContextEntry(PARAM_COMMAND, name); + } + } + } + + /** + * Get an iterator over the global entries. + * Thread safe. + * @return an iterable to enumerate the values. + */ + public static Iterable> + getGlobalContextEntries() { + return new GlobalIterable(); + } + + /** + * Iterable to the global iterator. Avoids serving + * up full access to the map. + */ + private static final class GlobalIterable + implements Iterable> { + + @Override + public Iterator> iterator() { + return GLOBAL_CONTEXT_MAP.entrySet().iterator(); + } + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/audit/package-info.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/audit/package-info.java new file mode 100644 index 0000000000000..16c224940dd03 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/audit/package-info.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Public classes for adding information to any auditing information + * picked up by filesystem clients. + * + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +package org.apache.hadoop.fs.audit; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ftp/FTPFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ftp/FTPFileSystem.java index 4b144bfddf6c6..6899bb8d87426 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ftp/FTPFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ftp/FTPFileSystem.java @@ -20,11 +20,12 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; +import java.io.OutputStream; import java.net.ConnectException; import java.net.URI; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.net.ftp.FTP; import org.apache.commons.net.ftp.FTPClient; import org.apache.commons.net.ftp.FTPFile; @@ -41,6 +42,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.util.Progressable; import org.slf4j.Logger; @@ -110,7 +112,9 @@ public void initialize(URI uri, Configuration conf) throws IOException { // get // get port information from uri, (overrides info in conf) int port = uri.getPort(); - port = (port == -1) ? FTP.DEFAULT_PORT : port; + if(port == -1){ + port = conf.getInt(FS_FTP_HOST_PORT, FTP.DEFAULT_PORT); + } conf.setInt(FS_FTP_HOST_PORT, port); // get user/password information from URI (overrides info in conf) @@ -340,8 +344,19 @@ public FSDataOutputStream create(Path file, FsPermission permission, // file. The FTP client connection is closed when close() is called on the // FSDataOutputStream. client.changeWorkingDirectory(parent.toUri().getPath()); - FSDataOutputStream fos = new FSDataOutputStream(client.storeFileStream(file - .getName()), statistics) { + OutputStream outputStream = client.storeFileStream(file.getName()); + + if (!FTPReply.isPositivePreliminary(client.getReplyCode())) { + // The ftpClient is an inconsistent state. Must close the stream + // which in turn will logout and disconnect from FTP server + if (outputStream != null) { + IOUtils.closeStream(outputStream); + } + disconnect(client); + throw new IOException("Unable to create file: " + file + ", Aborting"); + } + + FSDataOutputStream fos = new FSDataOutputStream(outputStream, statistics) { @Override public void close() throws IOException { super.close(); @@ -356,12 +371,6 @@ public void close() throws IOException { } } }; - if (!FTPReply.isPositivePreliminary(client.getReplyCode())) { - // The ftpClient is an inconsistent state. Must close the stream - // which in turn will logout and disconnect from FTP server - fos.close(); - throw new IOException("Unable to create file: " + file + ", Aborting"); - } return fos; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/http/AbstractHttpFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/http/AbstractHttpFileSystem.java index baf0a8187efd0..44577baf85c07 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/http/AbstractHttpFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/http/AbstractHttpFileSystem.java @@ -60,7 +60,8 @@ public URI getUri() { @Override public FSDataInputStream open(Path path, int bufferSize) throws IOException { - URLConnection conn = path.toUri().toURL().openConnection(); + URI pathUri = makeQualified(path).toUri(); + URLConnection conn = pathUri.toURL().openConnection(); InputStream in = conn.getInputStream(); return new FSDataInputStream(new HttpDataInputStream(in)); } @@ -111,7 +112,7 @@ public boolean mkdirs(Path path, FsPermission fsPermission) @Override public FileStatus getFileStatus(Path path) throws IOException { - return new FileStatus(-1, false, 1, DEFAULT_BLOCK_SIZE, 0, path); + return new FileStatus(-1, false, 1, DEFAULT_BLOCK_SIZE, 0, makeQualified(path)); } /** diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/AbstractFSBuilderImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/AbstractFSBuilderImpl.java index 5fc92e97be76c..108b60256efa5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/AbstractFSBuilderImpl.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/AbstractFSBuilderImpl.java @@ -26,7 +26,7 @@ import java.util.Optional; import java.util.Set; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -36,19 +36,21 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathHandle; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkArgument; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull; /** * Builder for filesystem/filecontext operations of various kinds, * with option support. * * - * .opt("foofs:option.a", true) - * .opt("foofs:option.b", "value") - * .opt("barfs:cache", true) - * .must("foofs:cache", true) - * .must("barfs:cache-size", 256 * 1024 * 1024) + * .opt("fs.s3a.open.option.caching", true) + * .opt("fs.option.openfile.read.policy", "random, adaptive") + * .opt("fs.s3a.open.option.etag", "9fe4c37c25b") + * .optLong("fs.option.openfile.length", 1_500_000_000_000) + * .must("fs.option.openfile.buffer.size", 256_000) + * .mustLong("fs.option.openfile.split.start", 256_000_000) + * .mustLong("fs.option.openfile.split.end", 512_000_000) * .build(); * * @@ -64,6 +66,7 @@ */ @InterfaceAudience.Public @InterfaceStability.Unstable +@SuppressWarnings({"deprecation", "unused"}) public abstract class AbstractFSBuilderImpl> implements FSBuilder { @@ -88,6 +91,9 @@ /** Keep track of the keys for mandatory options. */ private final Set mandatoryKeys = new HashSet<>(); + /** Keep track of the optional keys. */ + private final Set optionalKeys = new HashSet<>(); + /** * Constructor with both optional path and path handle. * Either or both argument may be empty, but it is an error for @@ -163,6 +169,7 @@ public PathHandle getPathHandle() { @Override public B opt(@Nonnull final String key, @Nonnull final String value) { mandatoryKeys.remove(key); + optionalKeys.add(key); options.set(key, value); return getThisBuilder(); } @@ -174,9 +181,7 @@ public B opt(@Nonnull final String key, @Nonnull final String value) { */ @Override public B opt(@Nonnull final String key, boolean value) { - mandatoryKeys.remove(key); - options.setBoolean(key, value); - return getThisBuilder(); + return opt(key, Boolean.toString(value)); } /** @@ -186,9 +191,17 @@ public B opt(@Nonnull final String key, boolean value) { */ @Override public B opt(@Nonnull final String key, int value) { - mandatoryKeys.remove(key); - options.setInt(key, value); - return getThisBuilder(); + return optLong(key, value); + } + + @Override + public B opt(@Nonnull final String key, final long value) { + return optLong(key, value); + } + + @Override + public B optLong(@Nonnull final String key, final long value) { + return opt(key, Long.toString(value)); } /** @@ -198,9 +211,7 @@ public B opt(@Nonnull final String key, int value) { */ @Override public B opt(@Nonnull final String key, float value) { - mandatoryKeys.remove(key); - options.setFloat(key, value); - return getThisBuilder(); + return optLong(key, (long) value); } /** @@ -210,9 +221,17 @@ public B opt(@Nonnull final String key, float value) { */ @Override public B opt(@Nonnull final String key, double value) { - mandatoryKeys.remove(key); - options.setDouble(key, value); - return getThisBuilder(); + return optLong(key, (long) value); + } + + /** + * Set optional double parameter for the Builder. + * + * @see #opt(String, String) + */ + @Override + public B optDouble(@Nonnull final String key, double value) { + return opt(key, Double.toString(value)); } /** @@ -223,6 +242,7 @@ public B opt(@Nonnull final String key, double value) { @Override public B opt(@Nonnull final String key, @Nonnull final String... values) { mandatoryKeys.remove(key); + optionalKeys.add(key); options.setStrings(key, values); return getThisBuilder(); } @@ -247,45 +267,47 @@ public B must(@Nonnull final String key, @Nonnull final String value) { */ @Override public B must(@Nonnull final String key, boolean value) { - mandatoryKeys.add(key); - options.setBoolean(key, value); - return getThisBuilder(); + return must(key, Boolean.toString(value)); } - /** - * Set mandatory int option. - * - * @see #must(String, String) - */ @Override - public B must(@Nonnull final String key, int value) { - mandatoryKeys.add(key); - options.setInt(key, value); - return getThisBuilder(); + public B mustLong(@Nonnull final String key, final long value) { + return must(key, Long.toString(value)); } /** - * Set mandatory float option. + * Set optional double parameter for the Builder. * - * @see #must(String, String) + * @see #opt(String, String) */ @Override - public B must(@Nonnull final String key, float value) { - mandatoryKeys.add(key); - options.setFloat(key, value); - return getThisBuilder(); + public B mustDouble(@Nonnull final String key, double value) { + return must(key, Double.toString(value)); } /** - * Set mandatory double option. + * Set mandatory int option. * * @see #must(String, String) */ + @Override + public B must(@Nonnull final String key, int value) { + return mustLong(key, value); + } + + @Override + public B must(@Nonnull final String key, final long value) { + return mustLong(key, value); + } + + @Override + public B must(@Nonnull final String key, final float value) { + return mustLong(key, (long) value); + } + @Override public B must(@Nonnull final String key, double value) { - mandatoryKeys.add(key); - options.setDouble(key, value); - return getThisBuilder(); + return mustLong(key, (long) value); } /** @@ -296,6 +318,7 @@ public B must(@Nonnull final String key, double value) { @Override public B must(@Nonnull final String key, @Nonnull final String... values) { mandatoryKeys.add(key); + optionalKeys.remove(key); options.setStrings(key, values); return getThisBuilder(); } @@ -310,10 +333,18 @@ public Configuration getOptions() { /** * Get all the keys that are set as mandatory keys. + * @return mandatory keys. */ public Set getMandatoryKeys() { return Collections.unmodifiableSet(mandatoryKeys); } + /** + * Get all the keys that are set as optional keys. + * @return optional keys. + */ + public Set getOptionalKeys() { + return Collections.unmodifiableSet(optionalKeys); + } /** * Reject a configuration if one or more mandatory keys are diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/AbstractMultipartUploader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/AbstractMultipartUploader.java new file mode 100644 index 0000000000000..5328e3c712414 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/AbstractMultipartUploader.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.impl; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.CompletableFuture; + +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; + +import org.apache.hadoop.fs.MultipartUploader; +import org.apache.hadoop.fs.PartHandle; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.UploadHandle; + +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkArgument; + +/** + * Standard base class for Multipart Uploaders. + */ +public abstract class AbstractMultipartUploader implements MultipartUploader { + + /** + * Base path of upload. + */ + private final Path basePath; + + /** + * Instantiate. + * @param basePath base path + */ + protected AbstractMultipartUploader(final Path basePath) { + this.basePath = Objects.requireNonNull(basePath, "null path"); + } + + /** + * Perform any cleanup. + * The upload is not required to support any operations after this. + * @throws IOException problems on close. + */ + @Override + public void close() throws IOException { + } + + protected Path getBasePath() { + return basePath; + } + + /** + * Validate a path. + * @param path path to check. + */ + protected void checkPath(Path path) { + Objects.requireNonNull(path, "null path"); + Preconditions.checkArgument(path.toString().startsWith(basePath.toString()), + "Path %s is not under %s", path, basePath); + } + + /** + * Utility method to validate uploadIDs. + * @param uploadId Upload ID + * @throws IllegalArgumentException invalid ID + */ + protected void checkUploadId(byte[] uploadId) + throws IllegalArgumentException { + checkArgument(uploadId != null, "null uploadId"); + checkArgument(uploadId.length > 0, + "Empty UploadId is not valid"); + } + + /** + * Utility method to validate partHandles. + * @param partHandles handles + * @throws IllegalArgumentException if the parts are invalid + */ + protected void checkPartHandles(Map partHandles) { + checkArgument(!partHandles.isEmpty(), + "Empty upload"); + partHandles.keySet() + .stream() + .forEach(key -> + checkArgument(key > 0, + "Invalid part handle index %s", key)); + } + + /** + * Check all the arguments to the + * {@link MultipartUploader#putPart(UploadHandle, int, Path, InputStream, long)} + * operation. + * @param filePath Target path for upload (as {@link #startUpload(Path)}). + * @param inputStream Data for this part. Implementations MUST close this + * stream after reading in the data. + * @param partNumber Index of the part relative to others. + * @param uploadId Identifier from {@link #startUpload(Path)}. + * @param lengthInBytes Target length to read from the stream. + * @throws IllegalArgumentException invalid argument + */ + protected void checkPutArguments(Path filePath, + InputStream inputStream, + int partNumber, + UploadHandle uploadId, + long lengthInBytes) throws IllegalArgumentException { + checkPath(filePath); + checkArgument(inputStream != null, "null inputStream"); + checkArgument(partNumber > 0, "Invalid part number: %d", partNumber); + checkArgument(uploadId != null, "null uploadId"); + checkArgument(lengthInBytes >= 0, "Invalid part length: %d", lengthInBytes); + } + + /** + * {@inheritDoc}. + * @param path path to abort uploads under. + * @return a future to -1. + * @throws IOException raised on errors performing I/O. + */ + public CompletableFuture abortUploadsUnderPath(Path path) + throws IOException { + checkPath(path); + CompletableFuture f = new CompletableFuture<>(); + f.complete(-1); + return f; + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/CombinedFileRange.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/CombinedFileRange.java new file mode 100644 index 0000000000000..c9555a1e5414e --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/CombinedFileRange.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.impl; + +import org.apache.hadoop.fs.FileRange; + +import java.util.ArrayList; +import java.util.List; + +/** + * A file range that represents a set of underlying file ranges. + * This is used when we combine the user's FileRange objects + * together into a single read for efficiency. + */ +public class CombinedFileRange extends FileRangeImpl { + private List underlying = new ArrayList<>(); + + public CombinedFileRange(long offset, long end, FileRange original) { + super(offset, (int) (end - offset), null); + this.underlying.add(original); + } + + /** + * Get the list of ranges that were merged together to form this one. + * @return the list of input ranges + */ + public List getUnderlying() { + return underlying; + } + + /** + * Merge this input range into the current one, if it is compatible. + * It is assumed that otherOffset is greater or equal the current offset, + * which typically happens by sorting the input ranges on offset. + * @param otherOffset the offset to consider merging + * @param otherEnd the end to consider merging + * @param other the underlying FileRange to add if we merge + * @param minSeek the minimum distance that we'll seek without merging the + * ranges together + * @param maxSize the maximum size that we'll merge into a single range + * @return true if we have merged the range into this one + */ + public boolean merge(long otherOffset, long otherEnd, FileRange other, + int minSeek, int maxSize) { + long end = this.getOffset() + this.getLength(); + long newEnd = Math.max(end, otherEnd); + if (otherOffset - end >= minSeek || newEnd - this.getOffset() > maxSize) { + return false; + } + this.setLength((int) (newEnd - this.getOffset())); + underlying.add(other); + return true; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FSBuilderSupport.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FSBuilderSupport.java new file mode 100644 index 0000000000000..dc4a18eb2b549 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FSBuilderSupport.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.impl; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.store.LogExactlyOnce; + +/** + * Class to help with use of FSBuilder. + */ +public class FSBuilderSupport { + + private static final Logger LOG = + LoggerFactory.getLogger(FSBuilderSupport.class); + + public static final LogExactlyOnce LOG_PARSE_ERROR = new LogExactlyOnce(LOG); + + /** + * Options which are parsed. + */ + private final Configuration options; + + /** + * Constructor. + * @param options the configuration options from the builder. + */ + public FSBuilderSupport(final Configuration options) { + this.options = options; + } + + public Configuration getOptions() { + return options; + } + + /** + * Get a long value with resilience to unparseable values. + * Negative values are replaced with the default. + * @param key key to log + * @param defVal default value + * @return long value + */ + public long getPositiveLong(String key, long defVal) { + long l = getLong(key, defVal); + if (l < 0) { + LOG.debug("The option {} has a negative value {}, replacing with the default {}", + key, l, defVal); + l = defVal; + } + return l; + } + + /** + * Get a long value with resilience to unparseable values. + * @param key key to log + * @param defVal default value + * @return long value + */ + public long getLong(String key, long defVal) { + final String v = options.getTrimmed(key, ""); + if (v.isEmpty()) { + return defVal; + } + try { + return options.getLong(key, defVal); + } catch (NumberFormatException e) { + final String msg = String.format( + "The option %s value \"%s\" is not a long integer; using the default value %s", + key, v, defVal); + // not a long, + LOG_PARSE_ERROR.warn(msg); + LOG.debug("{}", msg, e); + return defVal; + } + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FileRangeImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FileRangeImpl.java new file mode 100644 index 0000000000000..1239be764ba5c --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FileRangeImpl.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.impl; + +import java.nio.ByteBuffer; +import java.util.concurrent.CompletableFuture; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.FileRange; + +/** + * A range of bytes from a file with an optional buffer to read those bytes + * for zero copy. This shouldn't be created directly via constructor rather + * factory defined in {@code FileRange#createFileRange} should be used. + */ +@InterfaceAudience.Private +public class FileRangeImpl implements FileRange { + private long offset; + private int length; + private CompletableFuture reader; + + /** + * nullable reference to store in the range. + */ + private final Object reference; + + /** + * Create. + * @param offset offset in file + * @param length length of data to read. + * @param reference nullable reference to store in the range. + */ + public FileRangeImpl(long offset, int length, Object reference) { + this.offset = offset; + this.length = length; + this.reference = reference; + } + + @Override + public String toString() { + return "range[" + offset + "," + (offset + length) + ")"; + } + + @Override + public long getOffset() { + return offset; + } + + @Override + public int getLength() { + return length; + } + + public void setOffset(long offset) { + this.offset = offset; + } + + public void setLength(int length) { + this.length = length; + } + + @Override + public void setData(CompletableFuture pReader) { + this.reader = pReader; + } + + @Override + public CompletableFuture getData() { + return reader; + } + + @Override + public Object getReference() { + return reference; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FileSystemMultipartUploader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FileSystemMultipartUploader.java new file mode 100644 index 0000000000000..1fafd41b054b9 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FileSystemMultipartUploader.java @@ -0,0 +1,263 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.impl; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.CompletableFuture; +import java.util.stream.Collectors; + +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.commons.compress.utils.IOUtils; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.BBPartHandle; +import org.apache.hadoop.fs.BBUploadHandle; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FSDataOutputStreamBuilder; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.InternalOperations; +import org.apache.hadoop.fs.Options; +import org.apache.hadoop.fs.PartHandle; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathHandle; +import org.apache.hadoop.fs.UploadHandle; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.util.functional.FutureIO; + +import static org.apache.hadoop.fs.Path.mergePaths; +import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; + +/** + * A MultipartUploader that uses the basic FileSystem commands. + * This is done in three stages: + *
      + *
    • Init - create a temp {@code _multipart} directory.
    • + *
    • PutPart - copying the individual parts of the file to the temp + * directory.
    • + *
    • Complete - use {@link FileSystem#concat} to merge the files; + * and then delete the temp directory.
    • + *
    + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class FileSystemMultipartUploader extends AbstractMultipartUploader { + + private static final Logger LOG = LoggerFactory.getLogger( + FileSystemMultipartUploader.class); + + private final FileSystem fs; + + private final FileSystemMultipartUploaderBuilder builder; + + private final FsPermission permission; + + private final long blockSize; + + private final Options.ChecksumOpt checksumOpt; + + public FileSystemMultipartUploader( + final FileSystemMultipartUploaderBuilder builder, + FileSystem fs) { + super(builder.getPath()); + this.builder = builder; + this.fs = fs; + blockSize = builder.getBlockSize(); + checksumOpt = builder.getChecksumOpt(); + permission = builder.getPermission(); + } + + @Override + public CompletableFuture startUpload(Path filePath) + throws IOException { + checkPath(filePath); + return FutureIO.eval(() -> { + Path collectorPath = createCollectorPath(filePath); + fs.mkdirs(collectorPath, FsPermission.getDirDefault()); + + ByteBuffer byteBuffer = ByteBuffer.wrap( + collectorPath.toString().getBytes(Charsets.UTF_8)); + return BBUploadHandle.from(byteBuffer); + }); + } + + @Override + public CompletableFuture putPart(UploadHandle uploadId, + int partNumber, Path filePath, + InputStream inputStream, + long lengthInBytes) + throws IOException { + checkPutArguments(filePath, inputStream, partNumber, uploadId, + lengthInBytes); + return FutureIO.eval(() -> innerPutPart(filePath, + inputStream, partNumber, uploadId, lengthInBytes)); + } + + private PartHandle innerPutPart(Path filePath, + InputStream inputStream, + int partNumber, + UploadHandle uploadId, + long lengthInBytes) + throws IOException { + byte[] uploadIdByteArray = uploadId.toByteArray(); + checkUploadId(uploadIdByteArray); + Path collectorPath = new Path(new String(uploadIdByteArray, 0, + uploadIdByteArray.length, Charsets.UTF_8)); + Path partPath = + mergePaths(collectorPath, mergePaths(new Path(Path.SEPARATOR), + new Path(partNumber + ".part"))); + final FSDataOutputStreamBuilder fileBuilder = fs.createFile(partPath); + if (checksumOpt != null) { + fileBuilder.checksumOpt(checksumOpt); + } + if (permission != null) { + fileBuilder.permission(permission); + } + try (FSDataOutputStream fsDataOutputStream = + fileBuilder.blockSize(blockSize).build()) { + IOUtils.copy(inputStream, fsDataOutputStream, + this.builder.getBufferSize()); + } finally { + cleanupWithLogger(LOG, inputStream); + } + return BBPartHandle.from(ByteBuffer.wrap( + partPath.toString().getBytes(Charsets.UTF_8))); + } + + private Path createCollectorPath(Path filePath) { + String uuid = UUID.randomUUID().toString(); + return mergePaths(filePath.getParent(), + mergePaths(new Path(filePath.getName().split("\\.")[0]), + mergePaths(new Path("_multipart_" + uuid), + new Path(Path.SEPARATOR)))); + } + + private PathHandle getPathHandle(Path filePath) throws IOException { + FileStatus status = fs.getFileStatus(filePath); + return fs.getPathHandle(status); + } + + private long totalPartsLen(List partHandles) throws IOException { + long totalLen = 0; + for (Path p : partHandles) { + totalLen += fs.getFileStatus(p).getLen(); + } + return totalLen; + } + + @Override + public CompletableFuture complete( + UploadHandle uploadId, + Path filePath, + Map handleMap) throws IOException { + + checkPath(filePath); + return FutureIO.eval(() -> + innerComplete(uploadId, filePath, handleMap)); + } + + /** + * The upload complete operation. + * @param multipartUploadId the ID of the upload + * @param filePath path + * @param handleMap map of handles + * @return the path handle + * @throws IOException failure + */ + private PathHandle innerComplete( + UploadHandle multipartUploadId, Path filePath, + Map handleMap) throws IOException { + + checkPath(filePath); + + checkUploadId(multipartUploadId.toByteArray()); + + checkPartHandles(handleMap); + List> handles = + new ArrayList<>(handleMap.entrySet()); + handles.sort(Comparator.comparingInt(Map.Entry::getKey)); + + List partHandles = handles + .stream() + .map(pair -> { + byte[] byteArray = pair.getValue().toByteArray(); + return new Path(new String(byteArray, 0, byteArray.length, + Charsets.UTF_8)); + }) + .collect(Collectors.toList()); + + int count = partHandles.size(); + // built up to identify duplicates -if the size of this set is + // below that of the number of parts, then there's a duplicate entry. + Set values = new HashSet<>(count); + values.addAll(partHandles); + Preconditions.checkArgument(values.size() == count, + "Duplicate PartHandles"); + byte[] uploadIdByteArray = multipartUploadId.toByteArray(); + Path collectorPath = new Path(new String(uploadIdByteArray, 0, + uploadIdByteArray.length, Charsets.UTF_8)); + + boolean emptyFile = totalPartsLen(partHandles) == 0; + if (emptyFile) { + fs.create(filePath).close(); + } else { + Path filePathInsideCollector = mergePaths(collectorPath, + new Path(Path.SEPARATOR + filePath.getName())); + fs.create(filePathInsideCollector).close(); + fs.concat(filePathInsideCollector, + partHandles.toArray(new Path[handles.size()])); + new InternalOperations() + .rename(fs, filePathInsideCollector, filePath, + Options.Rename.OVERWRITE); + } + fs.delete(collectorPath, true); + return getPathHandle(filePath); + } + + @Override + public CompletableFuture abort(UploadHandle uploadId, + Path filePath) + throws IOException { + checkPath(filePath); + byte[] uploadIdByteArray = uploadId.toByteArray(); + checkUploadId(uploadIdByteArray); + Path collectorPath = new Path(new String(uploadIdByteArray, 0, + uploadIdByteArray.length, Charsets.UTF_8)); + + return FutureIO.eval(() -> { + // force a check for a file existing; raises FNFE if not found + fs.getFileStatus(collectorPath); + fs.delete(collectorPath, true); + return null; + }); + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FileSystemMultipartUploaderBuilder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FileSystemMultipartUploaderBuilder.java new file mode 100644 index 0000000000000..7c4d995c69d1b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FileSystemMultipartUploaderBuilder.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.impl; + +import javax.annotation.Nonnull; +import java.io.IOException; +import java.util.EnumSet; + +import org.apache.hadoop.fs.CreateFlag; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Options; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; + +/** + * Builder for {@link FileSystemMultipartUploader}. + */ +public class FileSystemMultipartUploaderBuilder extends + MultipartUploaderBuilderImpl { + + public FileSystemMultipartUploaderBuilder( + @Nonnull final FileSystem fileSystem, + @Nonnull final Path path) { + super(fileSystem, path); + } + + @Override + public FileSystemMultipartUploaderBuilder getThisBuilder() { + return this; + } + + @Override + public FileSystemMultipartUploader build() + throws IllegalArgumentException, IOException { + return new FileSystemMultipartUploader(this, getFS()); + } + + @Override + public FileSystem getFS() { + return super.getFS(); + } + + @Override + public FsPermission getPermission() { + return super.getPermission(); + } + + @Override + public int getBufferSize() { + return super.getBufferSize(); + } + + @Override + public short getReplication() { + return super.getReplication(); + } + + @Override + public EnumSet getFlags() { + return super.getFlags(); + } + + @Override + public Options.ChecksumOpt getChecksumOpt() { + return super.getChecksumOpt(); + } + + @Override + protected long getBlockSize() { + return super.getBlockSize(); + } + + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FsLinkResolution.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FsLinkResolution.java index f5ef8c4923328..8d4bebda15096 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FsLinkResolution.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FsLinkResolution.java @@ -20,7 +20,7 @@ import java.io.IOException; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FunctionsRaisingIOE.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FunctionsRaisingIOE.java index 7bbb34622647d..551cf9cff3d6f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FunctionsRaisingIOE.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FunctionsRaisingIOE.java @@ -24,7 +24,8 @@ import org.apache.hadoop.classification.InterfaceStability; /** - * Evolving support for functional programming/lambda-expressions. + * Support for functional programming/lambda-expressions. + * @deprecated use {@code org.apache.hadoop.util.functional} */ @InterfaceAudience.Private @InterfaceStability.Unstable @@ -37,6 +38,7 @@ private FunctionsRaisingIOE() { * Function of arity 1 which may raise an IOException. * @param type of arg1 * @param type of return value. + * @deprecated use {@link org.apache.hadoop.util.functional.FunctionRaisingIOE} */ @FunctionalInterface public interface FunctionRaisingIOE { @@ -49,6 +51,7 @@ public interface FunctionRaisingIOE { * @param type of arg1 * @param type of arg2 * @param type of return value. + * @deprecated use {@link org.apache.hadoop.util.functional.BiFunctionRaisingIOE} */ @FunctionalInterface public interface BiFunctionRaisingIOE { @@ -59,6 +62,7 @@ public interface BiFunctionRaisingIOE { /** * This is a callable which only raises an IOException. * @param return type + * @deprecated use {@link org.apache.hadoop.util.functional.CallableRaisingIOE} */ @FunctionalInterface public interface CallableRaisingIOE { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FutureDataInputStreamBuilderImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FutureDataInputStreamBuilderImpl.java index 24a8d49747fe6..833c21ec1a67f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FutureDataInputStreamBuilderImpl.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FutureDataInputStreamBuilderImpl.java @@ -19,6 +19,7 @@ package org.apache.hadoop.fs.impl; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import java.io.IOException; import java.util.concurrent.CompletableFuture; @@ -47,7 +48,7 @@ * options accordingly, for example: * * If the option is not related to the file system, the option will be ignored. - * If the option is must, but not supported by the file system, a + * If the option is must, but not supported/known by the file system, an * {@link IllegalArgumentException} will be thrown. * */ @@ -125,6 +126,9 @@ protected int getBufferSize() { /** * Set the size of the buffer to be used. + * + * @param bufSize buffer size. + * @return FutureDataInputStreamBuilder. */ public FutureDataInputStreamBuilder bufferSize(int bufSize) { bufferSize = bufSize; @@ -136,6 +140,8 @@ public FutureDataInputStreamBuilder bufferSize(int bufSize) { * This must be used after the constructor has been invoked to create * the actual builder: it allows for subclasses to do things after * construction. + * + * @return FutureDataInputStreamBuilder. */ public FutureDataInputStreamBuilder builder() { return getThisBuilder(); @@ -147,8 +153,9 @@ public FutureDataInputStreamBuilder getThisBuilder() { } @Override - public FutureDataInputStreamBuilder withFileStatus(FileStatus st) { - this.status = requireNonNull(st, "status"); + public FutureDataInputStreamBuilder withFileStatus( + @Nullable FileStatus st) { + this.status = st; return this; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FutureIOSupport.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FutureIOSupport.java index 26856e5b935e0..0a080426c2b24 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FutureIOSupport.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FutureIOSupport.java @@ -20,7 +20,7 @@ import java.io.IOException; import java.io.InterruptedIOException; -import java.util.Map; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionException; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; @@ -31,12 +31,21 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSBuilder; +import org.apache.hadoop.util.functional.CallableRaisingIOE; +import org.apache.hadoop.util.functional.FutureIO; /** * Support for future IO and the FS Builder subclasses. + * All methods in this class have been superceded by those in + * {@link FutureIO}. + * The methods here are retained but all marked as deprecated. + * This is to ensure that any external + * filesystem implementations can still use these methods + * without linkage problems surfacing. */ @InterfaceAudience.Private @InterfaceStability.Unstable +@Deprecated public final class FutureIOSupport { private FutureIOSupport() { @@ -45,6 +54,7 @@ private FutureIOSupport() { /** * Given a future, evaluate it. Raised exceptions are * extracted and handled. + * See {@link FutureIO#awaitFuture(Future, long, TimeUnit)}. * @param future future to evaluate * @param type of the result. * @return the result, if all went well. @@ -52,54 +62,40 @@ private FutureIOSupport() { * @throws IOException if something went wrong * @throws RuntimeException any nested RTE thrown */ + @Deprecated public static T awaitFuture(final Future future) throws InterruptedIOException, IOException, RuntimeException { - try { - return future.get(); - } catch (InterruptedException e) { - throw (InterruptedIOException)new InterruptedIOException(e.toString()) - .initCause(e); - } catch (ExecutionException e) { - return raiseInnerCause(e); - } + return FutureIO.awaitFuture(future); } /** * Given a future, evaluate it. Raised exceptions are * extracted and handled. + * See {@link FutureIO#awaitFuture(Future, long, TimeUnit)}. * @param future future to evaluate * @param type of the result. + * @param timeout timeout. + * @param unit unit. * @return the result, if all went well. * @throws InterruptedIOException future was interrupted * @throws IOException if something went wrong * @throws RuntimeException any nested RTE thrown * @throws TimeoutException the future timed out. */ + @Deprecated public static T awaitFuture(final Future future, final long timeout, final TimeUnit unit) throws InterruptedIOException, IOException, RuntimeException, TimeoutException { - - try { - return future.get(timeout, unit); - } catch (InterruptedException e) { - throw (InterruptedIOException)new InterruptedIOException(e.toString()) - .initCause(e); - } catch (ExecutionException e) { - return raiseInnerCause(e); - } + return FutureIO.awaitFuture(future, timeout, unit); } - /** * From the inner cause of an execution exception, extract the inner cause * if it is an IOE or RTE. - * This will always raise an exception, either the inner IOException, - * an inner RuntimeException, or a new IOException wrapping the raised - * exception. - * + * See {@link FutureIO#raiseInnerCause(ExecutionException)}. * @param e exception. * @param type of return value. * @return nothing, ever. @@ -107,14 +103,16 @@ public static T awaitFuture(final Future future, * any non-Runtime-Exception * @throws RuntimeException if that is the inner cause. */ + @Deprecated public static T raiseInnerCause(final ExecutionException e) throws IOException { - throw unwrapInnerException(e); + return FutureIO.raiseInnerCause(e); } /** * Extract the cause of a completion failure and rethrow it if an IOE * or RTE. + * See {@link FutureIO#raiseInnerCause(CompletionException)}. * @param e exception. * @param type of return value. * @return nothing, ever. @@ -122,54 +120,15 @@ public static T raiseInnerCause(final ExecutionException e) * any non-Runtime-Exception * @throws RuntimeException if that is the inner cause. */ + @Deprecated public static T raiseInnerCause(final CompletionException e) throws IOException { - throw unwrapInnerException(e); - } - - /** - * From the inner cause of an execution exception, extract the inner cause. - * If it is an RTE: throw immediately. - * If it is an IOE: Return. - * If it is a WrappedIOException: Unwrap and return - * Else: create a new IOException. - * - * Recursively handles wrapped Execution and Completion Exceptions in - * case something very complicated has happened. - * @param e exception. - * @return an IOException extracted or built from the cause. - * @throws RuntimeException if that is the inner cause. - */ - private static IOException unwrapInnerException(final Throwable e) { - Throwable cause = e.getCause(); - if (cause instanceof IOException) { - return (IOException) cause; - } else if (cause instanceof WrappedIOException) { - return ((WrappedIOException) cause).getCause(); - } else if (cause instanceof CompletionException) { - return unwrapInnerException(cause); - } else if (cause instanceof ExecutionException) { - return unwrapInnerException(cause); - } else if (cause instanceof RuntimeException) { - throw (RuntimeException) cause; - } else if (cause != null) { - // other type: wrap with a new IOE - return new IOException(cause); - } else { - // this only happens if there was no cause. - return new IOException(e); - } + return FutureIO.raiseInnerCause(e); } /** - * Propagate options to any builder, converting everything with the - * prefix to an option where, if there were 2+ dot-separated elements, - * it is converted to a schema. - *
    -   *   fs.example.s3a.option => s3a:option
    -   *   fs.example.fs.io.policy => s3a.io.policy
    -   *   fs.example.something => something
    -   * 
    + * Propagate options to any builder. + * {@link FutureIO#propagateOptions(FSBuilder, Configuration, String, String)} * @param builder builder to modify * @param conf configuration to read * @param optionalPrefix prefix for optional settings @@ -178,50 +137,47 @@ private static IOException unwrapInnerException(final Throwable e) { * @param type of builder * @return the builder passed in. */ + @Deprecated public static > FSBuilder propagateOptions( final FSBuilder builder, final Configuration conf, final String optionalPrefix, final String mandatoryPrefix) { - propagateOptions(builder, conf, - optionalPrefix, false); - propagateOptions(builder, conf, - mandatoryPrefix, true); - return builder; + return FutureIO.propagateOptions(builder, + conf, optionalPrefix, mandatoryPrefix); } /** - * Propagate options to any builder, converting everything with the - * prefix to an option where, if there were 2+ dot-separated elements, - * it is converted to a schema. - *
    -   *   fs.example.s3a.option => s3a:option
    -   *   fs.example.fs.io.policy => s3a.io.policy
    -   *   fs.example.something => something
    -   * 
    + * Propagate options to any builder. + * {@link FutureIO#propagateOptions(FSBuilder, Configuration, String, boolean)} * @param builder builder to modify * @param conf configuration to read * @param prefix prefix to scan/strip * @param mandatory are the options to be mandatory or optional? */ + @Deprecated public static void propagateOptions( final FSBuilder builder, final Configuration conf, final String prefix, final boolean mandatory) { + FutureIO.propagateOptions(builder, conf, prefix, mandatory); + } - final String p = prefix.endsWith(".") ? prefix : (prefix + "."); - final Map propsWithPrefix = conf.getPropsWithPrefix(p); - for (Map.Entry entry : propsWithPrefix.entrySet()) { - // change the schema off each entry - String key = entry.getKey(); - String val = entry.getValue(); - if (mandatory) { - builder.must(key, val); - } else { - builder.opt(key, val); - } - } + /** + * Evaluate a CallableRaisingIOE in the current thread, + * converting IOEs to RTEs and propagating. + * See {@link FutureIO#eval(CallableRaisingIOE)}. + * + * @param callable callable to invoke + * @param Return type. + * @return the evaluated result. + * @throws UnsupportedOperationException fail fast if unsupported + * @throws IllegalArgumentException invalid argument + */ + public static CompletableFuture eval( + CallableRaisingIOE callable) { + return FutureIO.eval(callable); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/MultipartUploaderBuilderImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/MultipartUploaderBuilderImpl.java new file mode 100644 index 0000000000000..1d8c4e5e0beb9 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/MultipartUploaderBuilderImpl.java @@ -0,0 +1,218 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.impl; + +import javax.annotation.Nonnull; +import java.io.IOException; +import java.util.EnumSet; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.CreateFlag; +import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FsServerDefaults; +import org.apache.hadoop.fs.MultipartUploader; +import org.apache.hadoop.fs.MultipartUploaderBuilder; +import org.apache.hadoop.fs.Options.ChecksumOpt; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; + +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY; + +/** + * Builder for {@link MultipartUploader} implementations. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public abstract class MultipartUploaderBuilderImpl + > + extends AbstractFSBuilderImpl + implements MultipartUploaderBuilder { + + private final FileSystem fs; + + private FsPermission permission; + + private int bufferSize; + + private short replication; + + private long blockSize; + + private final EnumSet flags = EnumSet.noneOf(CreateFlag.class); + + private ChecksumOpt checksumOpt; + + /** + * Return the concrete implementation of the builder instance. + */ + public abstract B getThisBuilder(); + + /** + * Construct from a {@link FileContext}. + * + * @param fc FileContext + * @param p path. + * @throws IOException failure + */ + protected MultipartUploaderBuilderImpl(@Nonnull FileContext fc, + @Nonnull Path p) throws IOException { + super(checkNotNull(p)); + checkNotNull(fc); + this.fs = null; + + FsServerDefaults defaults = fc.getServerDefaults(p); + bufferSize = defaults.getFileBufferSize(); + replication = defaults.getReplication(); + blockSize = defaults.getBlockSize(); + } + + /** + * Constructor. + * + * @param fileSystem fileSystem. + * @param p path. + */ + protected MultipartUploaderBuilderImpl(@Nonnull FileSystem fileSystem, + @Nonnull Path p) { + super(fileSystem.makeQualified(checkNotNull(p))); + checkNotNull(fileSystem); + fs = fileSystem; + bufferSize = fs.getConf().getInt(IO_FILE_BUFFER_SIZE_KEY, + IO_FILE_BUFFER_SIZE_DEFAULT); + replication = fs.getDefaultReplication(p); + blockSize = fs.getDefaultBlockSize(p); + } + + protected FileSystem getFS() { + checkNotNull(fs); + return fs; + } + + protected FsPermission getPermission() { + if (permission == null) { + permission = FsPermission.getFileDefault(); + } + return permission; + } + + /** + * Set permission for the file. + */ + @Override + public B permission(@Nonnull final FsPermission perm) { + checkNotNull(perm); + permission = perm; + return getThisBuilder(); + } + + protected int getBufferSize() { + return bufferSize; + } + + /** + * Set the size of the buffer to be used. + */ + @Override + public B bufferSize(int bufSize) { + bufferSize = bufSize; + return getThisBuilder(); + } + + protected short getReplication() { + return replication; + } + + /** + * Set replication factor. + */ + @Override + public B replication(short replica) { + replication = replica; + return getThisBuilder(); + } + + protected long getBlockSize() { + return blockSize; + } + + /** + * Set block size. + */ + @Override + public B blockSize(long blkSize) { + blockSize = blkSize; + return getThisBuilder(); + } + + protected EnumSet getFlags() { + return flags; + } + + /** + * Create an FSDataOutputStream at the specified path. + */ + @Override + public B create() { + flags.add(CreateFlag.CREATE); + return getThisBuilder(); + } + + /** + * Set to true to overwrite the existing file. + * Set it to false, an exception will be thrown when calling {@link #build()} + * if the file exists. + */ + @Override + public B overwrite(boolean overwrite) { + if (overwrite) { + flags.add(CreateFlag.OVERWRITE); + } else { + flags.remove(CreateFlag.OVERWRITE); + } + return getThisBuilder(); + } + + /** + * Append to an existing file (optional operation). + */ + @Override + public B append() { + flags.add(CreateFlag.APPEND); + return getThisBuilder(); + } + + protected ChecksumOpt getChecksumOpt() { + return checksumOpt; + } + + /** + * Set checksum opt. + */ + @Override + public B checksumOpt(@Nonnull final ChecksumOpt chksumOpt) { + checkNotNull(chksumOpt); + checksumOpt = chksumOpt; + return getThisBuilder(); + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/OpenFileParameters.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/OpenFileParameters.java index 77b4ff52696a3..a19c5faff4d90 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/OpenFileParameters.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/OpenFileParameters.java @@ -38,6 +38,9 @@ public class OpenFileParameters { */ private Set mandatoryKeys; + /** The optional keys. */ + private Set optionalKeys; + /** * Options set during the build sequence. */ @@ -61,6 +64,11 @@ public OpenFileParameters withMandatoryKeys(final Set keys) { return this; } + public OpenFileParameters withOptionalKeys(final Set keys) { + this.optionalKeys = requireNonNull(keys); + return this; + } + public OpenFileParameters withOptions(final Configuration opts) { this.options = requireNonNull(opts); return this; @@ -80,6 +88,10 @@ public Set getMandatoryKeys() { return mandatoryKeys; } + public Set getOptionalKeys() { + return optionalKeys; + } + public Configuration getOptions() { return options; } @@ -91,4 +103,5 @@ public int getBufferSize() { public FileStatus getStatus() { return status; } + } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/PathCapabilitiesSupport.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/PathCapabilitiesSupport.java index 9332ac6e7eedb..1e3e43581dccc 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/PathCapabilitiesSupport.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/PathCapabilitiesSupport.java @@ -25,7 +25,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathCapabilities; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkArgument; @InterfaceAudience.Private @InterfaceStability.Evolving diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/StoreImplementationUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/StoreImplementationUtils.java new file mode 100644 index 0000000000000..605a3538d8b6b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/StoreImplementationUtils.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License + */ + +package org.apache.hadoop.fs.impl; + +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.StreamCapabilities; + +import static org.apache.hadoop.fs.StreamCapabilities.HFLUSH; +import static org.apache.hadoop.fs.StreamCapabilities.HSYNC; + +/** + * Utility classes to help implementing filesystems and streams. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public final class StoreImplementationUtils { + + private StoreImplementationUtils() { + } + + /** + * Check the probe capability being for {@link StreamCapabilities#HSYNC} + * or {@link StreamCapabilities#HFLUSH} + * {@code Syncable.hsync()} and {@code Syncable.hflush()} functionality. + * @param capability capability string. + * @return true if either refers to one of the Syncable operations. + */ + public static boolean isProbeForSyncable(String capability) { + return capability.equalsIgnoreCase(HSYNC) || + capability.equalsIgnoreCase(HFLUSH); + } + + /** + * Probe for an object having a capability; returns true + * if the stream implements {@link StreamCapabilities} and its + * {@code hasCapabilities()} method returns true for the capability. + * This is a package private method intended to provided a common + * implementation for input and output streams. + * {@link StreamCapabilities#hasCapability(String)} call is for public use. + * @param object object to probe. + * @param capability capability to probe for + * @return true if the object implements stream capabilities and + * declares that it supports the capability. + */ + static boolean objectHasCapability(Object object, String capability) { + if (object instanceof StreamCapabilities) { + return ((StreamCapabilities) object).hasCapability(capability); + } + return false; + } + + /** + * Probe for an output stream having a capability; returns true + * if the stream implements {@link StreamCapabilities} and its + * {@code hasCapabilities()} method returns true for the capability. + * @param out output stream + * @param capability capability to probe for + * @return true if the stream declares that it supports the capability. + */ + public static boolean hasCapability(OutputStream out, String capability) { + return objectHasCapability(out, capability); + } + + /** + * Probe for an input stream having a capability; returns true + * if the stream implements {@link StreamCapabilities} and its + * {@code hasCapabilities()} method returns true for the capability. + * @param in input stream + * @param capability capability to probe for + * @return true if the stream declares that it supports the capability. + */ + public static boolean hasCapability(InputStream in, String capability) { + return objectHasCapability(in, capability); + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/WeakRefMetricsSource.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/WeakRefMetricsSource.java new file mode 100644 index 0000000000000..146773857934c --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/WeakRefMetricsSource.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.impl; + +import java.lang.ref.WeakReference; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.metrics2.MetricsCollector; +import org.apache.hadoop.metrics2.MetricsSource; + +import static java.util.Objects.requireNonNull; + +/** + * A weak referenced metrics source which avoids hanging on to large objects + * if somehow they don't get fully closed/cleaned up. + * The JVM may clean up all objects which are only weakly referenced whenever + * it does a GC, even if there is no memory pressure. + * To avoid these refs being removed, always keep a strong reference around + * somewhere. + */ +@InterfaceAudience.Private +public class WeakRefMetricsSource implements MetricsSource { + + /** + * Name to know when unregistering. + */ + private final String name; + + /** + * Underlying metrics source. + */ + private final WeakReference sourceWeakReference; + + /** + * Constructor. + * @param name Name to know when unregistering. + * @param source metrics source + */ + public WeakRefMetricsSource(final String name, final MetricsSource source) { + this.name = name; + this.sourceWeakReference = new WeakReference<>(requireNonNull(source)); + } + + /** + * If the weak reference is non null, update the metrics. + * @param collector to contain the resulting metrics snapshot + * @param all if true, return all metrics even if unchanged. + */ + @Override + public void getMetrics(final MetricsCollector collector, final boolean all) { + MetricsSource metricsSource = sourceWeakReference.get(); + if (metricsSource != null) { + metricsSource.getMetrics(collector, all); + } + } + + /** + * Name to know when unregistering. + * @return the name passed in during construction. + */ + public String getName() { + return name; + } + + /** + * Get the source, will be null if the reference has been GC'd + * @return the source reference + */ + public MetricsSource getSource() { + return sourceWeakReference.get(); + } + + @Override + public String toString() { + return "WeakRefMetricsSource{" + + "name='" + name + '\'' + + ", sourceWeakReference is " + + (sourceWeakReference.get() == null ? "unset" : "set") + + '}'; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/WeakReferenceThreadMap.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/WeakReferenceThreadMap.java new file mode 100644 index 0000000000000..06be20310e43e --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/WeakReferenceThreadMap.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.impl; + +import java.lang.ref.WeakReference; +import java.util.function.Consumer; +import java.util.function.Function; +import javax.annotation.Nullable; + +import org.apache.hadoop.util.WeakReferenceMap; + +import static java.util.Objects.requireNonNull; + +/** + * A WeakReferenceMap for threads. + * @param value type of the map + */ +public class WeakReferenceThreadMap extends WeakReferenceMap { + + public WeakReferenceThreadMap(final Function factory, + @Nullable final Consumer referenceLost) { + super(factory, referenceLost); + } + + /** + * Get the value for the current thread, creating if needed. + * @return an instance. + */ + public V getForCurrentThread() { + return get(currentThreadId()); + } + + /** + * Remove the reference for the current thread. + * @return any reference value which existed. + */ + public V removeForCurrentThread() { + return remove(currentThreadId()); + } + + /** + * Get the current thread ID. + * @return thread ID. + */ + public long currentThreadId() { + return Thread.currentThread().getId(); + } + + /** + * Set the new value for the current thread. + * @param newVal new reference to set for the active thread. + * @return the previously set value, possibly null + */ + public V setForCurrentThread(V newVal) { + requireNonNull(newVal); + long id = currentThreadId(); + + // if the same object is already in the map, just return it. + WeakReference existingWeakRef = lookup(id); + + // The looked up reference could be one of + // 1. null: nothing there + // 2. valid but get() == null : reference lost by GC. + // 3. different from the new value + // 4. the same as the old value + if (resolve(existingWeakRef) == newVal) { + // case 4: do nothing, return the new value + return newVal; + } else { + // cases 1, 2, 3: update the map and return the old value + return put(id, newVal); + } + + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/WrappedIOException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/WrappedIOException.java index 1de1ecb785368..3f828897b1d6c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/WrappedIOException.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/WrappedIOException.java @@ -19,24 +19,24 @@ package org.apache.hadoop.fs.impl; import java.io.IOException; -import java.util.concurrent.ExecutionException; +import java.io.UncheckedIOException; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; /** - * A wrapper for an IOException which - * {@link FutureIOSupport#raiseInnerCause(ExecutionException)} knows to - * always extract the exception. + * A wrapper for an IOException. * * The constructor signature guarantees the cause will be an IOException, * and as it checks for a null-argument, non-null. + * @deprecated use the {@code UncheckedIOException} directly.] */ +@Deprecated @InterfaceAudience.Private @InterfaceStability.Unstable -public class WrappedIOException extends RuntimeException { +public class WrappedIOException extends UncheckedIOException { private static final long serialVersionUID = 2510210974235779294L; @@ -49,8 +49,4 @@ public WrappedIOException(final IOException cause) { super(Preconditions.checkNotNull(cause)); } - @Override - public synchronized IOException getCause() { - return (IOException) super.getCause(); - } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockCache.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockCache.java new file mode 100644 index 0000000000000..2990696ee1b08 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockCache.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import java.io.Closeable; +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.LocalDirAllocator; + +/** + * Provides functionality necessary for caching blocks of data read from FileSystem. + */ +public interface BlockCache extends Closeable { + + /** + * Indicates whether the given block is in this cache. + * + * @param blockNumber the id of the given block. + * @return true if the given block is in this cache, false otherwise. + */ + boolean containsBlock(int blockNumber); + + /** + * Gets the blocks in this cache. + * + * @return the blocks in this cache. + */ + Iterable blocks(); + + /** + * Gets the number of blocks in this cache. + * + * @return the number of blocks in this cache. + */ + int size(); + + /** + * Gets the block having the given {@code blockNumber}. + * + * @param blockNumber the id of the desired block. + * @param buffer contents of the desired block are copied to this buffer. + * @throws IOException if there is an error reading the given block. + */ + void get(int blockNumber, ByteBuffer buffer) throws IOException; + + /** + * Puts the given block in this cache. + * + * @param blockNumber the id of the given block. + * @param buffer contents of the given block to be added to this cache. + * @param conf the configuration. + * @param localDirAllocator the local dir allocator instance. + * @throws IOException if there is an error writing the given block. + */ + void put(int blockNumber, ByteBuffer buffer, Configuration conf, + LocalDirAllocator localDirAllocator) throws IOException; +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockData.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockData.java new file mode 100644 index 0000000000000..ecb8bc7243be0 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockData.java @@ -0,0 +1,250 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNegative; +import static org.apache.hadoop.fs.impl.prefetch.Validate.checkPositiveInteger; +import static org.apache.hadoop.fs.impl.prefetch.Validate.checkWithinRange; + +/** + * Holds information about blocks of data in a file. + */ +public final class BlockData { + + // State of each block of data. + enum State { + + /** Data is not yet ready to be read from this block (still being prefetched). */ + NOT_READY, + + /** A read of this block has been enqueued in the prefetch queue. */ + QUEUED, + + /** A read of this block has been enqueued in the prefetch queue. */ + READY, + + /** This block has been cached in the local disk cache. */ + CACHED + } + + /** + * State of all blocks in a file. + */ + private State[] state; + + /** + * The size of a file. + */ + private final long fileSize; + + /** + * The file is divided into blocks of this size. + */ + private final int blockSize; + + /** + * The file has these many blocks. + */ + private final int numBlocks; + + /** + * Constructs an instance of {@link BlockData}. + * @param fileSize the size of a file. + * @param blockSize the file is divided into blocks of this size. + * @throws IllegalArgumentException if fileSize is negative. + * @throws IllegalArgumentException if blockSize is negative. + * @throws IllegalArgumentException if blockSize is zero or negative. + */ + public BlockData(long fileSize, int blockSize) { + checkNotNegative(fileSize, "fileSize"); + if (fileSize == 0) { + checkNotNegative(blockSize, "blockSize"); + } else { + checkPositiveInteger(blockSize, "blockSize"); + } + + this.fileSize = fileSize; + this.blockSize = blockSize; + this.numBlocks = + (fileSize == 0) + ? 0 + : ((int) (fileSize / blockSize)) + (fileSize % blockSize > 0 + ? 1 + : 0); + this.state = new State[this.numBlocks]; + for (int b = 0; b < this.numBlocks; b++) { + setState(b, State.NOT_READY); + } + } + + /** + * Gets the size of each block. + * @return the size of each block. + */ + public int getBlockSize() { + return blockSize; + } + + /** + * Gets the size of the associated file. + * @return the size of the associated file. + */ + public long getFileSize() { + return fileSize; + } + + /** + * Gets the number of blocks in the associated file. + * @return the number of blocks in the associated file. + */ + public int getNumBlocks() { + return numBlocks; + } + + /** + * Indicates whether the given block is the last block in the associated file. + * @param blockNumber the id of the desired block. + * @return true if the given block is the last block in the associated file, false otherwise. + * @throws IllegalArgumentException if blockNumber is invalid. + */ + public boolean isLastBlock(int blockNumber) { + if (fileSize == 0) { + return false; + } + + throwIfInvalidBlockNumber(blockNumber); + + return blockNumber == (numBlocks - 1); + } + + /** + * Gets the id of the block that contains the given absolute offset. + * @param offset the absolute offset to check. + * @return the id of the block that contains the given absolute offset. + * @throws IllegalArgumentException if offset is invalid. + */ + public int getBlockNumber(long offset) { + throwIfInvalidOffset(offset); + + return (int) (offset / blockSize); + } + + /** + * Gets the size of the given block. + * @param blockNumber the id of the desired block. + * @return the size of the given block. + */ + public int getSize(int blockNumber) { + if (fileSize == 0) { + return 0; + } + + if (isLastBlock(blockNumber)) { + return (int) (fileSize - (((long) blockSize) * (numBlocks - 1))); + } else { + return blockSize; + } + } + + /** + * Indicates whether the given absolute offset is valid. + * @param offset absolute offset in the file.. + * @return true if the given absolute offset is valid, false otherwise. + */ + public boolean isValidOffset(long offset) { + return (offset >= 0) && (offset < fileSize); + } + + /** + * Gets the start offset of the given block. + * @param blockNumber the id of the given block. + * @return the start offset of the given block. + * @throws IllegalArgumentException if blockNumber is invalid. + */ + public long getStartOffset(int blockNumber) { + throwIfInvalidBlockNumber(blockNumber); + + return blockNumber * (long) blockSize; + } + + /** + * Gets the relative offset corresponding to the given block and the absolute offset. + * @param blockNumber the id of the given block. + * @param offset absolute offset in the file. + * @return the relative offset corresponding to the given block and the absolute offset. + * @throws IllegalArgumentException if either blockNumber or offset is invalid. + */ + public int getRelativeOffset(int blockNumber, long offset) { + throwIfInvalidOffset(offset); + + return (int) (offset - getStartOffset(blockNumber)); + } + + /** + * Gets the state of the given block. + * @param blockNumber the id of the given block. + * @return the state of the given block. + * @throws IllegalArgumentException if blockNumber is invalid. + */ + public State getState(int blockNumber) { + throwIfInvalidBlockNumber(blockNumber); + + return state[blockNumber]; + } + + /** + * Sets the state of the given block to the given value. + * @param blockNumber the id of the given block. + * @param blockState the target state. + * @throws IllegalArgumentException if blockNumber is invalid. + */ + public void setState(int blockNumber, State blockState) { + throwIfInvalidBlockNumber(blockNumber); + + state[blockNumber] = blockState; + } + + // Debug helper. + public String getStateString() { + StringBuilder sb = new StringBuilder(); + int blockNumber = 0; + while (blockNumber < numBlocks) { + State tstate = getState(blockNumber); + int endBlockNumber = blockNumber; + while ((endBlockNumber < numBlocks) && (getState(endBlockNumber) + == tstate)) { + endBlockNumber++; + } + sb.append( + String.format("[%03d ~ %03d] %s%n", blockNumber, endBlockNumber - 1, + tstate)); + blockNumber = endBlockNumber; + } + return sb.toString(); + } + + private void throwIfInvalidBlockNumber(int blockNumber) { + checkWithinRange(blockNumber, "blockNumber", 0, numBlocks - 1); + } + + private void throwIfInvalidOffset(long offset) { + checkWithinRange(offset, "offset", 0, fileSize - 1); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockManager.java new file mode 100644 index 0000000000000..45f0aabe7dcd9 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockManager.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import java.io.Closeable; +import java.io.IOException; +import java.nio.ByteBuffer; + +import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNegative; +import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNull; + +/** + * Provides read access to the underlying file one block at a time. + * + * This class is the simplest form of a {@code BlockManager} that does + * perform prefetching or caching. + */ +public abstract class BlockManager implements Closeable { + + /** + * Information about each block of the underlying file. + */ + private final BlockData blockData; + + /** + * Constructs an instance of {@code BlockManager}. + * + * @param blockData information about each block of the underlying file. + * + * @throws IllegalArgumentException if blockData is null. + */ + public BlockManager(BlockData blockData) { + checkNotNull(blockData, "blockData"); + + this.blockData = blockData; + } + + /** + * Gets block data information. + * + * @return instance of {@code BlockData}. + */ + public BlockData getBlockData() { + return blockData; + } + + /** + * Gets the block having the given {@code blockNumber}. + * + * The entire block is read into memory and returned as a {@code BufferData}. + * The blocks are treated as a limited resource and must be released when + * one is done reading them. + * + * @param blockNumber the number of the block to be read and returned. + * @return {@code BufferData} having data from the given block. + * + * @throws IOException if there an error reading the given block. + * @throws IllegalArgumentException if blockNumber is negative. + */ + public BufferData get(int blockNumber) throws IOException { + checkNotNegative(blockNumber, "blockNumber"); + + int size = blockData.getSize(blockNumber); + ByteBuffer buffer = ByteBuffer.allocate(size); + long startOffset = blockData.getStartOffset(blockNumber); + read(buffer, startOffset, size); + buffer.flip(); + return new BufferData(blockNumber, buffer); + } + + /** + * Reads into the given {@code buffer} {@code size} bytes from the underlying file + * starting at {@code startOffset}. + * + * @param buffer the buffer to read data in to. + * @param startOffset the offset at which reading starts. + * @param size the number bytes to read. + * @return number of bytes read. + * @throws IOException if there an error reading the given block. + */ + public abstract int read(ByteBuffer buffer, long startOffset, int size) throws IOException; + + /** + * Releases resources allocated to the given block. + * + * @param data the {@code BufferData} to release. + * + * @throws IllegalArgumentException if data is null. + */ + public void release(BufferData data) { + checkNotNull(data, "data"); + + // Do nothing because we allocate a new buffer each time. + } + + /** + * Requests optional prefetching of the given block. + * + * @param blockNumber the id of the block to prefetch. + * + * @throws IllegalArgumentException if blockNumber is negative. + */ + public void requestPrefetch(int blockNumber) { + checkNotNegative(blockNumber, "blockNumber"); + + // Do nothing because we do not support prefetches. + } + + /** + * Requests cancellation of any previously issued prefetch requests. + */ + public void cancelPrefetches() { + // Do nothing because we do not support prefetches. + } + + /** + * Requests that the given block should be copied to the cache. Optional operation. + * + * @param data the {@code BufferData} instance to optionally cache. + */ + public void requestCaching(BufferData data) { + // Do nothing because we do not support caching. + } + + @Override + public void close() { + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockOperations.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockOperations.java new file mode 100644 index 0000000000000..2744334a3bd7a --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockOperations.java @@ -0,0 +1,425 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.DoubleSummaryStatistics; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNegative; + +/** + * Block level operations performed on a file. + * This class is meant to be used by {@code BlockManager}. + * It is separated out in its own file due to its size. + * + * This class is used for debugging/logging. Calls to this class + * can be safely removed without affecting the overall operation. + */ +public final class BlockOperations { + private static final Logger LOG = LoggerFactory.getLogger(BlockOperations.class); + + /** + * Operation kind. + */ + public enum Kind { + UNKNOWN("??", "unknown", false), + CANCEL_PREFETCHES("CP", "cancelPrefetches", false), + CLOSE("CX", "close", false), + CACHE_PUT("C+", "putC", true), + GET_CACHED("GC", "getCached", true), + GET_PREFETCHED("GP", "getPrefetched", true), + GET_READ("GR", "getRead", true), + PREFETCH("PF", "prefetch", true), + RELEASE("RL", "release", true), + REQUEST_CACHING("RC", "requestCaching", true), + REQUEST_PREFETCH("RP", "requestPrefetch", true); + + private String shortName; + private String name; + private boolean hasBlock; + + Kind(String shortName, String name, boolean hasBlock) { + this.shortName = shortName; + this.name = name; + this.hasBlock = hasBlock; + } + + private static Map shortNameToKind = new HashMap<>(); + + public static Kind fromShortName(String shortName) { + if (shortNameToKind.isEmpty()) { + for (Kind kind : Kind.values()) { + shortNameToKind.put(kind.shortName, kind); + } + } + return shortNameToKind.get(shortName); + } + } + + public static class Operation { + private final Kind kind; + private final int blockNumber; + private final long timestamp; + + public Operation(Kind kind, int blockNumber) { + this.kind = kind; + this.blockNumber = blockNumber; + this.timestamp = System.nanoTime(); + } + + public Kind getKind() { + return kind; + } + + public int getBlockNumber() { + return blockNumber; + } + + public long getTimestamp() { + return timestamp; + } + + public void getSummary(StringBuilder sb) { + if (kind.hasBlock) { + sb.append(String.format("%s(%d)", kind.shortName, blockNumber)); + } else { + sb.append(String.format("%s", kind.shortName)); + } + } + + public String getDebugInfo() { + if (kind.hasBlock) { + return String.format("--- %s(%d)", kind.name, blockNumber); + } else { + return String.format("... %s()", kind.name); + } + } + } + + public static class End extends Operation { + private Operation op; + + public End(Operation op) { + super(op.kind, op.blockNumber); + this.op = op; + } + + @Override + public void getSummary(StringBuilder sb) { + sb.append("E"); + super.getSummary(sb); + } + + @Override + public String getDebugInfo() { + return "***" + super.getDebugInfo().substring(3); + } + + public double duration() { + return (getTimestamp() - op.getTimestamp()) / 1e9; + } + } + + private ArrayList ops; + private boolean debugMode; + + public BlockOperations() { + this.ops = new ArrayList<>(); + } + + public synchronized void setDebug(boolean state) { + debugMode = state; + } + + private synchronized Operation add(Operation op) { + if (debugMode) { + LOG.info(op.getDebugInfo()); + } + ops.add(op); + return op; + } + + public Operation getPrefetched(int blockNumber) { + checkNotNegative(blockNumber, "blockNumber"); + + return add(new Operation(Kind.GET_PREFETCHED, blockNumber)); + } + + public Operation getCached(int blockNumber) { + checkNotNegative(blockNumber, "blockNumber"); + + return add(new Operation(Kind.GET_CACHED, blockNumber)); + } + + public Operation getRead(int blockNumber) { + checkNotNegative(blockNumber, "blockNumber"); + + return add(new Operation(Kind.GET_READ, blockNumber)); + } + + public Operation release(int blockNumber) { + checkNotNegative(blockNumber, "blockNumber"); + + return add(new Operation(Kind.RELEASE, blockNumber)); + } + + public Operation requestPrefetch(int blockNumber) { + checkNotNegative(blockNumber, "blockNumber"); + + return add(new Operation(Kind.REQUEST_PREFETCH, blockNumber)); + } + + public Operation prefetch(int blockNumber) { + checkNotNegative(blockNumber, "blockNumber"); + + return add(new Operation(Kind.PREFETCH, blockNumber)); + } + + public Operation cancelPrefetches() { + return add(new Operation(Kind.CANCEL_PREFETCHES, -1)); + } + + public Operation close() { + return add(new Operation(Kind.CLOSE, -1)); + } + + public Operation requestCaching(int blockNumber) { + checkNotNegative(blockNumber, "blockNumber"); + + return add(new Operation(Kind.REQUEST_CACHING, blockNumber)); + } + + public Operation addToCache(int blockNumber) { + checkNotNegative(blockNumber, "blockNumber"); + + return add(new Operation(Kind.CACHE_PUT, blockNumber)); + } + + public Operation end(Operation op) { + return add(new End(op)); + } + + private static void append(StringBuilder sb, String format, Object... args) { + sb.append(String.format(format, args)); + } + + public synchronized String getSummary(boolean showDebugInfo) { + StringBuilder sb = new StringBuilder(); + for (Operation op : ops) { + if (op != null) { + if (showDebugInfo) { + sb.append(op.getDebugInfo()); + sb.append("\n"); + } else { + op.getSummary(sb); + sb.append(";"); + } + } + } + + sb.append("\n"); + getDurationInfo(sb); + + return sb.toString(); + } + + public synchronized void getDurationInfo(StringBuilder sb) { + Map durations = new HashMap<>(); + for (Operation op : ops) { + if (op instanceof End) { + End endOp = (End) op; + DoubleSummaryStatistics stats = durations.get(endOp.getKind()); + if (stats == null) { + stats = new DoubleSummaryStatistics(); + durations.put(endOp.getKind(), stats); + } + stats.accept(endOp.duration()); + } + } + + List kinds = Arrays.asList( + Kind.GET_CACHED, + Kind.GET_PREFETCHED, + Kind.GET_READ, + Kind.CACHE_PUT, + Kind.PREFETCH, + Kind.REQUEST_CACHING, + Kind.REQUEST_PREFETCH, + Kind.CANCEL_PREFETCHES, + Kind.RELEASE, + Kind.CLOSE + ); + + for (Kind kind : kinds) { + append(sb, "%-18s : ", kind); + DoubleSummaryStatistics stats = durations.get(kind); + if (stats == null) { + append(sb, "--\n"); + } else { + append( + sb, + "#ops = %3d, total = %5.1f, min: %3.1f, avg: %3.1f, max: %3.1f\n", + stats.getCount(), + stats.getSum(), + stats.getMin(), + stats.getAverage(), + stats.getMax()); + } + } + } + + public synchronized void analyze(StringBuilder sb) { + Map> blockOps = new HashMap<>(); + + // Group-by block number. + for (Operation op : ops) { + if (op.blockNumber < 0) { + continue; + } + + List perBlockOps; + if (!blockOps.containsKey(op.blockNumber)) { + perBlockOps = new ArrayList<>(); + blockOps.put(op.blockNumber, perBlockOps); + } + + perBlockOps = blockOps.get(op.blockNumber); + perBlockOps.add(op); + } + + List prefetchedNotUsed = new ArrayList<>(); + List cachedNotUsed = new ArrayList<>(); + + for (Map.Entry> entry : blockOps.entrySet()) { + Integer blockNumber = entry.getKey(); + List perBlockOps = entry.getValue(); + Map kindCounts = new HashMap<>(); + Map endKindCounts = new HashMap<>(); + + for (Operation op : perBlockOps) { + if (op instanceof End) { + int endCount = endKindCounts.getOrDefault(op.kind, 0) + 1; + endKindCounts.put(op.kind, endCount); + } else { + int count = kindCounts.getOrDefault(op.kind, 0) + 1; + kindCounts.put(op.kind, count); + } + } + + for (Kind kind : kindCounts.keySet()) { + int count = kindCounts.getOrDefault(kind, 0); + int endCount = endKindCounts.getOrDefault(kind, 0); + if (count != endCount) { + append(sb, "[%d] %s : #ops(%d) != #end-ops(%d)\n", blockNumber, kind, count, endCount); + } + + if (count > 1) { + append(sb, "[%d] %s = %d\n", blockNumber, kind, count); + } + } + + int prefetchCount = kindCounts.getOrDefault(Kind.PREFETCH, 0); + int getPrefetchedCount = kindCounts.getOrDefault(Kind.GET_PREFETCHED, 0); + if ((prefetchCount > 0) && (getPrefetchedCount < prefetchCount)) { + prefetchedNotUsed.add(blockNumber); + } + + int cacheCount = kindCounts.getOrDefault(Kind.CACHE_PUT, 0); + int getCachedCount = kindCounts.getOrDefault(Kind.GET_CACHED, 0); + if ((cacheCount > 0) && (getCachedCount < cacheCount)) { + cachedNotUsed.add(blockNumber); + } + } + + if (!prefetchedNotUsed.isEmpty()) { + append(sb, "Prefetched but not used: %s\n", getIntList(prefetchedNotUsed)); + } + + if (!cachedNotUsed.isEmpty()) { + append(sb, "Cached but not used: %s\n", getIntList(cachedNotUsed)); + } + } + + private static String getIntList(Iterable nums) { + List numList = new ArrayList<>(); + for (Integer n : nums) { + numList.add(n.toString()); + } + return String.join(", ", numList); + } + + public static BlockOperations fromSummary(String summary) { + BlockOperations ops = new BlockOperations(); + ops.setDebug(true); + Pattern blockOpPattern = Pattern.compile("([A-Z+]+)(\\(([0-9]+)?\\))?"); + String[] tokens = summary.split(";"); + for (String token : tokens) { + Matcher matcher = blockOpPattern.matcher(token); + if (!matcher.matches()) { + String message = String.format("Unknown summary format: %s", token); + throw new IllegalArgumentException(message); + } + + String shortName = matcher.group(1); + String blockNumberStr = matcher.group(3); + int blockNumber = (blockNumberStr == null) ? -1 : Integer.parseInt(blockNumberStr); + Kind kind = Kind.fromShortName(shortName); + Kind endKind = null; + if (kind == null) { + if (shortName.charAt(0) == 'E') { + endKind = Kind.fromShortName(shortName.substring(1)); + } + } + + if (kind == null && endKind == null) { + String message = String.format("Unknown short name: %s (token = %s)", shortName, token); + throw new IllegalArgumentException(message); + } + + if (kind != null) { + ops.add(new Operation(kind, blockNumber)); + } else { + Operation op = null; + for (int i = ops.ops.size() - 1; i >= 0; i--) { + op = ops.ops.get(i); + if ((op.blockNumber == blockNumber) && (op.kind == endKind) && !(op instanceof End)) { + ops.add(new End(op)); + break; + } + } + + if (op == null) { + LOG.warn("Start op not found: {}({})", endKind, blockNumber); + } + } + } + + return ops; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BoundedResourcePool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BoundedResourcePool.java new file mode 100644 index 0000000000000..a871f8237729f --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BoundedResourcePool.java @@ -0,0 +1,195 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import java.util.Collections; +import java.util.IdentityHashMap; +import java.util.Set; +import java.util.concurrent.ArrayBlockingQueue; + +import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNull; + +/** + * Manages a fixed pool of resources. + * + * Avoids creating a new resource if a previously created instance is already available. + */ +public abstract class BoundedResourcePool extends ResourcePool { + /** + * The size of this pool. Fixed at creation time. + */ + private final int size; + + /** + * Items currently available in the pool. + */ + private ArrayBlockingQueue items; + + /** + * Items that have been created so far (regardless of whether they are currently available). + */ + private Set createdItems; + + /** + * Constructs a resource pool of the given size. + * + * @param size the size of this pool. Cannot be changed post creation. + * + * @throws IllegalArgumentException if size is zero or negative. + */ + public BoundedResourcePool(int size) { + Validate.checkPositiveInteger(size, "size"); + + this.size = size; + this.items = new ArrayBlockingQueue<>(size); + + // The created items are identified based on their object reference. + this.createdItems = Collections.newSetFromMap(new IdentityHashMap()); + } + + /** + * Acquires a resource blocking if necessary until one becomes available. + */ + @Override + public T acquire() { + return this.acquireHelper(true); + } + + /** + * Acquires a resource blocking if one is immediately available. Otherwise returns null. + */ + @Override + public T tryAcquire() { + return this.acquireHelper(false); + } + + /** + * Releases a previously acquired resource. + * + * @throws IllegalArgumentException if item is null. + */ + @Override + public void release(T item) { + checkNotNull(item, "item"); + + synchronized (createdItems) { + if (!createdItems.contains(item)) { + throw new IllegalArgumentException("This item is not a part of this pool"); + } + } + + // Return if this item was released earlier. + // We cannot use items.contains() because that check is not based on reference equality. + for (T entry : items) { + if (entry == item) { + return; + } + } + + try { + items.put(item); + } catch (InterruptedException e) { + throw new IllegalStateException("release() should never block", e); + } + } + + @Override + public synchronized void close() { + for (T item : createdItems) { + close(item); + } + + items.clear(); + items = null; + + createdItems.clear(); + createdItems = null; + } + + /** + * Derived classes may implement a way to cleanup each item. + */ + @Override + protected synchronized void close(T item) { + // Do nothing in this class. Allow overriding classes to take any cleanup action. + } + + /** + * Number of items created so far. Mostly for testing purposes. + * @return the count. + */ + public int numCreated() { + synchronized (createdItems) { + return createdItems.size(); + } + } + + /** + * Number of items available to be acquired. Mostly for testing purposes. + * @return the number available. + */ + public synchronized int numAvailable() { + return (size - numCreated()) + items.size(); + } + + // For debugging purposes. + @Override + public synchronized String toString() { + return String.format( + "size = %d, #created = %d, #in-queue = %d, #available = %d", + size, numCreated(), items.size(), numAvailable()); + } + + /** + * Derived classes must implement a way to create an instance of a resource. + */ + protected abstract T createNew(); + + private T acquireHelper(boolean canBlock) { + + // Prefer reusing an item if one is available. + // That avoids unnecessarily creating new instances. + T result = items.poll(); + if (result != null) { + return result; + } + + synchronized (createdItems) { + // Create a new instance if allowed by the capacity of this pool. + if (createdItems.size() < size) { + T item = createNew(); + createdItems.add(item); + return item; + } + } + + if (canBlock) { + try { + // Block for an instance to be available. + return items.take(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return null; + } + } else { + return null; + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BufferData.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BufferData.java new file mode 100644 index 0000000000000..de68269ab700c --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BufferData.java @@ -0,0 +1,319 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Future; +import java.util.zip.CRC32; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Holds the state of a ByteBuffer that is in use by {@code CachingBlockManager}. + * + * This class is not meant to be of general use. It exists into its own file due to its size. + * We use the term block and buffer interchangeably in this file because one buffer + * holds exactly one block of data. + * + * Holding all of the state associated with a block allows us to validate and control + * state transitions in a synchronized fashion. + */ +public final class BufferData { + + private static final Logger LOG = LoggerFactory.getLogger(BufferData.class); + + public enum State { + /** + * Unknown / invalid state. + */ + UNKNOWN, + + /** + * Buffer has been acquired but has no data. + */ + BLANK, + + /** + * This block is being prefetched. + */ + PREFETCHING, + + /** + * This block is being added to the local cache. + */ + CACHING, + + /** + * This block has data and is ready to be read. + */ + READY, + + /** + * This block is no longer in-use and should not be used once in this state. + */ + DONE + } + + /** + * Number of the block associated with this buffer. + */ + private final int blockNumber; + + /** + * The buffer associated with this block. + */ + private ByteBuffer buffer; + + /** + * Current state of this block. + */ + private volatile State state; + + /** + * Future of the action being performed on this block (eg, prefetching or caching). + */ + private Future action; + + /** + * Checksum of the buffer contents once in READY state. + */ + private long checksum = 0; + + /** + * Constructs an instances of this class. + * + * @param blockNumber Number of the block associated with this buffer. + * @param buffer The buffer associated with this block. + * + * @throws IllegalArgumentException if blockNumber is negative. + * @throws IllegalArgumentException if buffer is null. + */ + public BufferData(int blockNumber, ByteBuffer buffer) { + Validate.checkNotNegative(blockNumber, "blockNumber"); + Validate.checkNotNull(buffer, "buffer"); + + this.blockNumber = blockNumber; + this.buffer = buffer; + this.state = State.BLANK; + } + + /** + * Gets the id of this block. + * + * @return the id of this block. + */ + public int getBlockNumber() { + return this.blockNumber; + } + + /** + * Gets the buffer associated with this block. + * + * @return the buffer associated with this block. + */ + public ByteBuffer getBuffer() { + return this.buffer; + } + + /** + * Gets the state of this block. + * + * @return the state of this block. + */ + public State getState() { + return this.state; + } + + /** + * Gets the checksum of data in this block. + * + * @return the checksum of data in this block. + */ + public long getChecksum() { + return this.checksum; + } + + /** + * Computes CRC32 checksum of the given buffer's contents. + * + * @param buffer the buffer whose content's checksum is to be computed. + * @return the computed checksum. + */ + public static long getChecksum(ByteBuffer buffer) { + ByteBuffer tempBuffer = buffer.duplicate(); + tempBuffer.rewind(); + CRC32 crc32 = new CRC32(); + crc32.update(tempBuffer); + return crc32.getValue(); + } + + public synchronized Future getActionFuture() { + return this.action; + } + + /** + * Indicates that a prefetch operation is in progress. + * + * @param actionFuture the {@code Future} of a prefetch action. + * + * @throws IllegalArgumentException if actionFuture is null. + */ + public synchronized void setPrefetch(Future actionFuture) { + Validate.checkNotNull(actionFuture, "actionFuture"); + + this.updateState(State.PREFETCHING, State.BLANK); + this.action = actionFuture; + } + + /** + * Indicates that a caching operation is in progress. + * + * @param actionFuture the {@code Future} of a caching action. + * + * @throws IllegalArgumentException if actionFuture is null. + */ + public synchronized void setCaching(Future actionFuture) { + Validate.checkNotNull(actionFuture, "actionFuture"); + + this.throwIfStateIncorrect(State.PREFETCHING, State.READY); + this.state = State.CACHING; + this.action = actionFuture; + } + + /** + * Marks the completion of reading data into the buffer. + * The buffer cannot be modified once in this state. + * + * @param expectedCurrentState the collection of states from which transition to READY is allowed. + */ + public synchronized void setReady(State... expectedCurrentState) { + if (this.checksum != 0) { + throw new IllegalStateException("Checksum cannot be changed once set"); + } + + this.buffer = this.buffer.asReadOnlyBuffer(); + this.checksum = getChecksum(this.buffer); + this.buffer.rewind(); + this.updateState(State.READY, expectedCurrentState); + } + + /** + * Indicates that this block is no longer of use and can be reclaimed. + */ + public synchronized void setDone() { + if (this.checksum != 0) { + if (getChecksum(this.buffer) != this.checksum) { + throw new IllegalStateException("checksum changed after setReady()"); + } + } + this.state = State.DONE; + this.action = null; + } + + /** + * Updates the current state to the specified value. + * Asserts that the current state is as expected. + * @param newState the state to transition to. + * @param expectedCurrentState the collection of states from which + * transition to {@code newState} is allowed. + * + * @throws IllegalArgumentException if newState is null. + * @throws IllegalArgumentException if expectedCurrentState is null. + */ + public synchronized void updateState(State newState, + State... expectedCurrentState) { + Validate.checkNotNull(newState, "newState"); + Validate.checkNotNull(expectedCurrentState, "expectedCurrentState"); + + this.throwIfStateIncorrect(expectedCurrentState); + this.state = newState; + } + + /** + * Helper that asserts the current state is one of the expected values. + * + * @param states the collection of allowed states. + * + * @throws IllegalArgumentException if states is null. + */ + public void throwIfStateIncorrect(State... states) { + Validate.checkNotNull(states, "states"); + + if (this.stateEqualsOneOf(states)) { + return; + } + + List statesStr = new ArrayList(); + for (State s : states) { + statesStr.add(s.toString()); + } + + String message = String.format( + "Expected buffer state to be '%s' but found: %s", + String.join(" or ", statesStr), this); + throw new IllegalStateException(message); + } + + public boolean stateEqualsOneOf(State... states) { + State currentState = this.state; + + for (State s : states) { + if (currentState == s) { + return true; + } + } + + return false; + } + + public String toString() { + + return String.format( + "[%03d] id: %03d, %s: buf: %s, checksum: %d, future: %s", + this.blockNumber, + System.identityHashCode(this), + this.state, + this.getBufferStr(this.buffer), + this.checksum, + this.getFutureStr(this.action)); + } + + private String getFutureStr(Future f) { + if (f == null) { + return "--"; + } else { + return this.action.isDone() ? "done" : "not done"; + } + } + + private String getBufferStr(ByteBuffer buf) { + if (buf == null) { + return "--"; + } else { + return String.format( + "(id = %d, pos = %d, lim = %d)", + System.identityHashCode(buf), + buf.position(), buf.limit()); + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BufferPool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BufferPool.java new file mode 100644 index 0000000000000..189357f6bd04f --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BufferPool.java @@ -0,0 +1,323 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import java.io.Closeable; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.IdentityHashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Future; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static java.util.Objects.requireNonNull; +import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNegative; +import static org.apache.hadoop.fs.impl.prefetch.Validate.checkState; +import static org.apache.hadoop.util.Preconditions.checkArgument; +import static org.apache.hadoop.util.Preconditions.checkNotNull; + +/** + * Manages a fixed pool of {@code ByteBuffer} instances. + *

    + * Avoids creating a new buffer if a previously created buffer is already available. + */ +public class BufferPool implements Closeable { + + private static final Logger LOG = LoggerFactory.getLogger(BufferPool.class); + + /** + * Max number of buffers in this pool. + */ + private final int size; + + /** + * Size in bytes of each buffer. + */ + private final int bufferSize; + + /* + Invariants for internal state. + -- a buffer is either in this.pool or in this.allocated + -- transition between this.pool <==> this.allocated must be atomic + -- only one buffer allocated for a given blockNumber + */ + + + /** + * Underlying bounded resource pool. + */ + private BoundedResourcePool pool; + + /** + * Allows associating metadata to each buffer in the pool. + */ + private Map allocated; + + /** + * Prefetching stats. + */ + private PrefetchingStatistics prefetchingStatistics; + + /** + * Initializes a new instance of the {@code BufferPool} class. + * @param size number of buffer in this pool. + * @param bufferSize size in bytes of each buffer. + * @param prefetchingStatistics statistics for this stream. + * @throws IllegalArgumentException if size is zero or negative. + * @throws IllegalArgumentException if bufferSize is zero or negative. + */ + public BufferPool(int size, + int bufferSize, + PrefetchingStatistics prefetchingStatistics) { + Validate.checkPositiveInteger(size, "size"); + Validate.checkPositiveInteger(bufferSize, "bufferSize"); + + this.size = size; + this.bufferSize = bufferSize; + this.allocated = new IdentityHashMap(); + this.prefetchingStatistics = requireNonNull(prefetchingStatistics); + this.pool = new BoundedResourcePool(size) { + @Override + public ByteBuffer createNew() { + ByteBuffer buffer = ByteBuffer.allocate(bufferSize); + prefetchingStatistics.memoryAllocated(bufferSize); + return buffer; + } + }; + } + + /** + * Gets a list of all blocks in this pool. + * @return a list of all blocks in this pool. + */ + public List getAll() { + synchronized (allocated) { + return Collections.unmodifiableList(new ArrayList<>(allocated.keySet())); + } + } + + /** + * Acquires a {@code ByteBuffer}; blocking if necessary until one becomes available. + * @param blockNumber the id of the block to acquire. + * @return the acquired block's {@code BufferData}. + */ + public synchronized BufferData acquire(int blockNumber) { + BufferData data; + final int maxRetryDelayMs = 600 * 1000; + final int statusUpdateDelayMs = 120 * 1000; + Retryer retryer = new Retryer(10, maxRetryDelayMs, statusUpdateDelayMs); + + do { + if (retryer.updateStatus()) { + if (LOG.isDebugEnabled()) { + LOG.debug("waiting to acquire block: {}", blockNumber); + LOG.debug("state = {}", this); + } + releaseReadyBlock(blockNumber); + } + data = tryAcquire(blockNumber); + } + while ((data == null) && retryer.continueRetry()); + + if (data != null) { + return data; + } else { + String message = + String.format("Wait failed for acquire(%d)", blockNumber); + throw new IllegalStateException(message); + } + } + + /** + * Acquires a buffer if one is immediately available. Otherwise returns null. + * @param blockNumber the id of the block to try acquire. + * @return the acquired block's {@code BufferData} or null. + */ + public synchronized BufferData tryAcquire(int blockNumber) { + return acquireHelper(blockNumber, false); + } + + private synchronized BufferData acquireHelper(int blockNumber, + boolean canBlock) { + checkNotNegative(blockNumber, "blockNumber"); + + releaseDoneBlocks(); + + BufferData data = find(blockNumber); + if (data != null) { + return data; + } + + ByteBuffer buffer = canBlock ? pool.acquire() : pool.tryAcquire(); + if (buffer == null) { + return null; + } + + buffer.clear(); + data = new BufferData(blockNumber, buffer.duplicate()); + + synchronized (allocated) { + checkState(find(blockNumber) == null, "buffer data already exists"); + + allocated.put(data, buffer); + } + + return data; + } + + /** + * Releases resources for any blocks marked as 'done'. + */ + private synchronized void releaseDoneBlocks() { + for (BufferData data : getAll()) { + if (data.stateEqualsOneOf(BufferData.State.DONE)) { + release(data); + } + } + } + + /** + * If no blocks were released after calling releaseDoneBlocks() a few times, + * we may end up waiting forever. To avoid that situation, we try releasing + * a 'ready' block farthest away from the given block. + */ + private synchronized void releaseReadyBlock(int blockNumber) { + BufferData releaseTarget = null; + for (BufferData data : getAll()) { + if (data.stateEqualsOneOf(BufferData.State.READY)) { + if (releaseTarget == null) { + releaseTarget = data; + } else { + if (distance(data, blockNumber) > distance(releaseTarget, + blockNumber)) { + releaseTarget = data; + } + } + } + } + + if (releaseTarget != null) { + LOG.warn("releasing 'ready' block: {}", releaseTarget); + releaseTarget.setDone(); + } + } + + private int distance(BufferData data, int blockNumber) { + return Math.abs(data.getBlockNumber() - blockNumber); + } + + /** + * Releases a previously acquired resource. + * @param data the {@code BufferData} instance to release. + * @throws IllegalArgumentException if data is null. + * @throws IllegalArgumentException if data cannot be released due to its state. + */ + public synchronized void release(BufferData data) { + checkNotNull(data, "data"); + + synchronized (data) { + checkArgument( + canRelease(data), + String.format("Unable to release buffer: %s", data)); + + ByteBuffer buffer = allocated.get(data); + if (buffer == null) { + // Likely released earlier. + return; + } + buffer.clear(); + pool.release(buffer); + allocated.remove(data); + } + + releaseDoneBlocks(); + } + + @Override + public synchronized void close() { + for (BufferData data : getAll()) { + Future actionFuture = data.getActionFuture(); + if (actionFuture != null) { + actionFuture.cancel(true); + } + } + + int currentPoolSize = pool.numCreated(); + + pool.close(); + pool = null; + + allocated.clear(); + allocated = null; + + prefetchingStatistics.memoryFreed(currentPoolSize * bufferSize); + } + + // For debugging purposes. + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(pool.toString()); + sb.append("\n"); + List allData = new ArrayList<>(getAll()); + Collections.sort(allData, + (d1, d2) -> d1.getBlockNumber() - d2.getBlockNumber()); + for (BufferData data : allData) { + sb.append(data.toString()); + sb.append("\n"); + } + + return sb.toString(); + } + + // Number of ByteBuffers created so far. + public synchronized int numCreated() { + return pool.numCreated(); + } + + // Number of ByteBuffers available to be acquired. + public synchronized int numAvailable() { + releaseDoneBlocks(); + return pool.numAvailable(); + } + + private BufferData find(int blockNumber) { + synchronized (allocated) { + for (BufferData data : allocated.keySet()) { + if ((data.getBlockNumber() == blockNumber) + && !data.stateEqualsOneOf(BufferData.State.DONE)) { + return data; + } + } + } + + return null; + } + + private boolean canRelease(BufferData data) { + return data.stateEqualsOneOf( + BufferData.State.DONE, + BufferData.State.READY); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/CachingBlockManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/CachingBlockManager.java new file mode 100644 index 0000000000000..e43b176d0bfe9 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/CachingBlockManager.java @@ -0,0 +1,654 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.time.Duration; +import java.time.Instant; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Supplier; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.LocalDirAllocator; +import org.apache.hadoop.fs.statistics.DurationTracker; + +import static java.util.Objects.requireNonNull; + +import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNegative; +import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; + +/** + * Provides read access to the underlying file one block at a time. + * Improve read performance by prefetching and locall caching blocks. + */ +public abstract class CachingBlockManager extends BlockManager { + private static final Logger LOG = LoggerFactory.getLogger(CachingBlockManager.class); + private static final int TIMEOUT_MINUTES = 60; + + /** + * Asynchronous tasks are performed in this pool. + */ + private final ExecutorServiceFuturePool futurePool; + + /** + * Pool of shared ByteBuffer instances. + */ + private BufferPool bufferPool; + + /** + * Size of the in-memory cache in terms of number of blocks. + * Total memory consumption is up to bufferPoolSize * blockSize. + */ + private final int bufferPoolSize; + + /** + * Local block cache. + */ + private BlockCache cache; + + /** + * Error counts. For testing purposes. + */ + private final AtomicInteger numCachingErrors; + private final AtomicInteger numReadErrors; + + /** + * Operations performed by this block manager. + */ + private final BlockOperations ops; + + private boolean closed; + + /** + * If a single caching operation takes more than this time (in seconds), + * we disable caching to prevent further perf degradation due to caching. + */ + private static final int SLOW_CACHING_THRESHOLD = 5; + + /** + * Once set to true, any further caching requests will be ignored. + */ + private final AtomicBoolean cachingDisabled; + + private final PrefetchingStatistics prefetchingStatistics; + + private final Configuration conf; + + private final LocalDirAllocator localDirAllocator; + + /** + * Constructs an instance of a {@code CachingBlockManager}. + * + * @param futurePool asynchronous tasks are performed in this pool. + * @param blockData information about each block of the underlying file. + * @param bufferPoolSize size of the in-memory cache in terms of number of blocks. + * @param prefetchingStatistics statistics for this stream. + * @param conf the configuration. + * @param localDirAllocator the local dir allocator instance. + * @throws IllegalArgumentException if bufferPoolSize is zero or negative. + */ + public CachingBlockManager( + ExecutorServiceFuturePool futurePool, + BlockData blockData, + int bufferPoolSize, + PrefetchingStatistics prefetchingStatistics, + Configuration conf, + LocalDirAllocator localDirAllocator) { + super(blockData); + + Validate.checkPositiveInteger(bufferPoolSize, "bufferPoolSize"); + + this.futurePool = requireNonNull(futurePool); + this.bufferPoolSize = bufferPoolSize; + this.numCachingErrors = new AtomicInteger(); + this.numReadErrors = new AtomicInteger(); + this.cachingDisabled = new AtomicBoolean(); + this.prefetchingStatistics = requireNonNull(prefetchingStatistics); + + if (this.getBlockData().getFileSize() > 0) { + this.bufferPool = new BufferPool(bufferPoolSize, this.getBlockData().getBlockSize(), + this.prefetchingStatistics); + this.cache = this.createCache(); + } + + this.ops = new BlockOperations(); + this.ops.setDebug(false); + this.conf = requireNonNull(conf); + this.localDirAllocator = localDirAllocator; + } + + /** + * Gets the block having the given {@code blockNumber}. + * + * @throws IllegalArgumentException if blockNumber is negative. + */ + @Override + public BufferData get(int blockNumber) throws IOException { + checkNotNegative(blockNumber, "blockNumber"); + + BufferData data; + final int maxRetryDelayMs = bufferPoolSize * 120 * 1000; + final int statusUpdateDelayMs = 120 * 1000; + Retryer retryer = new Retryer(10, maxRetryDelayMs, statusUpdateDelayMs); + boolean done; + + do { + if (closed) { + throw new IOException("this stream is already closed"); + } + + data = bufferPool.acquire(blockNumber); + done = getInternal(data); + + if (retryer.updateStatus()) { + LOG.warn("waiting to get block: {}", blockNumber); + LOG.info("state = {}", this.toString()); + } + } + while (!done && retryer.continueRetry()); + + if (done) { + return data; + } else { + String message = String.format("Wait failed for get(%d)", blockNumber); + throw new IllegalStateException(message); + } + } + + private boolean getInternal(BufferData data) throws IOException { + Validate.checkNotNull(data, "data"); + + // Opportunistic check without locking. + if (data.stateEqualsOneOf( + BufferData.State.PREFETCHING, + BufferData.State.CACHING, + BufferData.State.DONE)) { + return false; + } + + synchronized (data) { + // Reconfirm state after locking. + if (data.stateEqualsOneOf( + BufferData.State.PREFETCHING, + BufferData.State.CACHING, + BufferData.State.DONE)) { + return false; + } + + int blockNumber = data.getBlockNumber(); + if (data.getState() == BufferData.State.READY) { + BlockOperations.Operation op = ops.getPrefetched(blockNumber); + ops.end(op); + return true; + } + + data.throwIfStateIncorrect(BufferData.State.BLANK); + read(data); + return true; + } + } + + /** + * Releases resources allocated to the given block. + * + * @throws IllegalArgumentException if data is null. + */ + @Override + public void release(BufferData data) { + if (closed) { + return; + } + + Validate.checkNotNull(data, "data"); + + BlockOperations.Operation op = ops.release(data.getBlockNumber()); + bufferPool.release(data); + ops.end(op); + } + + @Override + public synchronized void close() { + if (closed) { + return; + } + + closed = true; + + final BlockOperations.Operation op = ops.close(); + + // Cancel any prefetches in progress. + cancelPrefetches(); + + cleanupWithLogger(LOG, cache); + + ops.end(op); + LOG.info(ops.getSummary(false)); + + bufferPool.close(); + bufferPool = null; + } + + /** + * Requests optional prefetching of the given block. + * The block is prefetched only if we can acquire a free buffer. + * + * @throws IllegalArgumentException if blockNumber is negative. + */ + @Override + public void requestPrefetch(int blockNumber) { + checkNotNegative(blockNumber, "blockNumber"); + + if (closed) { + return; + } + + // We initiate a prefetch only if we can acquire a buffer from the shared pool. + BufferData data = bufferPool.tryAcquire(blockNumber); + if (data == null) { + return; + } + + // Opportunistic check without locking. + if (!data.stateEqualsOneOf(BufferData.State.BLANK)) { + // The block is ready or being prefetched/cached. + return; + } + + synchronized (data) { + // Reconfirm state after locking. + if (!data.stateEqualsOneOf(BufferData.State.BLANK)) { + // The block is ready or being prefetched/cached. + return; + } + + BlockOperations.Operation op = ops.requestPrefetch(blockNumber); + PrefetchTask prefetchTask = new PrefetchTask(data, this, Instant.now()); + Future prefetchFuture = futurePool.executeFunction(prefetchTask); + data.setPrefetch(prefetchFuture); + ops.end(op); + } + } + + /** + * Requests cancellation of any previously issued prefetch requests. + */ + @Override + public void cancelPrefetches() { + BlockOperations.Operation op = ops.cancelPrefetches(); + + for (BufferData data : bufferPool.getAll()) { + // We add blocks being prefetched to the local cache so that the prefetch is not wasted. + if (data.stateEqualsOneOf(BufferData.State.PREFETCHING, BufferData.State.READY)) { + requestCaching(data); + } + } + + ops.end(op); + } + + private void read(BufferData data) throws IOException { + synchronized (data) { + try { + readBlock(data, false, BufferData.State.BLANK); + } catch (IOException e) { + LOG.error("error reading block {}", data.getBlockNumber(), e); + throw e; + } + } + } + + private void prefetch(BufferData data, Instant taskQueuedStartTime) throws IOException { + synchronized (data) { + prefetchingStatistics.executorAcquired( + Duration.between(taskQueuedStartTime, Instant.now())); + readBlock( + data, + true, + BufferData.State.PREFETCHING, + BufferData.State.CACHING); + } + } + + private void readBlock(BufferData data, boolean isPrefetch, BufferData.State... expectedState) + throws IOException { + + if (closed) { + return; + } + + BlockOperations.Operation op = null; + DurationTracker tracker = null; + + synchronized (data) { + try { + if (data.stateEqualsOneOf(BufferData.State.DONE, BufferData.State.READY)) { + // DONE : Block was released, likely due to caching being disabled on slow perf. + // READY : Block was already fetched by another thread. No need to re-read. + return; + } + + data.throwIfStateIncorrect(expectedState); + int blockNumber = data.getBlockNumber(); + + // Prefer reading from cache over reading from network. + if (cache.containsBlock(blockNumber)) { + op = ops.getCached(blockNumber); + cache.get(blockNumber, data.getBuffer()); + data.setReady(expectedState); + return; + } + + if (isPrefetch) { + tracker = prefetchingStatistics.prefetchOperationStarted(); + op = ops.prefetch(data.getBlockNumber()); + } else { + op = ops.getRead(data.getBlockNumber()); + } + + long offset = getBlockData().getStartOffset(data.getBlockNumber()); + int size = getBlockData().getSize(data.getBlockNumber()); + ByteBuffer buffer = data.getBuffer(); + buffer.clear(); + read(buffer, offset, size); + buffer.flip(); + data.setReady(expectedState); + } catch (Exception e) { + if (isPrefetch && tracker != null) { + tracker.failed(); + } + + numReadErrors.incrementAndGet(); + data.setDone(); + throw e; + } finally { + if (op != null) { + ops.end(op); + } + + if (isPrefetch) { + prefetchingStatistics.prefetchOperationCompleted(); + if (tracker != null) { + tracker.close(); + } + } + } + } + } + + /** + * Read task that is submitted to the future pool. + */ + private static class PrefetchTask implements Supplier { + private final BufferData data; + private final CachingBlockManager blockManager; + private final Instant taskQueuedStartTime; + + PrefetchTask(BufferData data, CachingBlockManager blockManager, Instant taskQueuedStartTime) { + this.data = data; + this.blockManager = blockManager; + this.taskQueuedStartTime = taskQueuedStartTime; + } + + @Override + public Void get() { + try { + blockManager.prefetch(data, taskQueuedStartTime); + } catch (Exception e) { + LOG.info("error prefetching block {}. {}", data.getBlockNumber(), e.getMessage()); + LOG.debug("error prefetching block {}", data.getBlockNumber(), e); + } + return null; + } + } + + private static final BufferData.State[] EXPECTED_STATE_AT_CACHING = + new BufferData.State[] { + BufferData.State.PREFETCHING, BufferData.State.READY + }; + + /** + * Requests that the given block should be copied to the local cache. + * The block must not be accessed by the caller after calling this method + * because it will released asynchronously relative to the caller. + * + * @throws IllegalArgumentException if data is null. + */ + @Override + public void requestCaching(BufferData data) { + if (closed) { + return; + } + + if (cachingDisabled.get()) { + data.setDone(); + return; + } + + Validate.checkNotNull(data, "data"); + + // Opportunistic check without locking. + if (!data.stateEqualsOneOf(EXPECTED_STATE_AT_CACHING)) { + return; + } + + synchronized (data) { + // Reconfirm state after locking. + if (!data.stateEqualsOneOf(EXPECTED_STATE_AT_CACHING)) { + return; + } + + if (cache.containsBlock(data.getBlockNumber())) { + data.setDone(); + return; + } + + BufferData.State state = data.getState(); + + BlockOperations.Operation op = ops.requestCaching(data.getBlockNumber()); + Future blockFuture; + if (state == BufferData.State.PREFETCHING) { + blockFuture = data.getActionFuture(); + } else { + CompletableFuture cf = new CompletableFuture<>(); + cf.complete(null); + blockFuture = cf; + } + + CachePutTask task = + new CachePutTask(data, blockFuture, this, Instant.now()); + Future actionFuture = futurePool.executeFunction(task); + data.setCaching(actionFuture); + ops.end(op); + } + } + + private void addToCacheAndRelease(BufferData data, Future blockFuture, + Instant taskQueuedStartTime) { + prefetchingStatistics.executorAcquired( + Duration.between(taskQueuedStartTime, Instant.now())); + + if (closed) { + return; + } + + if (cachingDisabled.get()) { + data.setDone(); + return; + } + + try { + blockFuture.get(TIMEOUT_MINUTES, TimeUnit.MINUTES); + if (data.stateEqualsOneOf(BufferData.State.DONE)) { + // There was an error during prefetch. + return; + } + } catch (Exception e) { + LOG.info("error waiting on blockFuture: {}. {}", data, e.getMessage()); + LOG.debug("error waiting on blockFuture: {}", data, e); + data.setDone(); + return; + } + + if (cachingDisabled.get()) { + data.setDone(); + return; + } + + BlockOperations.Operation op = null; + + synchronized (data) { + try { + if (data.stateEqualsOneOf(BufferData.State.DONE)) { + return; + } + + if (cache.containsBlock(data.getBlockNumber())) { + data.setDone(); + return; + } + + op = ops.addToCache(data.getBlockNumber()); + ByteBuffer buffer = data.getBuffer().duplicate(); + buffer.rewind(); + cachePut(data.getBlockNumber(), buffer); + data.setDone(); + } catch (Exception e) { + numCachingErrors.incrementAndGet(); + LOG.info("error adding block to cache after wait: {}. {}", data, e.getMessage()); + LOG.debug("error adding block to cache after wait: {}", data, e); + data.setDone(); + } + + if (op != null) { + BlockOperations.End endOp = (BlockOperations.End) ops.end(op); + if (endOp.duration() > SLOW_CACHING_THRESHOLD) { + if (!cachingDisabled.getAndSet(true)) { + String message = String.format( + "Caching disabled because of slow operation (%.1f sec)", endOp.duration()); + LOG.warn(message); + } + } + } + } + } + + protected BlockCache createCache() { + return new SingleFilePerBlockCache(prefetchingStatistics); + } + + protected void cachePut(int blockNumber, ByteBuffer buffer) throws IOException { + if (closed) { + return; + } + + cache.put(blockNumber, buffer, conf, localDirAllocator); + } + + private static class CachePutTask implements Supplier { + private final BufferData data; + + // Block being asynchronously fetched. + private final Future blockFuture; + + // Block manager that manages this block. + private final CachingBlockManager blockManager; + + private final Instant taskQueuedStartTime; + + CachePutTask( + BufferData data, + Future blockFuture, + CachingBlockManager blockManager, + Instant taskQueuedStartTime) { + this.data = data; + this.blockFuture = blockFuture; + this.blockManager = blockManager; + this.taskQueuedStartTime = taskQueuedStartTime; + } + + @Override + public Void get() { + blockManager.addToCacheAndRelease(data, blockFuture, taskQueuedStartTime); + return null; + } + } + + /** + * Number of ByteBuffers available to be acquired. + * + * @return the number of available buffers. + */ + public int numAvailable() { + return bufferPool.numAvailable(); + } + + /** + * Number of caching operations completed. + * + * @return the number of cached buffers. + */ + public int numCached() { + return cache.size(); + } + + /** + * Number of errors encountered when caching. + * + * @return the number of errors encountered when caching. + */ + public int numCachingErrors() { + return numCachingErrors.get(); + } + + /** + * Number of errors encountered when reading. + * + * @return the number of errors encountered when reading. + */ + public int numReadErrors() { + return numReadErrors.get(); + } + + BufferData getData(int blockNumber) { + return bufferPool.tryAcquire(blockNumber); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + + sb.append("cache("); + sb.append(cache.toString()); + sb.append("); "); + + sb.append("pool: "); + sb.append(bufferPool.toString()); + + return sb.toString(); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/EmptyPrefetchingStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/EmptyPrefetchingStatistics.java new file mode 100644 index 0000000000000..177ff7abab8b7 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/EmptyPrefetchingStatistics.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import java.time.Duration; + +import org.apache.hadoop.fs.statistics.DurationTracker; + +import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.stubDurationTracker; + +/** + * Empty implementation of the prefetching statistics interface. + */ +public final class EmptyPrefetchingStatistics + implements PrefetchingStatistics { + + private static final EmptyPrefetchingStatistics + EMPTY_PREFETCHING_STATISTICS = + new EmptyPrefetchingStatistics(); + + private EmptyPrefetchingStatistics() { + } + + public static EmptyPrefetchingStatistics getInstance() { + return EMPTY_PREFETCHING_STATISTICS; + } + + @Override + public DurationTracker prefetchOperationStarted() { + return stubDurationTracker(); + } + + @Override + public void blockAddedToFileCache() { + + } + + @Override + public void blockRemovedFromFileCache() { + + } + + @Override + public void prefetchOperationCompleted() { + + } + + @Override + public void executorAcquired(Duration timeInQueue) { + + } + + @Override + public void memoryAllocated(int size) { + + } + + @Override + public void memoryFreed(int size) { + + } +} + diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/ExecutorServiceFuturePool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/ExecutorServiceFuturePool.java new file mode 100644 index 0000000000000..645de280394c6 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/ExecutorServiceFuturePool.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import java.util.Locale; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; + +import org.slf4j.Logger; + +import org.apache.hadoop.util.concurrent.HadoopExecutors; + +/** + * A FuturePool implementation backed by a java.util.concurrent.ExecutorService. + * + * If a piece of work has started, it cannot (currently) be cancelled. + * + * This class is a simplified version of com.twitter:util-core_2.11 + * ExecutorServiceFuturePool designed to avoid depending on that Scala library. + * One problem with using a Scala library is that many downstream projects + * (eg Apache Spark) use Scala, and they might want to use a different version of Scala + * from the version that Hadoop chooses to use. + * + */ +public class ExecutorServiceFuturePool { + + private final ExecutorService executor; + + public ExecutorServiceFuturePool(ExecutorService executor) { + this.executor = executor; + } + + /** + * @param f function to run in future on executor pool + * @return future + * @throws java.util.concurrent.RejectedExecutionException can be thrown + * @throws NullPointerException if f param is null + */ + public Future executeFunction(final Supplier f) { + return executor.submit(f::get); + } + + /** + * @param r runnable to run in future on executor pool + * @return future + * @throws java.util.concurrent.RejectedExecutionException can be thrown + * @throws NullPointerException if r param is null + */ + @SuppressWarnings("unchecked") + public Future executeRunnable(final Runnable r) { + return (Future) executor.submit(r::run); + } + + /** + * Utility to shutdown the {@link ExecutorService} used by this class. Will wait up to a + * certain timeout for the ExecutorService to gracefully shutdown. + * + * @param logger Logger + * @param timeout the maximum time to wait + * @param unit the time unit of the timeout argument + */ + public void shutdown(Logger logger, long timeout, TimeUnit unit) { + HadoopExecutors.shutdown(executor, logger, timeout, unit); + } + + public String toString() { + return String.format(Locale.ROOT, "ExecutorServiceFuturePool(executor=%s)", executor); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/FilePosition.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/FilePosition.java new file mode 100644 index 0000000000000..286bdd7ae8996 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/FilePosition.java @@ -0,0 +1,301 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import java.nio.ByteBuffer; + +import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNegative; +import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNull; +import static org.apache.hadoop.fs.impl.prefetch.Validate.checkPositiveInteger; +import static org.apache.hadoop.fs.impl.prefetch.Validate.checkState; +import static org.apache.hadoop.fs.impl.prefetch.Validate.checkWithinRange; + +/** + * Provides functionality related to tracking the position within a file. + * + * The file is accessed through an in memory buffer. The absolute position within + * the file is the sum of start offset of the buffer within the file and the relative + * offset of the current access location within the buffer. + * + * A file is made up of equal sized blocks. The last block may be of a smaller size. + * The size of a buffer associated with this file is typically the same as block size. + */ +public final class FilePosition { + + /** + * Holds block based information about a file. + */ + private BlockData blockData; + + /** + * Information about the buffer in use. + */ + private BufferData data; + + /** + * Provides access to the underlying file. + */ + private ByteBuffer buffer; + + /** + * Start offset of the buffer relative to the start of a file. + */ + private long bufferStartOffset; + + /** + * Offset where reading starts relative to the start of a file. + */ + private long readStartOffset; + + // Read stats after a seek (mostly for debugging use). + private int numSingleByteReads; + + private int numBytesRead; + + private int numBufferReads; + + /** + * Constructs an instance of {@link FilePosition}. + * + * @param fileSize size of the associated file. + * @param blockSize size of each block within the file. + * + * @throws IllegalArgumentException if fileSize is negative. + * @throws IllegalArgumentException if blockSize is zero or negative. + */ + public FilePosition(long fileSize, int blockSize) { + checkNotNegative(fileSize, "fileSize"); + if (fileSize == 0) { + checkNotNegative(blockSize, "blockSize"); + } else { + checkPositiveInteger(blockSize, "blockSize"); + } + + this.blockData = new BlockData(fileSize, blockSize); + + // The position is valid only when a valid buffer is associated with this file. + this.invalidate(); + } + + /** + * Associates a buffer with this file. + * + * @param bufferData the buffer associated with this file. + * @param startOffset Start offset of the buffer relative to the start of a file. + * @param readOffset Offset where reading starts relative to the start of a file. + * + * @throws IllegalArgumentException if bufferData is null. + * @throws IllegalArgumentException if startOffset is negative. + * @throws IllegalArgumentException if readOffset is negative. + * @throws IllegalArgumentException if readOffset is outside the range [startOffset, buffer end]. + */ + public void setData(BufferData bufferData, + long startOffset, + long readOffset) { + checkNotNull(bufferData, "bufferData"); + checkNotNegative(startOffset, "startOffset"); + checkNotNegative(readOffset, "readOffset"); + checkWithinRange( + readOffset, + "readOffset", + startOffset, + startOffset + bufferData.getBuffer().limit()); + + data = bufferData; + buffer = bufferData.getBuffer().duplicate(); + bufferStartOffset = startOffset; + readStartOffset = readOffset; + setAbsolute(readOffset); + + resetReadStats(); + } + + public ByteBuffer buffer() { + throwIfInvalidBuffer(); + return buffer; + } + + public BufferData data() { + throwIfInvalidBuffer(); + return data; + } + + /** + * Gets the current absolute position within this file. + * + * @return the current absolute position within this file. + */ + public long absolute() { + throwIfInvalidBuffer(); + return bufferStartOffset + relative(); + } + + /** + * If the given {@code pos} lies within the current buffer, updates the current position to + * the specified value and returns true; otherwise returns false without changing the position. + * + * @param pos the absolute position to change the current position to if possible. + * @return true if the given current position was updated, false otherwise. + */ + public boolean setAbsolute(long pos) { + if (isValid() && isWithinCurrentBuffer(pos)) { + int relativePos = (int) (pos - bufferStartOffset); + buffer.position(relativePos); + return true; + } else { + return false; + } + } + + /** + * Gets the current position within this file relative to the start of the associated buffer. + * + * @return the current position within this file relative to the start of the associated buffer. + */ + public int relative() { + throwIfInvalidBuffer(); + return buffer.position(); + } + + /** + * Determines whether the given absolute position lies within the current buffer. + * + * @param pos the position to check. + * @return true if the given absolute position lies within the current buffer, false otherwise. + */ + public boolean isWithinCurrentBuffer(long pos) { + throwIfInvalidBuffer(); + long bufferEndOffset = bufferStartOffset + buffer.limit(); + return (pos >= bufferStartOffset) && (pos <= bufferEndOffset); + } + + /** + * Gets the id of the current block. + * + * @return the id of the current block. + */ + public int blockNumber() { + throwIfInvalidBuffer(); + return blockData.getBlockNumber(bufferStartOffset); + } + + /** + * Determines whether the current block is the last block in this file. + * + * @return true if the current block is the last block in this file, false otherwise. + */ + public boolean isLastBlock() { + return blockData.isLastBlock(blockNumber()); + } + + /** + * Determines if the current position is valid. + * + * @return true if the current position is valid, false otherwise. + */ + public boolean isValid() { + return buffer != null; + } + + /** + * Marks the current position as invalid. + */ + public void invalidate() { + buffer = null; + bufferStartOffset = -1; + data = null; + } + + /** + * Gets the start of the current block's absolute offset. + * + * @return the start of the current block's absolute offset. + */ + public long bufferStartOffset() { + throwIfInvalidBuffer(); + return bufferStartOffset; + } + + /** + * Determines whether the current buffer has been fully read. + * + * @return true if the current buffer has been fully read, false otherwise. + */ + public boolean bufferFullyRead() { + throwIfInvalidBuffer(); + return (bufferStartOffset == readStartOffset) + && (relative() == buffer.limit()) + && (numBytesRead == buffer.limit()); + } + + public void incrementBytesRead(int n) { + numBytesRead += n; + if (n == 1) { + numSingleByteReads++; + } else { + numBufferReads++; + } + } + + public int numBytesRead() { + return numBytesRead; + } + + public int numSingleByteReads() { + return numSingleByteReads; + } + + public int numBufferReads() { + return numBufferReads; + } + + private void resetReadStats() { + numBytesRead = 0; + numSingleByteReads = 0; + numBufferReads = 0; + } + + public String toString() { + StringBuilder sb = new StringBuilder(); + if (buffer == null) { + sb.append("currentBuffer = null"); + } else { + int pos = buffer.position(); + int val; + if (pos >= buffer.limit()) { + val = -1; + } else { + val = buffer.get(pos); + } + String currentBufferState = + String.format("%d at pos: %d, lim: %d", val, pos, buffer.limit()); + sb.append(String.format( + "block: %d, pos: %d (CBuf: %s)%n", + blockNumber(), absolute(), + currentBufferState)); + sb.append("\n"); + } + return sb.toString(); + } + + private void throwIfInvalidBuffer() { + checkState(buffer != null, "'buffer' must not be null"); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/PrefetchingStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/PrefetchingStatistics.java new file mode 100644 index 0000000000000..9ce2dec5889f1 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/PrefetchingStatistics.java @@ -0,0 +1,67 @@ + /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import java.time.Duration; + +import org.apache.hadoop.fs.statistics.DurationTracker; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; + +public interface PrefetchingStatistics extends IOStatisticsSource { + + /** + * A prefetch operation has started. + * @return duration tracker + */ + DurationTracker prefetchOperationStarted(); + + /** + * A block has been saved to the file cache. + */ + void blockAddedToFileCache(); + + /** + * A block has been removed from the file cache. + */ + void blockRemovedFromFileCache(); + + /** + * A prefetch operation has completed. + */ + void prefetchOperationCompleted(); + + /** + * An executor has been acquired, either for prefetching or caching. + * @param timeInQueue time taken to acquire an executor. + */ + void executorAcquired(Duration timeInQueue); + + /** + * A new buffer has been added to the buffer pool. + * @param size size of the new buffer + */ + void memoryAllocated(int size); + + /** + * Previously allocated memory has been freed. + * @param size size of memory freed. + */ + void memoryFreed(int size); +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/ResourcePool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/ResourcePool.java new file mode 100644 index 0000000000000..77e00972d08c3 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/ResourcePool.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import java.io.Closeable; + +/** + * Manages a fixed pool of resources. + * + * Avoids creating a new resource if a previously created instance is already available. + */ +public abstract class ResourcePool implements Closeable { + + /** + * Acquires a resource blocking if necessary until one becomes available. + * + * @return the acquired resource instance. + */ + public abstract T acquire(); + + /** + * Acquires a resource blocking if one is immediately available. Otherwise returns null. + + * @return the acquired resource instance (if immediately available) or null. + */ + public abstract T tryAcquire(); + + /** + * Releases a previously acquired resource. + * + * @param item the resource to release. + */ + public abstract void release(T item); + + @Override + public void close() { + } + + /** + * Derived classes may implement a way to cleanup each item. + * + * @param item the resource to close. + */ + protected void close(T item) { + // Do nothing in this class. Allow overriding classes to take any cleanup action. + } + + /** + * Derived classes must implement a way to create an instance of a resource. + * + * @return the created instance. + */ + protected abstract T createNew(); +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/Retryer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/Retryer.java new file mode 100644 index 0000000000000..84c17ef9dde8a --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/Retryer.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import static org.apache.hadoop.fs.impl.prefetch.Validate.checkGreater; +import static org.apache.hadoop.fs.impl.prefetch.Validate.checkPositiveInteger; + +/** + * Provides retry related functionality. + */ +public class Retryer { + + /* Maximum amount of delay (in ms) before retry fails. */ + private int maxDelay; + + /* Per retry delay (in ms). */ + private int perRetryDelay; + + /** + * The time interval (in ms) at which status update would be made. + */ + private int statusUpdateInterval; + + /* Current delay. */ + private int delay; + + /** + * Initializes a new instance of the {@code Retryer} class. + * + * @param perRetryDelay per retry delay (in ms). + * @param maxDelay maximum amount of delay (in ms) before retry fails. + * @param statusUpdateInterval time interval (in ms) at which status update would be made. + * + * @throws IllegalArgumentException if perRetryDelay is zero or negative. + * @throws IllegalArgumentException if maxDelay is less than or equal to perRetryDelay. + * @throws IllegalArgumentException if statusUpdateInterval is zero or negative. + */ + public Retryer(int perRetryDelay, int maxDelay, int statusUpdateInterval) { + checkPositiveInteger(perRetryDelay, "perRetryDelay"); + checkGreater(maxDelay, "maxDelay", perRetryDelay, "perRetryDelay"); + checkPositiveInteger(statusUpdateInterval, "statusUpdateInterval"); + + this.perRetryDelay = perRetryDelay; + this.maxDelay = maxDelay; + this.statusUpdateInterval = statusUpdateInterval; + } + + /** + * Returns true if retrying should continue, false otherwise. + * + * @return true if the caller should retry, false otherwise. + */ + public boolean continueRetry() { + if (this.delay >= this.maxDelay) { + return false; + } + + try { + Thread.sleep(this.perRetryDelay); + } catch (InterruptedException e) { + // Ignore the exception as required by the semantic of this class; + } + + this.delay += this.perRetryDelay; + return true; + } + + /** + * Returns true if status update interval has been reached. + * + * @return true if status update interval has been reached. + */ + public boolean updateStatus() { + return (this.delay > 0) && this.delay % this.statusUpdateInterval == 0; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/SingleFilePerBlockCache.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/SingleFilePerBlockCache.java new file mode 100644 index 0000000000000..7a817955452c9 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/SingleFilePerBlockCache.java @@ -0,0 +1,489 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.WritableByteChannel; +import java.nio.file.Files; +import java.nio.file.OpenOption; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.nio.file.attribute.PosixFilePermission; +import java.util.ArrayList; +import java.util.Collections; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableSet; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.LocalDirAllocator; + +import static java.util.Objects.requireNonNull; +import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNull; + +/** + * Provides functionality necessary for caching blocks of data read from FileSystem. + * Each cache block is stored on the local disk as a separate file. + */ +public class SingleFilePerBlockCache implements BlockCache { + private static final Logger LOG = LoggerFactory.getLogger(SingleFilePerBlockCache.class); + + /** + * Blocks stored in this cache. + */ + private final Map blocks = new ConcurrentHashMap<>(); + + /** + * Number of times a block was read from this cache. + * Used for determining cache utilization factor. + */ + private int numGets = 0; + + private boolean closed; + + private final PrefetchingStatistics prefetchingStatistics; + + /** + * Timeout to be used by close, while acquiring prefetch block write lock. + */ + private static final int PREFETCH_WRITE_LOCK_TIMEOUT = 5; + + /** + * Lock timeout unit to be used by the thread while acquiring prefetch block write lock. + */ + private static final TimeUnit PREFETCH_WRITE_LOCK_TIMEOUT_UNIT = TimeUnit.SECONDS; + + /** + * File attributes attached to any intermediate temporary file created during index creation. + */ + private static final Set TEMP_FILE_ATTRS = + ImmutableSet.of(PosixFilePermission.OWNER_READ, PosixFilePermission.OWNER_WRITE); + + /** + * Cache entry. + * Each block is stored as a separate file. + */ + private static final class Entry { + private final int blockNumber; + private final Path path; + private final int size; + private final long checksum; + private final ReentrantReadWriteLock lock; + private enum LockType { + READ, + WRITE + } + + Entry(int blockNumber, Path path, int size, long checksum) { + this.blockNumber = blockNumber; + this.path = path; + this.size = size; + this.checksum = checksum; + this.lock = new ReentrantReadWriteLock(); + } + + @Override + public String toString() { + return String.format( + "([%03d] %s: size = %d, checksum = %d)", + blockNumber, path, size, checksum); + } + + /** + * Take the read or write lock. + * + * @param lockType type of the lock. + */ + private void takeLock(LockType lockType) { + if (LockType.READ == lockType) { + lock.readLock().lock(); + } else if (LockType.WRITE == lockType) { + lock.writeLock().lock(); + } + } + + /** + * Release the read or write lock. + * + * @param lockType type of the lock. + */ + private void releaseLock(LockType lockType) { + if (LockType.READ == lockType) { + lock.readLock().unlock(); + } else if (LockType.WRITE == lockType) { + lock.writeLock().unlock(); + } + } + + /** + * Try to take the read or write lock within the given timeout. + * + * @param lockType type of the lock. + * @param timeout the time to wait for the given lock. + * @param unit the time unit of the timeout argument. + * @return true if the lock of the given lock type was acquired. + */ + private boolean takeLock(LockType lockType, long timeout, TimeUnit unit) { + try { + if (LockType.READ == lockType) { + return lock.readLock().tryLock(timeout, unit); + } else if (LockType.WRITE == lockType) { + return lock.writeLock().tryLock(timeout, unit); + } + } catch (InterruptedException e) { + LOG.warn("Thread interrupted while trying to acquire {} lock", lockType, e); + Thread.currentThread().interrupt(); + } + return false; + } + } + + /** + * Constructs an instance of a {@code SingleFilePerBlockCache}. + * + * @param prefetchingStatistics statistics for this stream. + */ + public SingleFilePerBlockCache(PrefetchingStatistics prefetchingStatistics) { + this.prefetchingStatistics = requireNonNull(prefetchingStatistics); + } + + /** + * Indicates whether the given block is in this cache. + */ + @Override + public boolean containsBlock(int blockNumber) { + return blocks.containsKey(blockNumber); + } + + /** + * Gets the blocks in this cache. + */ + @Override + public Iterable blocks() { + return Collections.unmodifiableList(new ArrayList<>(blocks.keySet())); + } + + /** + * Gets the number of blocks in this cache. + */ + @Override + public int size() { + return blocks.size(); + } + + /** + * Gets the block having the given {@code blockNumber}. + * + * @throws IllegalArgumentException if buffer is null. + */ + @Override + public void get(int blockNumber, ByteBuffer buffer) throws IOException { + if (closed) { + return; + } + + checkNotNull(buffer, "buffer"); + + Entry entry = getEntry(blockNumber); + entry.takeLock(Entry.LockType.READ); + try { + buffer.clear(); + readFile(entry.path, buffer); + buffer.rewind(); + validateEntry(entry, buffer); + } finally { + entry.releaseLock(Entry.LockType.READ); + } + } + + protected int readFile(Path path, ByteBuffer buffer) throws IOException { + int numBytesRead = 0; + int numBytes; + FileChannel channel = FileChannel.open(path, StandardOpenOption.READ); + while ((numBytes = channel.read(buffer)) > 0) { + numBytesRead += numBytes; + } + buffer.limit(buffer.position()); + channel.close(); + return numBytesRead; + } + + private Entry getEntry(int blockNumber) { + Validate.checkNotNegative(blockNumber, "blockNumber"); + + Entry entry = blocks.get(blockNumber); + if (entry == null) { + throw new IllegalStateException(String.format("block %d not found in cache", blockNumber)); + } + numGets++; + return entry; + } + + /** + * Puts the given block in this cache. + * + * @param blockNumber the block number, used as a key for blocks map. + * @param buffer buffer contents of the given block to be added to this cache. + * @param conf the configuration. + * @param localDirAllocator the local dir allocator instance. + * @throws IOException if either local dir allocator fails to allocate file or if IO error + * occurs while writing the buffer content to the file. + * @throws IllegalArgumentException if buffer is null, or if buffer.limit() is zero or negative. + */ + @Override + public void put(int blockNumber, ByteBuffer buffer, Configuration conf, + LocalDirAllocator localDirAllocator) throws IOException { + if (closed) { + return; + } + + checkNotNull(buffer, "buffer"); + + if (blocks.containsKey(blockNumber)) { + Entry entry = blocks.get(blockNumber); + entry.takeLock(Entry.LockType.READ); + try { + validateEntry(entry, buffer); + } finally { + entry.releaseLock(Entry.LockType.READ); + } + return; + } + + Validate.checkPositiveInteger(buffer.limit(), "buffer.limit()"); + + Path blockFilePath = getCacheFilePath(conf, localDirAllocator); + long size = Files.size(blockFilePath); + if (size != 0) { + String message = + String.format("[%d] temp file already has data. %s (%d)", + blockNumber, blockFilePath, size); + throw new IllegalStateException(message); + } + + writeFile(blockFilePath, buffer); + long checksum = BufferData.getChecksum(buffer); + Entry entry = new Entry(blockNumber, blockFilePath, buffer.limit(), checksum); + blocks.put(blockNumber, entry); + // Update stream_read_blocks_in_cache stats only after blocks map is updated with new file + // entry to avoid any discrepancy related to the value of stream_read_blocks_in_cache. + // If stream_read_blocks_in_cache is updated before updating the blocks map here, closing of + // the input stream can lead to the removal of the cache file even before blocks is added with + // the new cache file, leading to incorrect value of stream_read_blocks_in_cache. + prefetchingStatistics.blockAddedToFileCache(); + } + + private static final Set CREATE_OPTIONS = + EnumSet.of(StandardOpenOption.WRITE, + StandardOpenOption.CREATE, + StandardOpenOption.TRUNCATE_EXISTING); + + protected void writeFile(Path path, ByteBuffer buffer) throws IOException { + buffer.rewind(); + WritableByteChannel writeChannel = Files.newByteChannel(path, CREATE_OPTIONS); + while (buffer.hasRemaining()) { + writeChannel.write(buffer); + } + writeChannel.close(); + } + + /** + * Return temporary file created based on the file path retrieved from local dir allocator. + * + * @param conf The configuration object. + * @param localDirAllocator Local dir allocator instance. + * @return Path of the temporary file created. + * @throws IOException if IO error occurs while local dir allocator tries to retrieve path + * from local FS or file creation fails or permission set fails. + */ + protected Path getCacheFilePath(final Configuration conf, + final LocalDirAllocator localDirAllocator) + throws IOException { + return getTempFilePath(conf, localDirAllocator); + } + + @Override + public void close() throws IOException { + if (closed) { + return; + } + + closed = true; + + LOG.info(getStats()); + int numFilesDeleted = 0; + + for (Entry entry : blocks.values()) { + boolean lockAcquired = entry.takeLock(Entry.LockType.WRITE, PREFETCH_WRITE_LOCK_TIMEOUT, + PREFETCH_WRITE_LOCK_TIMEOUT_UNIT); + if (!lockAcquired) { + LOG.error("Cache file {} deletion would not be attempted as write lock could not" + + " be acquired within {} {}", entry.path, PREFETCH_WRITE_LOCK_TIMEOUT, + PREFETCH_WRITE_LOCK_TIMEOUT_UNIT); + continue; + } + try { + Files.deleteIfExists(entry.path); + prefetchingStatistics.blockRemovedFromFileCache(); + numFilesDeleted++; + } catch (IOException e) { + LOG.debug("Failed to delete cache file {}", entry.path, e); + } finally { + entry.releaseLock(Entry.LockType.WRITE); + } + } + + if (numFilesDeleted > 0) { + LOG.info("Deleted {} cache files", numFilesDeleted); + } + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("stats: "); + sb.append(getStats()); + sb.append(", blocks:["); + sb.append(getIntList(blocks())); + sb.append("]"); + return sb.toString(); + } + + private void validateEntry(Entry entry, ByteBuffer buffer) { + if (entry.size != buffer.limit()) { + String message = String.format( + "[%d] entry.size(%d) != buffer.limit(%d)", + entry.blockNumber, entry.size, buffer.limit()); + throw new IllegalStateException(message); + } + + long checksum = BufferData.getChecksum(buffer); + if (entry.checksum != checksum) { + String message = String.format( + "[%d] entry.checksum(%d) != buffer checksum(%d)", + entry.blockNumber, entry.checksum, checksum); + throw new IllegalStateException(message); + } + } + + /** + * Produces a human readable list of blocks for the purpose of logging. + * This method minimizes the length of returned list by converting + * a contiguous list of blocks into a range. + * for example, + * 1, 3, 4, 5, 6, 8 becomes 1, 3~6, 8 + */ + private String getIntList(Iterable nums) { + List numList = new ArrayList<>(); + List numbers = new ArrayList(); + for (Integer n : nums) { + numbers.add(n); + } + Collections.sort(numbers); + + int index = 0; + while (index < numbers.size()) { + int start = numbers.get(index); + int prev = start; + int end = start; + while ((++index < numbers.size()) && ((end = numbers.get(index)) == prev + 1)) { + prev = end; + } + + if (start == prev) { + numList.add(Integer.toString(start)); + } else { + numList.add(String.format("%d~%d", start, prev)); + } + } + + return String.join(", ", numList); + } + + private String getStats() { + StringBuilder sb = new StringBuilder(); + sb.append(String.format( + "#entries = %d, #gets = %d", + blocks.size(), numGets)); + return sb.toString(); + } + + private static final String CACHE_FILE_PREFIX = "fs-cache-"; + + /** + * Determine if the cache space is available on the local FS. + * + * @param fileSize The size of the file. + * @param conf The configuration. + * @param localDirAllocator Local dir allocator instance. + * @return True if the given file size is less than the available free space on local FS, + * False otherwise. + */ + public static boolean isCacheSpaceAvailable(long fileSize, Configuration conf, + LocalDirAllocator localDirAllocator) { + try { + Path cacheFilePath = getTempFilePath(conf, localDirAllocator); + long freeSpace = new File(cacheFilePath.toString()).getUsableSpace(); + LOG.info("fileSize = {}, freeSpace = {}", fileSize, freeSpace); + Files.deleteIfExists(cacheFilePath); + return fileSize < freeSpace; + } catch (IOException e) { + LOG.error("isCacheSpaceAvailable", e); + return false; + } + } + + // The suffix (file extension) of each serialized index file. + private static final String BINARY_FILE_SUFFIX = ".bin"; + + /** + * Create temporary file based on the file path retrieved from local dir allocator + * instance. The file is created with .bin suffix. The created file has been granted + * posix file permissions available in TEMP_FILE_ATTRS. + * + * @param conf the configuration. + * @param localDirAllocator the local dir allocator instance. + * @return path of the file created. + * @throws IOException if IO error occurs while local dir allocator tries to retrieve path + * from local FS or file creation fails or permission set fails. + */ + private static Path getTempFilePath(final Configuration conf, + final LocalDirAllocator localDirAllocator) throws IOException { + org.apache.hadoop.fs.Path path = + localDirAllocator.getLocalPathForWrite(CACHE_FILE_PREFIX, conf); + File dir = new File(path.getParent().toUri().getPath()); + String prefix = path.getName(); + File tmpFile = File.createTempFile(prefix, BINARY_FILE_SUFFIX, dir); + Path tmpFilePath = Paths.get(tmpFile.toURI()); + return Files.setPosixFilePermissions(tmpFilePath, TEMP_FILE_ATTRS); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/Validate.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/Validate.java new file mode 100644 index 0000000000000..17a668a0d3bc3 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/Validate.java @@ -0,0 +1,399 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Collection; + +import static org.apache.hadoop.util.Preconditions.checkArgument; + +/** + * A superset of Validate class in Apache commons lang3. + *

    + * It provides consistent message strings for frequently encountered checks. + * That simplifies callers because they have to supply only the name of the argument + * that failed a check instead of having to supply the entire message. + */ +public final class Validate { + + private Validate() { + } + + /** + * Validates that the given reference argument is not null. + * @param obj the argument reference to validate. + * @param argName the name of the argument being validated. + */ + public static void checkNotNull(Object obj, String argName) { + checkArgument(obj != null, "'%s' must not be null.", argName); + } + + /** + * Validates that the given integer argument is not zero or negative. + * @param value the argument value to validate + * @param argName the name of the argument being validated. + */ + public static void checkPositiveInteger(long value, String argName) { + checkArgument(value > 0, "'%s' must be a positive integer.", argName); + } + + /** + * Validates that the given integer argument is not negative. + * @param value the argument value to validate + * @param argName the name of the argument being validated. + */ + public static void checkNotNegative(long value, String argName) { + checkArgument(value >= 0, "'%s' must not be negative.", argName); + } + + /** + * Validates that the expression (that checks a required field is present) is true. + * @param isPresent indicates whether the given argument is present. + * @param argName the name of the argument being validated. + */ + public static void checkRequired(boolean isPresent, String argName) { + checkArgument(isPresent, "'%s' is required.", argName); + } + + /** + * Validates that the expression (that checks a field is valid) is true. + * @param isValid indicates whether the given argument is valid. + * @param argName the name of the argument being validated. + */ + public static void checkValid(boolean isValid, String argName) { + checkArgument(isValid, "'%s' is invalid.", argName); + } + + /** + * Validates that the expression (that checks a field is valid) is true. + * @param isValid indicates whether the given argument is valid. + * @param argName the name of the argument being validated. + * @param validValues the list of values that are allowed. + */ + public static void checkValid(boolean isValid, + String argName, + String validValues) { + checkArgument(isValid, "'%s' is invalid. Valid values are: %s.", argName, + validValues); + } + + /** + * Validates that the given string is not null and has non-zero length. + * @param arg the argument reference to validate. + * @param argName the name of the argument being validated. + */ + public static void checkNotNullAndNotEmpty(String arg, String argName) { + checkNotNull(arg, argName); + checkArgument( + !arg.isEmpty(), + "'%s' must not be empty.", + argName); + } + + /** + * Validates that the given array is not null and has at least one element. + * @param the type of array's elements. + * @param array the argument reference to validate. + * @param argName the name of the argument being validated. + */ + public static void checkNotNullAndNotEmpty(T[] array, String argName) { + checkNotNull(array, argName); + checkNotEmpty(array.length, argName); + } + + /** + * Validates that the given array is not null and has at least one element. + * @param array the argument reference to validate. + * @param argName the name of the argument being validated. + */ + public static void checkNotNullAndNotEmpty(byte[] array, String argName) { + checkNotNull(array, argName); + checkNotEmpty(array.length, argName); + } + + /** + * Validates that the given array is not null and has at least one element. + * @param array the argument reference to validate. + * @param argName the name of the argument being validated. + */ + public static void checkNotNullAndNotEmpty(short[] array, String argName) { + checkNotNull(array, argName); + checkNotEmpty(array.length, argName); + } + + /** + * Validates that the given array is not null and has at least one element. + * @param array the argument reference to validate. + * @param argName the name of the argument being validated. + */ + public static void checkNotNullAndNotEmpty(int[] array, String argName) { + checkNotNull(array, argName); + checkNotEmpty(array.length, argName); + } + + /** + * Validates that the given array is not null and has at least one element. + * @param array the argument reference to validate. + * @param argName the name of the argument being validated. + */ + public static void checkNotNullAndNotEmpty(long[] array, String argName) { + checkNotNull(array, argName); + checkNotEmpty(array.length, argName); + } + + /** + * Validates that the given buffer is not null and has non-zero capacity. + * @param the type of iterable's elements. + * @param iter the argument reference to validate. + * @param argName the name of the argument being validated. + */ + public static void checkNotNullAndNotEmpty(Iterable iter, + String argName) { + checkNotNull(iter, argName); + int minNumElements = iter.iterator().hasNext() ? 1 : 0; + checkNotEmpty(minNumElements, argName); + } + + /** + * Validates that the given set is not null and has an exact number of items. + * @param the type of collection's elements. + * @param collection the argument reference to validate. + * @param numElements the expected number of elements in the collection. + * @param argName the name of the argument being validated. + */ + public static void checkNotNullAndNumberOfElements( + Collection collection, int numElements, String argName) { + checkNotNull(collection, argName); + checkArgument( + collection.size() == numElements, + "Number of elements in '%s' must be exactly %s, %s given.", + argName, + numElements, + collection.size() + ); + } + + /** + * Validates that the given two values are equal. + * @param value1 the first value to check. + * @param value1Name the name of the first argument. + * @param value2 the second value to check. + * @param value2Name the name of the second argument. + */ + public static void checkValuesEqual( + long value1, + String value1Name, + long value2, + String value2Name) { + checkArgument( + value1 == value2, + "'%s' (%s) must equal '%s' (%s).", + value1Name, + value1, + value2Name, + value2); + } + + /** + * Validates that the first value is an integer multiple of the second value. + * @param value1 the first value to check. + * @param value1Name the name of the first argument. + * @param value2 the second value to check. + * @param value2Name the name of the second argument. + */ + public static void checkIntegerMultiple( + long value1, + String value1Name, + long value2, + String value2Name) { + checkArgument( + (value1 % value2) == 0, + "'%s' (%s) must be an integer multiple of '%s' (%s).", + value1Name, + value1, + value2Name, + value2); + } + + /** + * Validates that the first value is greater than the second value. + * @param value1 the first value to check. + * @param value1Name the name of the first argument. + * @param value2 the second value to check. + * @param value2Name the name of the second argument. + */ + public static void checkGreater( + long value1, + String value1Name, + long value2, + String value2Name) { + checkArgument( + value1 > value2, + "'%s' (%s) must be greater than '%s' (%s).", + value1Name, + value1, + value2Name, + value2); + } + + /** + * Validates that the first value is greater than or equal to the second value. + * @param value1 the first value to check. + * @param value1Name the name of the first argument. + * @param value2 the second value to check. + * @param value2Name the name of the second argument. + */ + public static void checkGreaterOrEqual( + long value1, + String value1Name, + long value2, + String value2Name) { + checkArgument( + value1 >= value2, + "'%s' (%s) must be greater than or equal to '%s' (%s).", + value1Name, + value1, + value2Name, + value2); + } + + /** + * Validates that the first value is less than or equal to the second value. + * @param value1 the first value to check. + * @param value1Name the name of the first argument. + * @param value2 the second value to check. + * @param value2Name the name of the second argument. + */ + public static void checkLessOrEqual( + long value1, + String value1Name, + long value2, + String value2Name) { + checkArgument( + value1 <= value2, + "'%s' (%s) must be less than or equal to '%s' (%s).", + value1Name, + value1, + value2Name, + value2); + } + + /** + * Validates that the given value is within the given range of values. + * @param value the value to check. + * @param valueName the name of the argument. + * @param minValueInclusive inclusive lower limit for the value. + * @param maxValueInclusive inclusive upper limit for the value. + */ + public static void checkWithinRange( + long value, + String valueName, + long minValueInclusive, + long maxValueInclusive) { + checkArgument( + (value >= minValueInclusive) && (value <= maxValueInclusive), + "'%s' (%s) must be within the range [%s, %s].", + valueName, + value, + minValueInclusive, + maxValueInclusive); + } + + /** + * Validates that the given value is within the given range of values. + * @param value the value to check. + * @param valueName the name of the argument. + * @param minValueInclusive inclusive lower limit for the value. + * @param maxValueInclusive inclusive upper limit for the value. + */ + public static void checkWithinRange( + double value, + String valueName, + double minValueInclusive, + double maxValueInclusive) { + checkArgument( + (value >= minValueInclusive) && (value <= maxValueInclusive), + "'%s' (%s) must be within the range [%s, %s].", + valueName, + value, + minValueInclusive, + maxValueInclusive); + } + + /** + * Validates that the given path exists. + * @param path the path to check. + * @param argName the name of the argument being validated. + */ + public static void checkPathExists(Path path, String argName) { + checkNotNull(path, argName); + checkArgument(Files.exists(path), "Path %s (%s) does not exist.", argName, + path); + } + + /** + * Validates that the given path exists and is a directory. + * @param path the path to check. + * @param argName the name of the argument being validated. + */ + public static void checkPathExistsAsDir(Path path, String argName) { + checkPathExists(path, argName); + checkArgument( + Files.isDirectory(path), + "Path %s (%s) must point to a directory.", + argName, + path); + } + + /** + * Validates that the given path exists and is a file. + * @param path the path to check. + * @param argName the name of the argument being validated. + */ + public static void checkPathExistsAsFile(Path path, String argName) { + checkPathExists(path, argName); + checkArgument(Files.isRegularFile(path), + "Path %s (%s) must point to a file.", argName, path); + } + + + /** + * Check state. + * @param expression expression which must hold. + * @param format format string + * @param args arguments for the error string + * @throws IllegalStateException if the state is not valid. + */ + public static void checkState(boolean expression, + String format, + Object... args) { + if (!expression) { + throw new IllegalStateException(String.format(format, args)); + } + } + + private static void checkNotEmpty(int arraySize, String argName) { + checkArgument( + arraySize > 0, + "'%s' must have at least one element.", + argName); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/package-info.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/package-info.java new file mode 100644 index 0000000000000..1b26da85d95fb --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/package-info.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * block caching for use in object store clients. + */ + +@InterfaceAudience.Private +@InterfaceStability.Unstable +package org.apache.hadoop.fs.impl.prefetch; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/AclEntry.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/AclEntry.java index a902488377239..04a045299b7ea 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/AclEntry.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/AclEntry.java @@ -21,7 +21,7 @@ import java.util.Collection; import java.util.List; -import com.google.common.base.Objects; +import org.apache.hadoop.thirdparty.com.google.common.base.Objects; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/AclStatus.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/AclStatus.java index 385fed21d4194..f598210037967 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/AclStatus.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/AclStatus.java @@ -22,9 +22,9 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import com.google.common.base.Objects; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.base.Objects; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; /** * An AclStatus contains the ACL information of a specific file. AclStatus @@ -185,7 +185,8 @@ public Builder stickyBit(boolean stickyBit) { /** * Sets the permission for the file. - * @param permission + * @param permission permission. + * @return Builder. */ public Builder setPermission(FsPermission permission) { this.permission = permission; @@ -224,6 +225,7 @@ private AclStatus(String owner, String group, boolean stickyBit, /** * Get the effective permission for the AclEntry * @param entry AclEntry to get the effective action + * @return FsAction. */ public FsAction getEffectivePermission(AclEntry entry) { return getEffectivePermission(entry, permission); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/AclUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/AclUtil.java index 42492520dceaa..58b24f200429b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/AclUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/AclUtil.java @@ -23,7 +23,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; /** * AclUtil contains utility methods for manipulating ACLs. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/FsAction.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/FsAction.java index 97dcf816c16ad..746e0e1e238f4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/FsAction.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/FsAction.java @@ -48,7 +48,8 @@ private FsAction(String s) { /** * Return true if this action implies that action. - * @param that + * @param that FsAction that. + * @return if implies true,not false. */ public boolean implies(FsAction that) { if (that != null) { @@ -57,15 +58,26 @@ public boolean implies(FsAction that) { return false; } - /** AND operation. */ + /** + * AND operation. + * @param that FsAction that. + * @return FsAction. + */ public FsAction and(FsAction that) { return vals[ordinal() & that.ordinal()]; } - /** OR operation. */ + /** + * OR operation. + * @param that FsAction that. + * @return FsAction. + */ public FsAction or(FsAction that) { return vals[ordinal() | that.ordinal()]; } - /** NOT operation. */ + /** + * NOT operation. + * @return FsAction. + */ public FsAction not() { return vals[7 - ordinal()]; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/FsCreateModes.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/FsCreateModes.java index 2bd6f1f3b9126..ff3b4f6d65a49 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/FsCreateModes.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/FsCreateModes.java @@ -35,7 +35,10 @@ public final class FsCreateModes extends FsPermission { /** * Create from unmasked mode and umask. * - * If the mode is already an FsCreateModes object, return it. + * @param mode mode. + * @param umask umask. + * @return If the mode is already + * an FsCreateModes object, return it. */ public static FsPermission applyUMask(FsPermission mode, FsPermission umask) { @@ -47,6 +50,10 @@ public static FsPermission applyUMask(FsPermission mode, /** * Create from masked and unmasked modes. + * + * @param masked masked. + * @param unmasked unmasked. + * @return FsCreateModes. */ public static FsCreateModes create(FsPermission masked, FsPermission unmasked) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/FsPermission.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/FsPermission.java index 51c113af2702e..33fed1d303990 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/FsPermission.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/FsPermission.java @@ -56,7 +56,11 @@ public class FsPermission implements Writable, Serializable, /** Maximum acceptable length of a permission string to parse */ public static final int MAX_PERMISSION_LENGTH = 10; - /** Create an immutable {@link FsPermission} object. */ + /** + * Create an immutable {@link FsPermission} object. + * @param permission permission. + * @return FsPermission. + */ public static FsPermission createImmutable(short permission) { return new ImmutableFsPermission(permission); } @@ -85,7 +89,7 @@ public FsPermission(FsAction u, FsAction g, FsAction o, boolean sb) { /** * Construct by the given mode. - * @param mode + * @param mode mode. * @see #toShort() */ public FsPermission(short mode) { fromShort(mode); } @@ -145,13 +149,19 @@ public FsPermission(String mode) { this(new RawParser(mode).getPermission()); } - /** Return user {@link FsAction}. */ + /** + * @return Return user {@link FsAction}. + */ public FsAction getUserAction() {return useraction;} - /** Return group {@link FsAction}. */ + /** + * @return Return group {@link FsAction}. + */ public FsAction getGroupAction() {return groupaction;} - /** Return other {@link FsAction}. */ + /** + * @return Return other {@link FsAction}. + */ public FsAction getOtherAction() {return otheraction;} private void set(FsAction u, FsAction g, FsAction o, boolean sb) { @@ -180,6 +190,7 @@ public void readFields(DataInput in) throws IOException { /** * Get masked permission if exists. + * @return masked. */ public FsPermission getMasked() { return null; @@ -187,6 +198,7 @@ public FsPermission getMasked() { /** * Get unmasked permission if exists. + * @return unmasked. */ public FsPermission getUnmasked() { return null; @@ -194,6 +206,10 @@ public FsPermission getUnmasked() { /** * Create and initialize a {@link FsPermission} from {@link DataInput}. + * + * @param in data input. + * @throws IOException raised on errors performing I/O. + * @return FsPermission. */ public static FsPermission read(DataInput in) throws IOException { FsPermission p = new FsPermission(); @@ -203,6 +219,7 @@ public static FsPermission read(DataInput in) throws IOException { /** * Encode the object to a short. + * @return object to a short. */ public short toShort() { int s = (stickyBit ? 1 << 9 : 0) | @@ -301,6 +318,9 @@ public FsPermission applyUMask(FsPermission umask) { * '-' sets bits in the mask. * * Octal umask, the specified bits are set in the file mode creation mask. + * + * @param conf configuration. + * @return FsPermission UMask. */ public static FsPermission getUMask(Configuration conf) { int umask = DEFAULT_UMASK; @@ -346,7 +366,11 @@ public boolean getAclBit() { } /** - * Returns true if the file is encrypted or directory is in an encryption zone + * Returns true if the file is encrypted or directory is in an encryption zone. + * + * @return if the file is encrypted or directory + * is in an encryption zone true, not false. + * * @deprecated Get encryption bit from the * {@link org.apache.hadoop.fs.FileStatus} object. */ @@ -357,6 +381,9 @@ public boolean getEncryptedBit() { /** * Returns true if the file or directory is erasure coded. + * + * @return if the file or directory is + * erasure coded true, not false. * @deprecated Get ec bit from the {@link org.apache.hadoop.fs.FileStatus} * object. */ @@ -365,7 +392,11 @@ public boolean getErasureCodedBit() { return false; } - /** Set the user file creation mask (umask) */ + /** + * Set the user file creation mask (umask) + * @param conf configuration. + * @param umask umask. + */ public static void setUMask(Configuration conf, FsPermission umask) { conf.set(UMASK_LABEL, String.format("%1$03o", umask.toShort())); } @@ -379,6 +410,8 @@ public static void setUMask(Configuration conf, FsPermission umask) { * {@link FsPermission#getDirDefault()} for directory, and use * {@link FsPermission#getFileDefault()} for file. * This method is kept for compatibility. + * + * @return Default FsPermission. */ public static FsPermission getDefault() { return new FsPermission((short)00777); @@ -386,6 +419,8 @@ public static FsPermission getDefault() { /** * Get the default permission for directory. + * + * @return DirDefault FsPermission. */ public static FsPermission getDirDefault() { return new FsPermission((short)00777); @@ -393,6 +428,8 @@ public static FsPermission getDirDefault() { /** * Get the default permission for file. + * + * @return FileDefault FsPermission. */ public static FsPermission getFileDefault() { return new FsPermission((short)00666); @@ -400,6 +437,8 @@ public static FsPermission getFileDefault() { /** * Get the default permission for cache pools. + * + * @return CachePoolDefault FsPermission. */ public static FsPermission getCachePoolDefault() { return new FsPermission((short)00755); @@ -408,6 +447,7 @@ public static FsPermission getCachePoolDefault() { /** * Create a FsPermission from a Unix symbolic permission string * @param unixSymbolicPermission e.g. "-rw-rw-rw-" + * @return FsPermission. */ public static FsPermission valueOf(String unixSymbolicPermission) { if (unixSymbolicPermission == null) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/PermissionStatus.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/PermissionStatus.java index 3c3693f613baf..be4beb506a63f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/PermissionStatus.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/PermissionStatus.java @@ -39,7 +39,13 @@ public class PermissionStatus implements Writable { WritableFactories.setFactory(PermissionStatus.class, FACTORY); } - /** Create an immutable {@link PermissionStatus} object. */ + /** + * Create an immutable {@link PermissionStatus} object. + * @param user user. + * @param group group. + * @param permission permission. + * @return PermissionStatus. + */ public static PermissionStatus createImmutable( String user, String group, FsPermission permission) { return new PermissionStatus(user, group, permission) { @@ -56,20 +62,35 @@ public void readFields(DataInput in) throws IOException { private PermissionStatus() {} - /** Constructor */ + /** + * Constructor. + * + * @param user user. + * @param group group. + * @param permission permission. + */ public PermissionStatus(String user, String group, FsPermission permission) { username = user; groupname = group; this.permission = permission; } - /** Return user name */ + /** + * Return user name. + * @return user name. + */ public String getUserName() {return username;} - /** Return group name */ + /** + * Return group name. + * @return group name. + */ public String getGroupName() {return groupname;} - /** Return permission */ + /** + * Return permission. + * @return FsPermission. + */ public FsPermission getPermission() {return permission;} @Override @@ -86,6 +107,9 @@ public void write(DataOutput out) throws IOException { /** * Create and initialize a {@link PermissionStatus} from {@link DataInput}. + * @param in data input. + * @throws IOException raised on errors performing I/O. + * @return PermissionStatus. */ public static PermissionStatus read(DataInput in) throws IOException { PermissionStatus p = new PermissionStatus(); @@ -95,6 +119,11 @@ public static PermissionStatus read(DataInput in) throws IOException { /** * Serialize a {@link PermissionStatus} from its base components. + * @param out out. + * @param username username. + * @param groupname groupname. + * @param permission FsPermission. + * @throws IOException raised on errors performing I/O. */ public static void write(DataOutput out, String username, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/sftp/SFTPFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/sftp/SFTPFileSystem.java index ed33357b51d2b..abfd24d68d08b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/sftp/SFTPFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/sftp/SFTPFileSystem.java @@ -19,12 +19,12 @@ import java.io.FileNotFoundException; import java.io.IOException; -import java.io.InputStream; import java.io.OutputStream; import java.net.URI; import java.net.URLDecoder; import java.util.ArrayList; import java.util.Vector; +import java.util.concurrent.atomic.AtomicBoolean; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; @@ -35,7 +35,7 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.util.Progressable; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import com.jcraft.jsch.ChannelSftp; import com.jcraft.jsch.ChannelSftp.LsEntry; import com.jcraft.jsch.SftpATTRS; @@ -51,6 +51,7 @@ public class SFTPFileSystem extends FileSystem { private SFTPConnectionPool connectionPool; private URI uri; + private final AtomicBoolean closed = new AtomicBoolean(false); private static final int DEFAULT_SFTP_PORT = 22; private static final int DEFAULT_MAX_CONNECTION = 5; @@ -84,6 +85,7 @@ public class SFTPFileSystem extends FileSystem { "Destination path %s already exist, cannot rename!"; public static final String E_FAILED_GETHOME = "Failed to get home directory"; public static final String E_FAILED_DISCONNECT = "Failed to disconnect"; + public static final String E_FS_CLOSED = "FileSystem is closed!"; /** * Set configuration from UI. @@ -139,8 +141,9 @@ private void setConfigurationFromURI(URI uriInfo, Configuration conf) * @throws IOException */ private ChannelSftp connect() throws IOException { - Configuration conf = getConf(); + checkNotClosed(); + Configuration conf = getConf(); String host = conf.get(FS_SFTP_HOST, null); int port = conf.getInt(FS_SFTP_HOST_PORT, DEFAULT_SFTP_PORT); String user = conf.get(FS_SFTP_USER_PREFIX + host, null); @@ -516,20 +519,21 @@ public FSDataInputStream open(Path f, int bufferSize) throws IOException { disconnect(channel); throw new IOException(String.format(E_PATH_DIR, f)); } - InputStream is; try { // the path could be a symbolic link, so get the real path absolute = new Path("/", channel.realpath(absolute.toUri().getPath())); - - is = channel.get(absolute.toUri().getPath()); } catch (SftpException e) { throw new IOException(e); } - return new FSDataInputStream(new SFTPInputStream(is, statistics)){ + return new FSDataInputStream( + new SFTPInputStream(channel, absolute, statistics)){ @Override public void close() throws IOException { - super.close(); - disconnect(channel); + try { + super.close(); + } finally { + disconnect(channel); + } } }; } @@ -703,6 +707,31 @@ public FileStatus getFileStatus(Path f) throws IOException { } } + @Override + public void close() throws IOException { + try { + super.close(); + if (closed.getAndSet(true)) { + return; + } + } finally { + if (connectionPool != null) { + connectionPool.shutdown(); + } + } + } + + /** + * Verify that the input stream is open. Non blocking; this gives + * the last state of the volatile {@link #closed} field. + * @throws IOException if the connection is closed. + */ + private void checkNotClosed() throws IOException { + if (closed.get()) { + throw new IOException(uri + ": " + E_FS_CLOSED); + } + } + @VisibleForTesting SFTPConnectionPool getConnectionPool() { return connectionPool; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/sftp/SFTPInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/sftp/SFTPInputStream.java index 7af299bd113e1..d0f9a8d0887ca 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/sftp/SFTPInputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/sftp/SFTPInputStream.java @@ -15,62 +15,107 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.hadoop.fs.sftp; +import java.io.EOFException; import java.io.IOException; import java.io.InputStream; +import com.jcraft.jsch.ChannelSftp; +import com.jcraft.jsch.SftpATTRS; +import com.jcraft.jsch.SftpException; + +import org.apache.hadoop.fs.FSExceptionMessages; import org.apache.hadoop.fs.FSInputStream; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; /** SFTP FileSystem input stream. */ class SFTPInputStream extends FSInputStream { - public static final String E_SEEK_NOTSUPPORTED = "Seek not supported"; - public static final String E_NULL_INPUTSTREAM = "Null InputStream"; - public static final String E_STREAM_CLOSED = "Stream closed"; - + private final ChannelSftp channel; + private final Path path; private InputStream wrappedStream; private FileSystem.Statistics stats; private boolean closed; private long pos; + private long nextPos; + private long contentLength; - SFTPInputStream(InputStream stream, FileSystem.Statistics stats) { - - if (stream == null) { - throw new IllegalArgumentException(E_NULL_INPUTSTREAM); + SFTPInputStream(ChannelSftp channel, Path path, FileSystem.Statistics stats) + throws IOException { + try { + this.channel = channel; + this.path = path; + this.stats = stats; + this.wrappedStream = channel.get(path.toUri().getPath()); + SftpATTRS stat = channel.lstat(path.toString()); + this.contentLength = stat.getSize(); + } catch (SftpException e) { + throw new IOException(e); } - this.wrappedStream = stream; - this.stats = stats; + } - this.pos = 0; - this.closed = false; + @Override + public synchronized void seek(long position) throws IOException { + checkNotClosed(); + if (position < 0) { + throw new EOFException(FSExceptionMessages.NEGATIVE_SEEK); + } + nextPos = position; } @Override - public void seek(long position) throws IOException { - throw new IOException(E_SEEK_NOTSUPPORTED); + public synchronized int available() throws IOException { + checkNotClosed(); + long remaining = contentLength - nextPos; + if (remaining > Integer.MAX_VALUE) { + return Integer.MAX_VALUE; + } + return (int) remaining; + } + + private void seekInternal() throws IOException { + if (pos == nextPos) { + return; + } + if (nextPos > pos) { + long skipped = wrappedStream.skip(nextPos - pos); + pos = pos + skipped; + } + if (nextPos < pos) { + wrappedStream.close(); + try { + wrappedStream = channel.get(path.toUri().getPath()); + pos = wrappedStream.skip(nextPos); + } catch (SftpException e) { + throw new IOException(e); + } + } } @Override public boolean seekToNewSource(long targetPos) throws IOException { - throw new IOException(E_SEEK_NOTSUPPORTED); + return false; } @Override - public long getPos() throws IOException { - return pos; + public synchronized long getPos() throws IOException { + return nextPos; } @Override public synchronized int read() throws IOException { - if (closed) { - throw new IOException(E_STREAM_CLOSED); + checkNotClosed(); + if (this.contentLength == 0 || (nextPos >= contentLength)) { + return -1; } - + seekInternal(); int byteRead = wrappedStream.read(); if (byteRead >= 0) { pos++; + nextPos++; } if (stats != null & byteRead >= 0) { stats.incrementBytesRead(1); @@ -78,23 +123,6 @@ public synchronized int read() throws IOException { return byteRead; } - public synchronized int read(byte[] buf, int off, int len) - throws IOException { - if (closed) { - throw new IOException(E_STREAM_CLOSED); - } - - int result = wrappedStream.read(buf, off, len); - if (result > 0) { - pos += result; - } - if (stats != null & result > 0) { - stats.incrementBytesRead(result); - } - - return result; - } - public synchronized void close() throws IOException { if (closed) { return; @@ -103,4 +131,12 @@ public synchronized void close() throws IOException { wrappedStream.close(); closed = true; } + + private void checkNotClosed() throws IOException { + if (closed) { + throw new IOException( + path.toUri() + ": " + FSExceptionMessages.STREAM_IS_CLOSED + ); + } + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/AclCommands.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/AclCommands.java index 5a60ef2ae9b03..dcff0094eccf5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/AclCommands.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/AclCommands.java @@ -22,7 +22,7 @@ import java.util.LinkedList; import java.util.List; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Command.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Command.java index c81825776a613..7858238ee71fd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Command.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Command.java @@ -38,6 +38,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.util.functional.RemoteIterators.cleanupRemoteIterator; + /** * An abstract class for the execution of a file system command */ @@ -75,7 +77,11 @@ protected Command() { err = System.err; } - /** Constructor */ + /** + * Constructor. + * + * @param conf configuration. + */ protected Command(Configuration conf) { super(conf); } @@ -107,7 +113,7 @@ protected int getDepth() { * Execute the command on the input path data. Commands can override to make * use of the resolved filesystem. * @param pathData The input path with resolved filesystem - * @throws IOException + * @throws IOException raised on errors performing I/O. */ protected void run(PathData pathData) throws IOException { run(pathData.path); @@ -134,11 +140,19 @@ public int runAll() { return exitCode; } - /** sets the command factory for later use */ + /** + * sets the command factory for later use. + * @param factory factory. + */ public void setCommandFactory(CommandFactory factory) { this.commandFactory = factory; } - /** retrieves the command factory */ + + /** + * retrieves the command factory. + * + * @return command factory. + */ protected CommandFactory getCommandFactory() { return this.commandFactory; } @@ -199,7 +213,7 @@ public int run(String...argv) { * IllegalArgumentException is thrown, the FsShell object will print the * short usage of the command. * @param args the command line arguments - * @throws IOException + * @throws IOException raised on errors performing I/O. */ protected void processOptions(LinkedList args) throws IOException {} @@ -209,7 +223,7 @@ protected void processOptions(LinkedList args) throws IOException {} * {@link #expandArguments(LinkedList)} and pass the resulting list to * {@link #processArguments(LinkedList)} * @param args the list of argument strings - * @throws IOException + * @throws IOException raised on errors performing I/O. */ protected void processRawArguments(LinkedList args) throws IOException { @@ -361,6 +375,7 @@ protected void processPaths(PathData parent, } } } + cleanupRemoteIterator(itemsIterator); } private void processPathInternal(PathData item) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java index ca9961aeb65a0..69a418c1925eb 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java @@ -54,6 +54,10 @@ import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY; import static org.apache.hadoop.fs.CreateFlag.CREATE; import static org.apache.hadoop.fs.CreateFlag.LAZY_PERSIST; +import static org.apache.hadoop.fs.CreateFlag.OVERWRITE; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE; +import static org.apache.hadoop.util.functional.FutureIO.awaitFuture; /** * Provides: argument processing to ensure the destination is valid @@ -115,6 +119,8 @@ protected void setDirectWrite(boolean flag) { * owner, group and permission information of the source * file will be preserved as far as target {@link FileSystem} * implementation allows. + * + * @param preserve preserve. */ protected void setPreserve(boolean preserve) { if (preserve) { @@ -171,6 +177,7 @@ protected void preserve(FileAttribute fileAttribute) { * The last arg is expected to be a local path, if only one argument is * given then the destination will be the current directory * @param args is the list of arguments + * @throws IOException raised on errors performing I/O. */ protected void getLocalDestination(LinkedList args) throws IOException { @@ -347,7 +354,11 @@ protected void copyFileToTarget(PathData src, PathData target) src.fs.setVerifyChecksum(verifyChecksum); InputStream in = null; try { - in = src.fs.open(src.path); + in = awaitFuture(src.fs.openFile(src.path) + .withFileStatus(src.stat) + .opt(FS_OPTION_OPENFILE_READ_POLICY, + FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE) + .build()); copyStreamToTarget(in, target); preserveAttributes(src, target, preserveRawXattrs); } finally { @@ -396,11 +407,11 @@ private boolean checkPathsForReservedRaw(Path src, Path target) /** * If direct write is disabled ,copies the stream contents to a temporary - * file "._COPYING_". If the copy is - * successful, the temporary file will be renamed to the real path, - * else the temporary file will be deleted. + * file "target._COPYING_". If the copy is successful, the temporary file + * will be renamed to the real path, else the temporary file will be deleted. * if direct write is enabled , then creation temporary file is skipped. - * @param in the input stream for the copy + * + * @param in the input stream for the copy * @param target where to store the contents of the stream * @throws IOException if copy fails */ @@ -515,7 +526,8 @@ FSDataOutputStream create(PathData item, boolean lazyPersist) defaultBlockSize = getDefaultBlockSize(item.path); } - EnumSet createFlags = EnumSet.of(CREATE, LAZY_PERSIST); + EnumSet createFlags = + EnumSet.of(CREATE, LAZY_PERSIST, OVERWRITE); return create(item.path, FsPermission.getFileDefault().applyUMask( FsPermission.getUMask(getConf())), diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Concat.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Concat.java new file mode 100644 index 0000000000000..f25b689e7ed17 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Concat.java @@ -0,0 +1,91 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.shell; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.LinkedList; + +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathIOException; + +/** + * Concat the given files. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class Concat extends FsCommand { + public static void registerCommands(CommandFactory factory) { + factory.addClass(Concat.class, "-concat"); + } + + public static final String NAME = "concat"; + public static final String USAGE = " ..."; + public static final String DESCRIPTION = "Concatenate existing source files" + + " into the target file. Target file and source files should be in the" + + " same directory."; + private static FileSystem testFs; // test only. + + @Override + protected void processArguments(LinkedList args) + throws IOException { + if (args.size() < 1) { + throw new IOException("Target path not specified. " + USAGE); + } + if (args.size() < 3) { + throw new IOException( + "The number of source paths is less than 2. " + USAGE); + } + PathData target = args.removeFirst(); + LinkedList srcList = args; + if (!target.exists || !target.stat.isFile()) { + throw new FileNotFoundException(String + .format("Target path %s does not exist or is" + " not file.", + target.path)); + } + Path[] srcArray = new Path[srcList.size()]; + for (int i = 0; i < args.size(); i++) { + PathData src = srcList.get(i); + if (!src.exists || !src.stat.isFile()) { + throw new FileNotFoundException( + String.format("%s does not exist or is not file.", src.path)); + } + srcArray[i] = src.path; + } + FileSystem fs = target.fs; + if (testFs != null) { + fs = testFs; + } + try { + fs.concat(target.path, srcArray); + } catch (UnsupportedOperationException exception) { + throw new PathIOException("Dest filesystem '" + fs.getUri().getScheme() + + "' doesn't support concat.", exception); + } + } + + @VisibleForTesting + static void setTestFs(FileSystem fs) { + testFs = fs; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommandWithMultiThread.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommandWithMultiThread.java new file mode 100644 index 0000000000000..aed4030540baf --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommandWithMultiThread.java @@ -0,0 +1,155 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.shell; + +import java.io.IOException; +import java.util.LinkedList; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.classification.VisibleForTesting; + +/** + * Abstract command to enable sub copy commands run with multi-thread. + */ +public abstract class CopyCommandWithMultiThread + extends CommandWithDestination { + + private int threadCount = 1; + private ThreadPoolExecutor executor = null; + private int threadPoolQueueSize = DEFAULT_QUEUE_SIZE; + + public static final int DEFAULT_QUEUE_SIZE = 1024; + + /** + * set thread count by option value, if the value less than 1, + * use 1 instead. + * + * @param optValue option value + */ + protected void setThreadCount(String optValue) { + if (optValue != null) { + threadCount = Math.max(Integer.parseInt(optValue), 1); + } + } + + /** + * set thread pool queue size by option value, if the value less than 1, + * use DEFAULT_QUEUE_SIZE instead. + * + * @param optValue option value + */ + protected void setThreadPoolQueueSize(String optValue) { + if (optValue != null) { + int size = Integer.parseInt(optValue); + threadPoolQueueSize = size < 1 ? DEFAULT_QUEUE_SIZE : size; + } + } + + @VisibleForTesting + protected int getThreadCount() { + return this.threadCount; + } + + @VisibleForTesting + protected int getThreadPoolQueueSize() { + return this.threadPoolQueueSize; + } + + @VisibleForTesting + protected ThreadPoolExecutor getExecutor() { + return this.executor; + } + + @Override + protected void processArguments(LinkedList args) + throws IOException { + + if (isMultiThreadNecessary(args)) { + initThreadPoolExecutor(); + } + + super.processArguments(args); + + if (executor != null) { + waitForCompletion(); + } + } + + // if thread count is 1 or the source is only one single file, + // don't init executor to avoid threading overhead. + @VisibleForTesting + protected boolean isMultiThreadNecessary(LinkedList args) + throws IOException { + return this.threadCount > 1 && hasMoreThanOneSourcePaths(args); + } + + // check if source is only one single file. + private boolean hasMoreThanOneSourcePaths(LinkedList args) + throws IOException { + if (args.size() > 1) { + return true; + } + if (args.size() == 1) { + PathData src = args.get(0); + if (src.stat == null) { + src.refreshStatus(); + } + return isPathRecursable(src); + } + return false; + } + + private void initThreadPoolExecutor() { + executor = + new ThreadPoolExecutor(threadCount, threadCount, 1, TimeUnit.SECONDS, + new ArrayBlockingQueue<>(threadPoolQueueSize), + new ThreadPoolExecutor.CallerRunsPolicy()); + } + + private void waitForCompletion() { + if (executor != null) { + executor.shutdown(); + try { + executor.awaitTermination(Long.MAX_VALUE, TimeUnit.MINUTES); + } catch (InterruptedException e) { + executor.shutdownNow(); + displayError(e); + Thread.currentThread().interrupt(); + } + } + } + + @Override + protected void copyFileToTarget(PathData src, PathData target) + throws IOException { + if (executor == null) { + super.copyFileToTarget(src, target); + } else { + executor.submit(() -> { + try { + super.copyFileToTarget(src, target); + } catch (IOException e) { + displayError(e); + } + }); + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java index 4622c75fbd410..1ac204f5f8a82 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java @@ -26,11 +26,7 @@ import java.util.Iterator; import java.util.LinkedList; import java.util.List; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.ArrayBlockingQueue; -import java.util.concurrent.TimeUnit; -import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.FSDataInputStream; @@ -102,7 +98,8 @@ protected void processArguments(LinkedList items) try { for (PathData src : srcs) { if (src.stat.getLen() != 0) { - try (FSDataInputStream in = src.fs.open(src.path)) { + // Always do sequential reads. + try (FSDataInputStream in = src.openForSequentialIO()) { IOUtils.copyBytes(in, out, getConf(), false); writeDelimiter(out); } @@ -151,33 +148,40 @@ protected boolean isSorted() { } } - static class Cp extends CommandWithDestination { + static class Cp extends CopyCommandWithMultiThread { public static final String NAME = "cp"; public static final String USAGE = - "[-f] [-p | -p[topax]] [-d] ... "; + "[-f] [-p | -p[topax]] [-d] [-t ]" + + " [-q ] ... "; public static final String DESCRIPTION = - "Copy files that match the file pattern to a " + - "destination. When copying multiple files, the destination " + - "must be a directory. Passing -p preserves status " + - "[topax] (timestamps, ownership, permission, ACLs, XAttr). " + - "If -p is specified with no , then preserves " + - "timestamps, ownership, permission. If -pa is specified, " + - "then preserves permission also because ACL is a super-set of " + - "permission. Passing -f overwrites the destination if it " + - "already exists. raw namespace extended attributes are preserved " + - "if (1) they are supported (HDFS only) and, (2) all of the source and " + - "target pathnames are in the /.reserved/raw hierarchy. raw namespace " + - "xattr preservation is determined solely by the presence (or absence) " + - "of the /.reserved/raw prefix and not by the -p option. Passing -d "+ - "will skip creation of temporary file(._COPYING_).\n"; + "Copy files that match the file pattern to a destination." + + " When copying multiple files, the destination must be a " + + "directory.\nFlags :\n" + + " -p[topax] : Preserve file attributes [topx] (timestamps, " + + "ownership, permission, ACL, XAttr). If -p is specified with " + + "no arg, then preserves timestamps, ownership, permission. " + + "If -pa is specified, then preserves permission also because " + + "ACL is a super-set of permission. Determination of whether raw " + + "namespace extended attributes are preserved is independent of " + + "the -p flag.\n" + + " -f : Overwrite the destination if it already exists.\n" + + " -d : Skip creation of temporary file(._COPYING_).\n" + + " -t : Number of threads to be used, " + + "default is 1.\n" + + " -q : Thread pool queue size to be " + + "used, default is 1024.\n"; @Override protected void processOptions(LinkedList args) throws IOException { popPreserveOption(args); CommandFormat cf = new CommandFormat(2, Integer.MAX_VALUE, "f", "d"); + cf.addOptionWithValue("t"); + cf.addOptionWithValue("q"); cf.parse(args); setDirectWrite(cf.getOpt("d")); setOverwrite(cf.getOpt("f")); + setThreadCount(cf.getOptValue("t")); + setThreadPoolQueueSize(cf.getOptValue("q")); // should have a -r option setRecursive(true); getRemoteDestination(args); @@ -208,28 +212,37 @@ private void popPreserveOption(List args) { /** * Copy local files to a remote filesystem */ - public static class Get extends CommandWithDestination { + public static class Get extends CopyCommandWithMultiThread { public static final String NAME = "get"; public static final String USAGE = - "[-f] [-p] [-ignoreCrc] [-crc] ... "; + "[-f] [-p] [-crc] [-ignoreCrc] [-t ]" + + " [-q ] ... "; public static final String DESCRIPTION = - "Copy files that match the file pattern " + - "to the local name. is kept. When copying multiple " + - "files, the destination must be a directory. Passing " + - "-f overwrites the destination if it already exists and " + - "-p preserves access and modification times, " + - "ownership and the mode.\n"; + "Copy files that match the file pattern to the local name. " + + " is kept.\nWhen copying multiple files, the destination" + + " must be a directory.\nFlags:\n" + + " -p : Preserves timestamps, ownership and the mode.\n" + + " -f : Overwrites the destination if it already exists.\n" + + " -crc : write CRC checksums for the files downloaded.\n" + + " -ignoreCrc : Skip CRC checks on the file(s) downloaded.\n" + + " -t : Number of threads to be used," + + " default is 1.\n" + + " -q : Thread pool queue size to be" + + " used, default is 1024.\n"; @Override - protected void processOptions(LinkedList args) - throws IOException { - CommandFormat cf = new CommandFormat( - 1, Integer.MAX_VALUE, "crc", "ignoreCrc", "p", "f"); + protected void processOptions(LinkedList args) throws IOException { + CommandFormat cf = + new CommandFormat(1, Integer.MAX_VALUE, "crc", "ignoreCrc", "p", "f"); + cf.addOptionWithValue("t"); + cf.addOptionWithValue("q"); cf.parse(args); setWriteChecksum(cf.getOpt("crc")); setVerifyChecksum(!cf.getOpt("ignoreCrc")); setPreserve(cf.getOpt("p")); setOverwrite(cf.getOpt("f")); + setThreadCount(cf.getOptValue("t")); + setThreadPoolQueueSize(cf.getOptValue("q")); setRecursive(true); getLocalDestination(args); } @@ -238,27 +251,36 @@ protected void processOptions(LinkedList args) /** * Copy local files to a remote filesystem */ - public static class Put extends CommandWithDestination { + public static class Put extends CopyCommandWithMultiThread { + public static final String NAME = "put"; public static final String USAGE = - "[-f] [-p] [-l] [-d] ... "; + "[-f] [-p] [-l] [-d] [-t ] [-q ]" + + " ... "; public static final String DESCRIPTION = - "Copy files from the local file system " + - "into fs. Copying fails if the file already " + - "exists, unless the -f flag is given.\n" + - "Flags:\n" + - " -p : Preserves access and modification times, ownership and the mode.\n" + - " -f : Overwrites the destination if it already exists.\n" + - " -l : Allow DataNode to lazily persist the file to disk. Forces\n" + - " replication factor of 1. This flag will result in reduced\n" + - " durability. Use with care.\n" + + "Copy files from the local file system " + + "into fs. Copying fails if the file already " + + "exists, unless the -f flag is given.\n" + + "Flags:\n" + + " -p : Preserves timestamps, ownership and the mode.\n" + + " -f : Overwrites the destination if it already exists.\n" + + " -t : Number of threads to be used, default is 1.\n" + + " -q : Thread pool queue size to be used, " + + "default is 1024.\n" + + " -l : Allow DataNode to lazily persist the file to disk. Forces " + + "replication factor of 1. This flag will result in reduced " + + "durability. Use with care.\n" + " -d : Skip creation of temporary file(._COPYING_).\n"; @Override protected void processOptions(LinkedList args) throws IOException { CommandFormat cf = new CommandFormat(1, Integer.MAX_VALUE, "f", "p", "l", "d"); + cf.addOptionWithValue("t"); + cf.addOptionWithValue("q"); cf.parse(args); + setThreadCount(cf.getOptValue("t")); + setThreadPoolQueueSize(cf.getOptValue("q")); setOverwrite(cf.getOpt("f")); setPreserve(cf.getOpt("p")); setLazyPersist(cf.getOpt("l")); @@ -290,116 +312,13 @@ protected void processArguments(LinkedList args) } super.processArguments(args); } + } public static class CopyFromLocal extends Put { - private ThreadPoolExecutor executor = null; - private int numThreads = 1; - - private static final int MAX_THREADS = - Runtime.getRuntime().availableProcessors() * 2; public static final String NAME = "copyFromLocal"; - public static final String USAGE = - "[-f] [-p] [-l] [-d] [-t ] ... "; - public static final String DESCRIPTION = - "Copy files from the local file system " + - "into fs. Copying fails if the file already " + - "exists, unless the -f flag is given.\n" + - "Flags:\n" + - " -p : Preserves access and modification times, ownership and the" + - " mode.\n" + - " -f : Overwrites the destination if it already exists.\n" + - " -t : Number of threads to be used, default is 1.\n" + - " -l : Allow DataNode to lazily persist the file to disk. Forces" + - " replication factor of 1. This flag will result in reduced" + - " durability. Use with care.\n" + - " -d : Skip creation of temporary file(._COPYING_).\n"; - - private void setNumberThreads(String numberThreadsString) { - if (numberThreadsString == null) { - numThreads = 1; - } else { - int parsedValue = Integer.parseInt(numberThreadsString); - if (parsedValue <= 1) { - numThreads = 1; - } else if (parsedValue > MAX_THREADS) { - numThreads = MAX_THREADS; - } else { - numThreads = parsedValue; - } - } - } - - @Override - protected void processOptions(LinkedList args) throws IOException { - CommandFormat cf = - new CommandFormat(1, Integer.MAX_VALUE, "f", "p", "l", "d"); - cf.addOptionWithValue("t"); - cf.parse(args); - setNumberThreads(cf.getOptValue("t")); - setOverwrite(cf.getOpt("f")); - setPreserve(cf.getOpt("p")); - setLazyPersist(cf.getOpt("l")); - setDirectWrite(cf.getOpt("d")); - getRemoteDestination(args); - // should have a -r option - setRecursive(true); - } - - private void copyFile(PathData src, PathData target) throws IOException { - if (isPathRecursable(src)) { - throw new PathIsDirectoryException(src.toString()); - } - super.copyFileToTarget(src, target); - } - - @Override - protected void copyFileToTarget(PathData src, PathData target) - throws IOException { - // if number of thread is 1, mimic put and avoid threading overhead - if (numThreads == 1) { - copyFile(src, target); - return; - } - - Runnable task = () -> { - try { - copyFile(src, target); - } catch (IOException e) { - displayError(e); - } - }; - executor.submit(task); - } - - @Override - protected void processArguments(LinkedList args) - throws IOException { - executor = new ThreadPoolExecutor(numThreads, numThreads, 1, - TimeUnit.SECONDS, new ArrayBlockingQueue<>(1024), - new ThreadPoolExecutor.CallerRunsPolicy()); - super.processArguments(args); - - // issue the command and then wait for it to finish - executor.shutdown(); - try { - executor.awaitTermination(Long.MAX_VALUE, TimeUnit.MINUTES); - } catch (InterruptedException e) { - executor.shutdownNow(); - displayError(e); - Thread.currentThread().interrupt(); - } - } - - @VisibleForTesting - public int getNumThreads() { - return numThreads; - } - - @VisibleForTesting - public ThreadPoolExecutor getExecutor() { - return executor; - } + public static final String USAGE = Put.USAGE; + public static final String DESCRIPTION = "Identical to the -put command."; } public static class CopyToLocal extends Get { @@ -414,15 +333,24 @@ public static class CopyToLocal extends Get { */ public static class AppendToFile extends CommandWithDestination { public static final String NAME = "appendToFile"; - public static final String USAGE = " ... "; + public static final String USAGE = "[-n] ... "; public static final String DESCRIPTION = "Appends the contents of all the given local files to the " + "given dst file. The dst file will be created if it does " + "not exist. If is -, then the input is read " + - "from stdin."; + "from stdin. Option -n represents that use NEW_BLOCK create flag to append file."; private static final int DEFAULT_IO_LENGTH = 1024 * 1024; boolean readStdin = false; + private boolean appendToNewBlock = false; + + public boolean isAppendToNewBlock() { + return appendToNewBlock; + } + + public void setAppendToNewBlock(boolean appendToNewBlock) { + this.appendToNewBlock = appendToNewBlock; + } // commands operating on local paths have no need for glob expansion @Override @@ -453,6 +381,9 @@ protected void processOptions(LinkedList args) throw new IOException("missing destination argument"); } + CommandFormat cf = new CommandFormat(2, Integer.MAX_VALUE, "n"); + cf.parse(args); + appendToNewBlock = cf.getOpt("n"); getRemoteDestination(args); super.processOptions(args); } @@ -466,7 +397,8 @@ protected void processArguments(LinkedList args) } InputStream is = null; - try (FSDataOutputStream fos = dst.fs.append(dst.path)) { + try (FSDataOutputStream fos = appendToNewBlock ? + dst.fs.append(dst.path, true) : dst.fs.append(dst.path)) { if (readStdin) { if (args.size() == 0) { IOUtils.copyBytes(System.in, fos, DEFAULT_IO_LENGTH); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Count.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Count.java index 22d8be53e97a6..ab7e1951bcd3f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Count.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Count.java @@ -56,13 +56,14 @@ public static void registerCommands(CommandFactory factory) { //return the quota, namespace count and disk space usage. private static final String OPTION_QUOTA_AND_USAGE = "u"; private static final String OPTION_ECPOLICY = "e"; + private static final String OPTION_SNAPSHOT_COUNT = "s"; public static final String NAME = "count"; public static final String USAGE = "[-" + OPTION_QUOTA + "] [-" + OPTION_HUMAN + "] [-" + OPTION_HEADER + "] [-" + OPTION_TYPE + " []] [-" + OPTION_QUOTA_AND_USAGE + "] [-" + OPTION_EXCLUDE_SNAPSHOT - + "] [-" + OPTION_ECPOLICY + + "] [-" + OPTION_ECPOLICY + "] [-" + OPTION_SNAPSHOT_COUNT + "] ..."; public static final String DESCRIPTION = "Count the number of directories, files and bytes under the paths\n" + @@ -93,7 +94,8 @@ public static void registerCommands(CommandFactory factory) { "the storage types.\n" + "The -" + OPTION_QUOTA_AND_USAGE + " option shows the quota and \n" + "the usage against the quota without the detailed content summary."+ - "The -"+ OPTION_ECPOLICY +" option shows the erasure coding policy."; + "The -" + OPTION_ECPOLICY + " option shows the erasure coding policy." + + "The -" + OPTION_SNAPSHOT_COUNT + " option shows snapshot counts."; private boolean showQuotas; private boolean humanReadable; @@ -102,6 +104,7 @@ public static void registerCommands(CommandFactory factory) { private boolean showQuotasAndUsageOnly; private boolean excludeSnapshots; private boolean displayECPolicy; + private boolean showSnapshot; /** Constructor */ public Count() {} @@ -123,7 +126,7 @@ protected void processOptions(LinkedList args) { CommandFormat cf = new CommandFormat(1, Integer.MAX_VALUE, OPTION_QUOTA, OPTION_HUMAN, OPTION_HEADER, OPTION_QUOTA_AND_USAGE, OPTION_EXCLUDE_SNAPSHOT, - OPTION_ECPOLICY); + OPTION_ECPOLICY, OPTION_SNAPSHOT_COUNT); cf.addOptionWithValue(OPTION_TYPE); cf.parse(args); if (args.isEmpty()) { // default path is the current working directory @@ -134,6 +137,7 @@ protected void processOptions(LinkedList args) { showQuotasAndUsageOnly = cf.getOpt(OPTION_QUOTA_AND_USAGE); excludeSnapshots = cf.getOpt(OPTION_EXCLUDE_SNAPSHOT); displayECPolicy = cf.getOpt(OPTION_ECPOLICY); + showSnapshot = cf.getOpt(OPTION_SNAPSHOT_COUNT); if (showQuotas || showQuotasAndUsageOnly) { String types = cf.getOptValue(OPTION_TYPE); @@ -165,6 +169,9 @@ protected void processOptions(LinkedList args) { if(displayECPolicy){ headString.append("ERASURECODING_POLICY "); } + if (showSnapshot) { + headString.append(ContentSummary.getSnapshotHeader()); + } headString.append("PATHNAME"); out.println(headString.toString()); } @@ -205,6 +212,10 @@ protected void processPath(PathData src) throws IOException { outputString.append(summary.getErasureCodingPolicy()) .append(" "); } + if (showSnapshot) { + ContentSummary summary = src.fs.getContentSummary(src.path); + outputString.append(summary.toSnapshot(isHumanReadable())); + } outputString.append(src); out.println(outputString.toString()); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Display.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Display.java index 670fa152f72ed..d3ca013a3f251 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Display.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Display.java @@ -105,7 +105,8 @@ private void printToStdout(InputStream in) throws IOException { } protected InputStream getInputStream(PathData item) throws IOException { - return item.fs.open(item.path); + // Always do sequential reads; + return item.openForSequentialIO(); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java index 784bbf33f7826..9cafbb0f151a9 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java @@ -70,6 +70,7 @@ public static void registerCommands(CommandFactory factory) { factory.registerCommands(Truncate.class); factory.registerCommands(SnapshotCommands.class); factory.registerCommands(XAttrCommands.class); + factory.registerCommands(Concat.class); } protected FsCommand() {} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsUsage.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsUsage.java index 6596527738058..64aade3df9539 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsUsage.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsUsage.java @@ -128,7 +128,8 @@ private void addToUsagesTable(URI uri, FsStatus fsStatus, @Override protected void processPath(PathData item) throws IOException { - if (ViewFileSystemUtil.isViewFileSystem(item.fs)) { + if (ViewFileSystemUtil.isViewFileSystem(item.fs) + || ViewFileSystemUtil.isViewFileSystemOverloadScheme(item.fs)) { ViewFileSystem viewFileSystem = (ViewFileSystem) item.fs; Map fsStatusMap = ViewFileSystemUtil.getStatus(viewFileSystem, item.path); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Head.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Head.java index 2280225b5ae32..7242f261801d6 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Head.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Head.java @@ -28,6 +28,8 @@ import java.util.LinkedList; import java.util.List; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL; + /** * Show the first 1KB of the file. */ @@ -68,11 +70,9 @@ protected void processPath(PathData item) throws IOException { } private void dumpToOffset(PathData item) throws IOException { - FSDataInputStream in = item.fs.open(item.path); - try { + try (FSDataInputStream in = item.openFile( + FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL)) { IOUtils.copyBytes(in, System.out, endingOffset, false); - } finally { - in.close(); } } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Ls.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Ls.java index efc541ccf81ee..b50eb69a26d70 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Ls.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Ls.java @@ -25,7 +25,7 @@ import java.util.Date; import java.util.LinkedList; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.StringUtils; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/MoveCommands.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/MoveCommands.java index 5ef42775ea58b..c20293e1a5adb 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/MoveCommands.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/MoveCommands.java @@ -25,7 +25,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.PathExistsException; -import org.apache.hadoop.fs.shell.CopyCommands.Put; +import org.apache.hadoop.fs.shell.CopyCommands.CopyFromLocal; /** Various commands for moving files */ @InterfaceAudience.Private @@ -41,12 +41,22 @@ public static void registerCommands(CommandFactory factory) { /** * Move local files to a remote filesystem */ - public static class MoveFromLocal extends Put { + public static class MoveFromLocal extends CopyFromLocal { public static final String NAME = "moveFromLocal"; - public static final String USAGE = " ... "; + public static final String USAGE = + "[-f] [-p] [-l] [-d] ... "; public static final String DESCRIPTION = - "Same as -put, except that the source is " + - "deleted after it's copied."; + "Same as -put, except that the source is " + + "deleted after it's copied\n" + + "and -t option has not yet implemented."; + + @Override + protected void processOptions(LinkedList args) throws IOException { + if(args.contains("-t")) { + throw new CommandFormat.UnknownOptionException("-t"); + } + super.processOptions(args); + } @Override protected void processPath(PathData src, PathData target) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java index dad54ea07bdf1..5e945ed8357e7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java @@ -29,6 +29,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; @@ -39,6 +40,12 @@ import org.apache.hadoop.fs.PathNotFoundException; import org.apache.hadoop.fs.RemoteIterator; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_LENGTH; +import static org.apache.hadoop.util.functional.FutureIO.awaitFuture; +import static org.apache.hadoop.util.functional.RemoteIterators.mappingRemoteIterator; + /** * Encapsulates a Path (path), its FileStatus (stat), and its FileSystem (fs). * PathData ensures that the returned path string will be the same as the @@ -287,20 +294,8 @@ public RemoteIterator getDirectoryContentsIterator() throws IOException { checkIfExists(FileTypeRequirement.SHOULD_BE_DIRECTORY); final RemoteIterator stats = this.fs.listStatusIterator(path); - return new RemoteIterator() { - - @Override - public boolean hasNext() throws IOException { - return stats.hasNext(); - } - - @Override - public PathData next() throws IOException { - FileStatus file = stats.next(); - String child = getStringForChildPath(file.getPath()); - return new PathData(fs, child, file); - } - }; + return mappingRemoteIterator(stats, + file -> new PathData(fs, getStringForChildPath(file.getPath()), file)); } /** @@ -611,4 +606,35 @@ public boolean equals(Object o) { public int hashCode() { return path.hashCode(); } + + + /** + * Open a file for sequential IO. + *

    + * This uses FileSystem.openFile() to request sequential IO; + * the file status is also passed in. + * Filesystems may use to optimize their IO. + *

    + * @return an input stream + * @throws IOException failure + */ + protected FSDataInputStream openForSequentialIO() + throws IOException { + return openFile(FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL); + } + + /** + * Open a file. + * @param policy fadvise policy. + * @return an input stream + * @throws IOException failure + */ + protected FSDataInputStream openFile(final String policy) throws IOException { + return awaitFuture(fs.openFile(path) + .opt(FS_OPTION_OPENFILE_READ_POLICY, + policy) + .optLong(FS_OPTION_OPENFILE_LENGTH, + stat.getLen()) // file length hint for object stores + .build()); + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/SnapshotCommands.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/SnapshotCommands.java index 4bd596a40d678..75dc86ec87c18 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/SnapshotCommands.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/SnapshotCommands.java @@ -26,7 +26,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathIsNotDirectoryException; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * Snapshot related operations diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Tail.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Tail.java index 8a75a60f435ca..22b135f064ca5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Tail.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Tail.java @@ -28,7 +28,9 @@ import org.apache.hadoop.fs.PathIsDirectoryException; import org.apache.hadoop.io.IOUtils; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; + +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL; /** * Get a listing of all files in that match the file patterns. @@ -107,16 +109,15 @@ private long dumpFromOffset(PathData item, long offset) throws IOException { if (offset < 0) { offset = Math.max(fileSize + offset, 0); } - - FSDataInputStream in = item.fs.open(item.path); - try { + // Always do sequential reads. + try (FSDataInputStream in = item.openFile( + FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL)) { in.seek(offset); // use conf so the system configured io block size is used IOUtils.copyBytes(in, System.out, getConf(), false); offset = in.getPos(); - } finally { - in.close(); } return offset; } + } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/TouchCommands.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/TouchCommands.java index be174b5e9cf68..872de306d287a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/TouchCommands.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/TouchCommands.java @@ -31,7 +31,7 @@ import org.apache.hadoop.fs.PathNotFoundException; import org.apache.hadoop.util.StringUtils; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Unix touch like commands @@ -102,8 +102,8 @@ public static class Touch extends TouchCommands { public static final String NAME = "touch"; public static final String USAGE = "[-" + OPTION_CHANGE_ONLY_ACCESS_TIME + "] [-" + OPTION_CHANGE_ONLY_MODIFICATION_TIME + "] [-" - + OPTION_USE_TIMESTAMP + " TIMESTAMP ] [-" + OPTION_DO_NOT_CREATE_FILE - + "] ..."; + + OPTION_USE_TIMESTAMP + " TIMESTAMP (yyyyMMdd:HHmmss) ] " + + "[-" + OPTION_DO_NOT_CREATE_FILE + "] ..."; public static final String DESCRIPTION = "Updates the access and modification times of the file specified by the" + " to the current time. If the file does not exist, then a zero" @@ -114,7 +114,8 @@ public static class Touch extends TouchCommands { + OPTION_CHANGE_ONLY_MODIFICATION_TIME + " Change only the modification time \n" + "-" + OPTION_USE_TIMESTAMP + " TIMESTAMP" - + " Use specified timestamp (in format yyyyMMddHHmmss) instead of current time \n" + + " Use specified timestamp instead of current time\n" + + " TIMESTAMP format yyyyMMdd:HHmmss\n" + "-" + OPTION_DO_NOT_CREATE_FILE + " Do not create any files"; private boolean changeModTime = false; @@ -137,7 +138,7 @@ protected void processOptions(LinkedList args) { CommandFormat cf = new CommandFormat(1, Integer.MAX_VALUE, OPTION_USE_TIMESTAMP, OPTION_CHANGE_ONLY_ACCESS_TIME, - OPTION_CHANGE_ONLY_MODIFICATION_TIME); + OPTION_CHANGE_ONLY_MODIFICATION_TIME, OPTION_DO_NOT_CREATE_FILE); cf.parse(args); this.changeModTime = cf.getOpt(OPTION_CHANGE_ONLY_MODIFICATION_TIME); this.changeAccessTime = cf.getOpt(OPTION_CHANGE_ONLY_ACCESS_TIME); @@ -183,7 +184,8 @@ private void updateTime(PathData item) throws IOException { time = dateFormat.parse(timestamp).getTime(); } catch (ParseException e) { throw new IllegalArgumentException( - "Unable to parse the specified timestamp " + timestamp, e); + "Unable to parse the specified timestamp "+ timestamp + + ". The expected format is " + dateFormat.toPattern(), e); } } if (changeModTime ^ changeAccessTime) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/XAttrCommands.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/XAttrCommands.java index 630177611940e..2fe7c858e4e66 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/XAttrCommands.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/XAttrCommands.java @@ -23,7 +23,7 @@ import java.util.Map; import java.util.Map.Entry; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/find/BaseExpression.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/find/BaseExpression.java index 5069d2d34e51c..cd9bbe2bc884e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/find/BaseExpression.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/find/BaseExpression.java @@ -38,12 +38,18 @@ public abstract class BaseExpression implements Expression, Configurable { private String[] usage = { "Not yet implemented" }; private String[] help = { "Not yet implemented" }; - /** Sets the usage text for this {@link Expression} */ + /** + * Sets the usage text for this {@link Expression} . + * @param usage usage array. + */ protected void setUsage(String[] usage) { this.usage = usage; } - /** Sets the help text for this {@link Expression} */ + /** + * Sets the help text for this {@link Expression} . + * @param help help. + */ protected void setHelp(String[] help) { this.help = help; } @@ -92,7 +98,10 @@ public void finish() throws IOException { /** Children of this expression. */ private LinkedList children = new LinkedList(); - /** Return the options to be used by this expression. */ + /** + * Return the options to be used by this expression. + * @return options. + */ protected FindOptions getOptions() { return (this.options == null) ? new FindOptions() : this.options; } @@ -265,6 +274,7 @@ protected void addArgument(String arg) { * @param depth * current depth in the process directories * @return FileStatus + * @throws IOException raised on errors performing I/O. */ protected FileStatus getFileStatus(PathData item, int depth) throws IOException { @@ -285,6 +295,8 @@ protected FileStatus getFileStatus(PathData item, int depth) * @param item * PathData * @return Path + * + * @throws IOException raised on errors performing I/O. */ protected Path getPath(PathData item) throws IOException { return item.path; @@ -295,6 +307,7 @@ protected Path getPath(PathData item) throws IOException { * * @param item PathData * @return FileSystem + * @throws IOException raised on errors performing I/O. */ protected FileSystem getFileSystem(PathData item) throws IOException { return item.fs; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/find/Expression.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/find/Expression.java index ccad631028cc9..353fe685cc9cd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/find/Expression.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/find/Expression.java @@ -30,13 +30,15 @@ public interface Expression { /** * Set the options for this expression, called once before processing any * items. + * @param options options. + * @throws IOException raised on errors performing I/O. */ public void setOptions(FindOptions options) throws IOException; /** * Prepares the expression for execution, called once after setting options * and before processing any options. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void prepare() throws IOException; @@ -46,13 +48,14 @@ public interface Expression { * @param item {@link PathData} item to be processed * @param depth distance of the item from the command line argument * @return {@link Result} of applying the expression to the item + * @throws IOException raised on errors performing I/O. */ public Result apply(PathData item, int depth) throws IOException; /** * Finishes the expression, called once after processing all items. * - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void finish() throws IOException; @@ -76,15 +79,21 @@ public interface Expression { /** * Indicates whether this expression performs an action, i.e. provides output * back to the user. + * @return if is action true, not false. */ public boolean isAction(); - /** Identifies the expression as an operator rather than a primary. */ + /** + * Identifies the expression as an operator rather than a primary. + * @return if is operator true, not false. + */ public boolean isOperator(); /** * Returns the precedence of this expression * (only applicable to operators). + * + * @return precedence. */ public int getPrecedence(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/find/FindOptions.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/find/FindOptions.java index b0f1be5c35c93..c605186230590 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/find/FindOptions.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/find/FindOptions.java @@ -264,6 +264,7 @@ public void setConfiguration(Configuration configuration) { /** * Return the {@link Configuration} return configuration {@link Configuration} + * @return configuration. */ public Configuration getConfiguration() { return this.configuration; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/find/Result.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/find/Result.java index 2ef9cb4a801d6..a242681acd030 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/find/Result.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/find/Result.java @@ -35,23 +35,36 @@ private Result(boolean success, boolean recurse) { this.descend = recurse; } - /** Should further directories be descended. */ + /** + * Should further directories be descended. + * @return if is pass true,not false. + */ public boolean isDescend() { return this.descend; } - /** Should processing continue. */ + /** + * Should processing continue. + * @return if is pass true,not false. + */ public boolean isPass() { return this.success; } - /** Returns the combination of this and another result. */ + /** + * Returns the combination of this and another result. + * @param other other. + * @return result. + */ public Result combine(Result other) { return new Result(this.isPass() && other.isPass(), this.isDescend() && other.isDescend()); } - /** Negate this result. */ + /** + * Negate this result. + * @return Result. + */ public Result negate() { return new Result(!this.isPass(), this.isDescend()); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/BufferedIOStatisticsInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/BufferedIOStatisticsInputStream.java new file mode 100644 index 0000000000000..bdc432570542b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/BufferedIOStatisticsInputStream.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import java.io.BufferedInputStream; +import java.io.InputStream; + +import org.apache.hadoop.fs.StreamCapabilities; + +import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.retrieveIOStatistics; + +/** + * An extension of {@code BufferedInputStream} which implements + * {@link IOStatisticsSource} and forwards requests for the + * {@link IOStatistics} to the wrapped stream. + * + * This should be used when any input stream needs buffering while + * allowing the inner stream to be a source of statistics. + * + * It also implements {@link StreamCapabilities} and forwards the probe + * to the inner stream, if possible. + */ +public class BufferedIOStatisticsInputStream + extends BufferedInputStream + implements IOStatisticsSource, StreamCapabilities { + + /** + * Buffer an input stream with the default buffer size of 8k. + * @param in input stream + */ + public BufferedIOStatisticsInputStream(final InputStream in) { + super(in); + } + + /** + * Buffer an input stream with the chosen buffer size. + * @param in input stream + * @param size buffer size + */ + public BufferedIOStatisticsInputStream(final InputStream in, final int size) { + super(in, size); + } + + /** + * Return any IOStatistics offered by the inner stream. + * @return inner IOStatistics or null + */ + @Override + public IOStatistics getIOStatistics() { + return retrieveIOStatistics(in); + } + + /** + * If the inner stream supports {@link StreamCapabilities}, + * forward the probe to it. + * Otherwise: return false. + * + * @param capability string to query the stream support for. + * @return true if a capability is known to be supported. + */ + @Override + public boolean hasCapability(final String capability) { + if (in instanceof StreamCapabilities) { + return ((StreamCapabilities) in).hasCapability(capability); + } else { + return false; + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/BufferedIOStatisticsOutputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/BufferedIOStatisticsOutputStream.java new file mode 100644 index 0000000000000..88e73a0629b1d --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/BufferedIOStatisticsOutputStream.java @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.hadoop.fs.StreamCapabilities; +import org.apache.hadoop.fs.Syncable; + +import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.retrieveIOStatistics; + +/** + * An extension of {@code BufferedOutputStream} which implements + * {@link IOStatisticsSource} and forwards requests for the + * {@link IOStatistics} to the wrapped stream. + * + * This should be used when any output stream needs buffering while + * allowing the inner stream to be a source of statistics. + * + * It also implements {@link StreamCapabilities} + * and {@link Syncable} and forwards to to the inner stream, + * if possible. + */ +public class BufferedIOStatisticsOutputStream + extends BufferedOutputStream + implements IOStatisticsSource, Syncable, StreamCapabilities { + + /** + * Should calls to Syncable downgrade to flush if the underlying + * stream does not support it? + * While that breaks a core contract requirement of Syncable: + * "Sync.sync() guarantees durability", downgrading is + * the default behavior of FsDataOutputStream. + */ + private final boolean downgradeSyncable; + + /** + * Construct with default buffer size. + * @param out output stream to buffer + * @param downgradeSyncable should Syncable calls downgrade? + */ + public BufferedIOStatisticsOutputStream( + final OutputStream out, + final boolean downgradeSyncable) { + super(out); + this.downgradeSyncable = downgradeSyncable; + } + + /** + * Construct with custom buffer size. + * + * @param out output stream to buffer + * @param size buffer. + * @param downgradeSyncable should Syncable calls downgrade? + */ + public BufferedIOStatisticsOutputStream( + final OutputStream out, + final int size, + final boolean downgradeSyncable) { + super(out, size); + this.downgradeSyncable = downgradeSyncable; + } + + /** + * Ask the inner stream for their IOStatistics. + * @return any IOStatistics offered by the inner stream. + */ + @Override + public IOStatistics getIOStatistics() { + return retrieveIOStatistics(out); + } + + /** + * If the inner stream supports {@link StreamCapabilities}, + * forward the probe to it. + * Otherwise: return false. + * + * @param capability string to query the stream support for. + * @return true if a capability is known to be supported. + */ + @Override + public boolean hasCapability(final String capability) { + if (out instanceof StreamCapabilities) { + return ((StreamCapabilities) out).hasCapability(capability); + } else { + return false; + } + } + + /** + * If the inner stream is Syncable, flush the buffer and then + * invoke the inner stream's hflush() operation. + * + * Otherwise: throw an exception, unless the stream was constructed with + * {@link #downgradeSyncable} set to true, in which case the stream + * is just flushed. + * @throws IOException IO Problem + * @throws UnsupportedOperationException if the inner class is not syncable + */ + @Override + public void hflush() throws IOException { + if (out instanceof Syncable) { + flush(); + ((Syncable) out).hflush(); + } else { + if (!downgradeSyncable) { + throw new UnsupportedOperationException("hflush not supported by " + + out); + } else { + flush(); + } + } + } + + /** + * If the inner stream is Syncable, flush the buffer and then + * invoke the inner stream's hsync() operation. + * + * Otherwise: throw an exception, unless the stream was constructed with + * {@link #downgradeSyncable} set to true, in which case the stream + * is just flushed. + * @throws IOException IO Problem + * @throws UnsupportedOperationException if the inner class is not syncable + */ + @Override + public void hsync() throws IOException { + if (out instanceof Syncable) { + flush(); + ((Syncable) out).hsync(); + } else { + if (!downgradeSyncable) { + throw new UnsupportedOperationException("hsync not supported by " + + out); + } else { + flush(); + } + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/DurationStatisticSummary.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/DurationStatisticSummary.java new file mode 100644 index 0000000000000..e1335d77d792a --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/DurationStatisticSummary.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import javax.annotation.Nullable; +import java.io.Serializable; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +import static org.apache.hadoop.fs.statistics.StoreStatisticNames.SUFFIX_FAILURES; +import static org.apache.hadoop.fs.statistics.StoreStatisticNames.SUFFIX_MAX; +import static org.apache.hadoop.fs.statistics.StoreStatisticNames.SUFFIX_MEAN; +import static org.apache.hadoop.fs.statistics.StoreStatisticNames.SUFFIX_MIN; + +/** + * Summary of duration tracking statistics + * as extracted from an IOStatistics instance. + *

    + * This is for reporting and testing. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public final class DurationStatisticSummary implements Serializable { + + private static final long serialVersionUID = 6776381340896518486L; + + /** Statistic key. */ + private final String key; + + /** Are these success or failure statistics. */ + private final boolean success; + + /** Count of operation invocations. */ + private final long count; + + /** Max duration; -1 if unknown. */ + private final long max; + + /** Min duration; -1 if unknown. */ + private final long min; + + /** Mean duration -may be null. */ + private final MeanStatistic mean; + + /** + * Constructor. + * @param key Statistic key. + * @param success Are these success or failure statistics. + * @param count Count of operation invocations. + * @param max Max duration; -1 if unknown. + * @param min Min duration; -1 if unknown. + * @param mean Mean duration -may be null. (will be cloned) + */ + public DurationStatisticSummary(final String key, + final boolean success, + final long count, + final long max, + final long min, + @Nullable final MeanStatistic mean) { + this.key = key; + this.success = success; + this.count = count; + this.max = max; + this.min = min; + this.mean = mean == null ? null : mean.clone(); + } + + public String getKey() { + return key; + } + + public boolean isSuccess() { + return success; + } + + public long getCount() { + return count; + } + + public long getMax() { + return max; + } + + public long getMin() { + return min; + } + + public MeanStatistic getMean() { + return mean; + } + + @Override + public String toString() { + return "DurationStatisticSummary{" + + "key='" + key + '\'' + + ", success=" + success + + ", counter=" + count + + ", max=" + max + + ", mean=" + mean + + '}'; + } + + /** + * Fetch the duration timing summary of success or failure operations + * from an IO Statistics source. + * If the duration key is unknown, the summary will be incomplete. + * @param source source of data + * @param key duration statistic key + * @param success fetch success statistics, or if false, failure stats. + * @return a summary of the statistics. + */ + public static DurationStatisticSummary fetchDurationSummary( + IOStatistics source, + String key, + boolean success) { + String fullkey = success ? key : key + SUFFIX_FAILURES; + return new DurationStatisticSummary(key, success, + source.counters().getOrDefault(fullkey, 0L), + source.maximums().getOrDefault(fullkey + SUFFIX_MAX, -1L), + source.minimums().getOrDefault(fullkey + SUFFIX_MIN, -1L), + source.meanStatistics() + .get(fullkey + SUFFIX_MEAN)); + } + + /** + * Fetch the duration timing summary from an IOStatistics source. + * If the duration key is unknown, the summary will be incomplete. + * @param source source of data + * @param key duration statistic key + * @return a summary of the statistics. + */ + public static DurationStatisticSummary fetchSuccessSummary( + IOStatistics source, + String key) { + return fetchDurationSummary(source, key, true); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/DurationTracker.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/DurationTracker.java new file mode 100644 index 0000000000000..5a15c7ad66c4f --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/DurationTracker.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import java.time.Duration; + +/** + * Interface to be implemented by objects which can track duration. + * It extends AutoCloseable to fit into a try-with-resources statement, + * but then strips out the {@code throws Exception} aspect of the signature + * so it doesn't force code to add extra handling for any failures. + * + * If a duration is declared as "failed()" then the failure counters + * will be updated. + */ +public interface DurationTracker extends AutoCloseable { + + /** + * The operation failed. Failure statistics will be updated. + */ + void failed(); + + /** + * Finish tracking: update the statistics with the timings. + */ + void close(); + + /** + * Get the duration of an operation as a java Duration + * instance. If the duration tracker hasn't completed, + * or its duration tracking doesn't actually measure duration, + * returns Duration.ZERO. + * @return a duration, value of ZERO until close(). + */ + default Duration asDuration() { + return Duration.ZERO; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/DurationTrackerFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/DurationTrackerFactory.java new file mode 100644 index 0000000000000..641d7e8368bb1 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/DurationTrackerFactory.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.stubDurationTracker; + +/** + * Interface for a source of duration tracking. + * + * This is intended for uses where it can be passed into classes + * which update operation durations, without tying those + * classes to internal implementation details. + */ +public interface DurationTrackerFactory { + + /** + * Initiate a duration tracking operation by creating/returning + * an object whose {@code close()} call will + * update the statistics. + * + * The statistics counter with the key name will be incremented + * by the given count. + * + * The expected use is within a try-with-resources clause. + * + * The default implementation returns a stub duration tracker. + * @param key statistic key prefix + * @param count #of times to increment the matching counter in this + * operation. + * @return an object to close after an operation completes. + */ + default DurationTracker trackDuration(String key, long count) { + return stubDurationTracker(); + } + + /** + * Initiate a duration tracking operation by creating/returning + * an object whose {@code close()} call will + * update the statistics. + * The expected use is within a try-with-resources clause. + * @param key statistic key + * @return an object to close after an operation completes. + */ + default DurationTracker trackDuration(String key) { + return trackDuration(key, 1); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatistics.java new file mode 100644 index 0000000000000..75d9965128101 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatistics.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import java.util.Map; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * IO Statistics. + *

    + * These are low-cost per-instance statistics provided by any Hadoop + * I/O class instance. + *

    + * Consult the filesystem specification document for the requirements + * of an implementation of this interface. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public interface IOStatistics { + + /** + * Map of counters. + * @return the current map of counters. + */ + Map counters(); + + /** + * Map of gauges. + * @return the current map of gauges. + */ + Map gauges(); + + /** + * Map of minimums. + * @return the current map of minimums. + */ + Map minimums(); + + /** + * Map of maximums. + * @return the current map of maximums. + */ + Map maximums(); + + /** + * Map of meanStatistics. + * @return the current map of MeanStatistic statistics. + */ + Map meanStatistics(); + + /** + * Value when a minimum value has never been set. + */ + long MIN_UNSET_VALUE = -1; + + /** + * Value when a max value has never been set. + */ + long MAX_UNSET_VALUE = -1; +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsAggregator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsAggregator.java new file mode 100644 index 0000000000000..1c5451c6f0e83 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsAggregator.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import javax.annotation.Nullable; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Interface exported by classes which support + * aggregation of {@link IOStatistics}. + * Implementations MAY aggregate all statistics + * exported by the IOStatistics reference passed in to + * {@link #aggregate(IOStatistics)}, or they + * may selectively aggregate specific values/classes + * of statistics. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public interface IOStatisticsAggregator { + + /** + * Aggregate the supplied statistics into the current + * set. + * + * @param statistics statistics; may be null + * @return true if the statistics reference was not null and + * so aggregated. + */ + boolean aggregate(@Nullable IOStatistics statistics); +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsContext.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsContext.java new file mode 100644 index 0000000000000..1876a48bc5122 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsContext.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import org.apache.hadoop.fs.statistics.impl.IOStatisticsContextIntegration; + +import static java.util.Objects.requireNonNull; + +/** + * An interface defined to capture thread-level IOStatistics by using per + * thread context. + *

    + * The aggregator should be collected in their constructor by statistics-generating + * classes to obtain the aggregator to update across all threads. + *

    + * The {@link #snapshot()} call creates a snapshot of the statistics; + *

    + * The {@link #reset()} call resets the statistics in the context so + * that later snapshots will get the incremental data. + */ +public interface IOStatisticsContext extends IOStatisticsSource { + + /** + * Get the IOStatisticsAggregator for the context. + * + * @return return the aggregator for the context. + */ + IOStatisticsAggregator getAggregator(); + + /** + * Capture the snapshot of the context's IOStatistics. + * + * @return IOStatisticsSnapshot for the context. + */ + IOStatisticsSnapshot snapshot(); + + /** + * Get a unique ID for this context, for logging + * purposes. + * + * @return an ID unique for all contexts in this process. + */ + long getID(); + + /** + * Reset the context's IOStatistics. + */ + void reset(); + + /** + * Get the context's IOStatisticsContext. + * + * @return instance of IOStatisticsContext for the context. + */ + static IOStatisticsContext getCurrentIOStatisticsContext() { + // the null check is just a safety check to highlight exactly where a null value would + // be returned if HADOOP-18456 has resurfaced. + return requireNonNull( + IOStatisticsContextIntegration.getCurrentIOStatisticsContext(), + "Null IOStatisticsContext"); + } + + /** + * Set the IOStatisticsContext for the current thread. + * @param statisticsContext IOStatistics context instance for the + * current thread. If null, the context is reset. + */ + static void setThreadIOStatisticsContext( + IOStatisticsContext statisticsContext) { + IOStatisticsContextIntegration.setThreadIOStatisticsContext( + statisticsContext); + } + + /** + * Static probe to check if the thread-level IO statistics enabled. + * + * @return if the thread-level IO statistics enabled. + */ + static boolean enabled() { + return IOStatisticsContextIntegration.isIOStatisticsThreadLevelEnabled(); + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsLogging.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsLogging.java new file mode 100644 index 0000000000000..df063f1fa832b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsLogging.java @@ -0,0 +1,332 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import javax.annotation.Nullable; +import java.util.Locale; +import java.util.Map; +import java.util.TreeMap; +import java.util.function.Predicate; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding; + +import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL_ERROR; +import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL_INFO; +import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL_WARN; +import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.retrieveIOStatistics; + +/** + * Utility operations convert IO Statistics sources/instances + * to strings, especially for robustly logging. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public final class IOStatisticsLogging { + + private static final Logger LOG = + LoggerFactory.getLogger(IOStatisticsLogging.class); + + private IOStatisticsLogging() { + } + + /** + * Extract the statistics from a source object -or "" + * if it is not an instance of {@link IOStatistics}, + * {@link IOStatisticsSource} or the retrieved + * statistics are null. + *

    + * Exceptions are caught and downgraded to debug logging. + * @param source source of statistics. + * @return a string for logging. + */ + public static String ioStatisticsSourceToString(@Nullable Object source) { + try { + return ioStatisticsToString(retrieveIOStatistics(source)); + } catch (RuntimeException e) { + LOG.debug("Ignoring", e); + return ""; + } + } + + /** + * Convert IOStatistics to a string form. + * @param statistics A statistics instance. + * @return string value or the empty string if null + */ + public static String ioStatisticsToString( + @Nullable final IOStatistics statistics) { + if (statistics != null) { + StringBuilder sb = new StringBuilder(); + mapToString(sb, "counters", statistics.counters(), " "); + mapToString(sb, "gauges", statistics.gauges(), " "); + mapToString(sb, "minimums", statistics.minimums(), " "); + mapToString(sb, "maximums", statistics.maximums(), " "); + mapToString(sb, "means", statistics.meanStatistics(), " "); + + return sb.toString(); + } else { + return ""; + } + } + + /** + * Convert IOStatistics to a string form, with all the metrics sorted + * and empty value stripped. + * This is more expensive than the simple conversion, so should only + * be used for logging/output where it's known/highly likely that the + * caller wants to see the values. Not for debug logging. + * @param statistics A statistics instance. + * @return string value or the empty string if null + */ + public static String ioStatisticsToPrettyString( + @Nullable final IOStatistics statistics) { + if (statistics != null) { + StringBuilder sb = new StringBuilder(); + mapToSortedString(sb, "counters", statistics.counters(), + p -> p == 0); + mapToSortedString(sb, "\ngauges", statistics.gauges(), + p -> p == 0); + mapToSortedString(sb, "\nminimums", statistics.minimums(), + p -> p < 0); + mapToSortedString(sb, "\nmaximums", statistics.maximums(), + p -> p < 0); + mapToSortedString(sb, "\nmeans", statistics.meanStatistics(), + MeanStatistic::isEmpty); + + return sb.toString(); + } else { + return ""; + } + } + + /** + * Given a map, add its entryset to the string. + * The entries are only sorted if the source entryset + * iterator is sorted, such as from a TreeMap. + * @param sb string buffer to append to + * @param type type (for output) + * @param map map to evaluate + * @param separator separator + * @param type of values of the map + */ + private static void mapToString(StringBuilder sb, + final String type, + final Map map, + final String separator) { + int count = 0; + sb.append(type); + sb.append("=("); + for (Map.Entry entry : map.entrySet()) { + if (count > 0) { + sb.append(separator); + } + count++; + sb.append(IOStatisticsBinding.entryToString( + entry.getKey(), entry.getValue())); + } + sb.append(");\n"); + } + + /** + * Given a map, produce a string with all the values, sorted. + * Needs to create a treemap and insert all the entries. + * @param sb string buffer to append to + * @param type type (for output) + * @param map map to evaluate + * @param type of values of the map + */ + private static void mapToSortedString(StringBuilder sb, + final String type, + final Map map, + final Predicate isEmpty) { + mapToString(sb, type, sortedMap(map, isEmpty), "\n"); + } + + /** + * Create a sorted (tree) map from an unsorted map. + * This incurs the cost of creating a map and that + * of inserting every object into the tree. + * @param source source map + * @param value type + * @return a treemap with all the entries. + */ + private static Map sortedMap( + final Map source, + final Predicate isEmpty) { + Map tm = new TreeMap<>(); + for (Map.Entry entry : source.entrySet()) { + if (!isEmpty.test(entry.getValue())) { + tm.put(entry.getKey(), entry.getValue()); + } + } + return tm; + } + + /** + * On demand stringifier of an IOStatisticsSource instance. + *

    + * Whenever this object's toString() method is called, it evaluates the + * statistics. + *

    + * This is designed to affordable to use in log statements. + * @param source source of statistics -may be null. + * @return an object whose toString() operation returns the current values. + */ + public static Object demandStringifyIOStatisticsSource( + @Nullable IOStatisticsSource source) { + return new SourceToString(source); + } + + /** + * On demand stringifier of an IOStatistics instance. + *

    + * Whenever this object's toString() method is called, it evaluates the + * statistics. + *

    + * This is for use in log statements where for the cost of creation + * of this entry is low; it is affordable to use in log statements. + * @param statistics statistics to stringify -may be null. + * @return an object whose toString() operation returns the current values. + */ + public static Object demandStringifyIOStatistics( + @Nullable IOStatistics statistics) { + return new StatisticsToString(statistics); + } + + /** + * Extract any statistics from the source and log at debug, if + * the log is set to log at debug. + * No-op if logging is not at debug or the source is null/of + * the wrong type/doesn't provide statistics. + * @param log log to log to + * @param message message for log -this must contain "{}" for the + * statistics report to actually get logged. + * @param source source object + */ + public static void logIOStatisticsAtDebug( + Logger log, + String message, + Object source) { + if (log.isDebugEnabled()) { + // robust extract and convert to string + String stats = ioStatisticsSourceToString(source); + if (!stats.isEmpty()) { + log.debug(message, stats); + } + } + } + + /** + * Extract any statistics from the source and log to + * this class's log at debug, if + * the log is set to log at debug. + * No-op if logging is not at debug or the source is null/of + * the wrong type/doesn't provide statistics. + * @param message message for log -this must contain "{}" for the + * statistics report to actually get logged. + * @param source source object + */ + public static void logIOStatisticsAtDebug( + String message, + Object source) { + logIOStatisticsAtDebug(LOG, message, source); + } + + /** + * A method to log IOStatistics from a source at different levels. + * + * @param log Logger for logging. + * @param level LOG level. + * @param source Source to LOG. + */ + public static void logIOStatisticsAtLevel(Logger log, String level, + Object source) { + IOStatistics stats = retrieveIOStatistics(source); + if (stats != null) { + switch (level.toLowerCase(Locale.US)) { + case IOSTATISTICS_LOGGING_LEVEL_INFO: + LOG.info("IOStatistics: {}", ioStatisticsToPrettyString(stats)); + break; + case IOSTATISTICS_LOGGING_LEVEL_ERROR: + LOG.error("IOStatistics: {}", ioStatisticsToPrettyString(stats)); + break; + case IOSTATISTICS_LOGGING_LEVEL_WARN: + LOG.warn("IOStatistics: {}", ioStatisticsToPrettyString(stats)); + break; + default: + logIOStatisticsAtDebug(log, "IOStatistics: {}", source); + } + } + } + + /** + * On demand stringifier. + *

    + * Whenever this object's toString() method is called, it + * retrieves the latest statistics instance and re-evaluates it. + */ + private static final class SourceToString { + + private final IOStatisticsSource source; + + private SourceToString(@Nullable IOStatisticsSource source) { + this.source = source; + } + + @Override + public String toString() { + return source != null + ? ioStatisticsSourceToString(source) + : IOStatisticsBinding.NULL_SOURCE; + } + } + + /** + * Stringifier of statistics: low cost to instantiate and every + * toString/logging will re-evaluate the statistics. + */ + private static final class StatisticsToString { + + private final IOStatistics statistics; + + /** + * Constructor. + * @param statistics statistics + */ + private StatisticsToString(@Nullable IOStatistics statistics) { + this.statistics = statistics; + } + + /** + * Evaluate and stringify the statistics. + * @return a string value. + */ + @Override + public String toString() { + return statistics != null + ? ioStatisticsToString(statistics) + : IOStatisticsBinding.NULL_SOURCE; + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsSetters.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsSetters.java new file mode 100644 index 0000000000000..1d1cf9687e7ab --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsSetters.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Setter for IOStatistics entries. + * These operations have been in the read/write API + * {@code IOStatisticsStore} since IOStatistics + * was added; extracting into its own interface allows for + * {@link IOStatisticsSnapshot} to also support it. + * These are the simple setters, they don't provide for increments, + * decrements, calculation of min/max/mean etc. + * @since The interface and IOStatisticsSnapshot support was added after Hadoop 3.3.5 + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public interface IOStatisticsSetters extends IOStatistics { + + /** + * Set a counter. + * + * No-op if the counter is unknown. + * @param key statistics key + * @param value value to set + */ + void setCounter(String key, long value); + + /** + * Set a gauge. + * + * @param key statistics key + * @param value value to set + */ + void setGauge(String key, long value); + + /** + * Set a maximum. + * @param key statistics key + * @param value value to set + */ + void setMaximum(String key, long value); + + /** + * Set a minimum. + * @param key statistics key + * @param value value to set + */ + void setMinimum(String key, long value); + + /** + * Set a mean statistic to a given value. + * @param key statistic key + * @param value new value. + */ + void setMeanStatistic(String key, MeanStatistic value); +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsSnapshot.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsSnapshot.java new file mode 100644 index 0000000000000..988d386e29877 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsSnapshot.java @@ -0,0 +1,319 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.Serializable; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; + +import com.fasterxml.jackson.annotation.JsonProperty; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding; +import org.apache.hadoop.util.JsonSerialization; + +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull; +import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.ioStatisticsToString; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.aggregateMaps; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.snapshotMap; + +/** + * Snapshot of statistics from a different source. + *

    + * It is serializable so that frameworks which can use java serialization + * to propagate data (Spark, Flink...) can send the statistics + * back. For this reason, TreeMaps are explicitly used as field types, + * even though IDEs can recommend use of Map instead. + * For security reasons, untrusted java object streams should never be + * deserialized. If for some reason this is required, use + * {@link #requiredSerializationClasses()} to get the list of classes + * used when deserializing instances of this object. + *

    + *

    + * It is annotated for correct serializations with jackson2. + *

    + */ +@SuppressWarnings("CollectionDeclaredAsConcreteClass") +@InterfaceAudience.Public +@InterfaceStability.Evolving +public final class IOStatisticsSnapshot + implements IOStatistics, Serializable, IOStatisticsAggregator, + IOStatisticsSetters { + + private static final long serialVersionUID = -1762522703841538084L; + + /** + * List of chasses needed to deserialize. + */ + private static final Class[] DESERIALIZATION_CLASSES = { + IOStatisticsSnapshot.class, + TreeMap.class, + Long.class, + MeanStatistic.class, + }; + + /** + * Counters. + */ + @JsonProperty + private transient Map counters; + + /** + * Gauges. + */ + @JsonProperty + private transient Map gauges; + + /** + * Minimum values. + */ + @JsonProperty + private transient Map minimums; + + /** + * Maximum values. + */ + @JsonProperty + private transient Map maximums; + + /** + * mean statistics. The JSON key is all lower case.. + */ + @JsonProperty("meanstatistics") + private transient Map meanStatistics; + + /** + * Construct. + */ + public IOStatisticsSnapshot() { + createMaps(); + } + + /** + * Construct, taking a snapshot of the source statistics data + * if the source is non-null. + * If the source is null, the empty maps are created + * @param source statistics source. Nullable. + */ + public IOStatisticsSnapshot(IOStatistics source) { + if (source != null) { + snapshot(source); + } else { + createMaps(); + } + } + + /** + * Create the maps. + */ + private synchronized void createMaps() { + counters = new ConcurrentHashMap<>(); + gauges = new ConcurrentHashMap<>(); + minimums = new ConcurrentHashMap<>(); + maximums = new ConcurrentHashMap<>(); + meanStatistics = new ConcurrentHashMap<>(); + } + + /** + * Clear all the maps. + */ + public synchronized void clear() { + counters.clear(); + gauges.clear(); + minimums.clear(); + maximums.clear(); + meanStatistics.clear(); + } + + /** + * Take a snapshot. + * + * This completely overwrites the map data with the statistics + * from the source. + * @param source statistics source. + */ + public synchronized void snapshot(IOStatistics source) { + checkNotNull(source); + counters = snapshotMap(source.counters()); + gauges = snapshotMap(source.gauges()); + minimums = snapshotMap(source.minimums()); + maximums = snapshotMap(source.maximums()); + meanStatistics = snapshotMap(source.meanStatistics(), + MeanStatistic::copy); + } + + /** + * Aggregate the current statistics with the + * source reference passed in. + * + * The operation is synchronized. + * @param source source; may be null + * @return true if a merge took place. + */ + @Override + public synchronized boolean aggregate( + @Nullable IOStatistics source) { + if (source == null) { + return false; + } + aggregateMaps(counters, source.counters(), + IOStatisticsBinding::aggregateCounters, + IOStatisticsBinding::passthroughFn); + aggregateMaps(gauges, source.gauges(), + IOStatisticsBinding::aggregateGauges, + IOStatisticsBinding::passthroughFn); + aggregateMaps(minimums, source.minimums(), + IOStatisticsBinding::aggregateMinimums, + IOStatisticsBinding::passthroughFn); + aggregateMaps(maximums, source.maximums(), + IOStatisticsBinding::aggregateMaximums, + IOStatisticsBinding::passthroughFn); + aggregateMaps(meanStatistics, source.meanStatistics(), + IOStatisticsBinding::aggregateMeanStatistics, MeanStatistic::copy); + return true; + } + + @Override + public synchronized Map counters() { + return counters; + } + + @Override + public synchronized Map gauges() { + return gauges; + } + + @Override + public synchronized Map minimums() { + return minimums; + } + + @Override + public synchronized Map maximums() { + return maximums; + } + + @Override + public synchronized Map meanStatistics() { + return meanStatistics; + } + + @Override + public synchronized void setCounter(final String key, final long value) { + counters().put(key, value); + } + + @Override + public synchronized void setGauge(final String key, final long value) { + gauges().put(key, value); + + } + + @Override + public synchronized void setMaximum(final String key, final long value) { + maximums().put(key, value); + + } + + @Override + public synchronized void setMinimum(final String key, final long value) { + minimums().put(key, value); + } + + @Override + public void setMeanStatistic(final String key, final MeanStatistic value) { + meanStatistics().put(key, value); + } + + @Override + public String toString() { + return ioStatisticsToString(this); + } + + /** + * Get a JSON serializer for this class. + * @return a serializer. + */ + public static JsonSerialization serializer() { + return new JsonSerialization<>(IOStatisticsSnapshot.class, false, true); + } + + /** + * Serialize by converting each map to a TreeMap, and saving that + * to the stream. + * @param s ObjectOutputStream. + * @throws IOException raised on errors performing I/O. + */ + private synchronized void writeObject(ObjectOutputStream s) + throws IOException { + // Write out the core + s.defaultWriteObject(); + s.writeObject(new TreeMap(counters)); + s.writeObject(new TreeMap(gauges)); + s.writeObject(new TreeMap(minimums)); + s.writeObject(new TreeMap(maximums)); + s.writeObject(new TreeMap(meanStatistics)); + } + + /** + * Deserialize by loading each TreeMap, and building concurrent + * hash maps from them. + * + * @param s ObjectInputStream. + * @throws IOException raised on errors performing I/O. + * @throws ClassNotFoundException class not found exception + */ + private void readObject(final ObjectInputStream s) + throws IOException, ClassNotFoundException { + // read in core + s.defaultReadObject(); + // and rebuild a concurrent hashmap from every serialized tree map + // read back from the stream. + counters = new ConcurrentHashMap<>( + (TreeMap) s.readObject()); + gauges = new ConcurrentHashMap<>( + (TreeMap) s.readObject()); + minimums = new ConcurrentHashMap<>( + (TreeMap) s.readObject()); + maximums = new ConcurrentHashMap<>( + (TreeMap) s.readObject()); + meanStatistics = new ConcurrentHashMap<>( + (TreeMap) s.readObject()); + } + + /** + * What classes are needed to deserialize this class? + * Needed to securely unmarshall this from untrusted sources. + * @return a list of required classes to deserialize the data. + */ + public static List requiredSerializationClasses() { + return Arrays.stream(DESERIALIZATION_CLASSES) + .collect(Collectors.toList()); + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsSource.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsSource.java new file mode 100644 index 0000000000000..67bf51fc0c3ae --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsSource.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import org.apache.hadoop.classification.InterfaceStability; + +/** + * A source of IO statistics. + *

    + * These statistics MUST be instance specific, not thread local. + *

    + */ + +@InterfaceStability.Unstable +public interface IOStatisticsSource { + + /** + * Return a statistics instance. + *

    + * It is not a requirement that the same instance is returned every time. + * {@link IOStatisticsSource}. + *

    + * If the object implementing this is Closeable, this method + * may return null if invoked on a closed object, even if + * it returns a valid instance when called earlier. + * @return an IOStatistics instance or null + */ + default IOStatistics getIOStatistics() { + return null; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsSupport.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsSupport.java new file mode 100644 index 0000000000000..bb4d9a44587a2 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsSupport.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.statistics.impl.StubDurationTracker; +import org.apache.hadoop.fs.statistics.impl.StubDurationTrackerFactory; + +/** + * Support for working with IOStatistics. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public final class IOStatisticsSupport { + + private IOStatisticsSupport() { + } + + /** + * Take a snapshot of the current statistics state. + *

    + * This is not an atomic option. + *

    + * The instance can be serialized, and its + * {@code toString()} method lists all the values. + * @param statistics statistics + * @return a snapshot of the current values. + */ + public static IOStatisticsSnapshot + snapshotIOStatistics(IOStatistics statistics) { + + return new IOStatisticsSnapshot(statistics); + } + + /** + * Create a snapshot statistics instance ready to aggregate data. + * + * The instance can be serialized, and its + * {@code toString()} method lists all the values. + * @return an empty snapshot + */ + public static IOStatisticsSnapshot + snapshotIOStatistics() { + + return new IOStatisticsSnapshot(); + } + + /** + * Get the IOStatistics of the source, casting it + * if it is of the relevant type, otherwise, + * if it implements {@link IOStatisticsSource} + * extracting the value. + * + * Returns null if the source isn't of the write type + * or the return value of + * {@link IOStatisticsSource#getIOStatistics()} was null. + * @param source source. + * @return an IOStatistics instance or null + */ + + public static IOStatistics retrieveIOStatistics( + final Object source) { + if (source instanceof IOStatistics) { + return (IOStatistics) source; + } else if (source instanceof IOStatisticsSource) { + return ((IOStatisticsSource) source).getIOStatistics(); + } else { + // null source or interface not implemented + return null; + } + } + + /** + * Return a stub duration tracker factory whose returned trackers + * are always no-ops. + * + * As singletons are returned, this is very low-cost to use. + * @return a duration tracker factory. + */ + public static DurationTrackerFactory stubDurationTrackerFactory() { + return StubDurationTrackerFactory.STUB_DURATION_TRACKER_FACTORY; + } + + /** + * Get a stub duration tracker. + * @return a stub tracker. + */ + public static DurationTracker stubDurationTracker() { + return StubDurationTracker.STUB_DURATION_TRACKER; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/MeanStatistic.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/MeanStatistic.java new file mode 100644 index 0000000000000..369db49654382 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/MeanStatistic.java @@ -0,0 +1,291 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import java.io.Serializable; +import java.util.Objects; + +import com.fasterxml.jackson.annotation.JsonIgnore; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * A mean statistic represented as the sum and the sample count; + * the mean is calculated on demand. + *

    + * It can be used to accrue values so as to dynamically update + * the mean. If so, know that there is no synchronization + * on the methods. + *

    + *

    + * If a statistic has 0 samples then it is considered to be empty. + *

    + *

    + * All 'empty' statistics are equivalent, independent of the sum value. + *

    + *

    + * For non-empty statistics, sum and sample values must match + * for equality. + *

    + *

    + * It is serializable and annotated for correct serializations with jackson2. + *

    + *

    + * Thread safety. The operations to add/copy sample data, are thread safe. + *

    + *
      + *
    1. {@link #add(MeanStatistic)}
    2. + *
    3. {@link #addSample(long)}
    4. + *
    5. {@link #clear()}
    6. + *
    7. {@link #setSamplesAndSum(long, long)}
    8. + *
    9. {@link #set(MeanStatistic)}
    10. + *
    11. {@link #setSamples(long)} and {@link #setSum(long)}
    12. + *
    + *

    + * So is the {@link #mean()} method. This ensures that when + * used to aggregated statistics, the aggregate value and sample + * count are set and evaluated consistently. + *

    + *

    + * Other methods marked as synchronized because Findbugs overreacts + * to the idea that some operations to update sum and sample count + * are synchronized, but that things like equals are not. + *

    + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public final class MeanStatistic implements Serializable, Cloneable { + + private static final long serialVersionUID = 567888327998615425L; + + /** + * Number of samples used to calculate + * the mean. + */ + private long samples; + + /** + * sum of the values. + */ + private long sum; + + /** + * Constructor, with some resilience against invalid sample counts. + * If the sample count is 0 or less, the sum is set to 0 and + * the sample count to 0. + * @param samples sample count. + * @param sum sum value + */ + public MeanStatistic(final long samples, final long sum) { + if (samples > 0) { + this.sum = sum; + this.samples = samples; + } + } + + /** + * Create from another statistic. + * @param that source + */ + public MeanStatistic(MeanStatistic that) { + synchronized (that) { + set(that); + } + } + + /** + * Create an empty statistic. + */ + public MeanStatistic() { + } + + /** + * Get the sum of samples. + * @return the sum + */ + public synchronized long getSum() { + return sum; + } + + /** + * Get the sample count. + * @return the sample count; 0 means empty + */ + public synchronized long getSamples() { + return samples; + } + + /** + * Is a statistic empty? + * @return true if the sample count is 0 + */ + @JsonIgnore + public synchronized boolean isEmpty() { + return samples == 0; + } + + /** + * Set the values to 0. + */ + public void clear() { + setSamplesAndSum(0, 0); + } + + /** + * Set the sum and samples. + * Synchronized. + * @param sampleCount new sample count. + * @param newSum new sum + */ + public synchronized void setSamplesAndSum(long sampleCount, + long newSum) { + setSamples(sampleCount); + setSum(newSum); + } + + /** + * Set the statistic to the values of another. + * Synchronized. + * @param other the source. + */ + public void set(final MeanStatistic other) { + setSamplesAndSum(other.getSamples(), other.getSum()); + } + + /** + * Set the sum. + * @param sum new sum + */ + public synchronized void setSum(final long sum) { + this.sum = sum; + } + + /** + * Set the sample count. + * + * If this is less than zero, it is set to zero. + * This stops an ill-formed JSON entry from + * breaking deserialization, or get an invalid sample count + * into an entry. + * @param samples sample count. + */ + public synchronized void setSamples(final long samples) { + if (samples < 0) { + this.samples = 0; + } else { + this.samples = samples; + } + } + + /** + * Get the arithmetic mean value. + * @return the mean + */ + public synchronized double mean() { + return samples > 0 + ? ((double) sum) / samples + : 0.0d; + } + + /** + * Add another MeanStatistic. + * @param other other value + * @return mean statistic. + */ + public synchronized MeanStatistic add(final MeanStatistic other) { + if (other.isEmpty()) { + return this; + } + long otherSamples; + long otherSum; + synchronized (other) { + otherSamples = other.samples; + otherSum = other.sum; + } + if (isEmpty()) { + samples = otherSamples; + sum = otherSum; + return this; + } + samples += otherSamples; + sum += otherSum; + return this; + } + + /** + * Add a sample. + * Thread safe. + * @param value value to add to the sum + */ + public synchronized void addSample(long value) { + samples++; + sum += value; + } + + /** + * The hash code is derived from the mean + * and sample count: if either is changed + * the statistic cannot be used as a key + * for hash tables/maps. + * @return a hash value + */ + @Override + public synchronized int hashCode() { + return Objects.hash(sum, samples); + } + + @Override + public synchronized boolean equals(final Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + MeanStatistic that = (MeanStatistic) o; + if (isEmpty()) { + // if we are empty, then so must the other. + return that.isEmpty(); + } + return getSum() == that.getSum() && + getSamples() == that.getSamples(); + } + + @Override + public MeanStatistic clone() { + return copy(); + } + + /** + * Create a copy of this instance. + * @return copy. + * + */ + public MeanStatistic copy() { + return new MeanStatistic(this); + } + + @Override + public String toString() { + return String.format("(samples=%d, sum=%d, mean=%.4f)", + samples, sum, mean()); + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StoreStatisticNames.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StoreStatisticNames.java new file mode 100644 index 0000000000000..c04c1bb47fcea --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StoreStatisticNames.java @@ -0,0 +1,413 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Common statistic names for object store operations.. + *

    + * When adding new common statistic name constants, please make them unique. + * By convention: + *

    + *
      + *
    • the name of the constants are uppercase, words separated by + * underscores.
    • + *
    • the value of the constants are lowercase of the constant names.
    • + *
    + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public final class StoreStatisticNames { + + /** {@value}. */ + public static final String OP_ABORT = "op_abort"; + + /** access() API call {@value}. */ + public static final String OP_ACCESS = "op_access"; + + /** {@value}. */ + public static final String OP_APPEND = "op_append"; + + /** {@value}. */ + public static final String OP_COPY_FROM_LOCAL_FILE = + "op_copy_from_local_file"; + + /** {@value}. */ + public static final String OP_CREATE = "op_create"; + + /** {@value}. */ + public static final String OP_CREATE_FILE = "op_createfile"; + + /** {@value}. */ + public static final String OP_CREATE_NON_RECURSIVE = + "op_create_non_recursive"; + + /** {@value}. */ + public static final String OP_DELETE = "op_delete"; + + /** {@value}. */ + public static final String OP_EXISTS = "op_exists"; + + /** {@value}. */ + public static final String OP_GET_CONTENT_SUMMARY = + "op_get_content_summary"; + + /** {@value}. */ + public static final String OP_GET_DELEGATION_TOKEN = + "op_get_delegation_token"; + + /** {@value}. */ + public static final String OP_GET_FILE_CHECKSUM = + "op_get_file_checksum"; + + /** {@value}. */ + public static final String OP_GET_FILE_STATUS = "op_get_file_status"; + + /** {@value}. */ + public static final String OP_GET_STATUS = "op_get_status"; + + /** {@value}. */ + public static final String OP_GLOB_STATUS = "op_glob_status"; + + /** {@value}. */ + public static final String OP_IS_FILE = "op_is_file"; + + /** {@value}. */ + public static final String OP_HFLUSH = "op_hflush"; + + /** {@value}. */ + public static final String OP_HSYNC = "op_hsync"; + + /** {@value}. */ + public static final String OP_IS_DIRECTORY = "op_is_directory"; + + /** {@value}. */ + public static final String OP_LIST_FILES = "op_list_files"; + + /** {@value}. */ + public static final String OP_LIST_LOCATED_STATUS = + "op_list_located_status"; + + /** {@value}. */ + public static final String OP_LIST_STATUS = "op_list_status"; + + /** {@value}. */ + public static final String OP_MKDIRS = "op_mkdirs"; + + /** {@value}. */ + public static final String OP_MODIFY_ACL_ENTRIES = "op_modify_acl_entries"; + + /** {@value}. */ + public static final String OP_MSYNC = "op_msync"; + + /** {@value}. */ + public static final String OP_OPEN = "op_open"; + + /** Call to openFile() {@value}. */ + public static final String OP_OPENFILE = "op_openfile"; + + /** {@value}. */ + public static final String OP_REMOVE_ACL = "op_remove_acl"; + + /** {@value}. */ + public static final String OP_REMOVE_ACL_ENTRIES = "op_remove_acl_entries"; + + /** {@value}. */ + public static final String OP_REMOVE_DEFAULT_ACL = "op_remove_default_acl"; + + /** {@value}. */ + public static final String OP_RENAME = "op_rename"; + + /** {@value}. */ + public static final String OP_SET_ACL = "op_set_acl"; + + /** {@value}. */ + public static final String OP_SET_OWNER = "op_set_owner"; + + /** {@value}. */ + public static final String OP_SET_PERMISSION = "op_set_permission"; + + /** {@value}. */ + public static final String OP_SET_TIMES = "op_set_times"; + + /** {@value}. */ + public static final String OP_TRUNCATE = "op_truncate"; + + /* The XAttr API */ + + /** Invoke {@code getXAttrs(Path path)}: {@value}. */ + public static final String OP_XATTR_GET_MAP = "op_xattr_get_map"; + + /** Invoke {@code getXAttr(Path, String)}: {@value}. */ + public static final String OP_XATTR_GET_NAMED = "op_xattr_get_named"; + + /** + * Invoke {@code getXAttrs(Path path, List names)}: {@value}. + */ + public static final String OP_XATTR_GET_NAMED_MAP = + "op_xattr_get_named_map"; + + /** Invoke {@code listXAttrs(Path path)}: {@value}. */ + public static final String OP_XATTR_LIST = "op_xattr_list"; + + + /** {@value}. */ + public static final String DELEGATION_TOKENS_ISSUED + = "delegation_tokens_issued"; + + /** Probe for store existing: {@value}. */ + public static final String STORE_EXISTS_PROBE + = "store_exists_probe"; + + /** Requests throttled and retried: {@value}. */ + public static final String STORE_IO_THROTTLED + = "store_io_throttled"; + + /** Rate limiting was reported {@value}. */ + public static final String STORE_IO_RATE_LIMITED = "store_io_rate_limited"; + + /** Requests made of a store: {@value}. */ + public static final String STORE_IO_REQUEST + = "store_io_request"; + + /** + * IO retried: {@value}. + */ + public static final String STORE_IO_RETRY + = "store_io_retry"; + + /** + * A store's equivalent of a paged LIST request was initiated: {@value}. + */ + public static final String OBJECT_LIST_REQUEST + = "object_list_request"; + + /** + * Number of continued object listings made. + * Value :{@value}. + */ + public static final String OBJECT_CONTINUE_LIST_REQUEST = + "object_continue_list_request"; + + /** + * A bulk DELETE request was made: {@value}. + * A separate statistic from {@link #OBJECT_DELETE_REQUEST} + * so that metrics on duration of the operations can + * be distinguished. + */ + public static final String OBJECT_BULK_DELETE_REQUEST + = "object_bulk_delete_request"; + + /** + * A store's equivalent of a DELETE request was made: {@value}. + * This may be an HTTP DELETE verb, or it may be some custom + * operation which takes a list of objects to delete. + */ + public static final String OBJECT_DELETE_REQUEST + = "object_delete_request"; + + /** + * The count of objects deleted in delete requests. + */ + public static final String OBJECT_DELETE_OBJECTS + = "object_delete_objects"; + + /** + * Object multipart upload initiated. + * Value :{@value}. + */ + public static final String OBJECT_MULTIPART_UPLOAD_INITIATED = + "object_multipart_initiated"; + + /** + * Object multipart upload aborted. + * Value :{@value}. + */ + public static final String OBJECT_MULTIPART_UPLOAD_ABORTED = + "object_multipart_aborted"; + + /** + * Object put/multipart upload count. + * Value :{@value}. + */ + public static final String OBJECT_PUT_REQUEST = + "object_put_request"; + + /** + * Object put/multipart upload completed count. + * Value :{@value}. + */ + public static final String OBJECT_PUT_REQUEST_COMPLETED = + "object_put_request_completed"; + + /** + * Current number of active put requests. + * Value :{@value}. + */ + public static final String OBJECT_PUT_REQUEST_ACTIVE = + "object_put_request_active"; + + /** + * number of bytes uploaded. + * Value :{@value}. + */ + public static final String OBJECT_PUT_BYTES = + "object_put_bytes"; + + /** + * number of bytes queued for upload/being actively uploaded. + * Value :{@value}. + */ + public static final String OBJECT_PUT_BYTES_PENDING = + "object_put_bytes_pending"; + + /** + * Count of S3 Select (or similar) requests issued. + * Value :{@value}. + */ + public static final String OBJECT_SELECT_REQUESTS = + "object_select_requests"; + + /** + * Suffix to use for a minimum value when + * the same key is shared across min/mean/max + * statistics. + * + * Value {@value}. + */ + public static final String SUFFIX_MIN = ".min"; + + /** + * Suffix to use for a maximum value when + * the same key is shared across max/mean/max + * statistics. + * + * Value {@value}. + */ + public static final String SUFFIX_MAX = ".max"; + + /** + * Suffix to use for a mean statistic value when + * the same key is shared across mean/mean/max + * statistics. + * + * Value {@value}. + */ + public static final String SUFFIX_MEAN = ".mean"; + + /** + * String to add to counters and other stats to track failures. + * This comes before the .min/.mean//max suffixes. + * + * Value {@value}. + */ + public static final String SUFFIX_FAILURES = ".failures"; + + /** + * The name of the statistic collected for executor acquisition if + * a duration tracker factory is passed in to the constructor. + * {@value}. + */ + public static final String ACTION_EXECUTOR_ACQUIRED = + "action_executor_acquired"; + + /** + * A file was opened: {@value}. + */ + public static final String ACTION_FILE_OPENED + = "action_file_opened"; + + /** + * An HTTP HEAD request was made: {@value}. + */ + public static final String ACTION_HTTP_HEAD_REQUEST + = "action_http_head_request"; + + /** + * An HTTP GET request was made: {@value}. + */ + public static final String ACTION_HTTP_GET_REQUEST + = "action_http_get_request"; + + /** + * An HTTP DELETE request was made: {@value}. + */ + public static final String ACTION_HTTP_DELETE_REQUEST + = "action_http_delete_request"; + + /** + * An HTTP PUT request was made: {@value}. + */ + public static final String ACTION_HTTP_PUT_REQUEST + = "action_http_put_request"; + + /** + * An HTTP PATCH request was made: {@value}. + */ + public static final String ACTION_HTTP_PATCH_REQUEST + = "action_http_patch_request"; + + /** + * An HTTP POST request was made: {@value}. + */ + public static final String ACTION_HTTP_POST_REQUEST + = "action_http_post_request"; + + /** + * An HTTP HEAD request was made: {@value}. + */ + public static final String OBJECT_METADATA_REQUESTS + = "object_metadata_request"; + + public static final String OBJECT_COPY_REQUESTS + = "object_copy_requests"; + + public static final String STORE_IO_THROTTLE_RATE + = "store_io_throttle_rate"; + + public static final String MULTIPART_UPLOAD_INSTANTIATED + = "multipart_instantiated"; + + public static final String MULTIPART_UPLOAD_PART_PUT + = "multipart_upload_part_put"; + + public static final String MULTIPART_UPLOAD_PART_PUT_BYTES + = "multipart_upload_part_put_bytes"; + + public static final String MULTIPART_UPLOAD_ABORTED + = "multipart_upload_aborted"; + + public static final String MULTIPART_UPLOAD_ABORT_UNDER_PATH_INVOKED + = "multipart_upload_abort_under_path_invoked"; + + public static final String MULTIPART_UPLOAD_COMPLETED + = "multipart_upload_completed"; + + public static final String MULTIPART_UPLOAD_STARTED + = "multipart_upload_started"; + + public static final String MULTIPART_UPLOAD_LIST + = "multipart_upload_list"; + + private StoreStatisticNames() { + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StreamStatisticNames.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StreamStatisticNames.java new file mode 100644 index 0000000000000..50bbf45505cec --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StreamStatisticNames.java @@ -0,0 +1,461 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * These are common statistic names. + *

    + * When adding new common statistic name constants, please make them unique. + * By convention, they are implicitly unique: + *

      + *
    • + * The name of the constants are uppercase, words separated by + * underscores. + *
    • + *
    • + * The value of the constants are lowercase of the constant names. + *
    • + *
    + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public final class StreamStatisticNames { + + /** + * Count of times the TCP stream was aborted. + * Value: {@value}. + */ + public static final String STREAM_READ_ABORTED = "stream_aborted"; + + /** + * Bytes read from an input stream in read()/readVectored() calls. + * Does not include bytes read and then discarded in seek/close etc. + * These are the bytes returned to the caller. + * Value: {@value}. + */ + public static final String STREAM_READ_BYTES + = "stream_read_bytes"; + + /** + * Count of bytes discarded by aborting an input stream . + * Value: {@value}. + */ + public static final String STREAM_READ_BYTES_DISCARDED_ABORT + = "stream_read_bytes_discarded_in_abort"; + + /** + * Count of bytes read and discarded when closing an input stream. + * Value: {@value}. + */ + public static final String STREAM_READ_BYTES_DISCARDED_CLOSE + = "stream_read_bytes_discarded_in_close"; + + /** + * Count of times the TCP stream was closed. + * Value: {@value}. + */ + public static final String STREAM_READ_CLOSED = "stream_read_closed"; + + /** + * Total count of times an attempt to close an input stream was made. + * Value: {@value}. + */ + public static final String STREAM_READ_CLOSE_OPERATIONS + = "stream_read_close_operations"; + + /** + * Total count of times an input stream to was opened. + * For object stores, that means the count a GET request was initiated. + * Value: {@value}. + */ + public static final String STREAM_READ_OPENED = "stream_read_opened"; + + /** + * Count of exceptions raised during input stream reads. + * Value: {@value}. + */ + public static final String STREAM_READ_EXCEPTIONS = + "stream_read_exceptions"; + + /** + * Count of readFully() operations in an input stream. + * Value: {@value}. + */ + public static final String STREAM_READ_FULLY_OPERATIONS + = "stream_read_fully_operations"; + + /** + * Count of read() operations in an input stream. + * Value: {@value}. + */ + public static final String STREAM_READ_OPERATIONS = + "stream_read_operations"; + + /** + * Count of readVectored() operations in an input stream. + * Value: {@value}. + */ + public static final String STREAM_READ_VECTORED_OPERATIONS = + "stream_read_vectored_operations"; + + /** + * Count of bytes discarded during readVectored() operation + * in an input stream. + * Value: {@value}. + */ + public static final String STREAM_READ_VECTORED_READ_BYTES_DISCARDED = + "stream_read_vectored_read_bytes_discarded"; + + /** + * Count of incoming file ranges during readVectored() operation. + * Value: {@value} + */ + public static final String STREAM_READ_VECTORED_INCOMING_RANGES = + "stream_read_vectored_incoming_ranges"; + /** + * Count of combined file ranges during readVectored() operation. + * Value: {@value} + */ + public static final String STREAM_READ_VECTORED_COMBINED_RANGES = + "stream_read_vectored_combined_ranges"; + + /** + * Count of incomplete read() operations in an input stream, + * that is, when the bytes returned were less than that requested. + * Value: {@value}. + */ + public static final String STREAM_READ_OPERATIONS_INCOMPLETE + = "stream_read_operations_incomplete"; + + /** + * count/duration of aborting a remote stream during stream IO + * IO. + * Value: {@value}. + */ + public static final String STREAM_READ_REMOTE_STREAM_ABORTED + = "stream_read_remote_stream_aborted"; + + /** + * count/duration of closing a remote stream, + * possibly including draining the stream to recycle + * the HTTP connection. + * Value: {@value}. + */ + public static final String STREAM_READ_REMOTE_STREAM_DRAINED + = "stream_read_remote_stream_drain"; + + /** + * Count of version mismatches encountered while reading an input stream. + * Value: {@value}. + */ + public static final String STREAM_READ_VERSION_MISMATCHES + = "stream_read_version_mismatches"; + + /** + * Count of executed seek operations which went backwards in a stream. + * Value: {@value}. + */ + public static final String STREAM_READ_SEEK_BACKWARD_OPERATIONS = + "stream_read_seek_backward_operations"; + + /** + * Count of bytes moved backwards during seek operations + * in an input stream. + * Value: {@value}. + */ + public static final String STREAM_READ_SEEK_BYTES_BACKWARDS + = "stream_read_bytes_backwards_on_seek"; + + /** + * Count of bytes read and discarded during seek() in an input stream. + * Value: {@value}. + */ + public static final String STREAM_READ_SEEK_BYTES_DISCARDED = + "stream_read_seek_bytes_discarded"; + + /** + * Count of bytes skipped during forward seek operations. + * Value: {@value}. + */ + public static final String STREAM_READ_SEEK_BYTES_SKIPPED + = "stream_read_seek_bytes_skipped"; + + /** + * Count of executed seek operations which went forward in + * an input stream. + * Value: {@value}. + */ + public static final String STREAM_READ_SEEK_FORWARD_OPERATIONS + = "stream_read_seek_forward_operations"; + + /** + * Count of times the seek policy was dynamically changed + * in an input stream. + * Value: {@value}. + */ + public static final String STREAM_READ_SEEK_POLICY_CHANGED = + "stream_read_seek_policy_changed"; + + /** + * Count of seek operations in an input stream. + * Value: {@value}. + */ + public static final String STREAM_READ_SEEK_OPERATIONS = + "stream_read_seek_operations"; + + /** + * Count of {@code InputStream.skip()} calls. + * Value: {@value}. + */ + public static final String STREAM_READ_SKIP_OPERATIONS = + "stream_read_skip_operations"; + + /** + * Count bytes skipped in {@code InputStream.skip()} calls. + * Value: {@value}. + */ + public static final String STREAM_READ_SKIP_BYTES = + "stream_read_skip_bytes"; + + /** + * Total count of bytes read from an input stream. + * Value: {@value}. + */ + public static final String STREAM_READ_TOTAL_BYTES + = "stream_read_total_bytes"; + + /** + * Count of calls of {@code CanUnbuffer.unbuffer()}. + * Value: {@value}. + */ + public static final String STREAM_READ_UNBUFFERED + = "stream_read_unbuffered"; + + /** + * "Count of stream write failures reported. + * Value: {@value}. + */ + public static final String STREAM_WRITE_EXCEPTIONS = + "stream_write_exceptions"; + + /** + * Count of failures when finalizing a multipart upload: + * {@value}. + */ + public static final String STREAM_WRITE_EXCEPTIONS_COMPLETING_UPLOADS = + "stream_write_exceptions_completing_upload"; + + /** + * Count of block/partition uploads complete. + * Value: {@value}. + */ + public static final String STREAM_WRITE_BLOCK_UPLOADS + = "stream_write_block_uploads"; + + /** + * Count of number of block uploads aborted. + * Value: {@value}. + */ + public static final String STREAM_WRITE_BLOCK_UPLOADS_ABORTED + = "stream_write_block_uploads_aborted"; + + /** + * Count of block/partition uploads active. + * Value: {@value}. + */ + public static final String STREAM_WRITE_BLOCK_UPLOADS_ACTIVE + = "stream_write_block_uploads_active"; + + /** + * Gauge of data queued to be written. + * Value: {@value}. + */ + public static final String STREAM_WRITE_BLOCK_UPLOADS_BYTES_PENDING = + "stream_write_block_uploads_data_pending"; + + /** + * Count of number of block uploads committed. + * Value: {@value}. + */ + public static final String STREAM_WRITE_BLOCK_UPLOADS_COMMITTED + = "stream_write_block_uploads_committed"; + + /** + * Gauge of block/partitions uploads queued to be written. + * Value: {@value}. + */ + public static final String STREAM_WRITE_BLOCK_UPLOADS_PENDING + = "stream_write_block_uploads_pending"; + + + /** + * "Count of bytes written to output stream including all not yet uploaded. + * {@value}. + */ + public static final String STREAM_WRITE_BYTES + = "stream_write_bytes"; + + /** + * Count of total time taken for uploads to complete. + * {@value}. + */ + public static final String STREAM_WRITE_TOTAL_TIME + = "stream_write_total_time"; + + /** + * Total queue duration of all block uploads. + * {@value}. + */ + public static final String STREAM_WRITE_QUEUE_DURATION + = "stream_write_queue_duration"; + + public static final String STREAM_WRITE_TOTAL_DATA + = "stream_write_total_data"; + + /** + * Number of bytes to upload from an OutputStream. + */ + public static final String BYTES_TO_UPLOAD + = "bytes_upload"; + + /** + * Number of bytes uploaded successfully to the object store. + */ + public static final String BYTES_UPLOAD_SUCCESSFUL + = "bytes_upload_successfully"; + + /** + * Number of bytes failed to upload to the object store. + */ + public static final String BYTES_UPLOAD_FAILED + = "bytes_upload_failed"; + + /** + * Total time spent on waiting for a task to complete. + */ + public static final String TIME_SPENT_ON_TASK_WAIT + = "time_spent_task_wait"; + + /** + * Number of task queue shrunk operations. + */ + public static final String QUEUE_SHRUNK_OPS + = "queue_shrunk_ops"; + + /** + * Number of times current buffer is written to the service. + */ + public static final String WRITE_CURRENT_BUFFER_OPERATIONS + = "write_current_buffer_ops"; + + /** + * Total time spent on completing a PUT request. + */ + public static final String TIME_SPENT_ON_PUT_REQUEST + = "time_spent_on_put_request"; + + /** + * Number of seeks in buffer. + */ + public static final String SEEK_IN_BUFFER + = "seek_in_buffer"; + + /** + * Number of bytes read from the buffer. + */ + public static final String BYTES_READ_BUFFER + = "bytes_read_buffer"; + + /** + * Total number of remote read operations performed. + */ + public static final String REMOTE_READ_OP + = "remote_read_op"; + + /** + * Total number of bytes read from readAhead. + */ + public static final String READ_AHEAD_BYTES_READ + = "read_ahead_bytes_read"; + + /** + * Total number of bytes read from remote operations. + */ + public static final String REMOTE_BYTES_READ + = "remote_bytes_read"; + + /** + * Total number of Data blocks allocated by an outputStream. + */ + public static final String BLOCKS_ALLOCATED + = "blocks_allocated"; + + /** + * Total number of Data blocks released by an outputStream. + */ + public static final String BLOCKS_RELEASED + = "blocks_released"; + + /** + * Total number of prefetching operations executed. + */ + public static final String STREAM_READ_PREFETCH_OPERATIONS + = "stream_read_prefetch_operations"; + + /** + * Total number of block in disk cache. + */ + public static final String STREAM_READ_BLOCKS_IN_FILE_CACHE + = "stream_read_blocks_in_cache"; + + /** + * Total number of active prefetch operations. + */ + public static final String STREAM_READ_ACTIVE_PREFETCH_OPERATIONS + = "stream_read_active_prefetch_operations"; + + /** + * Total bytes of memory in use by this input stream. + */ + public static final String STREAM_READ_ACTIVE_MEMORY_IN_USE + = "stream_read_active_memory_in_use"; + + /** + * count/duration of reading a remote block. + * + * Value: {@value}. + */ + public static final String STREAM_READ_REMOTE_BLOCK_READ + = "stream_read_block_read"; + + /** + * count/duration of acquiring a buffer and reading to it. + * + * Value: {@value}. + */ + public static final String STREAM_READ_BLOCK_ACQUIRE_AND_READ + = "stream_read_block_acquire_read"; + + private StreamStatisticNames() { + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/AbstractIOStatisticsImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/AbstractIOStatisticsImpl.java new file mode 100644 index 0000000000000..c701a509d8951 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/AbstractIOStatisticsImpl.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics.impl; + +import org.apache.hadoop.fs.statistics.IOStatistics; + +/** + * Base implementation in case common methods/fields need to be added + * in future. + */ +public abstract class AbstractIOStatisticsImpl implements IOStatistics { + + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/DynamicIOStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/DynamicIOStatistics.java new file mode 100644 index 0000000000000..50c2625c3513d --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/DynamicIOStatistics.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics.impl; + +import java.util.Collections; +import java.util.Map; +import java.util.function.Function; + +import org.apache.hadoop.fs.statistics.MeanStatistic; + +/** + * These statistics are dynamically evaluated by the supplied + * String -> type functions. + * + * This allows statistic sources to supply a list of callbacks used to + * generate the statistics on demand; similar to some of the Coda Hale metrics. + * + * The evaluation actually takes place during the iteration's {@code next()} + * call. + */ +final class DynamicIOStatistics + extends AbstractIOStatisticsImpl { + + /** + * Counter evaluators. + */ + private final EvaluatingStatisticsMap counters + = new EvaluatingStatisticsMap<>(); + + private final EvaluatingStatisticsMap gauges + = new EvaluatingStatisticsMap<>(); + + private final EvaluatingStatisticsMap minimums + = new EvaluatingStatisticsMap<>(); + + private final EvaluatingStatisticsMap maximums + = new EvaluatingStatisticsMap<>(); + + private final EvaluatingStatisticsMap meanStatistics + = new EvaluatingStatisticsMap<>(MeanStatistic::copy); + + DynamicIOStatistics() { + } + + @Override + public Map counters() { + return Collections.unmodifiableMap(counters); + } + + @Override + public Map gauges() { + return Collections.unmodifiableMap(gauges); + } + + @Override + public Map minimums() { + return Collections.unmodifiableMap(minimums); + } + + @Override + public Map maximums() { + return Collections.unmodifiableMap(maximums); + } + + @Override + public Map meanStatistics() { + return Collections.unmodifiableMap(meanStatistics); + } + + /** + * add a mapping of a key to a counter function. + * @param key the key + * @param eval the evaluator + */ + void addCounterFunction(String key, Function eval) { + counters.addFunction(key, eval); + } + + /** + * add a mapping of a key to a gauge function. + * @param key the key + * @param eval the evaluator + */ + void addGaugeFunction(String key, Function eval) { + gauges.addFunction(key, eval); + } + + /** + * add a mapping of a key to a minimum function. + * @param key the key + * @param eval the evaluator + */ + void addMinimumFunction(String key, Function eval) { + minimums.addFunction(key, eval); + } + + /** + * add a mapping of a key to a maximum function. + * @param key the key + * @param eval the evaluator + */ + void addMaximumFunction(String key, Function eval) { + maximums.addFunction(key, eval); + } + + /** + * add a mapping of a key to a meanStatistic function. + * @param key the key + * @param eval the evaluator + */ + void addMeanStatisticFunction(String key, + Function eval) { + meanStatistics.addFunction(key, eval); + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/DynamicIOStatisticsBuilder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/DynamicIOStatisticsBuilder.java new file mode 100644 index 0000000000000..47a317076dcf2 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/DynamicIOStatisticsBuilder.java @@ -0,0 +1,248 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics.impl; + +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Function; +import java.util.function.ToLongFunction; + +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.MeanStatistic; +import org.apache.hadoop.metrics2.lib.MutableCounterLong; + +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkState; + +/** + * Builder of {@link DynamicIOStatistics}. + * + * Instantiate through + * {@link IOStatisticsBinding#dynamicIOStatistics()}. + */ +public class DynamicIOStatisticsBuilder { + + /** + * the instance being built up. Will be null after the (single) + * call to {@link #build()}. + */ + private DynamicIOStatistics instance = new DynamicIOStatistics(); + + /** + * Build the IOStatistics instance. + * @return an instance. + * @throws IllegalStateException if the builder has already been built. + */ + public IOStatistics build() { + final DynamicIOStatistics stats = activeInstance(); + // stop the builder from working any more. + instance = null; + return stats; + } + + /** + * Get the statistics instance. + * @return the instance to build/return + * @throws IllegalStateException if the builder has already been built. + */ + private DynamicIOStatistics activeInstance() { + checkState(instance != null, "Already built"); + return instance; + } + + /** + * Add a new evaluator to the counter statistics. + * @param key key of this statistic + * @param eval evaluator for the statistic + * @return the builder. + */ + public DynamicIOStatisticsBuilder withLongFunctionCounter(String key, + ToLongFunction eval) { + activeInstance().addCounterFunction(key, eval::applyAsLong); + return this; + } + + /** + * Add a counter statistic to dynamically return the + * latest value of the source. + * @param key key of this statistic + * @param source atomic long counter + * @return the builder. + */ + public DynamicIOStatisticsBuilder withAtomicLongCounter(String key, + AtomicLong source) { + withLongFunctionCounter(key, s -> source.get()); + return this; + } + + /** + * Add a counter statistic to dynamically return the + * latest value of the source. + * @param key key of this statistic + * @param source atomic int counter + * @return the builder. + */ + public DynamicIOStatisticsBuilder withAtomicIntegerCounter(String key, + AtomicInteger source) { + withLongFunctionCounter(key, s -> source.get()); + return this; + } + + /** + * Build a dynamic counter statistic from a + * {@link MutableCounterLong}. + * @param key key of this statistic + * @param source mutable long counter + * @return the builder. + */ + public DynamicIOStatisticsBuilder withMutableCounter(String key, + MutableCounterLong source) { + withLongFunctionCounter(key, s -> source.value()); + return this; + } + + /** + * Add a new evaluator to the gauge statistics. + * @param key key of this statistic + * @param eval evaluator for the statistic + * @return the builder. + */ + public DynamicIOStatisticsBuilder withLongFunctionGauge(String key, + ToLongFunction eval) { + activeInstance().addGaugeFunction(key, eval::applyAsLong); + return this; + } + + /** + * Add a gauge statistic to dynamically return the + * latest value of the source. + * @param key key of this statistic + * @param source atomic long gauge + * @return the builder. + */ + public DynamicIOStatisticsBuilder withAtomicLongGauge(String key, + AtomicLong source) { + withLongFunctionGauge(key, s -> source.get()); + return this; + } + + /** + * Add a gauge statistic to dynamically return the + * latest value of the source. + * @param key key of this statistic + * @param source atomic int gauge + * @return the builder. + */ + public DynamicIOStatisticsBuilder withAtomicIntegerGauge(String key, + AtomicInteger source) { + withLongFunctionGauge(key, s -> source.get()); + return this; + } + + /** + * Add a new evaluator to the minimum statistics. + * @param key key of this statistic + * @param eval evaluator for the statistic + * @return the builder. + */ + public DynamicIOStatisticsBuilder withLongFunctionMinimum(String key, + ToLongFunction eval) { + activeInstance().addMinimumFunction(key, eval::applyAsLong); + return this; + } + + /** + * Add a minimum statistic to dynamically return the + * latest value of the source. + * @param key key of this statistic + * @param source atomic long minimum + * @return the builder. + */ + public DynamicIOStatisticsBuilder withAtomicLongMinimum(String key, + AtomicLong source) { + withLongFunctionMinimum(key, s -> source.get()); + return this; + } + + /** + * Add a minimum statistic to dynamically return the + * latest value of the source. + * @param key key of this statistic + * @param source atomic int minimum + * @return the builder. + */ + public DynamicIOStatisticsBuilder withAtomicIntegerMinimum(String key, + AtomicInteger source) { + withLongFunctionMinimum(key, s -> source.get()); + return this; + } + + + /** + * Add a new evaluator to the maximum statistics. + * @param key key of this statistic + * @param eval evaluator for the statistic + * @return the builder. + */ + public DynamicIOStatisticsBuilder withLongFunctionMaximum(String key, + ToLongFunction eval) { + activeInstance().addMaximumFunction(key, eval::applyAsLong); + return this; + } + + /** + * Add a maximum statistic to dynamically return the + * latest value of the source. + * @param key key of this statistic + * @param source atomic long maximum + * @return the builder. + */ + public DynamicIOStatisticsBuilder withAtomicLongMaximum(String key, + AtomicLong source) { + withLongFunctionMaximum(key, s -> source.get()); + return this; + } + + /** + * Add a maximum statistic to dynamically return the + * latest value of the source. + * @param key key of this statistic + * @param source atomic int maximum + * @return the builder. + */ + public DynamicIOStatisticsBuilder withAtomicIntegerMaximum(String key, + AtomicInteger source) { + withLongFunctionMaximum(key, s -> source.get()); + return this; + } + + /** + * Add a new evaluator to the mean statistics. + * + * This is a function which must return the mean and the sample count. + * @param key key of this statistic + * @param eval evaluator for the statistic + * @return the builder. + */ + public DynamicIOStatisticsBuilder withMeanStatisticFunction(String key, + Function eval) { + activeInstance().addMeanStatisticFunction(key, eval); + return this; + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/EmptyIOStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/EmptyIOStatistics.java new file mode 100644 index 0000000000000..f474fc209771c --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/EmptyIOStatistics.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics.impl; + +import java.util.Map; + +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.MeanStatistic; + +import static java.util.Collections.emptyMap; + +/** + * An empty IO Statistics implementation for classes which always + * want to return a non-null set of statistics. + */ +final class EmptyIOStatistics extends AbstractIOStatisticsImpl { + + /** + * The sole instance of this class. + */ + private static final EmptyIOStatistics INSTANCE = new EmptyIOStatistics(); + + private EmptyIOStatistics() { + } + + @Override + public Map counters() { + return emptyMap(); + } + + @Override + public Map gauges() { + return emptyMap(); + } + + @Override + public Map minimums() { + return emptyMap(); + } + + @Override + public Map maximums() { + return emptyMap(); + } + + @Override + public Map meanStatistics() { + return emptyMap(); + } + + /** + * Get the single instance of this class. + * @return a shared, empty instance. + */ + public static IOStatistics getInstance() { + return INSTANCE; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/EmptyIOStatisticsContextImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/EmptyIOStatisticsContextImpl.java new file mode 100644 index 0000000000000..b672f6639cb93 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/EmptyIOStatisticsContextImpl.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics.impl; + +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsAggregator; +import org.apache.hadoop.fs.statistics.IOStatisticsContext; +import org.apache.hadoop.fs.statistics.IOStatisticsSnapshot; + +/** + * Empty IOStatistics context which serves no-op for all the operations and + * returns an empty Snapshot if asked. + * + */ +final class EmptyIOStatisticsContextImpl implements IOStatisticsContext { + + private static final IOStatisticsContext EMPTY_CONTEXT = new EmptyIOStatisticsContextImpl(); + + private EmptyIOStatisticsContextImpl() { + } + + /** + * Create a new empty snapshot. + * A new one is always created for isolation. + * + * @return a statistics snapshot + */ + @Override + public IOStatisticsSnapshot snapshot() { + return new IOStatisticsSnapshot(); + } + + @Override + public IOStatisticsAggregator getAggregator() { + return EmptyIOStatisticsStore.getInstance(); + } + + @Override + public IOStatistics getIOStatistics() { + return EmptyIOStatistics.getInstance(); + } + + @Override + public void reset() {} + + /** + * The ID is always 0. + * As the real context implementation counter starts at 1, + * we are guaranteed to have unique IDs even between them and + * the empty context. + * @return 0 + */ + @Override + public long getID() { + return 0; + } + + /** + * Get the single instance. + * @return an instance. + */ + static IOStatisticsContext getInstance() { + return EMPTY_CONTEXT; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/EmptyIOStatisticsStore.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/EmptyIOStatisticsStore.java new file mode 100644 index 0000000000000..c970546e6dcb8 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/EmptyIOStatisticsStore.java @@ -0,0 +1,182 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics.impl; + +import javax.annotation.Nullable; +import java.time.Duration; +import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.MeanStatistic; + +import static java.util.Collections.emptyMap; + +/** + * An Empty IOStatisticsStore implementation. + */ +final class EmptyIOStatisticsStore implements IOStatisticsStore { + + /** + * The sole instance of this class. + */ + private static final EmptyIOStatisticsStore INSTANCE = + new EmptyIOStatisticsStore(); + /** + * Get the single instance of this class. + * @return a shared, empty instance. + */ + static IOStatisticsStore getInstance() { + return INSTANCE; + } + + private EmptyIOStatisticsStore() { + } + + @Override + public Map counters() { + return emptyMap(); + } + + @Override + public Map gauges() { + return emptyMap(); + } + + @Override + public Map minimums() { + return emptyMap(); + } + + @Override + public Map maximums() { + return emptyMap(); + } + + @Override + public Map meanStatistics() { + return emptyMap(); + } + + @Override + public boolean aggregate(@Nullable final IOStatistics statistics) { + return false; + } + + @Override + public long incrementCounter(final String key, final long value) { + return 0; + } + + @Override + public void setCounter(final String key, final long value) { + + } + + @Override + public void setGauge(final String key, final long value) { + + } + + @Override + public long incrementGauge(final String key, final long value) { + return 0; + } + + @Override + public void setMaximum(final String key, final long value) { + + } + + @Override + public long incrementMaximum(final String key, final long value) { + return 0; + } + + @Override + public void setMinimum(final String key, final long value) { + + } + + @Override + public long incrementMinimum(final String key, final long value) { + return 0; + } + + @Override + public void addMinimumSample(final String key, final long value) { + + } + + @Override + public void addMaximumSample(final String key, final long value) { + + } + + @Override + public void setMeanStatistic(final String key, final MeanStatistic value) { + + } + + @Override + public void addMeanStatisticSample(final String key, final long value) { + + } + + @Override + public void reset() { + + } + + @Override + public AtomicLong getCounterReference(final String key) { + return null; + } + + @Override + public AtomicLong getMaximumReference(final String key) { + return null; + } + + @Override + public AtomicLong getMinimumReference(final String key) { + return null; + } + + @Override + public AtomicLong getGaugeReference(final String key) { + return null; + } + + @Override + public MeanStatistic getMeanStatistic(final String key) { + return null; + } + + @Override + public void addTimedOperation(final String prefix, + final long durationMillis) { + + } + + @Override + public void addTimedOperation(final String prefix, final Duration duration) { + + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/EvaluatingStatisticsMap.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/EvaluatingStatisticsMap.java new file mode 100644 index 0000000000000..e4680f2d81fa0 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/EvaluatingStatisticsMap.java @@ -0,0 +1,202 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics.impl; + +import java.io.Serializable; +import java.util.Collection; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Function; +import java.util.stream.Collectors; + +/** + * A map of functions which can be invoked to dynamically + * create the value of an entry. + * @param type of entry value. + */ +final class EvaluatingStatisticsMap implements + Map { + + /** + * Functions to invoke when evaluating keys. + */ + private final Map> evaluators + = new ConcurrentHashMap<>(); + + /** + * Function to use when copying map values. + */ + private final Function copyFn; + + /** + * Construct with the copy function being simple passthrough. + */ + EvaluatingStatisticsMap() { + this(IOStatisticsBinding::passthroughFn); + } + + /** + * Construct with the copy function being that supplied in. + * @param copyFn copy function. + */ + EvaluatingStatisticsMap(final Function copyFn) { + this.copyFn = copyFn; + } + + /** + * add a mapping of a key to a function. + * @param key the key + * @param eval the evaluator + */ + void addFunction(String key, Function eval) { + evaluators.put(key, eval); + } + + @Override + public int size() { + return evaluators.size(); + } + + @Override + public boolean isEmpty() { + return evaluators.isEmpty(); + } + + @Override + public boolean containsKey(final Object key) { + return evaluators.containsKey(key); + } + + @Override + public boolean containsValue(final Object value) { + throw new UnsupportedOperationException(); + } + + @Override + public E get(final Object key) { + Function fn = evaluators.get(key); + return fn != null + ? fn.apply((String) key) + : null; + } + + @Override + public E put(final String key, final E value) { + throw new UnsupportedOperationException(); + } + + @Override + public E remove(final Object key) { + throw new UnsupportedOperationException(); + } + + @Override + public void putAll(final Map m) { + throw new UnsupportedOperationException(); + } + + @Override + public void clear() { + throw new UnsupportedOperationException(); + } + + @Override + public Set keySet() { + return evaluators.keySet(); + } + + /** + * Evaluate all the entries and provide a list of the results. + * + * This is not a snapshot, so if the evaluators actually return + * references to mutable objects (e.g. a MeanStatistic instance) + * then that value may still change. + * @return the current list of evaluated results. + */ + @Override + public Collection values() { + Set>> evalEntries = + evaluators.entrySet(); + return evalEntries.parallelStream().map((e) -> + e.getValue().apply(e.getKey())) + .collect(Collectors.toList()); + } + + /** + * Take a snapshot. + * @return a map snapshot. + */ + public Map snapshot() { + return IOStatisticsBinding.snapshotMap(this, copyFn); + } + + /** + * Creating the entry set forces an evaluation of the functions. + * + * This is not a snapshot, so if the evaluators actually return + * references to mutable objects (e.g. a MeanStatistic instance) + * then that value may still change. + * + * The evaluation may be parallelized. + * @return an evaluated set of values + */ + @Override + public synchronized Set> entrySet() { + Set>> evalEntries = + evaluators.entrySet(); + Set> r = evalEntries.parallelStream().map((e) -> + new EntryImpl<>(e.getKey(), e.getValue().apply(e.getKey()))) + .collect(Collectors.toSet()); + return r; + } + + /** + * Simple entry. + * @param entry type + */ + private static final class EntryImpl implements Entry { + + private String key; + + private E value; + + private EntryImpl(final String key, final E value) { + this.key = key; + this.value = value; + } + + @Override + public String getKey() { + return key; + } + + @Override + public E getValue() { + return value; + } + + @Override + public E setValue(final E val) { + this.value = val; + return val; + } + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/ForwardingIOStatisticsStore.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/ForwardingIOStatisticsStore.java new file mode 100644 index 0000000000000..dc6546ae17323 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/ForwardingIOStatisticsStore.java @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics.impl; + +import javax.annotation.Nullable; +import java.time.Duration; +import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.MeanStatistic; + +/** + * This may seem odd having an IOStatisticsStore which does nothing + * but forward to a wrapped store, but it's designed to + * assist in subclassing of selective methods, such + * as those to increment counters, get durations etc. + */ +public class ForwardingIOStatisticsStore implements IOStatisticsStore { + + private final IOStatisticsStore innerStatistics; + + public ForwardingIOStatisticsStore( + final IOStatisticsStore innerStatistics) { + this.innerStatistics = innerStatistics; + } + + protected IOStatisticsStore getInnerStatistics() { + return innerStatistics; + } + + @Override + public Map counters() { + return getInnerStatistics().counters(); + } + + @Override + public Map gauges() { + return getInnerStatistics().gauges(); + } + + @Override + public Map minimums() { + return getInnerStatistics().minimums(); + } + + @Override + public Map maximums() { + return getInnerStatistics().maximums(); + } + + @Override + public Map meanStatistics() { + return getInnerStatistics().meanStatistics(); + } + + @Override + public boolean aggregate(@Nullable final IOStatistics statistics) { + return getInnerStatistics().aggregate(statistics); + } + + @Override + public long incrementCounter(final String key, final long value) { + return getInnerStatistics().incrementCounter(key, value); + } + + @Override + public void setCounter(final String key, final long value) { + getInnerStatistics().setCounter(key, value); + } + + @Override + public void setGauge(final String key, final long value) { + getInnerStatistics().setGauge(key, value); + } + + @Override + public long incrementGauge(final String key, final long value) { + return getInnerStatistics().incrementGauge(key, value); + } + + @Override + public void setMaximum(final String key, final long value) { + getInnerStatistics().setMaximum(key, value); + } + + @Override + public long incrementMaximum(final String key, final long value) { + return getInnerStatistics().incrementMaximum(key, value); + } + + @Override + public void setMinimum(final String key, final long value) { + getInnerStatistics().setMinimum(key, value); + + } + + @Override + public long incrementMinimum(final String key, final long value) { + return getInnerStatistics().incrementMinimum(key, value); + + } + + @Override + public void addMinimumSample(final String key, final long value) { + getInnerStatistics().addMinimumSample(key, value); + + } + + @Override + public void addMaximumSample(final String key, final long value) { + getInnerStatistics().addMaximumSample(key, value); + } + + @Override + public void setMeanStatistic(final String key, final MeanStatistic value) { + getInnerStatistics().setMeanStatistic(key, value); + + } + + @Override + public void addMeanStatisticSample(final String key, final long value) { + getInnerStatistics().addMeanStatisticSample(key, value); + + } + + @Override + public void reset() { + getInnerStatistics().reset(); + } + + @Override + public AtomicLong getCounterReference(final String key) { + return getInnerStatistics().getCounterReference(key); + } + + @Override + public AtomicLong getMaximumReference(final String key) { + return getInnerStatistics().getMaximumReference(key); + } + + @Override + public AtomicLong getMinimumReference(final String key) { + return getInnerStatistics().getMinimumReference(key); + } + + @Override + public AtomicLong getGaugeReference(final String key) { + return getInnerStatistics().getGaugeReference(key); + } + + @Override + public MeanStatistic getMeanStatistic(final String key) { + return getInnerStatistics().getMeanStatistic(key); + } + + @Override + public void addTimedOperation(final String prefix, + final long durationMillis) { + getInnerStatistics().addTimedOperation(prefix, durationMillis); + + } + + @Override + public void addTimedOperation(final String prefix, + final Duration duration) { + getInnerStatistics().addTimedOperation(prefix, duration); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsBinding.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsBinding.java new file mode 100644 index 0000000000000..8c53764aa758c --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsBinding.java @@ -0,0 +1,705 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics.impl; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.io.Serializable; +import java.time.Duration; +import java.util.Iterator; +import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.BiFunction; +import java.util.function.Function; +import java.util.function.Supplier; + +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; + +import org.apache.hadoop.fs.StorageStatistics; +import org.apache.hadoop.fs.statistics.DurationTracker; +import org.apache.hadoop.fs.statistics.DurationTrackerFactory; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; +import org.apache.hadoop.fs.statistics.MeanStatistic; +import org.apache.hadoop.util.functional.CallableRaisingIOE; +import org.apache.hadoop.util.functional.ConsumerRaisingIOE; +import org.apache.hadoop.util.functional.FunctionRaisingIOE; +import org.apache.hadoop.util.functional.InvocationRaisingIOE; + +import static org.apache.hadoop.fs.statistics.IOStatistics.MIN_UNSET_VALUE; +import static org.apache.hadoop.fs.statistics.impl.StubDurationTracker.STUB_DURATION_TRACKER; + +/** + * Support for implementing IOStatistics interfaces. + */ +public final class IOStatisticsBinding { + + /** Pattern used for each entry. */ + public static final String ENTRY_PATTERN = "(%s=%s)"; + + /** String to return when a source is null. */ + @VisibleForTesting + public static final String NULL_SOURCE = "()"; + + private IOStatisticsBinding() { + } + + /** + * Create IOStatistics from a storage statistics instance. + * + * This will be updated as the storage statistics change. + * @param storageStatistics source data. + * @return an IO statistics source. + */ + public static IOStatistics fromStorageStatistics( + StorageStatistics storageStatistics) { + DynamicIOStatisticsBuilder builder = dynamicIOStatistics(); + Iterator it = storageStatistics + .getLongStatistics(); + while (it.hasNext()) { + StorageStatistics.LongStatistic next = it.next(); + builder.withLongFunctionCounter(next.getName(), + k -> storageStatistics.getLong(k)); + } + return builder.build(); + } + + /** + * Create a builder for dynamic IO Statistics. + * @return a builder to be completed. + */ + public static DynamicIOStatisticsBuilder dynamicIOStatistics() { + return new DynamicIOStatisticsBuilder(); + } + + /** + * Get the shared instance of the immutable empty statistics + * object. + * @return an empty statistics object. + */ + public static IOStatistics emptyStatistics() { + return EmptyIOStatistics.getInstance(); + } + + /** + * Get the shared instance of the immutable empty statistics + * store. + * @return an empty statistics object. + */ + public static IOStatisticsStore emptyStatisticsStore() { + return EmptyIOStatisticsStore.getInstance(); + } + + /** + * Take an IOStatistics instance and wrap it in a source. + * @param statistics statistics. + * @return a source which will return the values + */ + public static IOStatisticsSource wrap(IOStatistics statistics) { + return new SourceWrappedStatistics(statistics); + } + + /** + * Create a builder for an {@link IOStatisticsStore}. + * + * @return a builder instance. + */ + public static IOStatisticsStoreBuilder iostatisticsStore() { + return new IOStatisticsStoreBuilderImpl(); + } + + /** + * Convert an entry to the string format used in logging. + * + * @param entry entry to evaluate + * @param entry type + * @return formatted string + */ + public static String entryToString( + final Map.Entry entry) { + return entryToString(entry.getKey(), entry.getValue()); + } + + /** + * Convert entry values to the string format used in logging. + * + * @param type of values. + * @param name statistic name + * @param value stat value + * @return formatted string + */ + public static String entryToString( + final String name, final E value) { + return String.format( + ENTRY_PATTERN, + name, + value); + } + + /** + * Copy into the dest map all the source entries. + * The destination is cleared first. + * @param entry type + * @param dest destination of the copy + * @param source source + * @param copyFn function to copy entries + * @return the destination. + */ + private static Map copyMap( + Map dest, + Map source, + Function copyFn) { + // we have to clone the values so that they aren't + // bound to the original values + dest.clear(); + source.entrySet() + .forEach(entry -> + dest.put(entry.getKey(), copyFn.apply(entry.getValue()))); + return dest; + } + + /** + * A passthrough copy operation suitable for immutable + * types, including numbers. + * + * @param type of values. + * @param src source object + * @return the source object + */ + public static E passthroughFn(E src) { + return src; + } + + /** + * Take a snapshot of a supplied map, where the copy option simply + * uses the existing value. + * + * For this to be safe, the map must refer to immutable objects. + * @param source source map + * @param type of values. + * @return a new map referencing the same values. + */ + public static Map snapshotMap( + Map source) { + return snapshotMap(source, + IOStatisticsBinding::passthroughFn); + } + + /** + * Take a snapshot of a supplied map, using the copy function + * to replicate the source values. + * @param source source map + * @param copyFn function to copy the value + * @param type of values. + * @return a concurrent hash map referencing the same values. + */ + public static + ConcurrentHashMap snapshotMap( + Map source, + Function copyFn) { + ConcurrentHashMap dest = new ConcurrentHashMap<>(); + copyMap(dest, source, copyFn); + return dest; + } + + /** + * Aggregate two maps so that the destination. + * @param type of values + * @param dest destination map. + * @param other other map + * @param aggregateFn function to aggregate the values. + * @param copyFn function to copy the value + */ + public static void aggregateMaps( + Map dest, + Map other, + BiFunction aggregateFn, + Function copyFn) { + // scan through the other hand map; copy + // any values not in the left map, + // aggregate those for which there is already + // an entry + other.entrySet().forEach(entry -> { + String key = entry.getKey(); + E rVal = entry.getValue(); + E lVal = dest.get(key); + if (lVal == null) { + dest.put(key, copyFn.apply(rVal)); + } else { + dest.put(key, aggregateFn.apply(lVal, rVal)); + } + }); + } + + /** + * Aggregate two counters. + * @param l left value + * @param r right value + * @return the aggregate value + */ + public static Long aggregateCounters(Long l, Long r) { + return Math.max(l, 0) + Math.max(r, 0); + } + + /** + * Add two gauges. + * @param l left value + * @param r right value + * @return aggregate value + */ + public static Long aggregateGauges(Long l, Long r) { + return l + r; + } + + + /** + * Aggregate two minimum values. + * @param l left + * @param r right + * @return the new minimum. + */ + public static Long aggregateMinimums(Long l, Long r) { + if (l == MIN_UNSET_VALUE) { + return r; + } else if (r == MIN_UNSET_VALUE) { + return l; + } else { + return Math.min(l, r); + } + } + + /** + * Aggregate two maximum values. + * @param l left + * @param r right + * @return the new minimum. + */ + public static Long aggregateMaximums(Long l, Long r) { + if (l == MIN_UNSET_VALUE) { + return r; + } else if (r == MIN_UNSET_VALUE) { + return l; + } else { + return Math.max(l, r); + } + } + + /** + * Aggregate the mean statistics. + * This returns a new instance. + * @param l left value + * @param r right value + * @return aggregate value + */ + public static MeanStatistic aggregateMeanStatistics( + MeanStatistic l, MeanStatistic r) { + MeanStatistic res = l.copy(); + res.add(r); + return res; + } + + /** + * Update a maximum value tracked in an atomic long. + * This is thread safe -it uses compareAndSet to ensure + * that Thread T1 whose sample is greater than the current + * value never overwrites an update from thread T2 whose + * sample was also higher -and which completed first. + * @param dest destination for all changes. + * @param sample sample to update. + */ + public static void maybeUpdateMaximum(AtomicLong dest, long sample) { + boolean done; + do { + long current = dest.get(); + if (sample > current) { + done = dest.compareAndSet(current, sample); + } else { + done = true; + } + } while (!done); + } + + /** + * Update a maximum value tracked in an atomic long. + * This is thread safe -it uses compareAndSet to ensure + * that Thread T1 whose sample is greater than the current + * value never overwrites an update from thread T2 whose + * sample was also higher -and which completed first. + * @param dest destination for all changes. + * @param sample sample to update. + */ + public static void maybeUpdateMinimum(AtomicLong dest, long sample) { + boolean done; + do { + long current = dest.get(); + if (current == MIN_UNSET_VALUE || sample < current) { + done = dest.compareAndSet(current, sample); + } else { + done = true; + } + } while (!done); + } + + /** + * Given an IOException raising function/lambda expression, + * return a new one which wraps the inner and tracks + * the duration of the operation, including whether + * it passes/fails. + * @param factory factory of duration trackers + * @param statistic statistic key + * @param inputFn input function + * @param type of argument to the input function. + * @param return type. + * @return a new function which tracks duration and failure. + */ + public static FunctionRaisingIOE trackFunctionDuration( + @Nullable DurationTrackerFactory factory, + String statistic, + FunctionRaisingIOE inputFn) { + return (x) -> { + // create the tracker outside try-with-resources so + // that failures can be set in the catcher. + DurationTracker tracker = createTracker(factory, statistic); + try { + // exec the input function and return its value + return inputFn.apply(x); + } catch (IOException | RuntimeException e) { + // input function failed: note it + tracker.failed(); + // and rethrow + throw e; + } finally { + // update the tracker. + // this is called after the catch() call would have + // set the failed flag. + tracker.close(); + } + }; + } + + /** + * Given a java function/lambda expression, + * return a new one which wraps the inner and tracks + * the duration of the operation, including whether + * it passes/fails. + * @param factory factory of duration trackers + * @param statistic statistic key + * @param inputFn input function + * @param type of argument to the input function. + * @param return type. + * @return a new function which tracks duration and failure. + */ + public static Function trackJavaFunctionDuration( + @Nullable DurationTrackerFactory factory, + String statistic, + Function inputFn) { + return (x) -> { + // create the tracker outside try-with-resources so + // that failures can be set in the catcher. + DurationTracker tracker = createTracker(factory, statistic); + try { + // exec the input function and return its value + return inputFn.apply(x); + } catch (RuntimeException e) { + // input function failed: note it + tracker.failed(); + // and rethrow + throw e; + } finally { + // update the tracker. + // this is called after the catch() call would have + // set the failed flag. + tracker.close(); + } + }; + } + + /** + * Given an IOException raising callable/lambda expression, + * execute it and update the relevant statistic. + * @param factory factory of duration trackers + * @param statistic statistic key + * @param input input callable. + * @param return type. + * @return the result of the operation. + * @throws IOException raised on errors performing I/O. + */ + public static B trackDuration( + DurationTrackerFactory factory, + String statistic, + CallableRaisingIOE input) throws IOException { + return trackDurationOfOperation(factory, statistic, input).apply(); + } + + /** + * Given an IOException raising callable/lambda expression, + * execute it and update the relevant statistic. + * @param factory factory of duration trackers + * @param statistic statistic key + * @param input input callable. + * @throws IOException IO failure. + */ + public static void trackDurationOfInvocation( + DurationTrackerFactory factory, + String statistic, + InvocationRaisingIOE input) throws IOException { + + measureDurationOfInvocation(factory, statistic, input); + } + + /** + * Given an IOException raising callable/lambda expression, + * execute it and update the relevant statistic, + * returning the measured duration. + * + * {@link #trackDurationOfInvocation(DurationTrackerFactory, String, InvocationRaisingIOE)} + * with the duration returned for logging etc.; added as a new + * method to avoid linking problems with any code calling the existing + * method. + * + * @param factory factory of duration trackers + * @param statistic statistic key + * @param input input callable. + * @return the duration of the operation, as measured by the duration tracker. + * @throws IOException IO failure. + */ + public static Duration measureDurationOfInvocation( + DurationTrackerFactory factory, + String statistic, + InvocationRaisingIOE input) throws IOException { + + // create the tracker outside try-with-resources so + // that failures can be set in the catcher. + DurationTracker tracker = createTracker(factory, statistic); + try { + // exec the input function and return its value + input.apply(); + } catch (IOException | RuntimeException e) { + // input function failed: note it + tracker.failed(); + // and rethrow + throw e; + } finally { + // update the tracker. + // this is called after the catch() call would have + // set the failed flag. + tracker.close(); + } + return tracker.asDuration(); + } + + /** + * Given an IOException raising callable/lambda expression, + * return a new one which wraps the inner and tracks + * the duration of the operation, including whether + * it passes/fails. + * @param factory factory of duration trackers + * @param statistic statistic key + * @param input input callable. + * @param return type. + * @return a new callable which tracks duration and failure. + */ + public static CallableRaisingIOE trackDurationOfOperation( + @Nullable DurationTrackerFactory factory, + String statistic, + CallableRaisingIOE input) { + return () -> { + // create the tracker outside try-with-resources so + // that failures can be set in the catcher. + DurationTracker tracker = createTracker(factory, statistic); + return invokeTrackingDuration(tracker, input); + }; + } + + /** + * Given an IOException raising callable/lambda expression, + * execute it, updating the tracker on success/failure. + * @param tracker duration tracker. + * @param input input callable. + * @param return type. + * @return the result of the invocation + * @throws IOException on failure. + */ + public static B invokeTrackingDuration( + final DurationTracker tracker, + final CallableRaisingIOE input) + throws IOException { + try { + // exec the input function and return its value + return input.apply(); + } catch (IOException | RuntimeException e) { + // input function failed: note it + tracker.failed(); + // and rethrow + throw e; + } finally { + // update the tracker. + // this is called after the catch() call would have + // set the failed flag. + tracker.close(); + } + } + + /** + * Given an IOException raising Consumer, + * return a new one which wraps the inner and tracks + * the duration of the operation, including whether + * it passes/fails. + * @param factory factory of duration trackers + * @param statistic statistic key + * @param input input callable. + * @param return type. + * @return a new consumer which tracks duration and failure. + */ + public static ConsumerRaisingIOE trackDurationConsumer( + @Nullable DurationTrackerFactory factory, + String statistic, + ConsumerRaisingIOE input) { + return (B t) -> { + // create the tracker outside try-with-resources so + // that failures can be set in the catcher. + DurationTracker tracker = createTracker(factory, statistic); + try { + // exec the input function and return its value + input.accept(t); + } catch (IOException | RuntimeException e) { + // input function failed: note it + tracker.failed(); + // and rethrow + throw e; + } finally { + // update the tracker. + // this is called after the catch() call would have + // set the failed flag. + tracker.close(); + } + }; + } + + /** + * Given a callable/lambda expression, + * return a new one which wraps the inner and tracks + * the duration of the operation, including whether + * it passes/fails. + * @param factory factory of duration trackers + * @param statistic statistic key + * @param input input callable. + * @param return type. + * @return a new callable which tracks duration and failure. + */ + public static Callable trackDurationOfCallable( + @Nullable DurationTrackerFactory factory, + String statistic, + Callable input) { + return () -> { + // create the tracker outside try-with-resources so + // that failures can be set in the catcher. + DurationTracker tracker = createTracker(factory, statistic); + try { + // exec the input function and return its value + return input.call(); + } catch (RuntimeException e) { + // input function failed: note it + tracker.failed(); + // and rethrow + throw e; + } finally { + // update the tracker. + // this is called after any catch() call will have + // set the failed flag. + tracker.close(); + } + }; + } + + /** + * Given a Java supplier, evaluate it while + * tracking the duration of the operation and success/failure. + * @param factory factory of duration trackers + * @param statistic statistic key + * @param input input callable. + * @param return type. + * @return the output of the supplier. + */ + public static B trackDurationOfSupplier( + @Nullable DurationTrackerFactory factory, + String statistic, + Supplier input) { + // create the tracker outside try-with-resources so + // that failures can be set in the catcher. + DurationTracker tracker = createTracker(factory, statistic); + try { + // exec the input function and return its value + return input.get(); + } catch (RuntimeException e) { + // input function failed: note it + tracker.failed(); + // and rethrow + throw e; + } finally { + // update the tracker. + // this is called after any catch() call will have + // set the failed flag. + tracker.close(); + } + } + + /** + * Create the tracker. If the factory is null, a stub + * tracker is returned. + * @param factory tracker factory + * @param statistic statistic to track + * @return a duration tracker. + */ + public static DurationTracker createTracker( + @Nullable final DurationTrackerFactory factory, + final String statistic) { + return factory != null + ? factory.trackDuration(statistic) + : STUB_DURATION_TRACKER; + } + + /** + * Create a DurationTrackerFactory which aggregates the tracking + * of two other factories. + * @param first first tracker factory + * @param second second tracker factory + * @return a factory + */ + public static DurationTrackerFactory pairedTrackerFactory( + final DurationTrackerFactory first, + final DurationTrackerFactory second) { + return new PairedDurationTrackerFactory(first, second); + } + + /** + * Publish the IOStatistics as a set of storage statistics. + * This is dynamic. + * @param name storage statistics name. + * @param scheme FS scheme; may be null. + * @param source IOStatistics source. + * @return a dynamic storage statistics object. + */ + public static StorageStatistics publishAsStorageStatistics( + String name, String scheme, IOStatistics source) { + return new StorageStatisticsFromIOStatistics(name, scheme, source); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsContextImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsContextImpl.java new file mode 100644 index 0000000000000..97a85281c4fb8 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsContextImpl.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics.impl; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsAggregator; +import org.apache.hadoop.fs.statistics.IOStatisticsContext; +import org.apache.hadoop.fs.statistics.IOStatisticsSnapshot; + +/** + * Implementing the IOStatisticsContext. + * + * A Context defined for IOStatistics collection per thread which captures + * each worker thread's work in FS streams and stores it in the form of + * IOStatisticsSnapshot. + * + * For the current thread the IOStatisticsSnapshot can be used as a way to + * move the IOStatistics data between applications using the Serializable + * nature of the class. + */ +public final class IOStatisticsContextImpl implements IOStatisticsContext { + private static final Logger LOG = + LoggerFactory.getLogger(IOStatisticsContextImpl.class); + + /** + * Thread ID. + */ + private final long threadId; + + /** + * Unique ID. + */ + private final long id; + + /** + * IOStatistics to aggregate. + */ + private final IOStatisticsSnapshot ioStatistics = new IOStatisticsSnapshot(); + + /** + * Constructor. + * @param threadId thread ID + * @param id instance ID. + */ + public IOStatisticsContextImpl(final long threadId, final long id) { + this.threadId = threadId; + this.id = id; + } + + @Override + public String toString() { + return "IOStatisticsContextImpl{" + + "id=" + id + + ", threadId=" + threadId + + ", ioStatistics=" + ioStatistics + + '}'; + } + + /** + * Get the IOStatisticsAggregator of the context. + * @return the instance of IOStatisticsAggregator for this context. + */ + @Override + public IOStatisticsAggregator getAggregator() { + return ioStatistics; + } + + /** + * Returns a snapshot of the current thread's IOStatistics. + * + * @return IOStatisticsSnapshot of the context. + */ + @Override + public IOStatisticsSnapshot snapshot() { + LOG.debug("Taking snapshot of IOStatisticsContext id {}", id); + return new IOStatisticsSnapshot(ioStatistics); + } + + /** + * Reset the thread +. + */ + @Override + public void reset() { + LOG.debug("clearing IOStatisticsContext id {}", id); + ioStatistics.clear(); + } + + @Override + public IOStatistics getIOStatistics() { + return ioStatistics; + } + + /** + * ID of this context. + * @return ID. + */ + @Override + public long getID() { + return id; + } + + /** + * Get the thread ID. + * @return thread ID. + */ + public long getThreadID() { + return threadId; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsContextIntegration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsContextIntegration.java new file mode 100644 index 0000000000000..71fdb1f17b1f5 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsContextIntegration.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics.impl; + +import java.lang.ref.WeakReference; +import java.util.concurrent.atomic.AtomicLong; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.impl.WeakReferenceThreadMap; +import org.apache.hadoop.fs.statistics.IOStatisticsContext; + +import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_THREAD_LEVEL_ENABLED; +import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_THREAD_LEVEL_ENABLED_DEFAULT; + +/** + * A Utility class for IOStatisticsContext, which helps in creating and + * getting the current active context. Static methods in this class allows to + * get the current context to start aggregating the IOStatistics. + * + * Static initializer is used to work out if the feature to collect + * thread-level IOStatistics is enabled or not and the corresponding + * implementation class is called for it. + * + * Weak Reference thread map to be used to keep track of different context's + * to avoid long-lived memory leakages as these references would be cleaned + * up at GC. + */ +public final class IOStatisticsContextIntegration { + + private static final Logger LOG = + LoggerFactory.getLogger(IOStatisticsContextIntegration.class); + + /** + * Is thread-level IO Statistics enabled? + */ + private static boolean isThreadIOStatsEnabled; + + /** + * ID for next instance to create. + */ + public static final AtomicLong INSTANCE_ID = new AtomicLong(1); + + /** + * Active IOStatistics Context containing different worker thread's + * statistics. Weak Reference so that it gets cleaned up during GC and we + * avoid any memory leak issues due to long lived references. + */ + private static final WeakReferenceThreadMap + ACTIVE_IOSTATS_CONTEXT = + new WeakReferenceThreadMap<>( + IOStatisticsContextIntegration::createNewInstance, + IOStatisticsContextIntegration::referenceLostContext + ); + + static { + // Work out if the current context has thread level IOStatistics enabled. + final Configuration configuration = new Configuration(); + isThreadIOStatsEnabled = + configuration.getBoolean(IOSTATISTICS_THREAD_LEVEL_ENABLED, + IOSTATISTICS_THREAD_LEVEL_ENABLED_DEFAULT); + } + + /** + * Static probe to check if the thread-level IO statistics enabled. + * + * @return if the thread-level IO statistics enabled. + */ + public static boolean isIOStatisticsThreadLevelEnabled() { + return isThreadIOStatsEnabled; + } + + /** + * Private constructor for a utility class to be used in IOStatisticsContext. + */ + private IOStatisticsContextIntegration() {} + + /** + * Creating a new IOStatisticsContext instance for a FS to be used. + * @param key Thread ID that represents which thread the context belongs to. + * @return an instance of IOStatisticsContext. + */ + private static IOStatisticsContext createNewInstance(Long key) { + IOStatisticsContextImpl instance = + new IOStatisticsContextImpl(key, INSTANCE_ID.getAndIncrement()); + LOG.debug("Created instance {}", instance); + return instance; + } + + /** + * In case of reference loss for IOStatisticsContext. + * @param key ThreadID. + */ + private static void referenceLostContext(Long key) { + LOG.debug("Reference lost for threadID for the context: {}", key); + } + + /** + * Get the current thread's IOStatisticsContext instance. If no instance is + * present for this thread ID, create one using the factory. + * @return instance of IOStatisticsContext. + */ + public static IOStatisticsContext getCurrentIOStatisticsContext() { + return isThreadIOStatsEnabled + ? ACTIVE_IOSTATS_CONTEXT.getForCurrentThread() + : EmptyIOStatisticsContextImpl.getInstance(); + } + + /** + * Set the IOStatisticsContext for the current thread. + * @param statisticsContext IOStatistics context instance for the + * current thread. If null, the context is reset. + */ + public static void setThreadIOStatisticsContext( + IOStatisticsContext statisticsContext) { + if (isThreadIOStatsEnabled) { + if (statisticsContext == null) { + // new value is null, so remove it + ACTIVE_IOSTATS_CONTEXT.removeForCurrentThread(); + } else { + // the setter is efficient in that it does not create a new + // reference if the context is unchanged. + ACTIVE_IOSTATS_CONTEXT.setForCurrentThread(statisticsContext); + } + } + } + + /** + * Get thread ID specific IOStatistics values if + * statistics are enabled and the thread ID is in the map. + * @param testThreadId thread ID. + * @return IOStatisticsContext if found in the map. + */ + @VisibleForTesting + public static IOStatisticsContext getThreadSpecificIOStatisticsContext(long testThreadId) { + LOG.debug("IOStatsContext thread ID required: {}", testThreadId); + + if (!isThreadIOStatsEnabled) { + return null; + } + // lookup the weakRef IOStatisticsContext for the thread ID in the + // ThreadMap. + WeakReference ioStatisticsSnapshotWeakReference = + ACTIVE_IOSTATS_CONTEXT.lookup(testThreadId); + if (ioStatisticsSnapshotWeakReference != null) { + return ioStatisticsSnapshotWeakReference.get(); + } + return null; + } + + /** + * A method to enable IOStatisticsContext to override if set otherwise in + * the configurations for tests. + */ + @VisibleForTesting + public static void enableIOStatisticsContext() { + if (!isThreadIOStatsEnabled) { + LOG.info("Enabling Thread IOStatistics.."); + isThreadIOStatsEnabled = true; + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsStore.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsStore.java new file mode 100644 index 0000000000000..fed7c69ccd85d --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsStore.java @@ -0,0 +1,225 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics.impl; + +import java.time.Duration; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsAggregator; +import org.apache.hadoop.fs.statistics.DurationTrackerFactory; +import org.apache.hadoop.fs.statistics.IOStatisticsSetters; +import org.apache.hadoop.fs.statistics.MeanStatistic; + +/** + * Interface of an IOStatistics store intended for + * use in classes which track statistics for reporting. + */ +public interface IOStatisticsStore extends IOStatistics, + IOStatisticsSetters, + IOStatisticsAggregator, + DurationTrackerFactory { + + /** + * Increment a counter by one. + * + * No-op if the counter is unknown. + * @param key statistics key + * @return old value or, if the counter is unknown: 0 + */ + default long incrementCounter(String key) { + return incrementCounter(key, 1); + } + + /** + * Increment a counter. + * + * No-op if the counter is unknown. + * If the value is negative, it is ignored. + * @param key statistics key + * @param value value to increment + * @return the updated value or, if the counter is unknown: 0 + */ + long incrementCounter(String key, long value); + + /** + * Increment a gauge. + *

    + * No-op if the gauge is unknown. + *

    + * @param key statistics key + * @param value value to increment + * @return new value or 0 if the key is unknown + */ + long incrementGauge(String key, long value); + + /** + * Increment a maximum. + *

    + * No-op if the maximum is unknown. + *

    + * @param key statistics key + * @param value value to increment + * @return new value or 0 if the key is unknown + */ + long incrementMaximum(String key, long value); + + /** + * Increment a minimum. + *

    + * No-op if the minimum is unknown. + *

    + * @param key statistics key + * @param value value to increment + * @return new value or 0 if the key is unknown + */ + long incrementMinimum(String key, long value); + + /** + * Add a minimum sample: if less than the current value, + * updates the value. + *

    + * No-op if the minimum is unknown. + *

    + * @param key statistics key + * @param value sample value + */ + void addMinimumSample(String key, long value); + + /** + * Add a maximum sample: if greater than the current value, + * updates the value. + *

    + * No-op if the key is unknown. + *

    + * @param key statistics key + * @param value sample value + */ + void addMaximumSample(String key, long value); + + /** + * Add a sample to the mean statistics. + *

    + * No-op if the key is unknown. + *

    + * @param key key + * @param value sample value. + */ + void addMeanStatisticSample(String key, long value); + + /** + * Reset all statistics. + * Unsynchronized. + */ + void reset(); + + /** + * Get a reference to the atomic instance providing the + * value for a specific counter. This is useful if + * the value is passed around. + * @param key statistic name + * @return the reference + * @throws NullPointerException if there is no entry of that name + */ + AtomicLong getCounterReference(String key); + + /** + * Get a reference to the atomic instance providing the + * value for a specific maximum. This is useful if + * the value is passed around. + * @param key statistic name + * @return the reference + * @throws NullPointerException if there is no entry of that name + */ + AtomicLong getMaximumReference(String key); + + /** + * Get a reference to the atomic instance providing the + * value for a specific minimum. This is useful if + * the value is passed around. + * @param key statistic name + * @return the reference + * @throws NullPointerException if there is no entry of that name + */ + AtomicLong getMinimumReference(String key); + + /** + * Get a reference to the atomic instance providing the + * value for a specific gauge. This is useful if + * the value is passed around. + * @param key statistic name + * @return the reference + * @throws NullPointerException if there is no entry of that name + */ + AtomicLong getGaugeReference(String key); + + /** + * Get a reference to the atomic instance providing the + * value for a specific meanStatistic. This is useful if + * the value is passed around. + * @param key statistic name + * @return the reference + * @throws NullPointerException if there is no entry of that name + */ + MeanStatistic getMeanStatistic(String key); + + /** + * Add a duration to the min/mean/max statistics, using the + * given prefix and adding a suffix for each specific value. + * + * The update is not-atomic, even though each individual statistic + * is updated thread-safely. If two threads update the values + * simultaneously, at the end of each operation the state will + * be correct. It is only during the sequence that the statistics + * may be observably inconsistent. + * @param prefix statistic prefix + * @param durationMillis duration in milliseconds. + */ + void addTimedOperation(String prefix, long durationMillis); + + /** + * Add a duration to the min/mean/max statistics, using the + * given prefix and adding a suffix for each specific value.; + * increment tha counter whose name == prefix. + * + * If any of the statistics are not registered, that part of + * the sequence will be omitted -the rest will proceed. + * + * The update is not-atomic, even though each individual statistic + * is updated thread-safely. If two threads update the values + * simultaneously, at the end of each operation the state will + * be correct. It is only during the sequence that the statistics + * may be observably inconsistent. + * @param prefix statistic prefix + * @param duration duration + */ + void addTimedOperation(String prefix, Duration duration); + + /** + * Add a statistics sample as a min, max and mean and count. + * @param key key to add. + * @param count count. + */ + default void addSample(String key, long count) { + incrementCounter(key, count); + addMeanStatisticSample(key, count); + addMaximumSample(key, count); + addMinimumSample(key, count); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsStoreBuilder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsStoreBuilder.java new file mode 100644 index 0000000000000..f1272d53ebbb2 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsStoreBuilder.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics.impl; + +/** + * Builder of the {@link IOStatisticsStore} implementation. + */ +public interface IOStatisticsStoreBuilder { + + /** + * Declare a varargs list of counters to add. + * @param keys names of statistics. + * @return this builder. + */ + IOStatisticsStoreBuilder withCounters(String... keys); + + /** + * Declare a varargs list of gauges to add. + * @param keys names of statistics. + * @return this builder. + */ + IOStatisticsStoreBuilder withGauges(String... keys); + + /** + * Declare a varargs list of maximums to add. + * @param keys names of statistics. + * @return this builder. + */ + IOStatisticsStoreBuilder withMaximums(String... keys); + + /** + * Declare a varargs list of minimums to add. + * @param keys names of statistics. + * @return this builder. + */ + IOStatisticsStoreBuilder withMinimums(String... keys); + + /** + * Declare a varargs list of means to add. + * @param keys names of statistics. + * @return this builder. + */ + IOStatisticsStoreBuilder withMeanStatistics(String... keys); + + /** + * Add a statistic in the counter, min, max and mean maps for each + * declared statistic prefix. + * @param prefixes prefixes for the stats. + * @return this + */ + IOStatisticsStoreBuilder withDurationTracking( + String... prefixes); + + /** + * A value which is tracked with counter/min/max/mean. + * Similar to {@link #withDurationTracking(String...)} + * but without the failure option and with the same name + * across all categories. + * @param prefixes prefixes to add. + * @return the builder + */ + IOStatisticsStoreBuilder withSampleTracking( + String... prefixes); + + /** + * Build the collector. + * @return a new collector. + */ + IOStatisticsStore build(); +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsStoreBuilderImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsStoreBuilderImpl.java new file mode 100644 index 0000000000000..70d4f6951d3d2 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsStoreBuilderImpl.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics.impl; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import static org.apache.hadoop.fs.statistics.StoreStatisticNames.SUFFIX_FAILURES; +import static org.apache.hadoop.fs.statistics.StoreStatisticNames.SUFFIX_MAX; +import static org.apache.hadoop.fs.statistics.StoreStatisticNames.SUFFIX_MEAN; +import static org.apache.hadoop.fs.statistics.StoreStatisticNames.SUFFIX_MIN; + +/** + * Builder for an IOStatistics store.. + */ +final class IOStatisticsStoreBuilderImpl implements + IOStatisticsStoreBuilder { + + private final List counters = new ArrayList<>(); + + private final List gauges = new ArrayList<>(); + + private final List minimums = new ArrayList<>(); + + private final List maximums = new ArrayList<>(); + + private final List meanStatistics = new ArrayList<>(); + + @Override + public IOStatisticsStoreBuilderImpl withCounters(final String... keys) { + counters.addAll(Arrays.asList(keys)); + return this; + } + + @Override + public IOStatisticsStoreBuilderImpl withGauges(final String... keys) { + gauges.addAll(Arrays.asList(keys)); + return this; + } + + @Override + public IOStatisticsStoreBuilderImpl withMaximums(final String... keys) { + maximums.addAll(Arrays.asList(keys)); + return this; + } + + @Override + public IOStatisticsStoreBuilderImpl withMinimums(final String... keys) { + minimums.addAll(Arrays.asList(keys)); + return this; + } + + @Override + public IOStatisticsStoreBuilderImpl withMeanStatistics( + final String... keys) { + meanStatistics.addAll(Arrays.asList(keys)); + return this; + } + + @Override + public IOStatisticsStoreBuilderImpl withDurationTracking( + final String... prefixes) { + for (String p : prefixes) { + withCounters(p, p + SUFFIX_FAILURES); + withMinimums( + p + SUFFIX_MIN, + p + SUFFIX_FAILURES + SUFFIX_MIN); + withMaximums( + p + SUFFIX_MAX, + p + SUFFIX_FAILURES + SUFFIX_MAX); + withMeanStatistics( + p + SUFFIX_MEAN, + p + SUFFIX_FAILURES + SUFFIX_MEAN); + } + return this; + } + + @Override + public IOStatisticsStoreBuilderImpl withSampleTracking( + final String... prefixes) { + for (String p : prefixes) { + withCounters(p); + withMinimums(p); + withMaximums(p); + withMeanStatistics(p); + } + return this; + } + + @Override + public IOStatisticsStore build() { + return new IOStatisticsStoreImpl(counters, gauges, minimums, + maximums, meanStatistics); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsStoreImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsStoreImpl.java new file mode 100644 index 0000000000000..6db3820891969 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsStoreImpl.java @@ -0,0 +1,469 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics.impl; + +import javax.annotation.Nullable; +import java.time.Duration; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.statistics.DurationTracker; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.MeanStatistic; + +import static java.util.Objects.requireNonNull; +import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.stubDurationTracker; +import static org.apache.hadoop.fs.statistics.StoreStatisticNames.SUFFIX_MAX; +import static org.apache.hadoop.fs.statistics.StoreStatisticNames.SUFFIX_MEAN; +import static org.apache.hadoop.fs.statistics.StoreStatisticNames.SUFFIX_MIN; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.aggregateMaximums; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.aggregateMinimums; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.dynamicIOStatistics; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.maybeUpdateMaximum; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.maybeUpdateMinimum; + +/** + * Implementation of {@link IOStatisticsStore}. + *

    + * A ConcurrentHashMap of each set of statistics is created; + * the AtomicLong/MeanStatistic entries are fetched as required. + * When the statistics are updated, the referenced objects + * are updated rather than new values set in the map. + *

    + */ +final class IOStatisticsStoreImpl extends WrappedIOStatistics + implements IOStatisticsStore { + + /** + * Log changes at debug. + * Noisy, but occasionally useful. + */ + private static final Logger LOG = + LoggerFactory.getLogger(IOStatisticsStoreImpl.class); + + /** All the counters are atomic longs. */ + private final Map counterMap = new ConcurrentHashMap<>(); + + /** All the gauges are atomic longs. */ + private final Map gaugeMap = new ConcurrentHashMap<>(); + + /** All the minimum values are atomic longs. */ + private final Map minimumMap = new ConcurrentHashMap<>(); + + /** All the maximum values are atomic longs. */ + private final Map maximumMap = new ConcurrentHashMap<>(); + + /** + * The mean statistics. + * Relies on the MeanStatistic operations being synchronized. + */ + private final Map meanStatisticMap + = new ConcurrentHashMap<>(); + + /** + * Constructor invoked via the builder. + * @param counters keys to use for the counter statistics. + * @param gauges names of gauges + * @param minimums names of minimums + * @param maximums names of maximums + * @param meanStatistics names of mean statistics. + */ + IOStatisticsStoreImpl( + final List counters, + final List gauges, + final List minimums, + final List maximums, + final List meanStatistics) { + // initially create the superclass with no wrapped mapping; + super(null); + + // now construct a dynamic statistics source mapping to + // the various counters, gauges etc dynamically created + // into maps + DynamicIOStatisticsBuilder builder = dynamicIOStatistics(); + if (counters != null) { + for (String key : counters) { + AtomicLong counter = new AtomicLong(); + counterMap.put(key, counter); + builder.withAtomicLongCounter(key, counter); + } + } + if (gauges != null) { + for (String key : gauges) { + AtomicLong gauge = new AtomicLong(); + gaugeMap.put(key, gauge); + builder.withAtomicLongGauge(key, gauge); + } + } + if (maximums != null) { + for (String key : maximums) { + AtomicLong maximum = new AtomicLong(MAX_UNSET_VALUE); + maximumMap.put(key, maximum); + builder.withAtomicLongMaximum(key, maximum); + } + } + if (minimums != null) { + for (String key : minimums) { + AtomicLong minimum = new AtomicLong(MIN_UNSET_VALUE); + minimumMap.put(key, minimum); + builder.withAtomicLongMinimum(key, minimum); + } + } + if (meanStatistics != null) { + for (String key : meanStatistics) { + meanStatisticMap.put(key, new MeanStatistic()); + builder.withMeanStatisticFunction(key, k -> meanStatisticMap.get(k)); + } + } + setWrapped(builder.build()); + } + + /** + * Set an atomic long to a value. + * @param aLong atomic long; may be null + * @param value value to set to + */ + private void setAtomicLong(final AtomicLong aLong, final long value) { + if (aLong != null) { + aLong.set(value); + } + } + + /** + * increment an atomic long and return its value; + * null long is no-op returning 0. + * @param aLong atomic long; may be null + * param increment amount to increment; negative for a decrement + * @return final value or 0 if the long is null + */ + private long incAtomicLong(final AtomicLong aLong, + final long increment) { + if (aLong != null) { + // optimization: zero is a get rather than addAndGet() + return increment != 0 + ? aLong.addAndGet(increment) + : aLong.get(); + } else { + return 0; + } + } + + @Override + public void setCounter(final String key, final long value) { + setAtomicLong(counterMap.get(key), value); + LOG.debug("Setting counter {} to {}", key, value); + } + + @Override + public long incrementCounter(final String key, final long value) { + AtomicLong counter = counterMap.get(key); + if (counter == null) { + LOG.debug("Ignoring counter increment for unknown counter {}", + key); + return 0; + } + if (value < 0) { + LOG.debug("Ignoring negative increment value {} for counter {}", + value, key); + // returns old value + return counter.get(); + } else { + long l = incAtomicLong(counter, value); + LOG.trace("Incrementing counter {} by {} with final value {}", + key, value, l); + return l; + } + } + + @Override + public void setMaximum(final String key, final long value) { + setAtomicLong(maximumMap.get(key), value); + } + + @Override + public long incrementMaximum(final String key, final long value) { + return incAtomicLong(maximumMap.get(key), value); + } + + @Override + public void setMinimum(final String key, final long value) { + setAtomicLong(minimumMap.get(key), value); + } + + @Override + public long incrementMinimum(final String key, final long value) { + return incAtomicLong(minimumMap.get(key), value); + } + + @Override + public void addMinimumSample(final String key, final long value) { + AtomicLong min = minimumMap.get(key); + if (min != null) { + maybeUpdateMinimum(min, value); + } + } + + @Override + public void addMaximumSample(final String key, final long value) { + AtomicLong max = maximumMap.get(key); + if (max != null) { + maybeUpdateMaximum(max, value); + } + } + + @Override + public void setGauge(final String key, final long value) { + setAtomicLong(gaugeMap.get(key), value); + } + + @Override + public long incrementGauge(final String key, final long value) { + return incAtomicLong(gaugeMap.get(key), value); + } + + @Override + public void setMeanStatistic(final String key, final MeanStatistic value) { + final MeanStatistic ref = meanStatisticMap.get(key); + if (ref != null) { + ref.set(value); + } + } + + @Override + public void addMeanStatisticSample(final String key, final long value) { + final MeanStatistic ref = meanStatisticMap.get(key); + if (ref != null) { + ref.addSample(value); + } + } + + /** + * Reset all statistics. + */ + @Override + public synchronized void reset() { + counterMap.values().forEach(a -> a.set(0)); + gaugeMap.values().forEach(a -> a.set(0)); + minimumMap.values().forEach(a -> a.set(0)); + maximumMap.values().forEach(a -> a.set(0)); + meanStatisticMap.values().forEach(a -> a.clear()); + } + + /** + * Aggregate those statistics which the store is tracking; + * ignore the rest. + * + * @param source statistics; may be null + * @return true if a statistics reference was supplied/aggregated. + */ + @Override + public synchronized boolean aggregate( + @Nullable final IOStatistics source) { + + if (source == null) { + return false; + } + // counters: addition + Map sourceCounters = source.counters(); + counterMap.entrySet(). + forEach(e -> { + Long sourceValue = lookupQuietly(sourceCounters, e.getKey()); + if (sourceValue != null) { + e.getValue().addAndGet(sourceValue); + } + }); + // gauge: add positive values only + Map sourceGauges = source.gauges(); + gaugeMap.entrySet().forEach(e -> { + Long sourceGauge = lookupQuietly(sourceGauges, e.getKey()); + if (sourceGauge != null && sourceGauge > 0) { + e.getValue().addAndGet(sourceGauge); + } + }); + // min: min of current and source + Map sourceMinimums = source.minimums(); + minimumMap.entrySet().forEach(e -> { + Long sourceValue = lookupQuietly(sourceMinimums, e.getKey()); + if (sourceValue != null) { + AtomicLong dest = e.getValue(); + dest.set(aggregateMaximums(dest.get(), sourceValue)); + dest.set(aggregateMinimums(dest.get(), sourceValue)); + } + }); + // max: max of current and source + Map sourceMaximums = source.maximums(); + maximumMap.entrySet().forEach(e -> { + Long sourceValue = lookupQuietly(sourceMaximums, e.getKey()); + if (sourceValue != null) { + AtomicLong dest = e.getValue(); + dest.set(aggregateMaximums(dest.get(), sourceValue)); + } + }); + // the most complex + Map sourceMeans = source.meanStatistics(); + meanStatisticMap.entrySet().forEach(e -> { + MeanStatistic current = e.getValue(); + MeanStatistic sourceValue = lookupQuietly( + sourceMeans, e.getKey()); + if (sourceValue != null) { + current.add(sourceValue); + } + }); + return true; + } + + /** + * Get a reference to the map type providing the + * value for a specific key, raising an exception if + * there is no entry for that key. + * @param type of map/return type. + * @param map map to look up + * @param key statistic name + * @return the value + * @throws NullPointerException if there is no entry of that name + */ + private static T lookup(final Map map, String key) { + T val = map.get(key); + requireNonNull(val, () -> ("unknown statistic " + key)); + return val; + } + + /** + * Get a reference to the map type providing the + * value for a specific key, returning null if it not found. + * @param type of map/return type. + * @param map map to look up + * @param key statistic name + * @return the value + */ + private static T lookupQuietly(final Map map, String key) { + return map.get(key); + } + + /** + * Get a reference to the atomic instance providing the + * value for a specific counter. This is useful if + * the value is passed around. + * @param key statistic name + * @return the reference + * @throws NullPointerException if there is no entry of that name + */ + @Override + public AtomicLong getCounterReference(String key) { + return lookup(counterMap, key); + } + + /** + * Get a reference to the atomic instance providing the + * value for a specific maximum. This is useful if + * the value is passed around. + * @param key statistic name + * @return the reference + * @throws NullPointerException if there is no entry of that name + */ + @Override + public AtomicLong getMaximumReference(String key) { + return lookup(maximumMap, key); + } + + /** + * Get a reference to the atomic instance providing the + * value for a specific minimum. This is useful if + * the value is passed around. + * @param key statistic name + * @return the reference + * @throws NullPointerException if there is no entry of that name + */ + @Override + public AtomicLong getMinimumReference(String key) { + return lookup(minimumMap, key); + } + + /** + * Get a reference to the atomic instance providing the + * value for a specific gauge. This is useful if + * the value is passed around. + * @param key statistic name + * @return the reference + * @throws NullPointerException if there is no entry of that name + */ + @Override + public AtomicLong getGaugeReference(String key) { + return lookup(gaugeMap, key); + } + + /** + * Get a mean statistic. + * @param key statistic name + * @return the reference + * @throws NullPointerException if there is no entry of that name + */ + @Override + public MeanStatistic getMeanStatistic(String key) { + return lookup(meanStatisticMap, key); + } + + /** + * Add a duration to the min/mean/max statistics, using the + * given prefix and adding a suffix for each specific value. + *

    + * The update is non -atomic, even though each individual statistic + * is updated thread-safely. If two threads update the values + * simultaneously, at the end of each operation the state will + * be correct. It is only during the sequence that the statistics + * may be observably inconsistent. + *

    + * @param prefix statistic prefix + * @param durationMillis duration in milliseconds. + */ + @Override + public void addTimedOperation(String prefix, long durationMillis) { + addMeanStatisticSample(prefix + SUFFIX_MEAN, durationMillis); + addMinimumSample(prefix + SUFFIX_MIN, durationMillis); + addMaximumSample(prefix + SUFFIX_MAX, durationMillis); + } + + @Override + public void addTimedOperation(String prefix, Duration duration) { + addTimedOperation(prefix, duration.toMillis()); + } + + /** + * If the store is tracking the given key, return the + * duration tracker for it. If not tracked, return the + * stub tracker. + * @param key statistic key prefix + * @param count #of times to increment the matching counter in this + * operation. + * @return a tracker. + */ + @Override + public DurationTracker trackDuration(final String key, final long count) { + if (counterMap.containsKey(key)) { + return new StatisticDurationTracker(this, key, count); + } else { + return stubDurationTracker(); + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/PairedDurationTrackerFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/PairedDurationTrackerFactory.java new file mode 100644 index 0000000000000..9bc01338a1497 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/PairedDurationTrackerFactory.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics.impl; + +import java.time.Duration; + +import org.apache.hadoop.fs.statistics.DurationTracker; +import org.apache.hadoop.fs.statistics.DurationTrackerFactory; + +/** + * A duration tracker factory which aggregates two other trackers + * to have the same lifecycle. + * + * This is to ease having instance-level tracking alongside global + * values, such as an input stream and a filesystem. + * + * It's got some inefficiencies -assuming system time is used for + * the tracking, System.currentTimeMillis will be invoked twice + * at each point of the process -and the results may actually be different. + * However, it enables multiple duration tracker factories to be given the + * opportunity to collect the statistics. + */ +final class PairedDurationTrackerFactory implements DurationTrackerFactory { + + private final DurationTrackerFactory local; + private final DurationTrackerFactory global; + + PairedDurationTrackerFactory(final DurationTrackerFactory local, + final DurationTrackerFactory global) { + this.local = local; + this.global = global; + } + + @Override + public DurationTracker trackDuration(final String key, final long count) { + return new PairedDurationTracker( + global.trackDuration(key, count), + local.trackDuration(key, count)); + } + + /** + * Tracker which wraps the two duration trackers created for the operation. + */ + private static final class PairedDurationTracker + implements DurationTracker { + private final DurationTracker firstDuration; + private final DurationTracker secondDuration; + + private PairedDurationTracker( + final DurationTracker firstDuration, + final DurationTracker secondDuration) { + this.firstDuration = firstDuration; + this.secondDuration = secondDuration; + } + + @Override + public void failed() { + firstDuration.failed(); + secondDuration.failed(); + } + + @Override + public void close() { + firstDuration.close(); + secondDuration.close(); + } + + /** + * @return the global duration + */ + @Override + public Duration asDuration() { + return firstDuration.asDuration(); + } + + @Override + public String toString() { + return firstDuration.toString(); + } + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/SourceWrappedStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/SourceWrappedStatistics.java new file mode 100644 index 0000000000000..5aced7c5cddbf --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/SourceWrappedStatistics.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics.impl; + +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; + +/** + * Wrap a statistics instance with an {@link IOStatisticsSource} + * instance which will then serve up the statistics when asked. + */ +public class SourceWrappedStatistics implements IOStatisticsSource { + + private final IOStatistics source; + + /** + * Constructor. + * @param source source of statistics. + */ + public SourceWrappedStatistics(final IOStatistics source) { + this.source = source; + } + + @Override + public IOStatistics getIOStatistics() { + return source; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/StatisticDurationTracker.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/StatisticDurationTracker.java new file mode 100644 index 0000000000000..04d30135f6bd3 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/StatisticDurationTracker.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics.impl; + +import org.apache.hadoop.fs.statistics.DurationTracker; +import org.apache.hadoop.fs.statistics.StoreStatisticNames; +import org.apache.hadoop.util.OperationDuration; + +/** + * Track the duration of an object. + * + * When closed the + * min/max/mean statistics are updated. + * + * In the constructor, the counter with name of 'key' is + * incremented -default is by 1, but can be set to other + * values, including 0. + */ +public class StatisticDurationTracker extends OperationDuration + implements DurationTracker { + + /** + * Statistics to update. + */ + private final IOStatisticsStore iostats; + + /** + * Key to use as prefix of values. + */ + private final String key; + + /** + * Flag to indicate the operation failed. + */ + private boolean failed; + + /** + * Constructor -increments the counter by 1. + * @param iostats statistics to update + * @param key prefix of values. + */ + public StatisticDurationTracker( + final IOStatisticsStore iostats, + final String key) { + this(iostats, key, 1); + } + + /** + * Constructor. + * If the supplied count is greater than zero, the counter + * of the key name is updated. + * @param iostats statistics to update + * @param key Key to use as prefix of values. + * @param count #of times to increment the matching counter. + */ + public StatisticDurationTracker( + final IOStatisticsStore iostats, + final String key, + final long count) { + this.iostats = iostats; + this.key = key; + if (count > 0) { + iostats.incrementCounter(key, count); + } + } + + @Override + public void failed() { + failed = true; + } + + /** + * Set the finished time and then update the statistics. + * If the operation failed then the key + .failures counter will be + * incremented by one. + * The operation min/mean/max values will be updated with the duration; + * on a failure these will all be the .failures metrics. + */ + @Override + public void close() { + finished(); + String name = key; + if (failed) { + // failure: + name = key + StoreStatisticNames.SUFFIX_FAILURES; + iostats.incrementCounter(name); + } + iostats.addTimedOperation(name, asDuration()); + } + + @Override + public String toString() { + return " Duration of " + + (failed? (key + StoreStatisticNames.SUFFIX_FAILURES) : key) + + ": " + super.toString(); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/StorageStatisticsFromIOStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/StorageStatisticsFromIOStatistics.java new file mode 100644 index 0000000000000..f586cd8d9bdd4 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/StorageStatisticsFromIOStatistics.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics.impl; + +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.hadoop.fs.StorageStatistics; +import org.apache.hadoop.fs.statistics.IOStatistics; + +/** + * Returns all the counters of an IOStatistics instance as StorageStatistics. + * This is dynamic. + * The {@link #reset()} is downgraded to a no-op. + */ +public class StorageStatisticsFromIOStatistics + extends StorageStatistics + implements Iterable { + + private final IOStatistics ioStatistics; + private final String scheme; + + /** + * Instantiate. + * @param name storage statistics name. + * @param scheme FS scheme; may be null. + * @param ioStatistics IOStatistics source. + */ + public StorageStatisticsFromIOStatistics( + final String name, + final String scheme, + final IOStatistics ioStatistics) { + super(name); + this.scheme = scheme; + this.ioStatistics = ioStatistics; + } + + @Override + public Iterator iterator() { + return getLongStatistics(); + } + + /** + * Take a snapshot of the current counter values + * and return an iterator over them. + * @return all the counter statistics. + */ + @Override + public Iterator getLongStatistics() { + final Set> counters = counters() + .entrySet(); + final Set statisticSet = counters.stream().map( + this::toLongStatistic) + .collect(Collectors.toSet()); + + // add the gauges + gauges().entrySet().forEach(entry -> + statisticSet.add(toLongStatistic(entry))); + return statisticSet.iterator(); + } + + /** + * Convert a counter/gauge entry to a long statistics. + * @param e entry + * @return statistic + */ + private LongStatistic toLongStatistic(final Map.Entry e) { + return new LongStatistic(e.getKey(), e.getValue()); + } + + private Map counters() { + return ioStatistics.counters(); + } + + private Map gauges() { + return ioStatistics.gauges(); + } + + @Override + public Long getLong(final String key) { + Long l = counters().get(key); + if (l == null) { + l = gauges().get(key); + } + return l; + } + + @Override + public boolean isTracked(final String key) { + return counters().containsKey(key) + || gauges().containsKey(key); + } + + @Override + public void reset() { + /* no-op */ + } + + @Override + public String getScheme() { + return scheme; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/StubDurationTracker.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/StubDurationTracker.java new file mode 100644 index 0000000000000..638a9da9c7b51 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/StubDurationTracker.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics.impl; + +import java.time.Duration; + +import org.apache.hadoop.fs.statistics.DurationTracker; + +/** + * A simple stub duration tracker which can be issued in interfaces + * and other places where full duration tracking is not implemented. + */ +public final class StubDurationTracker implements DurationTracker { + + public static final DurationTracker STUB_DURATION_TRACKER = + new StubDurationTracker(); + + private StubDurationTracker() { + } + + @Override + public void failed() { + + } + + @Override + public void close() { + + } + + @Override + public Duration asDuration() { + return Duration.ZERO; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/StubDurationTrackerFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/StubDurationTrackerFactory.java new file mode 100644 index 0000000000000..8856b6330cee6 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/StubDurationTrackerFactory.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics.impl; + +import org.apache.hadoop.fs.statistics.DurationTracker; +import org.apache.hadoop.fs.statistics.DurationTrackerFactory; + +/** + * This is a stub factory which always returns no-op duration + * trackers. Allows for code to always be handed a factory. + */ +public final class StubDurationTrackerFactory + implements DurationTrackerFactory { + + /** + * Single instance. + */ + public static final StubDurationTrackerFactory STUB_DURATION_TRACKER_FACTORY + = new StubDurationTrackerFactory(); + + private StubDurationTrackerFactory() { + } + + @Override + public DurationTracker trackDuration(final String key, final long count) { + return StubDurationTracker.STUB_DURATION_TRACKER; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/WrappedIOStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/WrappedIOStatistics.java new file mode 100644 index 0000000000000..4e5fc6a6a1071 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/WrappedIOStatistics.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics.impl; + +import java.util.Map; + +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; + +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.MeanStatistic; + +import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.ioStatisticsToString; + +/** + * Wrap IOStatistics source with another (dynamic) wrapper. + */ +public class WrappedIOStatistics extends AbstractIOStatisticsImpl { + + /** + * The wrapped statistics. + */ + private IOStatistics wrapped; + + /** + * Instantiate. + * @param wrapped nullable wrapped statistics. + */ + public WrappedIOStatistics(final IOStatistics wrapped) { + this.wrapped = wrapped; + } + + /** + * Instantiate without setting the statistics. + * This is for subclasses which build up the map during their own + * construction. + */ + protected WrappedIOStatistics() { + } + + @Override + public Map counters() { + return getWrapped().counters(); + } + + /** + * Get at the wrapped inner statistics. + * @return the wrapped value + */ + protected IOStatistics getWrapped() { + return wrapped; + } + + /** + * Set the wrapped statistics. + * Will fail if the field is already set. + * @param wrapped new value + */ + protected void setWrapped(final IOStatistics wrapped) { + Preconditions.checkState(this.wrapped == null, + "Attempted to overwrite existing wrapped statistics"); + this.wrapped = wrapped; + } + + @Override + public Map gauges() { + return getWrapped().gauges(); + } + + @Override + public Map minimums() { + return getWrapped().minimums(); + } + + @Override + public Map maximums() { + return getWrapped().maximums(); + } + + @Override + public Map meanStatistics() { + return getWrapped().meanStatistics(); + } + + /** + * Return the statistics dump of the wrapped statistics. + * @return the statistics for logging. + */ + @Override + public String toString() { + return ioStatisticsToString(wrapped); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/package-info.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/package-info.java new file mode 100644 index 0000000000000..3ff7dacadce7a --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/package-info.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Implementation support for statistics. + * For use internally; external filesystems MAY use this if the implementors + * accept that it is unstable and that incompatible changes may take + * place over minor point releases. + */ + +@InterfaceAudience.LimitedPrivate("Filesystems") +@InterfaceStability.Unstable +package org.apache.hadoop.fs.statistics.impl; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/package-info.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/package-info.java new file mode 100644 index 0000000000000..bf46b33a516c6 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/package-info.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This package contains support for statistic collection and reporting. + * This is the public API; implementation classes are to be kept elsewhere. + *

    + * This package defines two interfaces: + *

    + * {@link org.apache.hadoop.fs.statistics.IOStatisticsSource}: + * a source of statistic data, which can be retrieved + * through a call to + * {@link org.apache.hadoop.fs.statistics.IOStatisticsSource#getIOStatistics()} . + *

    + * {@link org.apache.hadoop.fs.statistics.IOStatistics} the statistics retrieved + * from a statistics source. + *

    + * The retrieved statistics may be an immutable snapshot -in which case to get + * updated statistics another call to + * {@link org.apache.hadoop.fs.statistics.IOStatisticsSource#getIOStatistics()} + * must be made. Or they may be dynamic -in which case every time a specific + * statistic is retrieved, the latest version is returned. Callers should assume + * that if a statistics instance is dynamic, there is no atomicity when querying + * multiple statistics. If the statistics source was a closeable object (e.g. a + * stream), the statistics MUST remain valid after the stream is closed. + *

    + * Use pattern: + *

    + * An application probes an object (filesystem, stream etc) to see if it + * implements {@code IOStatisticsSource}, and, if it is, + * calls {@code getIOStatistics()} to get its statistics. + * If this is non-null, the client has statistics on the current + * state of the statistics. + *

    + * The expectation is that a statistics source is dynamic: when a value is + * looked up the most recent values are returned. + * When iterating through the set, the values of the iterator SHOULD + * be frozen at the time the iterator was requested. + *

    + * These statistics can be used to: log operations, profile applications, + * and make assertions about the state of the output. + *

    + * The names of statistics are a matter of choice of the specific source. + * However, {@link org.apache.hadoop.fs.statistics.StoreStatisticNames} + * contains a + * set of names recommended for object store operations. + * {@link org.apache.hadoop.fs.statistics.StreamStatisticNames} declares + * recommended names for statistics provided for + * input and output streams. + *

    + * Utility classes: + *

      + *
    • + * {@link org.apache.hadoop.fs.statistics.IOStatisticsSupport}. + * General support, including the ability to take a serializable + * snapshot of the current state of an IOStatistics instance. + *
    • + *
    • + * {@link org.apache.hadoop.fs.statistics.IOStatisticsLogging}. + * Methods for robust/on-demand string conversion, designed + * for use in logging statements and {@code toString()} implementations. + *
    • + *
    • + * {@link org.apache.hadoop.fs.statistics.IOStatisticsSnapshot}. + * A static snaphot of statistics which can be marshalled via + * java serialization or as JSON via jackson. It supports + * aggregation, so can be used to generate aggregate statistics. + *
    • + *
    + * + *

    + * Implementors notes: + *

      + *
    1. + * IOStatistics keys SHOULD be standard names where possible. + *
    2. + *
    3. + * An IOStatistics instance MUST be unique to that specific instance of + * {@link org.apache.hadoop.fs.statistics.IOStatisticsSource}. + * (i.e. not shared the way StorageStatistics are) + *
    4. + *
    5. + * MUST return the same values irrespective of which thread the statistics are + * retrieved or its keys evaluated. + *
    6. + *
    7. + * MUST NOT remove keys once a statistic instance has been created. + *
    8. + *
    9. + * MUST NOT add keys once a statistic instance has been created. + *
    10. + *
    11. + * MUST NOT block for long periods of time while blocking operations + * (reads, writes) are taking place in the source. + * That is: minimal synchronization points (AtomicLongs etc.) may be + * used to share values, but retrieval of statistics should + * be fast and return values even while slow/blocking remote IO is underway. + *
    12. + *
    13. + * MUST support value enumeration and retrieval after the source has been + * closed. + *
    14. + *
    15. + * SHOULD NOT have back-references to potentially expensive objects + * (filesystem instances etc.) + *
    16. + *
    17. + * SHOULD provide statistics which can be added to generate aggregate + * statistics. + *
    18. + *
    + */ + +@InterfaceAudience.Public +@InterfaceStability.Unstable +package org.apache.hadoop.fs.statistics; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/BlockUploadStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/BlockUploadStatistics.java new file mode 100644 index 0000000000000..bf7cbbbc5d5ef --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/BlockUploadStatistics.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.store; + +public interface BlockUploadStatistics { + + /** + * A block has been allocated. + */ + void blockAllocated(); + + /** + * A block has been released. + */ + void blockReleased(); + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/DataBlocks.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/DataBlocks.java new file mode 100644 index 0000000000000..a267ce67660f5 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/DataBlocks.java @@ -0,0 +1,1127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.store; + +import java.io.BufferedOutputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.Closeable; +import java.io.EOFException; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.fs.FSExceptionMessages; +import org.apache.hadoop.fs.LocalDirAllocator; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.DirectBufferPool; + +import static java.util.Objects.requireNonNull; +import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_TMP_DIR; +import static org.apache.hadoop.fs.store.DataBlocks.DataBlock.DestState.Closed; +import static org.apache.hadoop.fs.store.DataBlocks.DataBlock.DestState.Upload; +import static org.apache.hadoop.fs.store.DataBlocks.DataBlock.DestState.Writing; +import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; + +/** + * A class to provide disk, byteBuffer and byteArray option for Filesystem + * OutputStreams. + *
      + *
    • + * Disk: Uses Disk space to write the blocks. Is suited best to avoid + * OutOfMemory Errors in Java heap space. + *
    • + *
    • + * byteBuffer: Uses DirectByteBuffer to allocate memory off-heap to + * provide faster writing of DataBlocks with some risk of running + * OutOfMemory. + *
    • + *
    • + * byteArray: Uses byte[] to write a block of data. On heap and does have + * risk of running OutOfMemory fairly easily. + *
    • + *
    + *

    + * Implementation of DataBlocks taken from HADOOP-13560 to support huge file + * uploads in S3A with different options rather than one. + */ +public final class DataBlocks { + + private static final Logger LOG = + LoggerFactory.getLogger(DataBlocks.class); + + /** + * Buffer blocks to disk. + * Capacity is limited to available disk space. + */ + public static final String DATA_BLOCKS_BUFFER_DISK = "disk"; + + /** + * Use a byte buffer. + */ + public static final String DATA_BLOCKS_BYTEBUFFER = "bytebuffer"; + + /** + * Use an in-memory array. Fast but will run of heap rapidly. + */ + public static final String DATA_BLOCKS_BUFFER_ARRAY = "array"; + + private DataBlocks() { + } + + /** + * Validate args to a write command. These are the same validation checks + * expected for any implementation of {@code OutputStream.write()}. + * + * @param b byte array containing data. + * @param off offset in array where to start. + * @param len number of bytes to be written. + * @throws NullPointerException for a null buffer + * @throws IndexOutOfBoundsException if indices are out of range + * @throws IOException raised on errors performing I/O. + */ + public static void validateWriteArgs(byte[] b, int off, int len) + throws IOException { + Preconditions.checkNotNull(b); + if ((off < 0) || (off > b.length) || (len < 0) || + ((off + len) > b.length) || ((off + len) < 0)) { + throw new IndexOutOfBoundsException( + "write (b[" + b.length + "], " + off + ", " + len + ')'); + } + } + + /** + * Create a factory. + * + * @param keyToBufferDir Key to buffer directory config for a FS. + * @param configuration factory configurations. + * @param name factory name -the option from {@link CommonConfigurationKeys}. + * @return the factory, ready to be initialized. + * @throws IllegalArgumentException if the name is unknown. + */ + public static BlockFactory createFactory(String keyToBufferDir, + Configuration configuration, + String name) { + LOG.debug("Creating DataFactory of type : {}", name); + switch (name) { + case DATA_BLOCKS_BUFFER_ARRAY: + return new ArrayBlockFactory(keyToBufferDir, configuration); + case DATA_BLOCKS_BUFFER_DISK: + return new DiskBlockFactory(keyToBufferDir, configuration); + case DATA_BLOCKS_BYTEBUFFER: + return new ByteBufferBlockFactory(keyToBufferDir, configuration); + default: + throw new IllegalArgumentException("Unsupported block buffer" + + " \"" + name + '"'); + } + } + + /** + * The output information for an upload. + * It can be one of a file, an input stream or a byteArray. + * {@link #toByteArray()} method to be used to convert the data into byte + * array to be done in this class as well. + * When closed, any stream is closed. Any source file is untouched. + */ + public static final class BlockUploadData implements Closeable { + private final File file; + private InputStream uploadStream; + private byte[] byteArray; + private boolean isClosed; + + /** + * Constructor for byteArray upload data block. File and uploadStream + * would be null. + * + * @param byteArray byteArray used to construct BlockUploadData. + */ + public BlockUploadData(byte[] byteArray) { + this.file = null; + this.uploadStream = null; + + this.byteArray = requireNonNull(byteArray); + } + + /** + * File constructor; input stream and byteArray will be null. + * + * @param file file to upload + */ + BlockUploadData(File file) { + Preconditions.checkArgument(file.exists(), "No file: %s", file); + this.file = file; + this.uploadStream = null; + this.byteArray = null; + } + + /** + * Stream constructor, file and byteArray field will be null. + * + * @param uploadStream stream to upload. + */ + BlockUploadData(InputStream uploadStream) { + requireNonNull(uploadStream, "rawUploadStream"); + this.uploadStream = uploadStream; + this.file = null; + this.byteArray = null; + } + + /** + * Predicate: does this instance contain a file reference. + * + * @return true if there is a file. + */ + boolean hasFile() { + return file != null; + } + + /** + * Get the file, if there is one. + * + * @return the file for uploading, or null. + */ + File getFile() { + return file; + } + + /** + * Get the raw upload stream, if the object was + * created with one. + * + * @return the upload stream or null. + */ + InputStream getUploadStream() { + return uploadStream; + } + + /** + * Convert to a byte array. + * If the data is stored in a file, it will be read and returned. + * If the data was passed in via an input stream (which happens if the + * data is stored in a bytebuffer) then it will be converted to a byte + * array -which will then be cached for any subsequent use. + * + * @return byte[] after converting the uploadBlock. + * @throws IOException throw if an exception is caught while reading + * File/InputStream or closing InputStream. + */ + public byte[] toByteArray() throws IOException { + Preconditions.checkState(!isClosed, "Block is closed"); + if (byteArray != null) { + return byteArray; + } + if (file != null) { + // Need to save byteArray here so that we don't read File if + // byteArray() is called more than once on the same file. + byteArray = FileUtils.readFileToByteArray(file); + return byteArray; + } + byteArray = IOUtils.toByteArray(uploadStream); + IOUtils.close(uploadStream); + uploadStream = null; + return byteArray; + } + + /** + * Close: closes any upload stream and byteArray provided in the + * constructor. + * + * @throws IOException inherited exception. + */ + @Override + public void close() throws IOException { + isClosed = true; + cleanupWithLogger(LOG, uploadStream); + byteArray = null; + if (file != null) { + LOG.debug("File deleted in BlockUploadData close: {}", file.delete()); + } + } + } + + /** + * Base class for block factories. + */ + public static abstract class BlockFactory implements Closeable { + + private final String keyToBufferDir; + private final Configuration conf; + + protected BlockFactory(String keyToBufferDir, Configuration conf) { + this.keyToBufferDir = keyToBufferDir; + this.conf = conf; + } + + /** + * Create a block. + * + * @param index index of block + * @param limit limit of the block. + * @param statistics stats to work with + * @return a new block. + * @throws IOException raised on errors performing I/O. + */ + public abstract DataBlock create(long index, int limit, + BlockUploadStatistics statistics) + throws IOException; + + /** + * Implement any close/cleanup operation. + * Base class is a no-op. + * + * @throws IOException Inherited exception; implementations should + * avoid raising it. + */ + @Override + public void close() throws IOException { + } + + /** + * Configuration. + * + * @return config passed to create the factory. + */ + protected Configuration getConf() { + return conf; + } + + /** + * Key to Buffer Directory config for a FS instance. + * + * @return String containing key to Buffer dir. + */ + public String getKeyToBufferDir() { + return keyToBufferDir; + } + } + + /** + * This represents a block being uploaded. + */ + public static abstract class DataBlock implements Closeable { + + enum DestState {Writing, Upload, Closed} + + private volatile DestState state = Writing; + private final long index; + private final BlockUploadStatistics statistics; + + protected DataBlock(long index, + BlockUploadStatistics statistics) { + this.index = index; + this.statistics = statistics; + } + + /** + * Atomically enter a state, verifying current state. + * + * @param current current state. null means "no check" + * @param next next state + * @throws IllegalStateException if the current state is not as expected + */ + protected synchronized final void enterState(DestState current, + DestState next) + throws IllegalStateException { + verifyState(current); + LOG.debug("{}: entering state {}", this, next); + state = next; + } + + /** + * Verify that the block is in the declared state. + * + * @param expected expected state. + * @throws IllegalStateException if the DataBlock is in the wrong state + */ + protected final void verifyState(DestState expected) + throws IllegalStateException { + if (expected != null && state != expected) { + throw new IllegalStateException("Expected stream state " + expected + + " -but actual state is " + state + " in " + this); + } + } + + /** + * Current state. + * + * @return the current state. + */ + final DestState getState() { + return state; + } + + /** + * Return the current data size. + * + * @return the size of the data. + */ + public abstract int dataSize(); + + /** + * Predicate to verify that the block has the capacity to write + * the given set of bytes. + * + * @param bytes number of bytes desired to be written. + * @return true if there is enough space. + */ + abstract boolean hasCapacity(long bytes); + + /** + * Predicate to check if there is data in the block. + * + * @return true if there is + */ + public boolean hasData() { + return dataSize() > 0; + } + + /** + * The remaining capacity in the block before it is full. + * + * @return the number of bytes remaining. + */ + public abstract int remainingCapacity(); + + /** + * Write a series of bytes from the buffer, from the offset. + * Returns the number of bytes written. + * Only valid in the state {@code Writing}. + * Base class verifies the state but does no writing. + * + * @param buffer buffer. + * @param offset offset. + * @param length length of write. + * @return number of bytes written. + * @throws IOException trouble + */ + public int write(byte[] buffer, int offset, int length) throws IOException { + verifyState(Writing); + Preconditions.checkArgument(buffer != null, "Null buffer"); + Preconditions.checkArgument(length >= 0, "length is negative"); + Preconditions.checkArgument(offset >= 0, "offset is negative"); + Preconditions.checkArgument( + !(buffer.length - offset < length), + "buffer shorter than amount of data to write"); + return 0; + } + + /** + * Flush the output. + * Only valid in the state {@code Writing}. + * In the base class, this is a no-op + * + * @throws IOException any IO problem. + */ + public void flush() throws IOException { + verifyState(Writing); + } + + /** + * Switch to the upload state and return a stream for uploading. + * Base class calls {@link #enterState(DestState, DestState)} to + * manage the state machine. + * + * @return the stream. + * @throws IOException trouble + */ + public BlockUploadData startUpload() throws IOException { + LOG.debug("Start datablock[{}] upload", index); + enterState(Writing, Upload); + return null; + } + + /** + * Enter the closed state. + * + * @return true if the class was in any other state, implying that + * the subclass should do its close operations. + */ + protected synchronized boolean enterClosedState() { + if (!state.equals(Closed)) { + enterState(null, Closed); + return true; + } else { + return false; + } + } + + @Override + public void close() throws IOException { + if (enterClosedState()) { + LOG.debug("Closed {}", this); + innerClose(); + } + } + + /** + * Inner close logic for subclasses to implement. + * + * @throws IOException raised on errors performing I/O. + */ + protected void innerClose() throws IOException { + + } + + /** + * A block has been allocated. + */ + protected void blockAllocated() { + if (statistics != null) { + statistics.blockAllocated(); + } + } + + /** + * A block has been released. + */ + protected void blockReleased() { + if (statistics != null) { + statistics.blockReleased(); + } + } + + protected BlockUploadStatistics getStatistics() { + return statistics; + } + + public long getIndex() { + return index; + } + } + + // ==================================================================== + + /** + * Use byte arrays on the heap for storage. + */ + static class ArrayBlockFactory extends BlockFactory { + + ArrayBlockFactory(String keyToBufferDir, Configuration conf) { + super(keyToBufferDir, conf); + } + + @Override + public DataBlock create(long index, int limit, + BlockUploadStatistics statistics) + throws IOException { + return new ByteArrayBlock(0, limit, statistics); + } + + } + + static class DataBlockByteArrayOutputStream extends ByteArrayOutputStream { + + DataBlockByteArrayOutputStream(int size) { + super(size); + } + + /** + * InputStream backed by the internal byte array. + * + * @return ByteArrayInputStream instance. + */ + ByteArrayInputStream getInputStream() { + ByteArrayInputStream bin = new ByteArrayInputStream(this.buf, 0, count); + this.reset(); + this.buf = null; + return bin; + } + } + + /** + * Stream to memory via a {@code ByteArrayOutputStream}. + *

    + * It can consume a lot of heap space + * proportional to the mismatch between writes to the stream and + * the JVM-wide upload bandwidth to a Store's endpoint. + * The memory consumption can be limited by tuning the filesystem settings + * to restrict the number of queued/active uploads. + */ + + static class ByteArrayBlock extends DataBlock { + private DataBlockByteArrayOutputStream buffer; + private final int limit; + // cache data size so that it is consistent after the buffer is reset. + private Integer dataSize; + + ByteArrayBlock(long index, + int limit, + BlockUploadStatistics statistics) { + super(index, statistics); + this.limit = limit; + this.buffer = new DataBlockByteArrayOutputStream(limit); + blockAllocated(); + } + + /** + * Get the amount of data; if there is no buffer then the size is 0. + * + * @return the amount of data available to upload. + */ + @Override + public int dataSize() { + return dataSize != null ? dataSize : buffer.size(); + } + + @Override + public BlockUploadData startUpload() throws IOException { + super.startUpload(); + dataSize = buffer.size(); + ByteArrayInputStream bufferData = buffer.getInputStream(); + buffer = null; + return new BlockUploadData(bufferData); + } + + @Override + boolean hasCapacity(long bytes) { + return dataSize() + bytes <= limit; + } + + @Override + public int remainingCapacity() { + return limit - dataSize(); + } + + @Override + public int write(byte[] b, int offset, int len) throws IOException { + super.write(b, offset, len); + int written = Math.min(remainingCapacity(), len); + buffer.write(b, offset, written); + return written; + } + + @Override + protected void innerClose() { + buffer = null; + blockReleased(); + } + + @Override + public String toString() { + return "ByteArrayBlock{" + + "index=" + getIndex() + + ", state=" + getState() + + ", limit=" + limit + + ", dataSize=" + dataSize + + '}'; + } + } + + // ==================================================================== + + /** + * Stream via Direct ByteBuffers; these are allocated off heap + * via {@link DirectBufferPool}. + */ + + static class ByteBufferBlockFactory extends BlockFactory { + + private final DirectBufferPool bufferPool = new DirectBufferPool(); + private final AtomicInteger buffersOutstanding = new AtomicInteger(0); + + ByteBufferBlockFactory(String keyToBufferDir, Configuration conf) { + super(keyToBufferDir, conf); + } + + @Override public ByteBufferBlock create(long index, int limit, + BlockUploadStatistics statistics) + throws IOException { + return new ByteBufferBlock(index, limit, statistics); + } + + private ByteBuffer requestBuffer(int limit) { + LOG.debug("Requesting buffer of size {}", limit); + buffersOutstanding.incrementAndGet(); + return bufferPool.getBuffer(limit); + } + + private void releaseBuffer(ByteBuffer buffer) { + LOG.debug("Releasing buffer"); + bufferPool.returnBuffer(buffer); + buffersOutstanding.decrementAndGet(); + } + + /** + * Get count of outstanding buffers. + * + * @return the current buffer count. + */ + public int getOutstandingBufferCount() { + return buffersOutstanding.get(); + } + + @Override + public String toString() { + return "ByteBufferBlockFactory{" + + "buffersOutstanding=" + buffersOutstanding + + '}'; + } + + /** + * A DataBlock which requests a buffer from pool on creation; returns + * it when it is closed. + */ + class ByteBufferBlock extends DataBlock { + private ByteBuffer blockBuffer; + private final int bufferSize; + // cache data size so that it is consistent after the buffer is reset. + private Integer dataSize; + + /** + * Instantiate. This will request a ByteBuffer of the desired size. + * + * @param index block index. + * @param bufferSize buffer size. + * @param statistics statistics to update. + */ + ByteBufferBlock(long index, + int bufferSize, + BlockUploadStatistics statistics) { + super(index, statistics); + this.bufferSize = bufferSize; + this.blockBuffer = requestBuffer(bufferSize); + blockAllocated(); + } + + /** + * Get the amount of data; if there is no buffer then the size is 0. + * + * @return the amount of data available to upload. + */ + @Override public int dataSize() { + return dataSize != null ? dataSize : bufferCapacityUsed(); + } + + @Override + public BlockUploadData startUpload() throws IOException { + super.startUpload(); + dataSize = bufferCapacityUsed(); + // set the buffer up from reading from the beginning + blockBuffer.limit(blockBuffer.position()); + blockBuffer.position(0); + return new BlockUploadData( + new ByteBufferInputStream(dataSize, blockBuffer)); + } + + @Override + public boolean hasCapacity(long bytes) { + return bytes <= remainingCapacity(); + } + + @Override + public int remainingCapacity() { + return blockBuffer != null ? blockBuffer.remaining() : 0; + } + + private int bufferCapacityUsed() { + return blockBuffer.capacity() - blockBuffer.remaining(); + } + + @Override + public int write(byte[] b, int offset, int len) throws IOException { + super.write(b, offset, len); + int written = Math.min(remainingCapacity(), len); + blockBuffer.put(b, offset, written); + return written; + } + + /** + * Closing the block will release the buffer. + */ + @Override + protected void innerClose() { + if (blockBuffer != null) { + blockReleased(); + releaseBuffer(blockBuffer); + blockBuffer = null; + } + } + + @Override + public String toString() { + return "ByteBufferBlock{" + + "index=" + getIndex() + + ", state=" + getState() + + ", dataSize=" + dataSize() + + ", limit=" + bufferSize + + ", remainingCapacity=" + remainingCapacity() + + '}'; + } + + /** + * Provide an input stream from a byte buffer; supporting + * {@link #mark(int)}, which is required to enable replay of failed + * PUT attempts. + */ + class ByteBufferInputStream extends InputStream { + + private final int size; + private ByteBuffer byteBuffer; + + ByteBufferInputStream(int size, + ByteBuffer byteBuffer) { + LOG.debug("Creating ByteBufferInputStream of size {}", size); + this.size = size; + this.byteBuffer = byteBuffer; + } + + /** + * After the stream is closed, set the local reference to the byte + * buffer to null; this guarantees that future attempts to use + * stream methods will fail. + */ + @Override + public synchronized void close() { + LOG.debug("ByteBufferInputStream.close() for {}", + ByteBufferBlock.super.toString()); + byteBuffer = null; + } + + /** + * Verify that the stream is open. + * + * @throws IOException if the stream is closed + */ + private void verifyOpen() throws IOException { + if (byteBuffer == null) { + throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); + } + } + + public synchronized int read() throws IOException { + if (available() > 0) { + return byteBuffer.get() & 0xFF; + } else { + return -1; + } + } + + @Override + public synchronized long skip(long offset) throws IOException { + verifyOpen(); + long newPos = position() + offset; + if (newPos < 0) { + throw new EOFException(FSExceptionMessages.NEGATIVE_SEEK); + } + if (newPos > size) { + throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF); + } + byteBuffer.position((int) newPos); + return newPos; + } + + @Override + public synchronized int available() { + Preconditions.checkState(byteBuffer != null, + FSExceptionMessages.STREAM_IS_CLOSED); + return byteBuffer.remaining(); + } + + /** + * Get the current buffer position. + * + * @return the buffer position + */ + public synchronized int position() { + return byteBuffer.position(); + } + + /** + * Check if there is data left. + * + * @return true if there is data remaining in the buffer. + */ + public synchronized boolean hasRemaining() { + return byteBuffer.hasRemaining(); + } + + @Override + public synchronized void mark(int readlimit) { + LOG.debug("mark at {}", position()); + byteBuffer.mark(); + } + + @Override + public synchronized void reset() throws IOException { + LOG.debug("reset"); + byteBuffer.reset(); + } + + @Override + public boolean markSupported() { + return true; + } + + /** + * Read in data. + * + * @param b destination buffer. + * @param offset offset within the buffer. + * @param length length of bytes to read. + * @throws EOFException if the position is negative + * @throws IndexOutOfBoundsException if there isn't space for the + * amount of data requested. + * @throws IllegalArgumentException other arguments are invalid. + */ + @SuppressWarnings("NullableProblems") + public synchronized int read(byte[] b, int offset, int length) + throws IOException { + Preconditions.checkArgument(length >= 0, "length is negative"); + Preconditions.checkArgument(b != null, "Null buffer"); + if (b.length - offset < length) { + throw new IndexOutOfBoundsException( + FSExceptionMessages.TOO_MANY_BYTES_FOR_DEST_BUFFER + + ": request length =" + length + + ", with offset =" + offset + + "; buffer capacity =" + (b.length - offset)); + } + verifyOpen(); + if (!hasRemaining()) { + return -1; + } + + int toRead = Math.min(length, available()); + byteBuffer.get(b, offset, toRead); + return toRead; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder( + "ByteBufferInputStream{"); + sb.append("size=").append(size); + ByteBuffer buf = this.byteBuffer; + if (buf != null) { + sb.append(", available=").append(buf.remaining()); + } + sb.append(", ").append(ByteBufferBlock.super.toString()); + sb.append('}'); + return sb.toString(); + } + } + } + } + + // ==================================================================== + + /** + * Buffer blocks to disk. + */ + static class DiskBlockFactory extends BlockFactory { + + private LocalDirAllocator directoryAllocator; + + DiskBlockFactory(String keyToBufferDir, Configuration conf) { + super(keyToBufferDir, conf); + String bufferDir = conf.get(keyToBufferDir) != null + ? keyToBufferDir : HADOOP_TMP_DIR; + directoryAllocator = new LocalDirAllocator(bufferDir); + } + + /** + * Create a temp file and a {@link DiskBlock} instance to manage it. + * + * @param index block index. + * @param limit limit of the block. + * @param statistics statistics to update. + * @return the new block. + * @throws IOException IO problems + */ + @Override + public DataBlock create(long index, + int limit, + BlockUploadStatistics statistics) + throws IOException { + File destFile = createTmpFileForWrite(String.format("datablock-%04d-", + index), + limit, getConf()); + + return new DiskBlock(destFile, limit, index, statistics); + } + + /** + * Demand create the directory allocator, then create a temporary file. + * This does not mark the file for deletion when a process exits. + * {@link LocalDirAllocator#createTmpFileForWrite(String, long, Configuration)}. + * + * @param pathStr prefix for the temporary file. + * @param size the size of the file that is going to be written. + * @param conf the Configuration object. + * @return a unique temporary file. + * @throws IOException IO problems + */ + File createTmpFileForWrite(String pathStr, long size, + Configuration conf) throws IOException { + Path path = directoryAllocator.getLocalPathForWrite(pathStr, + size, conf); + File dir = new File(path.getParent().toUri().getPath()); + String prefix = path.getName(); + // create a temp file on this directory + return File.createTempFile(prefix, null, dir); + } + } + + /** + * Stream to a file. + * This will stop at the limit; the caller is expected to create a new block. + */ + static class DiskBlock extends DataBlock { + + private int bytesWritten; + private final File bufferFile; + private final int limit; + private BufferedOutputStream out; + private final AtomicBoolean closed = new AtomicBoolean(false); + + DiskBlock(File bufferFile, + int limit, + long index, + BlockUploadStatistics statistics) + throws FileNotFoundException { + super(index, statistics); + this.limit = limit; + this.bufferFile = bufferFile; + blockAllocated(); + out = new BufferedOutputStream(new FileOutputStream(bufferFile)); + } + + @Override public int dataSize() { + return bytesWritten; + } + + @Override + boolean hasCapacity(long bytes) { + return dataSize() + bytes <= limit; + } + + @Override public int remainingCapacity() { + return limit - bytesWritten; + } + + @Override + public int write(byte[] b, int offset, int len) throws IOException { + super.write(b, offset, len); + int written = Math.min(remainingCapacity(), len); + out.write(b, offset, written); + bytesWritten += written; + return written; + } + + @Override + public BlockUploadData startUpload() throws IOException { + super.startUpload(); + try { + out.flush(); + } finally { + out.close(); + out = null; + } + return new BlockUploadData(bufferFile); + } + + /** + * The close operation will delete the destination file if it still + * exists. + * + * @throws IOException IO problems + */ + @SuppressWarnings("UnnecessaryDefault") + @Override + protected void innerClose() throws IOException { + final DestState state = getState(); + LOG.debug("Closing {}", this); + switch (state) { + case Writing: + if (bufferFile.exists()) { + // file was not uploaded + LOG.debug("Block[{}]: Deleting buffer file as upload did not start", + getIndex()); + closeBlock(); + } + break; + + case Upload: + LOG.debug("Block[{}]: Buffer file {} exists —close upload stream", + getIndex(), bufferFile); + break; + + case Closed: + closeBlock(); + break; + + default: + // this state can never be reached, but checkstyle complains, so + // it is here. + } + } + + /** + * Flush operation will flush to disk. + * + * @throws IOException IOE raised on FileOutputStream + */ + @Override public void flush() throws IOException { + super.flush(); + out.flush(); + } + + @Override + public String toString() { + String sb = "FileBlock{" + + "index=" + getIndex() + + ", destFile=" + bufferFile + + ", state=" + getState() + + ", dataSize=" + dataSize() + + ", limit=" + limit + + '}'; + return sb; + } + + /** + * Close the block. + * This will delete the block's buffer file if the block has + * not previously been closed. + */ + void closeBlock() { + LOG.debug("block[{}]: closeBlock()", getIndex()); + if (!closed.getAndSet(true)) { + blockReleased(); + if (!bufferFile.delete() && bufferFile.exists()) { + LOG.warn("delete({}) returned false", + bufferFile.getAbsoluteFile()); + } + } else { + LOG.debug("block[{}]: skipping re-entrant closeBlock()", getIndex()); + } + } + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/LogExactlyOnce.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/LogExactlyOnce.java similarity index 81% rename from hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/LogExactlyOnce.java rename to hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/LogExactlyOnce.java index 54a8836d02ba4..04cd5111e90a2 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/LogExactlyOnce.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/LogExactlyOnce.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.hadoop.fs.s3a.impl; +package org.apache.hadoop.fs.store; import java.util.concurrent.atomic.AtomicBoolean; @@ -39,4 +39,14 @@ public void warn(String format, Object...args) { log.warn(format, args); } } + public void info(String format, Object...args) { + if (!logged.getAndSet(true)) { + log.info(format, args); + } + } + public void error(String format, Object...args) { + if (!logged.getAndSet(true)) { + log.error(format, args); + } + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/audit/ActiveThreadSpanSource.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/audit/ActiveThreadSpanSource.java new file mode 100644 index 0000000000000..4ddb8e1f29072 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/audit/ActiveThreadSpanSource.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.store.audit; + +/** + * Interface to get the active thread span. + * This can be used to collect the active span to + * propagate it into other threads. + * + * FileSystems which track their active span may implement + * this and offer their active span. + */ +public interface ActiveThreadSpanSource { + + /** + * The active span. This may not be a valid span, i.e. there is no guarantee + * that {@code getActiveAuditSpan().isValidSpan()} is true, but + * implementations MUST always return a non-null span. + * @return the currently active span. + */ + T getActiveAuditSpan(); +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/audit/AuditEntryPoint.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/audit/AuditEntryPoint.java new file mode 100644 index 0000000000000..6210dd0c8987c --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/audit/AuditEntryPoint.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.store.audit; + +import java.lang.annotation.Documented; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; + +/** + * A marker attribute simply to highlight which of the methods + * in a FileSystem why are audit entry points. + *

      + *
    1. + * A FS method is an AuditEntryPoint if, on invocation it + * creates and activates an Audit Span for that FS. + *
    2. + *
    3. + * The audit span SHOULD be deactivated before returning, + *
    4. + *
    5. + * Objects returned by the API call which go on + * to make calls of the filesystem MUST perform + * all IO within the same audit span. + *
    6. + *
    7. + * Audit Entry points SHOULD NOT invoke other Audit Entry Points. + * This is to ensure the original audit span information + * is not replaced. + *
    8. + *
    + * FileSystem methods the entry point then invokes + * SHOULD NOT invoke audit entry points internally. + * + * All external methods MUST be audit entry points. + */ +@Documented +@Retention(RetentionPolicy.SOURCE) +public @interface AuditEntryPoint { +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/audit/AuditSpan.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/audit/AuditSpan.java new file mode 100644 index 0000000000000..ecdaf71c11132 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/audit/AuditSpan.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.store.audit; + +import java.io.Closeable; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * This is a span created by an {@link AuditSpanSource}. + * An implementation of a span may carry context which can be picked + * up by the filesystem when activated. + * Each FS can have one active span per thread. + * Different filesystem instances SHALL have different active + * spans (if they support them) + * A span is activated in a thread when {@link #activate()} + * is called. + * The span stays active in that thread until {@link #deactivate()} + * is called. + * When deactivated in one thread, it MAY still be active in others. + * There's no explicit "end of span"; this is too hard to manage in + * terms of API lifecycle. + * Similarly, there's no stack of spans. Once a span is activated, + * the previous span is forgotten about. + * Therefore each FS will need a fallback "inactive span" which + * will be reverted to on deactivation of any other span. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public interface AuditSpan extends Closeable { + + /** + * Return a span ID which must be unique for all spans within + * everywhere. That effectively means part of the + * span SHOULD be derived from a UUID. + * Callers MUST NOT make any assumptions about the actual + * contents or structure of this string other than the + * uniqueness. + * @return a non-empty string + */ + String getSpanId(); + + /** + * Get the name of the operation. + * @return the operation name. + */ + String getOperationName(); + + /** + * Timestamp in UTC of span creation. + * @return timestamp. + */ + long getTimestamp(); + + /** + * Make this span active in the current thread. + * @return the activated span. + * This is makes it easy to use in try with resources + */ + AuditSpan activate(); + + /** + * Deactivate the span in the current thread. + */ + void deactivate(); + + /** + * Close calls {@link #deactivate()}; subclasses may override + * but the audit manager's wrapping span will always relay to + * {@link #deactivate()} rather + * than call this method on the wrapped span. + */ + default void close() { + deactivate(); + } + + /** + * Is the span valid? False == this is a span to indicate unbonded. + * @return true if this span represents a real operation. + */ + default boolean isValidSpan() { + return true; + } + + /** + * Set an attribute. + * This may or may not be propagated to audit logs. + * @param key attribute name + * @param value value + */ + default void set(String key, String value) { } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/audit/AuditSpanSource.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/audit/AuditSpanSource.java new file mode 100644 index 0000000000000..4f9f5a64564c0 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/audit/AuditSpanSource.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.store.audit; + +import javax.annotation.Nullable; +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * A source of audit spans. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public interface AuditSpanSource { + + /** + * Create a span for an operation. + * + * All operation names SHOULD come from + * {@code StoreStatisticNames} or + * {@code StreamStatisticNames}. + * @param operation operation name. + * @param path1 first path of operation + * @param path2 second path of operation + * @return a span for the audit + * @throws IOException failure + */ + T createSpan(String operation, + @Nullable String path1, + @Nullable String path2) + throws IOException; +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/audit/AuditingFunctions.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/audit/AuditingFunctions.java new file mode 100644 index 0000000000000..9ad727f5a2df2 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/audit/AuditingFunctions.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.store.audit; + +import javax.annotation.Nullable; +import java.util.concurrent.Callable; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.util.functional.CallableRaisingIOE; +import org.apache.hadoop.util.functional.FunctionRaisingIOE; +import org.apache.hadoop.util.functional.InvocationRaisingIOE; + +/** + * Static methods to assist in working with Audit Spans. + * the {@code withinX} calls take a span and a closure/function etc. + * and return a new function of the same types but which will + * activate and the span. + * They do not deactivate it afterwards to avoid accidentally deactivating + * the already-active span during a chain of operations in the same thread. + * All they do is ensure that the given span is guaranteed to be + * active when the passed in callable/function/invokable is evaluated. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public final class AuditingFunctions { + + private AuditingFunctions() { + } + + /** + * Given a callable, return a new callable which + * activates and deactivates the span around the inner invocation. + * @param auditSpan audit span + * @param operation operation + * @param type of result + * @return a new invocation. + */ + public static CallableRaisingIOE withinAuditSpan( + @Nullable AuditSpan auditSpan, + CallableRaisingIOE operation) { + return auditSpan == null + ? operation + : () -> { + auditSpan.activate(); + return operation.apply(); + }; + } + + /** + * Given an invocation, return a new invocation which + * activates and deactivates the span around the inner invocation. + * @param auditSpan audit span + * @param operation operation + * @return a new invocation. + */ + public static InvocationRaisingIOE withinAuditSpan( + @Nullable AuditSpan auditSpan, + InvocationRaisingIOE operation) { + return auditSpan == null + ? operation + : () -> { + auditSpan.activate(); + operation.apply(); + }; + } + + /** + * Given a function, return a new function which + * activates and deactivates the span around the inner one. + * @param auditSpan audit span + * @param operation operation + * @param Generics Type T. + * @param Generics Type R. + * @return a new invocation. + */ + public static FunctionRaisingIOE withinAuditSpan( + @Nullable AuditSpan auditSpan, + FunctionRaisingIOE operation) { + return auditSpan == null + ? operation + : (x) -> { + auditSpan.activate(); + return operation.apply(x); + }; + } + + /** + * Given a callable, return a new callable which + * activates and deactivates the span around the inner invocation. + * @param auditSpan audit span + * @param operation operation + * @param type of result + * @return a new invocation. + */ + public static Callable callableWithinAuditSpan( + @Nullable AuditSpan auditSpan, + Callable operation) { + return auditSpan == null + ? operation + : () -> { + auditSpan.activate(); + return operation.call(); + }; + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/audit/HttpReferrerAuditHeader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/audit/HttpReferrerAuditHeader.java new file mode 100644 index 0000000000000..b2684e758892a --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/audit/HttpReferrerAuditHeader.java @@ -0,0 +1,503 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.store.audit; + +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.StringJoiner; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.audit.CommonAuditContext; +import org.apache.hadoop.fs.store.LogExactlyOnce; +import org.apache.http.NameValuePair; +import org.apache.http.client.utils.URLEncodedUtils; + +import static java.util.Objects.requireNonNull; +import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_ID; +import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_OP; +import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_PATH; +import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_PATH2; +import static org.apache.hadoop.fs.audit.AuditConstants.REFERRER_ORIGIN_HOST; + +/** + * Contains all the logic for generating an HTTP "Referer" + * entry; includes escaping query params. + * Tests for this are in + * {@code org.apache.hadoop.fs.s3a.audit.TestHttpReferrerAuditHeader} + * so as to verify that header generation in the S3A auditors, and + * S3 log parsing, all work. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public final class HttpReferrerAuditHeader { + + /** + * Format of path to build: {@value}. + * the params passed in are (context ID, span ID, op). + * Update + * {@code TestHttpReferrerAuditHeader.SAMPLE_LOG_ENTRY} on changes + */ + public static final String REFERRER_PATH_FORMAT = "/hadoop/1/%3$s/%2$s/"; + + private static final Logger LOG = + LoggerFactory.getLogger(HttpReferrerAuditHeader.class); + + /** + * Log for warning of problems creating headers will only log of + * a problem once per process instance. + * This is to avoid logs being flooded with errors. + */ + private static final LogExactlyOnce WARN_OF_URL_CREATION = + new LogExactlyOnce(LOG); + + /** Context ID. */ + private final String contextId; + + /** operation name. */ + private final String operationName; + + /** Span ID. */ + private final String spanId; + + /** optional first path. */ + private final String path1; + + /** optional second path. */ + private final String path2; + + /** + * The header as created in the constructor; used in toString(). + * A new header is built on demand in {@link #buildHttpReferrer()} + * so that evaluated attributes are dynamically evaluated + * in the correct thread/place. + */ + private final String initialHeader; + + /** + * Map of simple attributes. + */ + private final Map attributes; + + /** + * Parameters dynamically evaluated on the thread just before + * the request is made. + */ + private final Map> evaluated; + + /** + * Elements to filter from the final header. + */ + private final Set filter; + + /** + * Instantiate. + * + * Context and operationId are expected to be well formed + * numeric/hex strings, at least adequate to be + * used as individual path elements in a URL. + */ + private HttpReferrerAuditHeader( + final Builder builder) { + this.contextId = requireNonNull(builder.contextId); + this.evaluated = builder.evaluated; + this.filter = builder.filter; + this.operationName = requireNonNull(builder.operationName); + this.path1 = builder.path1; + this.path2 = builder.path2; + this.spanId = requireNonNull(builder.spanId); + + // copy the parameters from the builder and extend + attributes = builder.attributes; + + addAttribute(PARAM_OP, operationName); + addAttribute(PARAM_PATH, path1); + addAttribute(PARAM_PATH2, path2); + addAttribute(PARAM_ID, spanId); + + // patch in global context values where not set + Iterable> globalContextValues + = builder.globalContextValues; + if (globalContextValues != null) { + for (Map.Entry entry : globalContextValues) { + attributes.putIfAbsent(entry.getKey(), entry.getValue()); + } + } + + // build the referrer up. so as to find/report problems early + initialHeader = buildHttpReferrer(); + } + + /** + * Build the referrer string. + * This includes dynamically evaluating all of the evaluated + * attributes. + * If there is an error creating the string it will be logged once + * per entry, and "" returned. + * @return a referrer string or "" + */ + public String buildHttpReferrer() { + + String header; + try { + String queries; + // Update any params which are dynamically evaluated + evaluated.forEach((key, eval) -> + addAttribute(key, eval.get())); + // now build the query parameters from all attributes, static and + // evaluated, stripping out any from the filter + queries = attributes.entrySet().stream() + .filter(e -> !filter.contains(e.getKey())) + .map(e -> e.getKey() + "=" + e.getValue()) + .collect(Collectors.joining("&")); + final URI uri = new URI("https", REFERRER_ORIGIN_HOST, + String.format(Locale.ENGLISH, REFERRER_PATH_FORMAT, + contextId, spanId, operationName), + queries, + null); + header = uri.toASCIIString(); + } catch (URISyntaxException e) { + WARN_OF_URL_CREATION.warn("Failed to build URI for auditor: " + e, e); + header = ""; + } + return header; + } + + /** + * Add a query parameter if not null/empty + * There's no need to escape here as it is done in the URI + * constructor. + * @param key query key + * @param value query value + */ + private void addAttribute(String key, + String value) { + if (StringUtils.isNotEmpty(value)) { + attributes.put(key, value); + } + } + + /** + * Set an attribute. If the value is non-null/empty, + * it will be used as a query parameter. + * + * @param key key to set + * @param value value. + */ + public void set(final String key, final String value) { + addAttribute(requireNonNull(key), value); + } + + public String getContextId() { + return contextId; + } + + public String getOperationName() { + return operationName; + } + + public String getSpanId() { + return spanId; + } + + public String getPath1() { + return path1; + } + + public String getPath2() { + return path2; + } + + @Override + public String toString() { + return new StringJoiner(", ", + HttpReferrerAuditHeader.class.getSimpleName() + "[", "]") + .add(initialHeader) + .toString(); + } + + /** + * Perform any escaping to valid path elements in advance of + * new URI() doing this itself. Only path separators need to + * be escaped/converted at this point. + * @param source source string + * @return an escaped path element. + */ + public static String escapeToPathElement(CharSequence source) { + int len = source.length(); + StringBuilder r = new StringBuilder(len); + for (int i = 0; i < len; i++) { + char c = source.charAt(i); + String s = Character.toString(c); + switch (c) { + case '/': + case '@': + s = "+"; + break; + default: + break; + } + r.append(s); + } + return r.toString(); + + } + + /** + * Strip any quotes from around a header. + * This is needed when processing log entries. + * @param header field. + * @return field without quotes. + */ + public static String maybeStripWrappedQuotes(String header) { + String h = header; + // remove quotes if needed. + while (h.startsWith("\"")) { + h = h.substring(1); + } + while (h.endsWith("\"")) { + h = h.substring(0, h.length() - 1); + } + return h; + } + + /** + * Split up the string. Uses httpClient: make sure it is on the classpath. + * Any query param with a name but no value, e.g ?something is + * returned in the map with an empty string as the value. + * @param header URI to parse + * @return a map of parameters. + * @throws URISyntaxException failure to build URI from header. + */ + public static Map extractQueryParameters(String header) + throws URISyntaxException { + URI uri = new URI(maybeStripWrappedQuotes(header)); + // get the decoded query + List params = URLEncodedUtils.parse(uri, + StandardCharsets.UTF_8); + Map result = new HashMap<>(params.size()); + for (NameValuePair param : params) { + String name = param.getName(); + String value = param.getValue(); + if (value == null) { + value = ""; + } + result.put(name, value); + } + return result; + } + + /** + * Get a builder. + * @return a new builder. + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder. + * + * Context and operationId are expected to be well formed + * numeric/hex strings, at least adequate to be + * used as individual path elements in a URL. + */ + public static final class Builder { + + /** Context ID. */ + private String contextId; + + /** operation name. */ + private String operationName; + + /** operation ID. */ + private String spanId; + + /** optional first path. */ + private String path1; + + /** optional second path. */ + private String path2; + + /** Map of attributes to add as query parameters. */ + private final Map attributes = new HashMap<>(); + + /** + * Parameters dynamically evaluated on the thread just before + * the request is made. + */ + private final Map> evaluated = + new HashMap<>(); + + /** + * Global context values; defaults to that of + * {@link CommonAuditContext#getGlobalContextEntries()} and + * should not need to be changed. + */ + private Iterable> globalContextValues = + CommonAuditContext.getGlobalContextEntries(); + + /** + * Elements to filter from the final header. + */ + private Set filter = new HashSet<>(); + + private Builder() { + } + + /** + * Build. + * @return an HttpReferrerAuditHeader + */ + public HttpReferrerAuditHeader build() { + return new HttpReferrerAuditHeader(this); + } + + /** + * Set context ID. + * @param value context + * @return the builder + */ + public Builder withContextId(final String value) { + contextId = value; + return this; + } + + /** + * Set Operation name. + * @param value new value + * @return the builder + */ + public Builder withOperationName(final String value) { + operationName = value; + return this; + } + + /** + * Set ID. + * @param value new value + * @return the builder + */ + public Builder withSpanId(final String value) { + spanId = value; + return this; + } + + /** + * Set Path1 of operation. + * @param value new value + * @return the builder + */ + public Builder withPath1(final String value) { + path1 = value; + return this; + } + + /** + * Set Path2 of operation. + * @param value new value + * @return the builder + */ + public Builder withPath2(final String value) { + path2 = value; + return this; + } + + /** + * Add all attributes to the current map. + * @param value new value + * @return the builder + */ + public Builder withAttributes(final Map value) { + attributes.putAll(value); + return this; + } + + /** + * Add an attribute to the current map. + * Replaces any with the existing key. + * @param key key to set/update + * @param value new value + * @return the builder + */ + public Builder withAttribute(String key, String value) { + attributes.put(key, value); + return this; + } + + /** + * Add all evaluated attributes to the current map. + * @param value new value + * @return the builder + */ + public Builder withEvaluated(final Map> value) { + evaluated.putAll(value); + return this; + } + + /** + * Add an evaluated attribute to the current map. + * Replaces any with the existing key. + * Set evaluated methods. + * @param key key + * @param value new value + * @return the builder + */ + public Builder withEvaluated(String key, Supplier value) { + evaluated.put(key, value); + return this; + } + + /** + * Set the global context values (replaces the default binding + * to {@link CommonAuditContext#getGlobalContextEntries()}). + * @param value new value + * @return the builder + */ + public Builder withGlobalContextValues( + final Iterable> value) { + globalContextValues = value; + return this; + } + + /** + * Declare the fields to filter. + * @param fields iterable of field names. + * @return the builder + */ + public Builder withFilter(final Collection fields) { + this.filter = new HashSet<>(fields); + return this; + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/audit/package-info.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/audit/package-info.java new file mode 100644 index 0000000000000..98fb5b59c3ac0 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/store/audit/package-info.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Auditing classes for internal + * use within the hadoop-* modules only. No stability guarantees. + * The public/evolving API is in {@code org.apache.hadoop.fs.audit}. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +package org.apache.hadoop.fs.store.audit; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ConfigUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ConfigUtil.java index 4c3dae9a9f99b..10caed7fd27d4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ConfigUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ConfigUtil.java @@ -48,7 +48,7 @@ public static String getConfigViewFsPrefix() { /** * Add a link to the config for the specified mount table * @param conf - add the link to this conf - * @param mountTableName + * @param mountTableName mountTable. * @param src - the src path name * @param target - the target URI link */ @@ -66,15 +66,15 @@ public static void addLink(Configuration conf, final String mountTableName, */ public static void addLink(final Configuration conf, final String src, final URI target) { - addLink( conf, Constants.CONFIG_VIEWFS_DEFAULT_MOUNT_TABLE, - src, target); + addLink(conf, getDefaultMountTableName(conf), src, target); } /** * Add a LinkMergeSlash to the config for the specified mount table. - * @param conf - * @param mountTableName - * @param target + * + * @param conf configuration. + * @param mountTableName mountTable. + * @param target target. */ public static void addLinkMergeSlash(Configuration conf, final String mountTableName, final URI target) { @@ -84,19 +84,20 @@ public static void addLinkMergeSlash(Configuration conf, /** * Add a LinkMergeSlash to the config for the default mount table. - * @param conf - * @param target + * + * @param conf configuration. + * @param target targets. */ public static void addLinkMergeSlash(Configuration conf, final URI target) { - addLinkMergeSlash(conf, Constants.CONFIG_VIEWFS_DEFAULT_MOUNT_TABLE, - target); + addLinkMergeSlash(conf, getDefaultMountTableName(conf), target); } /** * Add a LinkFallback to the config for the specified mount table. - * @param conf - * @param mountTableName - * @param target + * + * @param conf configuration. + * @param mountTableName mountTable. + * @param target targets. */ public static void addLinkFallback(Configuration conf, final String mountTableName, final URI target) { @@ -106,19 +107,20 @@ public static void addLinkFallback(Configuration conf, /** * Add a LinkFallback to the config for the default mount table. - * @param conf - * @param target + * + * @param conf configuration. + * @param target targets. */ public static void addLinkFallback(Configuration conf, final URI target) { - addLinkFallback(conf, Constants.CONFIG_VIEWFS_DEFAULT_MOUNT_TABLE, - target); + addLinkFallback(conf, getDefaultMountTableName(conf), target); } /** * Add a LinkMerge to the config for the specified mount table. - * @param conf - * @param mountTableName - * @param targets + * + * @param conf configuration. + * @param mountTableName mountTable. + * @param targets targets. */ public static void addLinkMerge(Configuration conf, final String mountTableName, final URI[] targets) { @@ -128,20 +130,39 @@ public static void addLinkMerge(Configuration conf, /** * Add a LinkMerge to the config for the default mount table. - * @param conf - * @param targets + * + * @param conf configuration. + * @param targets targets array. */ public static void addLinkMerge(Configuration conf, final URI[] targets) { - addLinkMerge(conf, Constants.CONFIG_VIEWFS_DEFAULT_MOUNT_TABLE, targets); + addLinkMerge(conf, getDefaultMountTableName(conf), targets); + } + + /** + * Add nfly link to configuration for the given mount table. + * + * @param conf configuration. + * @param mountTableName mount table. + * @param src src. + * @param settings settings. + * @param targets targets. + */ + public static void addLinkNfly(Configuration conf, String mountTableName, + String src, String settings, final String targets) { + conf.set( + getConfigViewFsPrefix(mountTableName) + "." + + Constants.CONFIG_VIEWFS_LINK_NFLY + "." + settings + "." + src, + targets); } /** + * Add nfly link to configuration for the given mount table. * - * @param conf - * @param mountTableName - * @param src - * @param settings - * @param targets + * @param conf configuration. + * @param mountTableName mount table. + * @param src src. + * @param settings settings. + * @param targets targets. */ public static void addLinkNfly(Configuration conf, String mountTableName, String src, String settings, final URI ... targets) { @@ -149,16 +170,35 @@ public static void addLinkNfly(Configuration conf, String mountTableName, settings = settings == null ? "minReplication=2,repairOnRead=true" : settings; - - conf.set(getConfigViewFsPrefix(mountTableName) + "." + - Constants.CONFIG_VIEWFS_LINK_NFLY + "." + settings + "." + src, + addLinkNfly(conf, mountTableName, src, settings, StringUtils.uriToString(targets)); } public static void addLinkNfly(final Configuration conf, final String src, final URI ... targets) { - addLinkNfly(conf, Constants.CONFIG_VIEWFS_DEFAULT_MOUNT_TABLE, src, null, - targets); + addLinkNfly(conf, getDefaultMountTableName(conf), src, null, targets); + } + + /** + * Add a LinkRegex to the config for the specified mount table. + * @param conf - get mountable config from this conf + * @param mountTableName - the mountable name of the regex config item + * @param srcRegex - the src path regex expression that applies to this config + * @param targetStr - the string of target path + * @param interceptorSettings - the serialized interceptor string to be + * applied while resolving the mapping + */ + public static void addLinkRegex( + Configuration conf, final String mountTableName, final String srcRegex, + final String targetStr, final String interceptorSettings) { + String prefix = getConfigViewFsPrefix(mountTableName) + "." + + Constants.CONFIG_VIEWFS_LINK_REGEX + "."; + if ((interceptorSettings != null) && (!interceptorSettings.isEmpty())) { + prefix = prefix + interceptorSettings + + RegexMountPoint.SETTING_SRCREGEX_SEP; + } + String key = prefix + srcRegex; + conf.set(key, targetStr); } /** @@ -168,14 +208,14 @@ public static void addLinkNfly(final Configuration conf, final String src, */ public static void setHomeDirConf(final Configuration conf, final String homedir) { - setHomeDirConf( conf, - Constants.CONFIG_VIEWFS_DEFAULT_MOUNT_TABLE, homedir); + setHomeDirConf(conf, getDefaultMountTableName(conf), homedir); } /** * Add config variable for homedir the specified mount table * @param conf - add to this conf * @param homedir - the home dir path starting with slash + * @param mountTableName - the mount table. */ public static void setHomeDirConf(final Configuration conf, final String mountTableName, final String homedir) { @@ -193,7 +233,7 @@ public static void setHomeDirConf(final Configuration conf, * @return home dir value, null if variable is not in conf */ public static String getHomeDirValue(final Configuration conf) { - return getHomeDirValue(conf, Constants.CONFIG_VIEWFS_DEFAULT_MOUNT_TABLE); + return getHomeDirValue(conf, getDefaultMountTableName(conf)); } /** @@ -207,4 +247,18 @@ public static String getHomeDirValue(final Configuration conf, return conf.get(getConfigViewFsPrefix(mountTableName) + "." + Constants.CONFIG_VIEWFS_HOMEDIR); } + + /** + * Get the name of the default mount table to use. If + * {@link Constants#CONFIG_VIEWFS_DEFAULT_MOUNT_TABLE_NAME_KEY} is specified, + * it's value is returned. Otherwise, + * {@link Constants#CONFIG_VIEWFS_DEFAULT_MOUNT_TABLE} is returned. + * + * @param conf Configuration to use. + * @return the name of the default mount table to use. + */ + public static String getDefaultMountTableName(final Configuration conf) { + return conf.get(Constants.CONFIG_VIEWFS_DEFAULT_MOUNT_TABLE_NAME_KEY, + Constants.CONFIG_VIEWFS_DEFAULT_MOUNT_TABLE); + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/Constants.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/Constants.java index 37f1a16800e7d..ca0b5ec56b06a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/Constants.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/Constants.java @@ -30,18 +30,29 @@ public interface Constants { * Prefix for the config variable prefix for the ViewFs mount-table */ public static final String CONFIG_VIEWFS_PREFIX = "fs.viewfs.mounttable"; + + /** + * Prefix for the config variable for the ViewFs mount-table path. + */ + String CONFIG_VIEWFS_MOUNTTABLE_PATH = CONFIG_VIEWFS_PREFIX + ".path"; /** * Prefix for the home dir for the mount table - if not specified * then the hadoop default value (/user) is used. */ public static final String CONFIG_VIEWFS_HOMEDIR = "homedir"; - + + /** + * Config key to specify the name of the default mount table. + */ + String CONFIG_VIEWFS_DEFAULT_MOUNT_TABLE_NAME_KEY = + "fs.viewfs.mounttable.default.name.key"; + /** * Config variable name for the default mount table. */ public static final String CONFIG_VIEWFS_DEFAULT_MOUNT_TABLE = "default"; - + /** * Config variable full prefix for the default mount table. */ @@ -75,6 +86,14 @@ public interface Constants { */ String CONFIG_VIEWFS_LINK_MERGE_SLASH = "linkMergeSlash"; + /** + * Config variable for specifying a regex link which uses regular expressions + * as source and target could use group captured in src. + * E.g. (^/(?<firstDir>\\w+), /prefix-${firstDir}) => + * (/path1/file1 => /prefix-path1/file1) + */ + String CONFIG_VIEWFS_LINK_REGEX = "linkRegex"; + FsPermission PERMISSION_555 = new FsPermission((short) 0555); String CONFIG_VIEWFS_RENAME_STRATEGY = "fs.viewfs.rename.strategy"; @@ -85,4 +104,32 @@ public interface Constants { String CONFIG_VIEWFS_ENABLE_INNER_CACHE = "fs.viewfs.enable.inner.cache"; boolean CONFIG_VIEWFS_ENABLE_INNER_CACHE_DEFAULT = true; + + /** + * Enable ViewFileSystem to show mountlinks as symlinks. + */ + String CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS = + "fs.viewfs.mount.links.as.symlinks"; + + boolean CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS_DEFAULT = true; + + /** + * When initializing the viewfs, authority will be used as the mount table + * name to find the mount link configurations. To make the mount table name + * unique, we may want to ignore port if initialized uri authority contains + * port number. By default, we will consider port number also in + * ViewFileSystem(This default value false, because to support existing + * deployments continue with the current behavior). + */ + String CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME = + "fs.viewfs.ignore.port.in.mount.table.name"; + + boolean CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME_DEFAULT = false; + + /** + * Force ViewFileSystem to return a trashRoot that is inside a mount point. + */ + String CONFIG_VIEWFS_TRASH_FORCE_INSIDE_MOUNT_POINT = + "fs.viewfs.trash.force-inside-mount-point"; + boolean CONFIG_VIEWFS_TRASH_FORCE_INSIDE_MOUNT_POINT_DEFAULT = false; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/FsGetter.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/FsGetter.java new file mode 100644 index 0000000000000..f723f238e199e --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/FsGetter.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.viewfs; + +import java.io.IOException; +import java.net.URI; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; + +/** + * File system instance getter. + */ +@InterfaceAudience.LimitedPrivate({"Common"}) +@InterfaceStability.Unstable +public class FsGetter { + + /** + * Gets new file system instance of given uri. + * @param uri uri. + * @param conf configuration. + * @throws IOException raised on errors performing I/O. + * @return file system. + */ + public FileSystem getNewInstance(URI uri, Configuration conf) + throws IOException { + return FileSystem.newInstance(uri, conf); + } + + /** + * Gets file system instance of given uri. + * + * @param uri uri. + * @param conf configuration. + * @throws IOException raised on errors performing I/O. + * @return FileSystem. + */ + public FileSystem get(URI uri, Configuration conf) throws IOException { + return FileSystem.get(uri, conf); + } +} \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/HCFSMountTableConfigLoader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/HCFSMountTableConfigLoader.java new file mode 100644 index 0000000000000..72de9ee8a7e28 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/HCFSMountTableConfigLoader.java @@ -0,0 +1,121 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.viewfs; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * An implementation for Apache Hadoop compatible file system based mount-table + * file loading. + */ +public class HCFSMountTableConfigLoader implements MountTableConfigLoader { + private static final String REGEX_DOT = "[.]"; + private static final Logger LOGGER = + LoggerFactory.getLogger(HCFSMountTableConfigLoader.class); + private Path mountTable = null; + + /** + * Loads the mount-table configuration from hadoop compatible file system and + * add the configuration items to given configuration. Mount-table + * configuration format should be suffixed with version number. + * Format: mount-table.<versionNumber>.xml + * Example: mount-table.1.xml + * When user wants to update mount-table, the expectation is to upload new + * mount-table configuration file with monotonically increasing integer as + * version number. This API loads the highest version number file. We can + * also configure single file path directly. + * + * @param mountTableConfigPath : A directory path where mount-table files + * stored or a mount-table file path. We recommend to configure + * directory with the mount-table version files. + * @param conf : to add the mount table as resource. + */ + @Override + public void load(String mountTableConfigPath, Configuration conf) + throws IOException { + this.mountTable = new Path(mountTableConfigPath); + String scheme = mountTable.toUri().getScheme(); + FsGetter fsGetter = new ViewFileSystemOverloadScheme.ChildFsGetter(scheme); + try (FileSystem fs = fsGetter.getNewInstance(mountTable.toUri(), conf)) { + RemoteIterator listFiles = + fs.listFiles(mountTable, false); + LocatedFileStatus lfs = null; + int higherVersion = -1; + while (listFiles.hasNext()) { + LocatedFileStatus curLfs = listFiles.next(); + String cur = curLfs.getPath().getName(); + String[] nameParts = cur.split(REGEX_DOT); + if (nameParts.length < 2) { + logInvalidFileNameFormat(cur); + continue; // invalid file name + } + int curVersion = higherVersion; + try { + curVersion = Integer.parseInt(nameParts[nameParts.length - 2]); + } catch (NumberFormatException nfe) { + logInvalidFileNameFormat(cur); + continue; + } + + if (curVersion > higherVersion) { + higherVersion = curVersion; + lfs = curLfs; + } + } + + if (lfs == null) { + // No valid mount table file found. + // TODO: Should we fail? Currently viewfs init will fail if no mount + // links anyway. + LOGGER.warn("No valid mount-table file exist at: {}. At least one " + + "mount-table file should present with the name format: " + + "mount-table..xml", mountTableConfigPath); + return; + } + // Latest version file. + Path latestVersionMountTable = lfs.getPath(); + if (LOGGER.isDebugEnabled()) { + LOGGER.debug("Loading the mount-table {} into configuration.", + latestVersionMountTable); + } + try (FSDataInputStream open = fs.open(latestVersionMountTable)) { + Configuration newConf = new Configuration(false); + newConf.addResource(open); + // This will add configuration props as resource, instead of stream + // itself. So, that stream can be closed now. + conf.addResource(newConf); + } + } + } + + private void logInvalidFileNameFormat(String cur) { + LOGGER.warn("Invalid file name format for mount-table version file: {}. " + + "The valid file name format is mount-table-name..xml", + cur); + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/InodeTree.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/InodeTree.java index 69923438ecc20..05834718811eb 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/InodeTree.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/InodeTree.java @@ -17,7 +17,8 @@ */ package org.apache.hadoop.fs.viewfs; -import com.google.common.base.Preconditions; +import java.util.function.Function; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; @@ -34,10 +35,13 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileAlreadyExistsException; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.UnsupportedFileSystemException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * InodeTree implements a mount-table as a tree of inodes. @@ -45,19 +49,22 @@ * In order to use it the caller must subclass it and implement * the abstract methods {@link #getTargetFileSystem(INodeDir)}, etc. * - * The mountable is initialized from the config variables as + * The mountable is initialized from the config variables as * specified in {@link ViewFs} * * @param is AbstractFileSystem or FileSystem * * The two main methods are - * {@link #InodeTree(Configuration, String)} // constructor - * {@link #resolve(String, boolean)} + * {@link #InodeTree(Configuration, String, URI, boolean)} // constructor + * {@link #resolve(String, boolean)} */ @InterfaceAudience.Private @InterfaceStability.Unstable -abstract class InodeTree { +public abstract class InodeTree { + private static final Logger LOGGER = + LoggerFactory.getLogger(InodeTree.class.getName()); + enum ResultKind { INTERNAL_DIR, EXTERNAL_DIR @@ -67,12 +74,14 @@ enum ResultKind { // the root of the mount table private final INode root; // the fallback filesystem - private final INodeLink rootFallbackLink; + private INodeLink rootFallbackLink; // the homedir for this mount table private final String homedirPrefix; private List> mountPoints = new ArrayList>(); + private List> regexMountPointList = + new ArrayList>(); - static class MountPoint { + public static class MountPoint { String src; INodeLink target; @@ -80,6 +89,22 @@ static class MountPoint { src = srcPath; target = mountLink; } + + /** + * Returns the source of mount point. + * @return The source + */ + public String getSource() { + return this.src; + } + + /** + * Returns the target link. + * @return The target INode link + */ + public INodeLink getTarget() { + return this.target; + } } /** @@ -121,8 +146,9 @@ boolean isLink() { */ static class INodeDir extends INode { private final Map> children = new HashMap<>(); - private T internalDirFs = null; //filesystem of this internal directory + private T internalDirFs = null; //filesystem of this internal directory private boolean isRoot = false; + private INodeLink fallbackLink = null; INodeDir(final String pathToNode, final UserGroupInformation aUgi) { super(pathToNode, aUgi); @@ -149,6 +175,17 @@ boolean isRoot() { return isRoot; } + INodeLink getFallbackLink() { + return fallbackLink; + } + + void addFallbackLink(INodeLink link) throws IOException { + if (!isRoot) { + throw new IOException("Fallback link can only be added for root"); + } + this.fallbackLink = link; + } + Map> getChildren() { return Collections.unmodifiableMap(children); } @@ -213,7 +250,14 @@ enum LinkType { * Config prefix: fs.viewfs.mounttable..linkNfly * Refer: {@link Constants#CONFIG_VIEWFS_LINK_NFLY} */ - NFLY; + NFLY, + /** + * Link entry which source are regex exrepssions and target refer matched + * group from source + * Config prefix: fs.viewfs.mounttable..linkRegex + * Refer: {@link Constants#CONFIG_VIEWFS_LINK_REGEX} + */ + REGEX; } /** @@ -222,21 +266,24 @@ enum LinkType { * A merge dir link is a merge (junction) of links to dirs: * example : merge of 2 dirs - * /users -> hdfs:nn1//users - * /users -> hdfs:nn2//users + * /users -> hdfs:nn1//users + * /users -> hdfs:nn2//users * * For a merge, each target is checked to be dir when created but if target * is changed later it is then ignored (a dir with null entries) */ - static class INodeLink extends INode { - final URI[] targetDirLinkList; - final T targetFileSystem; // file system object created from the link. + public static class INodeLink extends INode { + final String[] targetDirLinkList; + private T targetFileSystem; // file system object created from the link. + // Function to initialize file system. Only applicable for simple links + private Function fileSystemInitMethod; + private final Object lock = new Object(); /** * Construct a mergeLink or nfly. */ INodeLink(final String pathToNode, final UserGroupInformation aUgi, - final T targetMergeFs, final URI[] aTargetDirLinkList) { + final T targetMergeFs, final String[] aTargetDirLinkList) { super(pathToNode, aUgi); targetFileSystem = targetMergeFs; targetDirLinkList = aTargetDirLinkList; @@ -246,18 +293,22 @@ static class INodeLink extends INode { * Construct a simple link (i.e. not a mergeLink). */ INodeLink(final String pathToNode, final UserGroupInformation aUgi, - final T targetFs, final URI aTargetDirLink) { + Function createFileSystemMethod, + final String aTargetDirLink) throws URISyntaxException { super(pathToNode, aUgi); - targetFileSystem = targetFs; - targetDirLinkList = new URI[1]; - targetDirLinkList[0] = aTargetDirLink; + targetFileSystem = null; + targetDirLinkList = new String[1]; + targetDirLinkList[0] = new URI(aTargetDirLink).toString(); + this.fileSystemInitMethod = createFileSystemMethod; } /** * Get the target of the link. If a merge link then it returned * as "," separated URI list. + * + * @return the path. */ - Path getTargetLink() { + public Path getTargetLink() { StringBuilder result = new StringBuilder(targetDirLinkList[0].toString()); // If merge link, use "," as separator between the merged URIs for (int i = 1; i < targetDirLinkList.length; ++i) { @@ -271,7 +322,31 @@ boolean isInternalDir() { return false; } - public T getTargetFileSystem() { + /** + * Get the instance of FileSystem to use, creating one if needed. + * @return An Initialized instance of T + * @throws IOException raised on errors performing I/O. + */ + public T getTargetFileSystem() throws IOException { + if (targetFileSystem != null) { + return targetFileSystem; + } + // For non NFLY and MERGE links, we initialize the FileSystem when the + // corresponding mount path is accessed. + if (targetDirLinkList.length == 1) { + synchronized (lock) { + if (targetFileSystem != null) { + return targetFileSystem; + } + targetFileSystem = + fileSystemInitMethod.apply(URI.create(targetDirLinkList[0])); + if (targetFileSystem == null) { + throw new IOException( + "Could not initialize target File System for URI : " + + targetDirLinkList[0]); + } + } + } return targetFileSystem; } } @@ -332,7 +407,7 @@ private void createLink(final String src, final String target, switch (linkType) { case SINGLE: newLink = new INodeLink(fullPath, aUgi, - getTargetFileSystem(new URI(target)), new URI(target)); + initAndGetTargetFs(), target); break; case SINGLE_FALLBACK: case MERGE_SLASH: @@ -341,10 +416,10 @@ private void createLink(final String src, final String target, throw new IllegalArgumentException("Unexpected linkType: " + linkType); case MERGE: case NFLY: - final URI[] targetUris = StringUtils.stringToURI( - StringUtils.getStrings(target)); + final String[] targetUris = StringUtils.getStrings(target); newLink = new INodeLink(fullPath, aUgi, - getTargetFileSystem(settings, targetUris), targetUris); + getTargetFileSystem(settings, StringUtils.stringToURI(targetUris)), + targetUris); break; default: throw new IllegalArgumentException(linkType + ": Infeasible linkType"); @@ -356,13 +431,12 @@ private void createLink(final String src, final String target, /** * The user of this class must subclass and implement the following * 3 abstract methods. - * @throws IOException + * @return Function. */ - protected abstract T getTargetFileSystem(URI uri) - throws UnsupportedFileSystemException, URISyntaxException, IOException; + protected abstract Function initAndGetTargetFs(); protected abstract T getTargetFileSystem(INodeDir dir) - throws URISyntaxException; + throws URISyntaxException, IOException; protected abstract T getTargetFileSystem(String settings, URI[] mergeFsURIs) throws UnsupportedFileSystemException, URISyntaxException, IOException; @@ -381,7 +455,16 @@ private boolean hasFallbackLink() { return rootFallbackLink != null; } - private INodeLink getRootFallbackLink() { + /** + * @return true if the root represented as internalDir. In LinkMergeSlash, + * there will be root to root mapping. So, root does not represent as + * internalDir. + */ + public boolean isRootInternalDir() { + return root.isInternalDir(); + } + + public INodeLink getRootFallbackLink() { Preconditions.checkState(root.isInternalDir()); return rootFallbackLink; } @@ -439,21 +522,29 @@ Configuration getConfig() { } /** - * Create Inode Tree from the specified mount-table specified in Config - * @param config - the mount table keys are prefixed with - * FsConstants.CONFIG_VIEWFS_PREFIX - * @param viewName - the name of the mount table - if null use defaultMT name - * @throws UnsupportedFileSystemException - * @throws URISyntaxException - * @throws FileAlreadyExistsException - * @throws IOException + * Create Inode Tree from the specified mount-table specified in Config. + * + * @param config the mount table keys are prefixed with + * FsConstants.CONFIG_VIEWFS_PREFIX. + * @param viewName the name of the mount table + * if null use defaultMT name. + * @param theUri heUri. + * @param initingUriAsFallbackOnNoMounts initingUriAsFallbackOnNoMounts. + * @throws UnsupportedFileSystemException file system for uri is + * not found. + * @throws URISyntaxException if the URI does not have an authority + * it is badly formed. + * @throws FileAlreadyExistsException there is a file at the path specified + * or is discovered on one of its ancestors. + * @throws IOException raised on errors performing I/O. */ - protected InodeTree(final Configuration config, final String viewName) + protected InodeTree(final Configuration config, final String viewName, + final URI theUri, boolean initingUriAsFallbackOnNoMounts) throws UnsupportedFileSystemException, URISyntaxException, FileAlreadyExistsException, IOException { String mountTableName = viewName; if (mountTableName == null) { - mountTableName = Constants.CONFIG_VIEWFS_DEFAULT_MOUNT_TABLE; + mountTableName = ConfigUtil.getDefaultMountTableName(config); } homedirPrefix = ConfigUtil.getHomeDirValue(config, mountTableName); @@ -472,90 +563,87 @@ protected InodeTree(final Configuration config, final String viewName) final UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); for (Entry si : config) { final String key = si.getKey(); - if (key.startsWith(mountTablePrefix)) { - gotMountTableEntry = true; - LinkType linkType; - String src = key.substring(mountTablePrefix.length()); - String settings = null; - if (src.startsWith(linkPrefix)) { - src = src.substring(linkPrefix.length()); - if (src.equals(SlashPath.toString())) { - throw new UnsupportedFileSystemException("Unexpected mount table " - + "link entry '" + key + "'. Use " - + Constants.CONFIG_VIEWFS_LINK_MERGE_SLASH + " instead!"); - } - linkType = LinkType.SINGLE; - } else if (src.startsWith(linkFallbackPrefix)) { - if (src.length() != linkFallbackPrefix.length()) { - throw new IOException("ViewFs: Mount points initialization error." + - " Invalid " + Constants.CONFIG_VIEWFS_LINK_FALLBACK + - " entry in config: " + src); - } - linkType = LinkType.SINGLE_FALLBACK; - } else if (src.startsWith(linkMergePrefix)) { // A merge link - src = src.substring(linkMergePrefix.length()); - linkType = LinkType.MERGE; - } else if (src.startsWith(linkMergeSlashPrefix)) { - // This is a LinkMergeSlash entry. This entry should - // not have any additional source path. - if (src.length() != linkMergeSlashPrefix.length()) { - throw new IOException("ViewFs: Mount points initialization error." + - " Invalid " + Constants.CONFIG_VIEWFS_LINK_MERGE_SLASH + - " entry in config: " + src); - } - linkType = LinkType.MERGE_SLASH; - } else if (src.startsWith(Constants.CONFIG_VIEWFS_LINK_NFLY)) { - // prefix.settings.src - src = src.substring(Constants.CONFIG_VIEWFS_LINK_NFLY.length() + 1); - // settings.src - settings = src.substring(0, src.indexOf('.')); - // settings - - // settings.src - src = src.substring(settings.length() + 1); - // src - - linkType = LinkType.NFLY; - } else if (src.startsWith(Constants.CONFIG_VIEWFS_HOMEDIR)) { - // ignore - we set home dir from config - continue; - } else { - throw new IOException("ViewFs: Cannot initialize: Invalid entry in " + - "Mount table in config: " + src); + if (!key.startsWith(mountTablePrefix)) { + continue; + } + + gotMountTableEntry = true; + LinkType linkType; + String src = key.substring(mountTablePrefix.length()); + String settings = null; + if (src.startsWith(linkPrefix)) { + src = src.substring(linkPrefix.length()); + if (src.equals(SlashPath.toString())) { + throw new UnsupportedFileSystemException("Unexpected mount table " + + "link entry '" + key + "'. Use " + + Constants.CONFIG_VIEWFS_LINK_MERGE_SLASH + " instead!"); } + linkType = LinkType.SINGLE; + } else if (src.startsWith(linkFallbackPrefix)) { + checkMntEntryKeyEqualsTarget(src, linkFallbackPrefix); + linkType = LinkType.SINGLE_FALLBACK; + } else if (src.startsWith(linkMergePrefix)) { // A merge link + src = src.substring(linkMergePrefix.length()); + linkType = LinkType.MERGE; + } else if (src.startsWith(linkMergeSlashPrefix)) { + // This is a LinkMergeSlash entry. This entry should + // not have any additional source path. + checkMntEntryKeyEqualsTarget(src, linkMergeSlashPrefix); + linkType = LinkType.MERGE_SLASH; + } else if (src.startsWith(Constants.CONFIG_VIEWFS_LINK_NFLY)) { + // prefix.settings.src + src = src.substring(Constants.CONFIG_VIEWFS_LINK_NFLY.length() + 1); + // settings.src + settings = src.substring(0, src.indexOf('.')); + // settings + + // settings.src + src = src.substring(settings.length() + 1); + // src + + linkType = LinkType.NFLY; + } else if (src.startsWith(Constants.CONFIG_VIEWFS_LINK_REGEX)) { + linkEntries.add( + buildLinkRegexEntry(config, ugi, src, si.getValue())); + continue; + } else if (src.startsWith(Constants.CONFIG_VIEWFS_HOMEDIR)) { + // ignore - we set home dir from config + continue; + } else { + throw new IOException("ViewFs: Cannot initialize: Invalid entry in " + + "Mount table in config: " + src); + } - final String target = si.getValue(); - if (linkType != LinkType.MERGE_SLASH) { - if (isMergeSlashConfigured) { - throw new IOException("Mount table " + mountTableName - + " has already been configured with a merge slash link. " - + "A regular link should not be added."); - } - linkEntries.add( - new LinkEntry(src, target, linkType, settings, ugi, config)); - } else { - if (!linkEntries.isEmpty()) { - throw new IOException("Mount table " + mountTableName - + " has already been configured with regular links. " - + "A merge slash link should not be configured."); - } - if (isMergeSlashConfigured) { - throw new IOException("Mount table " + mountTableName - + " has already been configured with a merge slash link. " - + "Multiple merge slash links for the same mount table is " - + "not allowed."); - } - isMergeSlashConfigured = true; - mergeSlashTarget = target; + final String target = si.getValue(); + if (linkType != LinkType.MERGE_SLASH) { + if (isMergeSlashConfigured) { + throw new IOException("Mount table " + mountTableName + + " has already been configured with a merge slash link. " + + "A regular link should not be added."); + } + linkEntries.add( + new LinkEntry(src, target, linkType, settings, ugi, config)); + } else { + if (!linkEntries.isEmpty()) { + throw new IOException("Mount table " + mountTableName + + " has already been configured with regular links. " + + "A merge slash link should not be configured."); } + if (isMergeSlashConfigured) { + throw new IOException("Mount table " + mountTableName + + " has already been configured with a merge slash link. " + + "Multiple merge slash links for the same mount table is " + + "not allowed."); + } + isMergeSlashConfigured = true; + mergeSlashTarget = target; } - } + } // End of for loop. if (isMergeSlashConfigured) { Preconditions.checkNotNull(mergeSlashTarget); root = new INodeLink(mountTableName, ugi, - getTargetFileSystem(new URI(mergeSlashTarget)), - new URI(mergeSlashTarget)); + initAndGetTargetFs(), mergeSlashTarget); mountPoints.add(new MountPoint("/", (INodeLink) root)); rootFallbackLink = null; } else { @@ -564,7 +652,8 @@ protected InodeTree(final Configuration config, final String viewName) getRootDir().setRoot(true); INodeLink fallbackLink = null; for (LinkEntry le : linkEntries) { - if (le.isLinkType(LinkType.SINGLE_FALLBACK)) { + switch (le.getLinkType()) { + case SINGLE_FALLBACK: if (fallbackLink != null) { throw new IOException("Mount table " + mountTableName + " has already been configured with a link fallback. " @@ -572,23 +661,86 @@ protected InodeTree(final Configuration config, final String viewName) + "not allowed."); } fallbackLink = new INodeLink(mountTableName, ugi, - getTargetFileSystem(new URI(le.getTarget())), - new URI(le.getTarget())); - } else { + initAndGetTargetFs(), le.getTarget()); + continue; + case REGEX: + addRegexMountEntry(le); + continue; + default: createLink(le.getSrc(), le.getTarget(), le.getLinkType(), le.getSettings(), le.getUgi(), le.getConfig()); } } rootFallbackLink = fallbackLink; + getRootDir().addFallbackLink(rootFallbackLink); } if (!gotMountTableEntry) { - throw new IOException( - "ViewFs: Cannot initialize: Empty Mount table in config for " + - "viewfs://" + mountTableName + "/"); + if (!initingUriAsFallbackOnNoMounts) { + throw new IOException(new StringBuilder( + "ViewFs: Cannot initialize: Empty Mount table in config for ") + .append(theUri.getScheme()).append("://").append(mountTableName) + .append("/").toString()); + } + StringBuilder msg = + new StringBuilder("Empty mount table detected for ").append(theUri) + .append(" and considering itself as a linkFallback."); + FileSystem.LOG.info(msg.toString()); + rootFallbackLink = new INodeLink(mountTableName, ugi, + initAndGetTargetFs(), theUri.toString()); + getRootDir().addFallbackLink(rootFallbackLink); + } + } + + private void checkMntEntryKeyEqualsTarget( + String mntEntryKey, String targetMntEntryKey) throws IOException { + if (!mntEntryKey.equals(targetMntEntryKey)) { + throw new IOException("ViewFs: Mount points initialization error." + + " Invalid " + targetMntEntryKey + + " entry in config: " + mntEntryKey); } } + private void addRegexMountEntry(LinkEntry le) throws IOException { + LOGGER.info("Add regex mount point:" + le.getSrc() + + ", target:" + le.getTarget() + + ", interceptor settings:" + le.getSettings()); + RegexMountPoint regexMountPoint = + new RegexMountPoint( + this, le.getSrc(), le.getTarget(), le.getSettings()); + regexMountPoint.initialize(); + regexMountPointList.add(regexMountPoint); + } + + private LinkEntry buildLinkRegexEntry( + Configuration config, UserGroupInformation ugi, + String mntEntryStrippedKey, String mntEntryValue) { + String linkKeyPath = null; + String settings = null; + final String linkRegexPrefix = Constants.CONFIG_VIEWFS_LINK_REGEX + "."; + // settings#.linkKey + String settingsAndLinkKeyPath = + mntEntryStrippedKey.substring(linkRegexPrefix.length()); + int settingLinkKeySepIndex = settingsAndLinkKeyPath + .indexOf(RegexMountPoint.SETTING_SRCREGEX_SEP); + if (settingLinkKeySepIndex == -1) { + // There's no settings + linkKeyPath = settingsAndLinkKeyPath; + settings = null; + } else { + // settings#.linkKey style configuration + // settings from settings#.linkKey + settings = + settingsAndLinkKeyPath.substring(0, settingLinkKeySepIndex); + // linkKeyPath + linkKeyPath = settingsAndLinkKeyPath.substring( + settings.length() + RegexMountPoint.SETTING_SRCREGEX_SEP + .length()); + } + return new LinkEntry( + linkKeyPath, mntEntryValue, LinkType.REGEX, settings, ugi, config); + } + /** * Resolve returns ResolveResult. * The caller can continue the resolution of the remainingPath @@ -600,42 +752,52 @@ protected InodeTree(final Configuration config, final String viewName) * If the input pathname leads to an internal mount-table entry then * the target file system is one that represents the internal inode. */ - static class ResolveResult { + public static class ResolveResult { final ResultKind kind; final T targetFileSystem; final String resolvedPath; final Path remainingPath; // to resolve in the target FileSystem + private final boolean isLastInternalDirLink; ResolveResult(final ResultKind k, final T targetFs, final String resolveP, - final Path remainingP) { + final Path remainingP, boolean isLastIntenalDirLink) { kind = k; targetFileSystem = targetFs; resolvedPath = resolveP; remainingPath = remainingP; + this.isLastInternalDirLink = isLastIntenalDirLink; } // Internal dir path resolution completed within the mount table boolean isInternalDir() { return (kind == ResultKind.INTERNAL_DIR); } + + // Indicates whether the internal dir path resolution completed at the link + // or resolved due to fallback. + boolean isLastInternalDirLink() { + return this.isLastInternalDirLink; + } } /** - * Resolve the pathname p relative to root InodeDir + * Resolve the pathname p relative to root InodeDir. * @param p - input path - * @param resolveLastComponent + * @param resolveLastComponent resolveLastComponent. * @return ResolveResult which allows further resolution of the remaining path - * @throws FileNotFoundException + * @throws IOException raised on errors performing I/O. */ - ResolveResult resolve(final String p, final boolean resolveLastComponent) - throws FileNotFoundException { + public ResolveResult resolve(final String p, final boolean resolveLastComponent) + throws IOException { + ResolveResult resolveResult = null; String[] path = breakIntoPathComponents(p); if (path.length <= 1) { // special case for when path is "/" T targetFs = root.isInternalDir() ? - getRootDir().getInternalDirFs() : getRootLink().getTargetFileSystem(); - ResolveResult res = new ResolveResult(ResultKind.INTERNAL_DIR, - targetFs, root.fullPath, SlashPath); - return res; + getRootDir().getInternalDirFs() + : getRootLink().getTargetFileSystem(); + resolveResult = new ResolveResult(ResultKind.INTERNAL_DIR, + targetFs, root.fullPath, SlashPath, false); + return resolveResult; } /** @@ -651,22 +813,30 @@ ResolveResult resolve(final String p, final boolean resolveLastComponent) remainingPathStr.append("/").append(path[i]); } remainingPath = new Path(remainingPathStr.toString()); - ResolveResult res = new ResolveResult(ResultKind.EXTERNAL_DIR, - getRootLink().getTargetFileSystem(), root.fullPath, remainingPath); - return res; + resolveResult = new ResolveResult(ResultKind.EXTERNAL_DIR, + getRootLink().getTargetFileSystem(), root.fullPath, remainingPath, + true); + return resolveResult; } Preconditions.checkState(root.isInternalDir()); INodeDir curInode = getRootDir(); + // Try to resolve path in the regex mount point + resolveResult = tryResolveInRegexMountpoint(p, resolveLastComponent); + if (resolveResult != null) { + return resolveResult; + } + int i; // ignore first slash for (i = 1; i < path.length - (resolveLastComponent ? 0 : 1); i++) { INode nextInode = curInode.resolveInternal(path[i]); if (nextInode == null) { if (hasFallbackLink()) { - return new ResolveResult(ResultKind.EXTERNAL_DIR, - getRootFallbackLink().getTargetFileSystem(), - root.fullPath, new Path(p)); + resolveResult = new ResolveResult(ResultKind.EXTERNAL_DIR, + getRootFallbackLink().getTargetFileSystem(), root.fullPath, + new Path(p), false); + return resolveResult; } else { StringBuilder failedAt = new StringBuilder(path[0]); for (int j = 1; j <= i; ++j) { @@ -683,16 +853,17 @@ ResolveResult resolve(final String p, final boolean resolveLastComponent) if (i >= path.length - 1) { remainingPath = SlashPath; } else { - StringBuilder remainingPathStr = new StringBuilder("/" + path[i + 1]); + StringBuilder remainingPathStr = + new StringBuilder("/" + path[i + 1]); for (int j = i + 2; j < path.length; ++j) { remainingPathStr.append('/').append(path[j]); } remainingPath = new Path(remainingPathStr.toString()); } - final ResolveResult res = - new ResolveResult(ResultKind.EXTERNAL_DIR, - link.getTargetFileSystem(), nextInode.fullPath, remainingPath); - return res; + resolveResult = new ResolveResult(ResultKind.EXTERNAL_DIR, + link.getTargetFileSystem(), nextInode.fullPath, remainingPath, + true); + return resolveResult; } else if (nextInode.isInternalDir()) { curInode = (INodeDir) nextInode; } @@ -713,13 +884,85 @@ ResolveResult resolve(final String p, final boolean resolveLastComponent) } remainingPath = new Path(remainingPathStr.toString()); } - final ResolveResult res = - new ResolveResult(ResultKind.INTERNAL_DIR, - curInode.getInternalDirFs(), curInode.fullPath, remainingPath); - return res; + resolveResult = new ResolveResult(ResultKind.INTERNAL_DIR, + curInode.getInternalDirFs(), curInode.fullPath, remainingPath, false); + return resolveResult; + } + + /** + * Walk through all regex mount points to see + * whether the path match any regex expressions. + * E.g. link: ^/user/(?<username>\\w+) => s3://$user.apache.com/_${user} + * srcPath: is /user/hadoop/dir1 + * resolveLastComponent: true + * then return value is s3://hadoop.apache.com/_hadoop + * + * @param srcPath srcPath. + * @param resolveLastComponent resolveLastComponent. + * @return ResolveResult. + */ + protected ResolveResult tryResolveInRegexMountpoint(final String srcPath, + final boolean resolveLastComponent) { + for (RegexMountPoint regexMountPoint : regexMountPointList) { + ResolveResult resolveResult = + regexMountPoint.resolve(srcPath, resolveLastComponent); + if (resolveResult != null) { + return resolveResult; + } + } + return null; + } + + /** + * Build resolve result. + * Here's an example + * Mountpoint: fs.viewfs.mounttable.mt + * .linkRegex.replaceresolveddstpath:_:-#.^/user/(??<username>\w+) + * Value: /targetTestRoot/$username + * Dir path to test: + * viewfs://mt/user/hadoop_user1/hadoop_dir1 + * Expect path: /targetTestRoot/hadoop-user1/hadoop_dir1 + * resolvedPathStr: /user/hadoop_user1 + * targetOfResolvedPathStr: /targetTestRoot/hadoop-user1 + * remainingPath: /hadoop_dir1 + * + * @param resultKind resultKind. + * @param resolvedPathStr resolvedPathStr. + * @param targetOfResolvedPathStr targetOfResolvedPathStr. + * @param remainingPath remainingPath. + * @return targetFileSystem or null on exceptions. + */ + protected ResolveResult buildResolveResultForRegexMountPoint( + ResultKind resultKind, String resolvedPathStr, + String targetOfResolvedPathStr, Path remainingPath) { + try { + T targetFs = initAndGetTargetFs() + .apply(new URI(targetOfResolvedPathStr)); + if (targetFs == null) { + LOGGER.error(String.format( + "Not able to initialize target file system." + + " ResultKind:%s, resolvedPathStr:%s," + + " targetOfResolvedPathStr:%s, remainingPath:%s," + + " will return null.", + resultKind, resolvedPathStr, targetOfResolvedPathStr, + remainingPath)); + return null; + } + return new ResolveResult(resultKind, targetFs, resolvedPathStr, + remainingPath, true); + } catch (URISyntaxException uex) { + LOGGER.error(String.format( + "Got Exception while build resolve result." + + " ResultKind:%s, resolvedPathStr:%s," + + " targetOfResolvedPathStr:%s, remainingPath:%s," + + " will return null.", + resultKind, resolvedPathStr, targetOfResolvedPathStr, remainingPath), + uex); + return null; + } } - List> getMountPoints() { + public List> getMountPoints() { return mountPoints; } @@ -728,7 +971,7 @@ List> getMountPoints() { * @return home dir value from mount table; null if no config value * was found. */ - String getHomeDirPrefixValue() { + public String getHomeDirPrefixValue() { return homedirPrefix; } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/MountTableConfigLoader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/MountTableConfigLoader.java new file mode 100644 index 0000000000000..5fcd77cd29155 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/MountTableConfigLoader.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.viewfs; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; + +/** + * An interface for loading mount-table configuration. This class can have more + * APIs like refreshing mount tables automatically etc. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public interface MountTableConfigLoader { + + /** + * Loads the mount-table configuration into given configuration. + * + * @param mountTableConfigPath - Path of the mount table. It can be a file or + * a directory in the case of multiple versions of mount-table + * files(Recommended option). + * @param conf - Configuration object to add mount table. + * @throws IOException raised on errors performing I/O. + */ + void load(String mountTableConfigPath, Configuration conf) + throws IOException; +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/NflyFSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/NflyFSystem.java index a406d77f2ef6c..85af68af31434 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/NflyFSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/NflyFSystem.java @@ -212,6 +212,21 @@ private static String getRack(String rackString) { */ private NflyFSystem(URI[] uris, Configuration conf, int minReplication, EnumSet nflyFlags) throws IOException { + this(uris, conf, minReplication, nflyFlags, null); + } + + /** + * Creates a new Nfly instance. + * + * @param uris the list of uris in the mount point + * @param conf configuration object + * @param minReplication minimum copies to commit a write op + * @param nflyFlags modes such readMostRecent + * @param fsGetter to get the file system instance with the given uri + * @throws IOException + */ + private NflyFSystem(URI[] uris, Configuration conf, int minReplication, + EnumSet nflyFlags, FsGetter fsGetter) throws IOException { if (uris.length < minReplication) { throw new IOException(minReplication + " < " + uris.length + ": Minimum replication < #destinations"); @@ -238,8 +253,14 @@ private NflyFSystem(URI[] uris, Configuration conf, int minReplication, nodes = new NflyNode[uris.length]; final Iterator rackIter = rackStrings.iterator(); for (int i = 0; i < nodes.length; i++) { - nodes[i] = new NflyNode(hostStrings.get(i), rackIter.next(), uris[i], - conf); + if (fsGetter != null) { + nodes[i] = new NflyNode(hostStrings.get(i), rackIter.next(), + new ChRootedFileSystem(fsGetter.getNewInstance(uris[i], conf), + uris[i])); + } else { + nodes[i] = + new NflyNode(hostStrings.get(i), rackIter.next(), uris[i], conf); + } } // sort all the uri's by distance from myNode, the local file system will // automatically be the the first one. @@ -921,7 +942,7 @@ private static void processThrowable(NflyNode nflyNode, String op, * @throws IOException */ static FileSystem createFileSystem(URI[] uris, Configuration conf, - String settings) throws IOException { + String settings, FsGetter fsGetter) throws IOException { // assert settings != null int minRepl = DEFAULT_MIN_REPLICATION; EnumSet nflyFlags = EnumSet.noneOf(NflyKey.class); @@ -946,6 +967,6 @@ static FileSystem createFileSystem(URI[] uris, Configuration conf, throw new IllegalArgumentException(nflyKey + ": Infeasible"); } } - return new NflyFSystem(uris, conf, minRepl, nflyFlags); + return new NflyFSystem(uris, conf, minRepl, nflyFlags, fsGetter); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/RegexMountPoint.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/RegexMountPoint.java new file mode 100644 index 0000000000000..aace7a2dba57d --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/RegexMountPoint.java @@ -0,0 +1,289 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.viewfs; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.hadoop.fs.viewfs.InodeTree.SlashPath; + +/** + * Regex mount point is build to implement regex based mount point. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +class RegexMountPoint { + private static final Logger LOGGER = + LoggerFactory.getLogger(RegexMountPoint.class.getName()); + + private InodeTree inodeTree; + private String srcPathRegex; + private Pattern srcPattern; + private String dstPath; + private String interceptorSettingsString; + private List interceptorList; + + public static final String SETTING_SRCREGEX_SEP = "#."; + public static final char INTERCEPTOR_SEP = ';'; + public static final char INTERCEPTOR_INTERNAL_SEP = ':'; + // ${var},$var + public static final Pattern VAR_PATTERN_IN_DEST = + Pattern.compile("\\$((\\{\\w+\\})|(\\w+))"); + + // Same var might have different representations. + // e.g. + // key => $key or key = > ${key} + private Map> varInDestPathMap; + + public Map> getVarInDestPathMap() { + return varInDestPathMap; + } + + RegexMountPoint(InodeTree inodeTree, String sourcePathRegex, + String destPath, String settingsStr) { + this.inodeTree = inodeTree; + this.srcPathRegex = sourcePathRegex; + this.dstPath = destPath; + this.interceptorSettingsString = settingsStr; + this.interceptorList = new ArrayList<>(); + } + + /** + * Initialize regex mount point. + * + * @throws IOException + */ + public void initialize() throws IOException { + try { + srcPattern = Pattern.compile(srcPathRegex); + } catch (PatternSyntaxException ex) { + throw new IOException( + "Failed to initialized mount point due to bad src path regex:" + + srcPathRegex + ", dstPath:" + dstPath, ex); + } + varInDestPathMap = getVarListInString(dstPath); + initializeInterceptors(); + } + + private void initializeInterceptors() throws IOException { + if (interceptorSettingsString == null + || interceptorSettingsString.isEmpty()) { + return; + } + String[] interceptorStrArray = + StringUtils.split(interceptorSettingsString, INTERCEPTOR_SEP); + for (String interceptorStr : interceptorStrArray) { + RegexMountPointInterceptor interceptor = + RegexMountPointInterceptorFactory.create(interceptorStr); + if (interceptor == null) { + throw new IOException( + "Illegal settings String " + interceptorSettingsString); + } + interceptor.initialize(); + interceptorList.add(interceptor); + } + } + + /** + * Get $var1 and $var2 style variables in string. + * + * @param input - the string to be process. + * @return + */ + public static Map> getVarListInString(String input) { + Map> varMap = new HashMap<>(); + Matcher matcher = VAR_PATTERN_IN_DEST.matcher(input); + while (matcher.find()) { + // $var or ${var} + String varName = matcher.group(0); + // var or {var} + String strippedVarName = matcher.group(1); + if (strippedVarName.startsWith("{")) { + // {varName} = > varName + strippedVarName = + strippedVarName.substring(1, strippedVarName.length() - 1); + } + varMap.putIfAbsent(strippedVarName, new HashSet<>()); + varMap.get(strippedVarName).add(varName); + } + return varMap; + } + + public String getSrcPathRegex() { + return srcPathRegex; + } + + public Pattern getSrcPattern() { + return srcPattern; + } + + public String getDstPath() { + return dstPath; + } + + public static Pattern getVarPatternInDest() { + return VAR_PATTERN_IN_DEST; + } + + /** + * Get resolved path from regex mount points. + * E.g. link: ^/user/(?\\w+) => s3://$user.apache.com/_${user} + * srcPath: is /user/hadoop/dir1 + * resolveLastComponent: true + * then return value is s3://hadoop.apache.com/_hadoop + * @param srcPath - the src path to resolve + * @param resolveLastComponent - whether resolve the path after last `/` + * @return mapped path of the mount point. + */ + public InodeTree.ResolveResult resolve(final String srcPath, + final boolean resolveLastComponent) { + String pathStrToResolve = getPathToResolve(srcPath, resolveLastComponent); + for (RegexMountPointInterceptor interceptor : interceptorList) { + pathStrToResolve = interceptor.interceptSource(pathStrToResolve); + } + LOGGER.debug("Path to resolve:" + pathStrToResolve + ", srcPattern:" + + getSrcPathRegex()); + Matcher srcMatcher = getSrcPattern().matcher(pathStrToResolve); + String parsedDestPath = getDstPath(); + int mappedCount = 0; + String resolvedPathStr = ""; + while (srcMatcher.find()) { + resolvedPathStr = pathStrToResolve.substring(0, srcMatcher.end()); + Map> varMap = getVarInDestPathMap(); + for (Map.Entry> entry : varMap.entrySet()) { + String regexGroupNameOrIndexStr = entry.getKey(); + Set groupRepresentationStrSetInDest = entry.getValue(); + parsedDestPath = replaceRegexCaptureGroupInPath( + parsedDestPath, srcMatcher, + regexGroupNameOrIndexStr, groupRepresentationStrSetInDest); + } + ++mappedCount; + } + if (0 == mappedCount) { + return null; + } + Path remainingPath = getRemainingPathStr(srcPath, resolvedPathStr); + for (RegexMountPointInterceptor interceptor : interceptorList) { + parsedDestPath = interceptor.interceptResolvedDestPathStr(parsedDestPath); + remainingPath = + interceptor.interceptRemainingPath(remainingPath); + } + InodeTree.ResolveResult resolveResult = inodeTree + .buildResolveResultForRegexMountPoint(InodeTree.ResultKind.EXTERNAL_DIR, + resolvedPathStr, parsedDestPath, remainingPath); + return resolveResult; + } + + private Path getRemainingPathStr( + String srcPath, + String resolvedPathStr) { + String remainingPathStr = srcPath.substring(resolvedPathStr.length()); + if (!remainingPathStr.startsWith("/")) { + remainingPathStr = "/" + remainingPathStr; + } + return new Path(remainingPathStr); + } + + private String getPathToResolve( + String srcPath, boolean resolveLastComponent) { + if (resolveLastComponent) { + return srcPath; + } + int lastSlashIndex = srcPath.lastIndexOf(SlashPath.toString()); + if (lastSlashIndex == -1) { + return null; + } + return srcPath.substring(0, lastSlashIndex); + } + + /** + * Use capture group named regexGroupNameOrIndexStr in mather to replace + * parsedDestPath. + * E.g. link: ^/user/(?\\w+) => s3://$user.apache.com/_${user} + * srcMatcher is from /user/hadoop. + * Then the params will be like following. + * parsedDestPath: s3://$user.apache.com/_${user}, + * regexGroupNameOrIndexStr: user + * groupRepresentationStrSetInDest: {user:$user; user:${user}} + * return value will be s3://hadoop.apache.com/_hadoop + * @param parsedDestPath + * @param srcMatcher + * @param regexGroupNameOrIndexStr + * @param groupRepresentationStrSetInDest + * @return return parsedDestPath while ${var},$var replaced or + * parsedDestPath nothing found. + */ + private String replaceRegexCaptureGroupInPath( + String parsedDestPath, + Matcher srcMatcher, + String regexGroupNameOrIndexStr, + Set groupRepresentationStrSetInDest) { + String groupValue = getRegexGroupValueFromMather( + srcMatcher, regexGroupNameOrIndexStr); + if (groupValue == null) { + return parsedDestPath; + } + for (String varName : groupRepresentationStrSetInDest) { + parsedDestPath = parsedDestPath.replace(varName, groupValue); + LOGGER.debug("parsedDestPath value is:" + parsedDestPath); + } + return parsedDestPath; + } + + /** + * Get matched capture group value from regex matched string. E.g. + * Regex: ^/user/(?\\w+), regexGroupNameOrIndexStr: userName + * then /user/hadoop should return hadoop while call + * getRegexGroupValueFromMather(matcher, usersName) + * or getRegexGroupValueFromMather(matcher, 1) + * + * @param srcMatcher - the matcher to be use + * @param regexGroupNameOrIndexStr - the regex group name or index + * @return - Null if no matched group named regexGroupNameOrIndexStr found. + */ + private String getRegexGroupValueFromMather( + Matcher srcMatcher, String regexGroupNameOrIndexStr) { + if (regexGroupNameOrIndexStr.matches("\\d+")) { + // group index + int groupIndex = Integer.parseUnsignedInt(regexGroupNameOrIndexStr); + if (groupIndex >= 0 && groupIndex <= srcMatcher.groupCount()) { + return srcMatcher.group(groupIndex); + } + } else { + // named group in regex + return srcMatcher.group(regexGroupNameOrIndexStr); + } + return null; + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/RegexMountPointInterceptor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/RegexMountPointInterceptor.java new file mode 100644 index 0000000000000..37f44b0a51579 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/RegexMountPointInterceptor.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.viewfs; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.Path; + +/** + * RegexMountPointInterceptor is a mechanism provided to intercept + * src and resolved path before/after resolution. + */ +@InterfaceAudience.LimitedPrivate("Common") +@InterfaceStability.Unstable +interface RegexMountPointInterceptor { + + /** + * Initialize interceptor and throws IOException if needed. + * @throws IOException + */ + void initialize() throws IOException; + + /** + * Intercept source before resolution. + * @param source + * @return + */ + String interceptSource(String source); + + /** + * Intercept parsed dest path and return a new one. + * @return intercepted string + */ + String interceptResolvedDestPathStr(String parsedDestPathStr); + + /** + * Intercept remaining path. + * @return intercepted string + */ + Path interceptRemainingPath(Path remainingPath); + + /** + * Get interceptor type. + * @return + */ + RegexMountPointInterceptorType getType(); + + /** + * Serialize the interceptor to a string. + * @return + */ + String serializeToString(); +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/RegexMountPointInterceptorFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/RegexMountPointInterceptorFactory.java new file mode 100644 index 0000000000000..fb564aa3a6e4d --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/RegexMountPointInterceptorFactory.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.viewfs; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * The interceptor factory used to create RegexMountPoint interceptors. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +final class RegexMountPointInterceptorFactory { + + private RegexMountPointInterceptorFactory() { + + } + + /** + * interceptorSettingsString string should be like ${type}:${string}, + * e.g. replaceresolveddstpath:word1,word2. + * + * @param interceptorSettingsString + * @return Return interceptor based on setting or null on bad/unknown config. + */ + public static RegexMountPointInterceptor create( + String interceptorSettingsString) { + int typeTagIndex = interceptorSettingsString + .indexOf(RegexMountPoint.INTERCEPTOR_INTERNAL_SEP); + if (typeTagIndex == -1 || (typeTagIndex == ( + interceptorSettingsString.length() - 1))) { + return null; + } + String typeTag = interceptorSettingsString.substring(0, typeTagIndex).trim() + .toLowerCase(); + RegexMountPointInterceptorType interceptorType = + RegexMountPointInterceptorType.get(typeTag); + if (interceptorType == null) { + return null; + } + switch (interceptorType) { + case REPLACE_RESOLVED_DST_PATH: + RegexMountPointInterceptor interceptor = + RegexMountPointResolvedDstPathReplaceInterceptor + .deserializeFromString(interceptorSettingsString); + return interceptor; + default: + // impossible now + return null; + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/RegexMountPointInterceptorType.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/RegexMountPointInterceptorType.java new file mode 100644 index 0000000000000..ad953eba24ad9 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/RegexMountPointInterceptorType.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.viewfs; + +import java.util.HashMap; +import java.util.Map; + +/** + * RegexMountPointInterceptorType. + */ +public enum RegexMountPointInterceptorType { + REPLACE_RESOLVED_DST_PATH("replaceresolveddstpath"); + + private final String configName; + private static final Map + INTERCEPTOR_TYPE_MAP + = new HashMap(); + + static { + for (RegexMountPointInterceptorType interceptorType + : RegexMountPointInterceptorType.values()) { + INTERCEPTOR_TYPE_MAP.put( + interceptorType.getConfigName(), interceptorType); + } + } + + RegexMountPointInterceptorType(String configName) { + this.configName = configName; + } + + public String getConfigName() { + return configName; + } + + public static RegexMountPointInterceptorType get(String configName) { + return INTERCEPTOR_TYPE_MAP.get(configName); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/RegexMountPointResolvedDstPathReplaceInterceptor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/RegexMountPointResolvedDstPathReplaceInterceptor.java new file mode 100644 index 0000000000000..18490dc57c5ac --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/RegexMountPointResolvedDstPathReplaceInterceptor.java @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.viewfs; + +import java.io.IOException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.Path; + +import static org.apache.hadoop.fs.viewfs.RegexMountPointInterceptorType.REPLACE_RESOLVED_DST_PATH; + +/** + * Implementation of RegexMountPointResolvedDstPathReplaceInterceptor. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +class RegexMountPointResolvedDstPathReplaceInterceptor + implements RegexMountPointInterceptor { + + private String srcRegexString; + private String replaceString; + private Pattern srcRegexPattern; + + RegexMountPointResolvedDstPathReplaceInterceptor(String srcRegex, + String replaceString) { + this.srcRegexString = srcRegex; + this.replaceString = replaceString; + this.srcRegexPattern = null; + } + + public String getSrcRegexString() { + return srcRegexString; + } + + public String getReplaceString() { + return replaceString; + } + + public Pattern getSrcRegexPattern() { + return srcRegexPattern; + } + + @Override + public void initialize() throws IOException { + try { + srcRegexPattern = Pattern.compile(srcRegexString); + } catch (PatternSyntaxException ex) { + throw new IOException( + "Initialize interceptor failed, srcRegx:" + srcRegexString, ex); + } + } + + /** + * Source won't be changed in the interceptor. + * + * @return source param string passed in. + */ + @Override + public String interceptSource(String source) { + return source; + } + + /** + * Intercept resolved path, e.g. + * Mount point /^(\\w+)/, ${1}.hadoop.net + * If incoming path is /user1/home/tmp/job1, + * then the resolved path str will be user1. + * + * @return intercepted string + */ + @Override + public String interceptResolvedDestPathStr( + String parsedDestPathStr) { + Matcher matcher = srcRegexPattern.matcher(parsedDestPathStr); + return matcher.replaceAll(replaceString); + } + + /** + * The interceptRemainingPath will just return the remainingPath passed in. + * + */ + @Override + public Path interceptRemainingPath(Path remainingPath) { + return remainingPath; + } + + @Override + public RegexMountPointInterceptorType getType() { + return REPLACE_RESOLVED_DST_PATH; + } + + @Override + public String serializeToString() { + return REPLACE_RESOLVED_DST_PATH.getConfigName() + + RegexMountPoint.INTERCEPTOR_INTERNAL_SEP + srcRegexString + + RegexMountPoint.INTERCEPTOR_INTERNAL_SEP + replaceString; + } + + /** + * Create interceptor from config string. The string should be in + * replaceresolvedpath:wordToReplace:replaceString + * Note that we'll assume there's no ':' in the regex for the moment. + * + * @return Interceptor instance or null on bad config. + */ + public static RegexMountPointResolvedDstPathReplaceInterceptor + deserializeFromString(String serializedString) { + String[] strings = serializedString + .split(Character.toString(RegexMountPoint.INTERCEPTOR_INTERNAL_SEP)); + // We'll assume there's no ':' in the regex for the moment. + if (strings.length != 3) { + return null; + } + //The format should be like replaceresolvedpath:wordToReplace:replaceString + return new RegexMountPointResolvedDstPathReplaceInterceptor(strings[1], + strings[2]); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java index faa374a39789b..3d405e86f2be1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java @@ -18,27 +18,36 @@ package org.apache.hadoop.fs.viewfs; import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; -import static org.apache.hadoop.fs.viewfs.Constants.PERMISSION_555; import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_ENABLE_INNER_CACHE; import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_ENABLE_INNER_CACHE_DEFAULT; +import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME; +import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME_DEFAULT; +import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS; +import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS_DEFAULT; +import static org.apache.hadoop.fs.viewfs.Constants.PERMISSION_555; +import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_TRASH_FORCE_INSIDE_MOUNT_POINT; +import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_TRASH_FORCE_INSIDE_MOUNT_POINT_DEFAULT; +import java.util.function.Function; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; +import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.Collections; import java.util.EnumSet; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Objects; import java.util.Set; -import java.util.Map.Entry; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -96,20 +105,48 @@ static AccessControlException readOnlyMountTable(final String operation, return readOnlyMountTable(operation, p.toString()); } + /** + * @return Gets file system creator instance. + */ + protected FsGetter fsGetter() { + return new FsGetter(); + } + /** * Caching children filesystems. HADOOP-15565. */ static class InnerCache { private Map map = new HashMap<>(); + private FsGetter fsCreator; + private ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock(); + + InnerCache(FsGetter fsCreator) { + this.fsCreator = fsCreator; + } FileSystem get(URI uri, Configuration config) throws IOException { Key key = new Key(uri); - if (map.get(key) == null) { - FileSystem fs = FileSystem.newInstance(uri, config); + FileSystem fs = null; + try { + rwLock.readLock().lock(); + fs = map.get(key); + if (fs != null) { + return fs; + } + } finally { + rwLock.readLock().unlock(); + } + try { + rwLock.writeLock().lock(); + fs = map.get(key); + if (fs != null) { + return fs; + } + fs = fsCreator.getNewInstance(uri, config); map.put(key, fs); return fs; - } else { - return map.get(key); + } finally { + rwLock.writeLock().unlock(); } } @@ -123,9 +160,13 @@ void closeAll() { } } - InnerCache unmodifiableCache() { - map = Collections.unmodifiableMap(map); - return this; + void clear() { + try { + rwLock.writeLock().lock(); + map.clear(); + } finally { + rwLock.writeLock().unlock(); + } } /** @@ -175,11 +216,11 @@ public static class MountPoint { /** * Array of target FileSystem URIs. */ - private final URI[] targetFileSystemURIs; + private final String[] targetFileSystemPaths; - MountPoint(Path srcPath, URI[] targetFs) { + MountPoint(Path srcPath, String[] targetFs) { mountedOnPath = srcPath; - targetFileSystemURIs = targetFs; + targetFileSystemPaths = targetFs; } public Path getMountedOnPath() { @@ -187,13 +228,21 @@ public Path getMountedOnPath() { } public URI[] getTargetFileSystemURIs() { - return targetFileSystemURIs; + URI[] targetUris = new URI[targetFileSystemPaths.length]; + for (int i = 0; i < targetFileSystemPaths.length; i++) { + targetUris[i] = URI.create(targetFileSystemPaths[i]); + } + return targetUris; + } + + public String[] getTargetFileSystemPaths() { + return targetFileSystemPaths; } } final long creationTime; // of the the mount table final UserGroupInformation ugi; // the user/group of user who created mtable - URI myUri; + private URI myUri; private Path workingDir; Configuration config; InodeTree fsState; // the fs state; ie the mount table @@ -224,7 +273,7 @@ private Path makeAbsolute(final Path f) { * {@link FileSystem#createFileSystem(URI, Configuration)} * * After this constructor is called initialize() is called. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public ViewFileSystem() throws IOException { ugi = UserGroupInformation.getCurrentUser(); @@ -241,6 +290,15 @@ public String getScheme() { return FsConstants.VIEWFS_SCHEME; } + /** + * Returns false as it does not support to add fallback link automatically on + * no mounts. + */ + boolean supportAutoAddingFallbackOnNoMounts() { + return false; + } + + /** * Called after a new FileSystem instance is constructed. * @param theUri a uri whose authority section names the host, port, etc. for @@ -255,35 +313,63 @@ public void initialize(final URI theUri, final Configuration conf) config = conf; enableInnerCache = config.getBoolean(CONFIG_VIEWFS_ENABLE_INNER_CACHE, CONFIG_VIEWFS_ENABLE_INNER_CACHE_DEFAULT); - final InnerCache innerCache = new InnerCache(); + FsGetter fsGetter = fsGetter(); + cache = new InnerCache(fsGetter); // Now build client side view (i.e. client side mount table) from config. final String authority = theUri.getAuthority(); + String tableName = authority; + if (theUri.getPort() != -1 && config + .getBoolean(CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME, + CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME_DEFAULT)) { + tableName = theUri.getHost(); + } try { - myUri = new URI(FsConstants.VIEWFS_SCHEME, authority, "/", null, null); - fsState = new InodeTree(conf, authority) { - + myUri = new URI(getScheme(), authority, "/", null, null); + boolean initingUriAsFallbackOnNoMounts = + supportAutoAddingFallbackOnNoMounts(); + fsState = new InodeTree(conf, tableName, myUri, + initingUriAsFallbackOnNoMounts) { @Override - protected FileSystem getTargetFileSystem(final URI uri) - throws URISyntaxException, IOException { - FileSystem fs; - if (enableInnerCache) { - fs = innerCache.get(uri, config); - } else { - fs = FileSystem.get(uri, config); + protected Function initAndGetTargetFs() { + return new Function() { + @Override + public FileSystem apply(final URI uri) { + FileSystem fs; + try { + fs = ugi.doAs(new PrivilegedExceptionAction() { + @Override + public FileSystem run() throws IOException { + if (enableInnerCache) { + synchronized (cache) { + return cache.get(uri, config); + } + } else { + return fsGetter().get(uri, config); + } + } + }); + return new ChRootedFileSystem(fs, uri); + } catch (IOException | InterruptedException ex) { + LOG.error("Could not initialize the underlying FileSystem " + + "object. Exception: " + ex.toString()); + } + return null; } - return new ChRootedFileSystem(fs, uri); + }; } @Override protected FileSystem getTargetFileSystem(final INodeDir dir) - throws URISyntaxException { - return new InternalDirOfViewFs(dir, creationTime, ugi, myUri, config); + throws URISyntaxException { + return new InternalDirOfViewFs(dir, creationTime, ugi, myUri, config, + this); } @Override protected FileSystem getTargetFileSystem(final String settings, final URI[] uris) throws URISyntaxException, IOException { - return NflyFSystem.createFileSystem(uris, config, settings); + return NflyFSystem.createFileSystem(uris, config, settings, + fsGetter); } }; workingDir = this.getHomeDirectory(); @@ -293,13 +379,6 @@ protected FileSystem getTargetFileSystem(final String settings, } catch (URISyntaxException e) { throw new IOException("URISyntax exception: " + theUri); } - - if (enableInnerCache) { - // All fs instances are created and cached on startup. The cache is - // readonly after the initialize() so the concurrent access of the cache - // is safe. - cache = innerCache.unmodifiableCache(); - } } /** @@ -315,9 +394,9 @@ protected FileSystem getTargetFileSystem(final String settings, } /** - * Convenience Constructor for apps to call directly - * @param conf - * @throws IOException + * Convenience Constructor for apps to call directly. + * @param conf input Configuration. + * @throws IOException raised on errors performing I/O. */ public ViewFileSystem(final Configuration conf) throws IOException { this(FsConstants.VIEWFS_URI, conf); @@ -331,7 +410,7 @@ public URI getUri() { @Override public Path resolvePath(final Path f) throws IOException { final InodeTree.ResolveResult res; - res = fsState.resolve(getUriPath(f), true); + res = fsState.resolve(getUriPath(f), true); if (res.isInternalDir()) { return f; } @@ -370,7 +449,7 @@ public FSDataOutputStream append(final Path f, final int bufferSize, fsState.resolve(getUriPath(f), true); return res.targetFileSystem.append(res.remainingPath, bufferSize, progress); } - + @Override public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, EnumSet flags, int bufferSize, short replication, @@ -475,6 +554,14 @@ private static FileStatus wrapLocalFileStatus(FileStatus orig, : new ViewFsFileStatus(orig, qualified); } + /** + * {@inheritDoc} + * + * If the given path is a symlink(mount link), the path will be resolved to a + * target path and it will get the resolved path's FileStatus object. It will + * not be represented as a symlink and isDirectory API returns true if the + * resolved path is a directory, false otherwise. + */ @Override public FileStatus getFileStatus(final Path f) throws AccessControlException, FileNotFoundException, IOException { @@ -492,6 +579,33 @@ public void access(Path path, FsAction mode) throws AccessControlException, res.targetFileSystem.access(res.remainingPath, mode); } + /** + * {@inheritDoc} + * + * Note: listStatus considers listing from fallbackLink if available. If the + * same directory path is present in configured mount path as well as in + * fallback fs, then only the fallback path will be listed in the returned + * result except for link. + * + * If any of the the immediate children of the given path f is a symlink(mount + * link), the returned FileStatus object of that children would be represented + * as a symlink. It will not be resolved to the target path and will not get + * the target path FileStatus object. The target path will be available via + * getSymlink on that children's FileStatus object. Since it represents as + * symlink, isDirectory on that children's FileStatus will return false. + * This behavior can be changed by setting an advanced configuration + * fs.viewfs.mount.links.as.symlinks to false. In this case, mount points will + * be represented as non-symlinks and all the file/directory attributes like + * permissions, isDirectory etc will be assigned from it's resolved target + * directory/file. + * + * If you want to get the FileStatus of target path for that children, you may + * want to use GetFileStatus API with that children's symlink path. Please see + * {@link ViewFileSystem#getFileStatus(Path f)} + * + * Note: In ViewFileSystem, by default the mount links are represented as + * symlinks. + */ @Override public FileStatus[] listStatus(final Path f) throws AccessControlException, FileNotFoundException, IOException { @@ -578,18 +692,52 @@ public FSDataInputStream open(final Path f, final int bufferSize) @Override public boolean rename(final Path src, final Path dst) throws IOException { // passing resolveLastComponet as false to catch renaming a mount point to - // itself. We need to catch this as an internal operation and fail. - InodeTree.ResolveResult resSrc = - fsState.resolve(getUriPath(src), false); - + // itself. We need to catch this as an internal operation and fail if no + // fallback. + InodeTree.ResolveResult resSrc = + fsState.resolve(getUriPath(src), false); + if (resSrc.isInternalDir()) { - throw readOnlyMountTable("rename", src); + if (fsState.getRootFallbackLink() == null) { + // If fallback is null, we can't rename from src. + throw readOnlyMountTable("rename", src); + } + InodeTree.ResolveResult resSrcWithLastComp = + fsState.resolve(getUriPath(src), true); + if (resSrcWithLastComp.isInternalDir() || resSrcWithLastComp + .isLastInternalDirLink()) { + throw readOnlyMountTable("rename", src); + } else { + // This is fallback and let's set the src fs with this fallback + resSrc = resSrcWithLastComp; + } } - - InodeTree.ResolveResult resDst = - fsState.resolve(getUriPath(dst), false); + + InodeTree.ResolveResult resDst = + fsState.resolve(getUriPath(dst), false); + if (resDst.isInternalDir()) { - throw readOnlyMountTable("rename", dst); + if (fsState.getRootFallbackLink() == null) { + // If fallback is null, we can't rename to dst. + throw readOnlyMountTable("rename", dst); + } + // if the fallback exist, we may have chance to rename to fallback path + // where dst parent is matching to internalDir. + InodeTree.ResolveResult resDstWithLastComp = + fsState.resolve(getUriPath(dst), true); + if (resDstWithLastComp.isInternalDir()) { + // We need to get fallback here. If matching fallback path not exist, it + // will fail later. This is a very special case: Even though we are on + // internal directory, we should allow to rename, so that src files will + // moved under matching fallback dir. + resDst = new InodeTree.ResolveResult( + InodeTree.ResultKind.INTERNAL_DIR, + fsState.getRootFallbackLink().getTargetFileSystem(), "/", + new Path(resDstWithLastComp.resolvedPath), false); + } else { + // The link resolved to some target fs or fallback fs. + resDst = resDstWithLastComp; + } } URI srcUri = resSrc.targetFileSystem.getUri(); @@ -779,12 +927,33 @@ public void removeXAttr(Path path, String name) throws IOException { } @Override - public void setVerifyChecksum(final boolean verifyChecksum) { - List> mountPoints = - fsState.getMountPoints(); + public void setVerifyChecksum(final boolean verifyChecksum) { + // This is a file system level operations, however ViewFileSystem + // points to many file systems. Noop for ViewFileSystem. + } + + /** + * Initialize the target filesystem for all mount points. + * @param mountPoints The mount points + * @return Mapping of mount point and the initialized target filesystems + * @throws RuntimeException when the target file system cannot be initialized + */ + private Map initializeMountedFileSystems( + List> mountPoints) { + FileSystem fs = null; + Map fsMap = new HashMap<>(mountPoints.size()); for (InodeTree.MountPoint mount : mountPoints) { - mount.target.targetFileSystem.setVerifyChecksum(verifyChecksum); + try { + fs = mount.target.getTargetFileSystem(); + fsMap.put(mount.src, fs); + } catch (IOException ex) { + String errMsg = "Not able to initialize FileSystem for mount path " + + mount.src + " with exception " + ex; + LOG.error(errMsg); + throw new RuntimeException(errMsg, ex); + } } + return fsMap; } @Override @@ -810,6 +979,9 @@ public long getDefaultBlockSize(Path f) { return res.targetFileSystem.getDefaultBlockSize(res.remainingPath); } catch (FileNotFoundException e) { throw new NotInMountpointException(f, "getDefaultBlockSize"); + } catch (IOException e) { + throw new RuntimeException("Not able to initialize fs in " + + " getDefaultBlockSize for path " + f + " with exception", e); } } @@ -821,6 +993,9 @@ public short getDefaultReplication(Path f) { return res.targetFileSystem.getDefaultReplication(res.remainingPath); } catch (FileNotFoundException e) { throw new NotInMountpointException(f, "getDefaultReplication"); + } catch (IOException e) { + throw new RuntimeException("Not able to initialize fs in " + + " getDefaultReplication for path " + f + " with exception", e); } } @@ -850,23 +1025,33 @@ public QuotaUsage getQuotaUsage(Path f) throws IOException { } @Override - public void setWriteChecksum(final boolean writeChecksum) { - List> mountPoints = - fsState.getMountPoints(); - for (InodeTree.MountPoint mount : mountPoints) { - mount.target.targetFileSystem.setWriteChecksum(writeChecksum); - } + public void setWriteChecksum(final boolean writeChecksum) { + // This is a file system level operations, however ViewFileSystem + // points to many file systems. Noop for ViewFileSystem. } @Override public FileSystem[] getChildFileSystems() { List> mountPoints = fsState.getMountPoints(); + Map fsMap = initializeMountedFileSystems(mountPoints); Set children = new HashSet(); for (InodeTree.MountPoint mountPoint : mountPoints) { - FileSystem targetFs = mountPoint.target.targetFileSystem; + FileSystem targetFs = fsMap.get(mountPoint.src); children.addAll(Arrays.asList(targetFs.getChildFileSystems())); } + + try { + if (fsState.isRootInternalDir() && + fsState.getRootFallbackLink() != null) { + children.addAll(Arrays.asList( + fsState.getRootFallbackLink().getTargetFileSystem() + .getChildFileSystems())); + } + } catch (IOException ex) { + LOG.error("Could not add child filesystems for source path " + + fsState.getRootFallbackLink().fullPath + " with exception " + ex); + } return children.toArray(new FileSystem[]{}); } @@ -955,16 +1140,79 @@ public Collection getAllStoragePolicies() * Get the trash root directory for current user when the path * specified is deleted. * + * If FORCE_INSIDE_MOUNT_POINT flag is not set, return the default trash root + * from targetFS. + * + * When FORCE_INSIDE_MOUNT_POINT is set to true, + *

      + *
    1. + * If the trash root for path p is in the same mount point as path p, + * and one of: + *
        + *
      1. The mount point isn't at the top of the target fs.
      2. + *
      3. The resolved path of path is root (in fallback FS).
      4. + *
      5. The trash isn't in user's target fs home directory + * get the corresponding viewFS path for the trash root and return + * it. + *
      6. + *
      + *
    2. + *
    3. + * else, return the trash root under the root of the mount point + * (/{mntpoint}/.Trash/{user}). + *
    4. + *
    + * + * These conditions handle several different important cases: + *
      + *
    • File systems may need to have more local trash roots, such as + * encryption zones or snapshot roots.
    • + *
    • The fallback mount should use the user's home directory.
    • + *
    • Cloud storage systems should not use trash in an implicity defined + * home directory, per a container, unless it is the fallback fs.
    • + *
    + * * @param path the trash root of the path to be determined. * @return the trash root path. */ @Override public Path getTrashRoot(Path path) { + try { InodeTree.ResolveResult res = fsState.resolve(getUriPath(path), true); - return res.targetFileSystem.getTrashRoot(res.remainingPath); - } catch (Exception e) { + Path targetFSTrashRoot = + res.targetFileSystem.getTrashRoot(res.remainingPath); + + // Allow clients to use old behavior of delegating to target fs. + if (!config.getBoolean(CONFIG_VIEWFS_TRASH_FORCE_INSIDE_MOUNT_POINT, + CONFIG_VIEWFS_TRASH_FORCE_INSIDE_MOUNT_POINT_DEFAULT)) { + return targetFSTrashRoot; + } + + // The trash root path from the target fs + String targetFSTrashRootPath = targetFSTrashRoot.toUri().getPath(); + // The mount point path in the target fs + String mountTargetPath = res.targetFileSystem.getUri().getPath(); + if (!mountTargetPath.endsWith("/")) { + mountTargetPath = mountTargetPath + "/"; + } + + Path targetFsUserHome = res.targetFileSystem.getHomeDirectory(); + if (targetFSTrashRootPath.startsWith(mountTargetPath) && + !(mountTargetPath.equals(ROOT_PATH.toString()) && + !res.resolvedPath.equals(ROOT_PATH.toString()) && + (targetFsUserHome != null && targetFSTrashRootPath.startsWith( + targetFsUserHome.toUri().getPath())))) { + String relativeTrashRoot = + targetFSTrashRootPath.substring(mountTargetPath.length()); + return makeQualified(new Path(res.resolvedPath, relativeTrashRoot)); + } else { + // Return the trash root for the mount point. + return makeQualified(new Path(res.resolvedPath, + TRASH_PREFIX + "/" + ugi.getShortUserName())); + } + } catch (IOException | IllegalArgumentException e) { throw new NotInMountpointException(path, "getTrashRoot"); } } @@ -972,16 +1220,78 @@ public Path getTrashRoot(Path path) { /** * Get all the trash roots for current user or all users. * + * When FORCE_INSIDE_MOUNT_POINT is set to true, we also return trash roots + * under the root of each mount point, with their viewFS paths. + * * @param allUsers return trash roots for all users if true. * @return all Trash root directories. */ @Override public Collection getTrashRoots(boolean allUsers) { - List trashRoots = new ArrayList<>(); + // A map from targetFSPath -> FileStatus. + // FileStatus can be from targetFS or viewFS. + HashMap trashRoots = new HashMap<>(); for (FileSystem fs : getChildFileSystems()) { - trashRoots.addAll(fs.getTrashRoots(allUsers)); + for (FileStatus trash : fs.getTrashRoots(allUsers)) { + trashRoots.put(trash.getPath(), trash); + } + } + + // Return trashRoots if FORCE_INSIDE_MOUNT_POINT is disabled. + if (!config.getBoolean(CONFIG_VIEWFS_TRASH_FORCE_INSIDE_MOUNT_POINT, + CONFIG_VIEWFS_TRASH_FORCE_INSIDE_MOUNT_POINT_DEFAULT)) { + return trashRoots.values(); + } + + // Get trash roots in TRASH_PREFIX dir inside mount points and fallback FS. + List> mountPoints = + fsState.getMountPoints(); + // If we have a fallback FS, add a mount point for it as <"", fallback FS>. + // The source path of a mount point shall not end with '/', thus for + // fallback fs, we set its mount point src as "". + if (fsState.getRootFallbackLink() != null) { + mountPoints.add(new InodeTree.MountPoint<>("", + fsState.getRootFallbackLink())); } - return trashRoots; + + try { + for (InodeTree.MountPoint mountPoint : mountPoints) { + + Path trashRoot = + makeQualified(new Path(mountPoint.src + "/" + TRASH_PREFIX)); + + // Continue if trashRoot does not exist for this mount point + if (!exists(trashRoot)) { + continue; + } + + FileSystem targetFS = mountPoint.target.getTargetFileSystem(); + if (!allUsers) { + Path userTrashRoot = new Path(trashRoot, ugi.getShortUserName()); + if (exists(userTrashRoot)) { + Path targetFSUserTrashRoot = targetFS.makeQualified( + new Path(targetFS.getUri().getPath(), + TRASH_PREFIX + "/" + ugi.getShortUserName())); + trashRoots.put(targetFSUserTrashRoot, getFileStatus(userTrashRoot)); + } + } else { + FileStatus[] mountPointTrashRoots = listStatus(trashRoot); + for (FileStatus trash : mountPointTrashRoots) { + // Remove the mountPoint and the leading '/' to get the + // relative targetFsTrash path + String targetFsTrash = trash.getPath().toUri().getPath() + .substring(mountPoint.src.length() + 1); + Path targetFsTrashPath = targetFS.makeQualified( + new Path(targetFS.getUri().getPath(), targetFsTrash)); + trashRoots.put(targetFsTrashPath, trash); + } + } + } + } catch (IOException e) { + LOG.warn("Exception in get all trash roots for mount points", e); + } + + return trashRoots.values(); } @Override @@ -1000,11 +1310,11 @@ public FsStatus getStatus(Path p) throws IOException { } /** - * Return the total size of all files under "/", if {@link + * @return Return the total size of all files under "/", if {@link * Constants#CONFIG_VIEWFS_LINK_MERGE_SLASH} is supported and is a valid * mount point. Else, throw NotInMountpointException. * - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public long getUsed() throws IOException { @@ -1074,11 +1384,14 @@ static class InternalDirOfViewFs extends FileSystem { final long creationTime; // of the the mount table final UserGroupInformation ugi; // the user/group of user who created mtable final URI myUri; + private final boolean showMountLinksAsSymlinks; + private InodeTree fsState; public InternalDirOfViewFs(final InodeTree.INodeDir dir, final long cTime, final UserGroupInformation ugi, URI uri, - Configuration config) throws URISyntaxException { + Configuration config, InodeTree fsState) throws URISyntaxException { myUri = uri; + this.fsState = fsState; try { initialize(myUri, config); } catch (IOException e) { @@ -1087,6 +1400,9 @@ public InternalDirOfViewFs(final InodeTree.INodeDir dir, theInternalDir = dir; creationTime = cTime; this.ugi = ugi; + showMountLinksAsSymlinks = config + .getBoolean(CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS, + CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS_DEFAULT); } static private void checkPathIsSlash(final Path f) throws IOException { @@ -1123,7 +1439,41 @@ public FSDataOutputStream append(final Path f, final int bufferSize, public FSDataOutputStream create(final Path f, final FsPermission permission, final boolean overwrite, final int bufferSize, final short replication, final long blockSize, - final Progressable progress) throws AccessControlException { + final Progressable progress) throws IOException { + Preconditions.checkNotNull(f, "File cannot be null."); + if (InodeTree.SlashPath.equals(f)) { + throw new FileAlreadyExistsException( + "/ is not a file. The directory / already exist at: " + + theInternalDir.fullPath); + } + + if (this.fsState.getRootFallbackLink() != null) { + + if (theInternalDir.getChildren().containsKey(f.getName())) { + throw new FileAlreadyExistsException( + "A mount path(file/dir) already exist with the requested path: " + + theInternalDir.getChildren().get(f.getName()).fullPath); + } + + FileSystem linkedFallbackFs = + this.fsState.getRootFallbackLink().getTargetFileSystem(); + Path parent = Path.getPathWithoutSchemeAndAuthority( + new Path(theInternalDir.fullPath)); + String leaf = f.getName(); + Path fileToCreate = new Path(parent, leaf); + + try { + return linkedFallbackFs + .create(fileToCreate, permission, overwrite, bufferSize, + replication, blockSize, progress); + } catch (IOException e) { + StringBuilder msg = + new StringBuilder("Failed to create file:").append(fileToCreate) + .append(" at fallback : ").append(linkedFallbackFs.getUri()); + LOG.error(msg.toString(), e); + throw e; + } + } throw readOnlyMountTable("create", f); } @@ -1145,6 +1495,23 @@ public boolean delete(final Path f) public BlockLocation[] getFileBlockLocations(final FileStatus fs, final long start, final long len) throws FileNotFoundException, IOException { + + // When application calls listFiles on internalDir, it would return + // RemoteIterator from InternalDirOfViewFs. If there is a fallBack, there + // is a chance of files exists under that internalDir in fallback. + // Iterator#next will call getFileBlockLocations with that files. So, we + // should return getFileBlockLocations on fallback. See HDFS-15532. + if (!InodeTree.SlashPath.equals(fs.getPath()) && this.fsState + .getRootFallbackLink() != null) { + FileSystem linkedFallbackFs = + this.fsState.getRootFallbackLink().getTargetFileSystem(); + Path parent = Path.getPathWithoutSchemeAndAuthority( + new Path(theInternalDir.fullPath)); + Path pathToFallbackFs = new Path(parent, fs.getPath().getName()); + return linkedFallbackFs + .getFileBlockLocations(pathToFallbackFs, start, len); + } + checkPathIsSlash(fs.getPath()); throw new FileNotFoundException("Path points to dir not a file"); } @@ -1161,7 +1528,6 @@ public FileStatus getFileStatus(Path f) throws IOException { checkPathIsSlash(f); return new FileStatus(0, true, 0, 0, creationTime, creationTime, PERMISSION_555, ugi.getShortUserName(), ugi.getPrimaryGroupName(), - new Path(theInternalDir.fullPath).makeQualified( myUri, ROOT_PATH)); } @@ -1171,34 +1537,147 @@ public FileStatus getFileStatus(Path f) throws IOException { public FileStatus[] listStatus(Path f) throws AccessControlException, FileNotFoundException, IOException { checkPathIsSlash(f); - FileStatus[] result = new FileStatus[theInternalDir.getChildren().size()]; + FileStatus[] fallbackStatuses = listStatusForFallbackLink(); + Set linkStatuses = new HashSet<>(); + Set internalDirStatuses = new HashSet<>(); int i = 0; for (Entry> iEntry : theInternalDir.getChildren().entrySet()) { INode inode = iEntry.getValue(); + Path path = new Path(inode.fullPath).makeQualified(myUri, null); if (inode.isLink()) { INodeLink link = (INodeLink) inode; - result[i++] = new FileStatus(0, false, 0, 0, - creationTime, creationTime, PERMISSION_555, - ugi.getShortUserName(), ugi.getPrimaryGroupName(), - link.getTargetLink(), - new Path(inode.fullPath).makeQualified( - myUri, null)); + if (showMountLinksAsSymlinks) { + // To maintain backward compatibility, with default option(showing + // mount links as symlinks), we will represent target link as + // symlink and rest other properties are belongs to mount link only. + linkStatuses.add( + new FileStatus(0, false, 0, 0, creationTime, creationTime, + PERMISSION_555, ugi.getShortUserName(), + ugi.getPrimaryGroupName(), link.getTargetLink(), path)); + continue; + } + + // We will represent as non-symlinks. Here it will show target + // directory/file properties like permissions, isDirectory etc on + // mount path. The path will be a mount link path and isDirectory is + // true if target is dir, otherwise false. + String linkedPath = link.getTargetFileSystem().getUri().getPath(); + if ("".equals(linkedPath)) { + linkedPath = "/"; + } + try { + FileStatus status = + ((ChRootedFileSystem)link.getTargetFileSystem()) + .getMyFs().getFileStatus(new Path(linkedPath)); + linkStatuses.add( + new FileStatus(status.getLen(), status.isDirectory(), + status.getReplication(), status.getBlockSize(), + status.getModificationTime(), status.getAccessTime(), + status.getPermission(), status.getOwner(), + status.getGroup(), null, path)); + } catch (FileNotFoundException ex) { + LOG.warn("Cannot get one of the children's(" + path + + ") target path(" + link.getTargetFileSystem().getUri() + + ") file status.", ex); + throw ex; + } } else { - result[i++] = new FileStatus(0, true, 0, 0, - creationTime, creationTime, PERMISSION_555, - ugi.getShortUserName(), ugi.getGroupNames()[0], - new Path(inode.fullPath).makeQualified( - myUri, null)); + internalDirStatuses.add( + new FileStatus(0, true, 0, 0, creationTime, creationTime, + PERMISSION_555, ugi.getShortUserName(), + ugi.getPrimaryGroupName(), path)); + } + } + FileStatus[] internalDirStatusesMergedWithFallBack = internalDirStatuses + .toArray(new FileStatus[internalDirStatuses.size()]); + if (fallbackStatuses.length > 0) { + internalDirStatusesMergedWithFallBack = + merge(fallbackStatuses, internalDirStatusesMergedWithFallBack); + } + // Links will always have precedence than internalDir or fallback paths. + return merge(linkStatuses.toArray(new FileStatus[linkStatuses.size()]), + internalDirStatusesMergedWithFallBack); + } + + private FileStatus[] merge(FileStatus[] toStatuses, + FileStatus[] fromStatuses) { + ArrayList result = new ArrayList<>(); + Set pathSet = new HashSet<>(); + for (FileStatus status : toStatuses) { + result.add(status); + pathSet.add(status.getPath().getName()); + } + for (FileStatus status : fromStatuses) { + if (!pathSet.contains(status.getPath().getName())) { + result.add(status); } } - return result; + return result.toArray(new FileStatus[result.size()]); + } + + private FileStatus[] listStatusForFallbackLink() throws IOException { + if (this.fsState.getRootFallbackLink() != null) { + FileSystem linkedFallbackFs = + this.fsState.getRootFallbackLink().getTargetFileSystem(); + Path p = Path.getPathWithoutSchemeAndAuthority( + new Path(theInternalDir.fullPath)); + if (theInternalDir.isRoot() || linkedFallbackFs.exists(p)) { + FileStatus[] statuses = linkedFallbackFs.listStatus(p); + for (FileStatus status : statuses) { + // Fix the path back to viewfs scheme + Path pathFromConfiguredFallbackRoot = + new Path(p, status.getPath().getName()); + status.setPath( + new Path(myUri.toString(), pathFromConfiguredFallbackRoot)); + } + return statuses; + } + } + return new FileStatus[0]; + } + + @Override + public ContentSummary getContentSummary(Path f) throws IOException { + long[] summary = {0, 0, 1}; + for (FileStatus status : listStatus(f)) { + Path targetPath = + Path.getPathWithoutSchemeAndAuthority(status.getPath()); + InodeTree.ResolveResult res = + fsState.resolve(targetPath.toString(), true); + ContentSummary child = + res.targetFileSystem.getContentSummary(res.remainingPath); + summary[0] += child.getLength(); + summary[1] += child.getFileCount(); + summary[2] += child.getDirectoryCount(); + } + return new ContentSummary.Builder() + .length(summary[0]) + .fileCount(summary[1]) + .directoryCount(summary[2]) + .build(); + } + + @Override + public FsStatus getStatus(Path p) throws IOException { + long[] summary = {0, 0, 0}; + for (FileStatus status : listStatus(p)) { + Path targetPath = + Path.getPathWithoutSchemeAndAuthority(status.getPath()); + InodeTree.ResolveResult res = + fsState.resolve(targetPath.toString(), true); + FsStatus child = res.targetFileSystem.getStatus(res.remainingPath); + summary[0] += child.getCapacity(); + summary[1] += child.getUsed(); + summary[2] += child.getRemaining(); + } + return new FsStatus(summary[0], summary[1], summary[2]); } @Override public boolean mkdirs(Path dir, FsPermission permission) - throws AccessControlException, FileAlreadyExistsException { + throws IOException { if (theInternalDir.isRoot() && dir == null) { throw new FileAlreadyExistsException("/ already exits"); } @@ -1207,13 +1686,32 @@ public boolean mkdirs(Path dir, FsPermission permission) dir.toString().substring(1))) { return true; // this is the stupid semantics of FileSystem } - throw readOnlyMountTable("mkdirs", dir); - } - @Override - public boolean mkdirs(Path dir) - throws AccessControlException, FileAlreadyExistsException { - return mkdirs(dir, null); + if (this.fsState.getRootFallbackLink() != null) { + FileSystem linkedFallbackFs = + this.fsState.getRootFallbackLink().getTargetFileSystem(); + Path parent = Path.getPathWithoutSchemeAndAuthority( + new Path(theInternalDir.fullPath)); + String leafChild = (InodeTree.SlashPath.equals(dir)) ? + InodeTree.SlashPath.toString() : + dir.getName(); + Path dirToCreate = new Path(parent, leafChild); + + try { + return linkedFallbackFs.mkdirs(dirToCreate, permission); + } catch (IOException e) { + if (LOG.isDebugEnabled()) { + StringBuilder msg = + new StringBuilder("Failed to create ").append(dirToCreate) + .append(" at fallback : ") + .append(linkedFallbackFs.getUri()); + LOG.debug(msg.toString(), e); + } + throw e; + } + } + + throw readOnlyMountTable("mkdirs", dir); } @Override @@ -1436,6 +1934,7 @@ public void close() throws IOException { super.close(); if (enableInnerCache && cache != null) { cache.closeAll(); + cache.clear(); } } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystemOverloadScheme.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystemOverloadScheme.java new file mode 100644 index 0000000000000..8e4bdf9d5782d --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystemOverloadScheme.java @@ -0,0 +1,370 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.viewfs; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.net.URI; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FsConstants; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.UnsupportedFileSystemException; + +import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME; + +/****************************************************************************** + * This class is extended from the ViewFileSystem for the overloaded scheme + * file system. Mount link configurations and in-memory mount table + * building behaviors are inherited from ViewFileSystem. Unlike ViewFileSystem + * scheme (viewfs://), the users would be able to use any scheme. + * + * To use this class, the following configurations need to be added in + * core-site.xml file. + * 1) fs.<scheme>.impl + * = org.apache.hadoop.fs.viewfs.ViewFileSystemOverloadScheme + * 2) fs.viewfs.overload.scheme.target.<scheme>.impl + * = <hadoop compatible file system implementation class name for the + * <scheme>" + * + * Here <scheme> can be any scheme, but with that scheme there should be a + * hadoop compatible file system available. Second configuration value should + * be the respective scheme's file system implementation class. + * Example: if scheme is configured with "hdfs", then the 2nd configuration + * class name will be org.apache.hadoop.hdfs.DistributedFileSystem. + * if scheme is configured with "s3a", then the 2nd configuration class name + * will be org.apache.hadoop.fs.s3a.S3AFileSystem. + * + * Use Case 1: + * =========== + * If users want some of their existing cluster (hdfs://Cluster) + * data to mount with other hdfs and object store clusters(hdfs://NN1, + * o3fs://bucket1.volume1/, s3a://bucket1/) + * + * fs.viewfs.mounttable.Cluster.link./user = hdfs://NN1/user + * fs.viewfs.mounttable.Cluster.link./data = o3fs://bucket1.volume1/data + * fs.viewfs.mounttable.Cluster.link./backup = s3a://bucket1/backup/ + * + * Op1: Create file hdfs://Cluster/user/fileA will go to hdfs://NN1/user/fileA + * Op2: Create file hdfs://Cluster/data/datafile will go to + * o3fs://bucket1.volume1/data/datafile + * Op3: Create file hdfs://Cluster/backup/data.zip will go to + * s3a://bucket1/backup/data.zip + * + * Use Case 2: + * =========== + * If users want some of their existing cluster (s3a://bucketA/) + * data to mount with other hdfs and object store clusters + * (hdfs://NN1, o3fs://bucket1.volume1/) + * + * fs.viewfs.mounttable.bucketA.link./user = hdfs://NN1/user + * fs.viewfs.mounttable.bucketA.link./data = o3fs://bucket1.volume1/data + * fs.viewfs.mounttable.bucketA.link./salesDB = s3a://bucketA/salesDB/ + * + * Op1: Create file s3a://bucketA/user/fileA will go to hdfs://NN1/user/fileA + * Op2: Create file s3a://bucketA/data/datafile will go to + * o3fs://bucket1.volume1/data/datafile + * Op3: Create file s3a://bucketA/salesDB/dbfile will go to + * s3a://bucketA/salesDB/dbfile + * + * Note: + * (1) In ViewFileSystemOverloadScheme, by default the mount links will be + * represented as non-symlinks. If you want to change this behavior, please see + * {@link ViewFileSystem#listStatus(Path)} + * (2) In ViewFileSystemOverloadScheme, only the initialized uri's hostname will + * be considered as the mount table name. When the passed uri has hostname:port, + * it will simply ignore the port number and only hostname will be considered as + * the mount table name. + * (3) If there are no mount links configured with the initializing uri's + * hostname as the mount table name, then it will automatically consider the + * current uri as fallback( ex: fs.viewfs.mounttable.<mycluster>.linkFallBack) + * target fs uri. + *****************************************************************************/ +@InterfaceAudience.LimitedPrivate({ "MapReduce", "HBase", "Hive" }) +@InterfaceStability.Evolving +public class ViewFileSystemOverloadScheme extends ViewFileSystem { + private URI myUri; + private boolean supportAutoAddingFallbackOnNoMounts = true; + public ViewFileSystemOverloadScheme() throws IOException { + super(); + } + + @Override + public String getScheme() { + return myUri.getScheme(); + } + + /** + * By default returns false as ViewFileSystemOverloadScheme supports auto + * adding fallback on no mounts. + */ + public boolean supportAutoAddingFallbackOnNoMounts() { + return this.supportAutoAddingFallbackOnNoMounts; + } + + /** + * Sets whether to add fallback automatically when no mount points found. + * + * @param addAutoFallbackOnNoMounts addAutoFallbackOnNoMounts. + */ + public void setSupportAutoAddingFallbackOnNoMounts( + boolean addAutoFallbackOnNoMounts) { + this.supportAutoAddingFallbackOnNoMounts = addAutoFallbackOnNoMounts; + } + + @Override + public void initialize(URI theUri, Configuration conf) throws IOException { + this.myUri = theUri; + if (LOG.isDebugEnabled()) { + LOG.debug("Initializing the ViewFileSystemOverloadScheme with the uri: " + + theUri); + } + String mountTableConfigPath = + conf.get(Constants.CONFIG_VIEWFS_MOUNTTABLE_PATH); + /* The default value to false in ViewFSOverloadScheme */ + conf.setBoolean(Constants.CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS, + conf.getBoolean(Constants.CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS, + false)); + /* the default value to true in ViewFSOverloadScheme */ + conf.setBoolean(CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME, + conf.getBoolean(Constants.CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME, + true)); + if (null != mountTableConfigPath) { + MountTableConfigLoader loader = new HCFSMountTableConfigLoader(); + loader.load(mountTableConfigPath, conf); + } else { + // TODO: Should we fail here.? + if (LOG.isDebugEnabled()) { + LOG.debug( + "Missing configuration for fs.viewfs.mounttable.path. Proceeding" + + "with core-site.xml mount-table information if avaialable."); + } + } + super.initialize(theUri, conf); + } + + /** + * This method is overridden because in ViewFileSystemOverloadScheme if + * overloaded scheme matches with mounted target fs scheme, file system + * should be created without going into fs.<scheme>.impl based resolution. + * Otherwise it will end up in an infinite loop as the target will be + * resolved again to ViewFileSystemOverloadScheme as fs.<scheme>.impl points + * to ViewFileSystemOverloadScheme. So, below method will initialize the + * fs.viewfs.overload.scheme.target.<scheme>.impl. Other schemes can + * follow fs.newInstance. + */ + @Override + protected FsGetter fsGetter() { + return new ChildFsGetter(getScheme()); + } + + /** + * This class checks whether the rooScheme is same as URI scheme. If both are + * same, then it will initialize file systems by using the configured + * fs.viewfs.overload.scheme.target..impl class. + */ + static class ChildFsGetter extends FsGetter { + + private final String rootScheme; + + ChildFsGetter(String rootScheme) { + this.rootScheme = rootScheme; + } + + @Override + public FileSystem getNewInstance(URI uri, Configuration conf) + throws IOException { + if (uri.getScheme().equals(this.rootScheme)) { + if (LOG.isDebugEnabled()) { + LOG.debug( + "The file system initialized uri scheme is matching with the " + + "given target uri scheme. The target uri is: " + uri); + } + /* + * Avoid looping when target fs scheme is matching to overloaded scheme. + */ + return createFileSystem(uri, conf); + } else { + return FileSystem.newInstance(uri, conf); + } + } + + /** + * When ViewFileSystemOverloadScheme scheme and target uri scheme are + * matching, it will not take advantage of FileSystem cache as it will + * create instance directly. For caching needs please set + * "fs.viewfs.enable.inner.cache" to true. + */ + @Override + public FileSystem get(URI uri, Configuration conf) throws IOException { + if (uri.getScheme().equals(this.rootScheme)) { + // Avoid looping when target fs scheme is matching to overloaded + // scheme. + if (LOG.isDebugEnabled()) { + LOG.debug( + "The file system initialized uri scheme is matching with the " + + "given target uri scheme. So, the target file system " + + "instances will not be cached. To cache fs instances, " + + "please set fs.viewfs.enable.inner.cache to true. " + + "The target uri is: " + uri); + } + return createFileSystem(uri, conf); + } else { + return FileSystem.get(uri, conf); + } + } + + private FileSystem createFileSystem(URI uri, Configuration conf) + throws IOException { + final String fsImplConf = String.format( + FsConstants.FS_VIEWFS_OVERLOAD_SCHEME_TARGET_FS_IMPL_PATTERN, + uri.getScheme()); + Class clazz = conf.getClass(fsImplConf, null); + if (clazz == null) { + throw new UnsupportedFileSystemException( + String.format("%s=null: %s: %s", fsImplConf, + "No overload scheme fs configured", uri.getScheme())); + } + FileSystem fs = (FileSystem) newInstance(clazz, uri, conf); + fs.initialize(uri, conf); + return fs; + } + + private T newInstance(Class theClass, URI uri, Configuration conf) { + T result; + try { + Constructor meth = theClass.getConstructor(); + meth.setAccessible(true); + result = meth.newInstance(); + } catch (InvocationTargetException e) { + Throwable cause = e.getCause(); + if (cause instanceof RuntimeException) { + throw (RuntimeException) cause; + } else { + throw new RuntimeException(cause); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + return result; + } + + } + + /** + * This is an admin only API to give access to its child raw file system, if + * the path is link. If the given path is an internal directory(path is from + * mount paths tree), it will initialize the file system of given path uri + * directly. If path cannot be resolved to any internal directory or link, it + * will throw NotInMountpointException. Please note, this API will not return + * chrooted file system. Instead, this API will get actual raw file system + * instances. + * + * @param path - fs uri path + * @param conf - configuration + * @throws IOException raised on errors performing I/O. + * @return file system. + */ + public FileSystem getRawFileSystem(Path path, Configuration conf) + throws IOException { + InodeTree.ResolveResult res; + try { + res = fsState.resolve(getUriPath(path), true); + return res.isInternalDir() ? fsGetter().get(path.toUri(), conf) + : ((ChRootedFileSystem) res.targetFileSystem).getMyFs(); + } catch (FileNotFoundException e) { + // No link configured with passed path. + throw new NotInMountpointException(path, + "No link found for the given path."); + } + } + + /** + * Gets the mount path info, which contains the target file system and + * remaining path to pass to the target file system. + * + * @param path the path. + * @param conf configuration. + * @return mount path info. + * @throws IOException raised on errors performing I/O. + */ + public MountPathInfo getMountPathInfo(Path path, + Configuration conf) throws IOException { + InodeTree.ResolveResult res; + try { + res = fsState.resolve(getUriPath(path), true); + FileSystem fs = res.isInternalDir() ? + (fsState.getRootFallbackLink() != null ? + ((ChRootedFileSystem) fsState + .getRootFallbackLink().getTargetFileSystem()).getMyFs() : + fsGetter().get(path.toUri(), conf)) : + ((ChRootedFileSystem) res.targetFileSystem).getMyFs(); + return new MountPathInfo(res.remainingPath, res.resolvedPath, + fs); + } catch (FileNotFoundException e) { + // No link configured with passed path. + throw new NotInMountpointException(path, + "No link found for the given path."); + } + } + + /** + * A class to maintain the target file system and a path to pass to the target + * file system. + */ + public static class MountPathInfo { + private Path pathOnTarget; + private T targetFs; + + public MountPathInfo(Path pathOnTarget, String resolvedPath, T targetFs) { + this.pathOnTarget = pathOnTarget; + this.targetFs = targetFs; + } + + public Path getPathOnTarget() { + return this.pathOnTarget; + } + + public T getTargetFs() { + return this.targetFs; + } + } + + /** + * @return Gets the fallback file system configured. Usually, this will be the + * default cluster. + */ + public FileSystem getFallbackFileSystem() { + if (fsState.getRootFallbackLink() == null) { + return null; + } + try { + return ((ChRootedFileSystem) fsState.getRootFallbackLink() + .getTargetFileSystem()).getMyFs(); + } catch (IOException ex) { + LOG.error("Could not get fallback filesystem "); + } + return null; + } + +} \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystemUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystemUtil.java index c8a1d78cffd46..c9c6767097b87 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystemUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystemUtil.java @@ -44,13 +44,24 @@ private ViewFileSystemUtil() { /** * Check if the FileSystem is a ViewFileSystem. * - * @param fileSystem + * @param fileSystem file system. * @return true if the fileSystem is ViewFileSystem */ public static boolean isViewFileSystem(final FileSystem fileSystem) { return fileSystem.getScheme().equals(FsConstants.VIEWFS_SCHEME); } + /** + * Check if the FileSystem is a ViewFileSystemOverloadScheme. + * + * @param fileSystem file system. + * @return true if the fileSystem is ViewFileSystemOverloadScheme + */ + public static boolean isViewFileSystemOverloadScheme( + final FileSystem fileSystem) { + return fileSystem instanceof ViewFileSystemOverloadScheme; + } + /** * Get FsStatus for all ViewFsMountPoints matching path for the given * ViewFileSystem. @@ -90,10 +101,12 @@ public static boolean isViewFileSystem(final FileSystem fileSystem) { * @param fileSystem - ViewFileSystem on which mount point exists * @param path - URI for which FsStatus is requested * @return Map of ViewFsMountPoint and FsStatus + * @throws IOException raised on errors performing I/O. */ public static Map getStatus( FileSystem fileSystem, Path path) throws IOException { - if (!isViewFileSystem(fileSystem)) { + if (!(isViewFileSystem(fileSystem) + || isViewFileSystemOverloadScheme(fileSystem))) { throw new UnsupportedFileSystemException("FileSystem '" + fileSystem.getUri() + "'is not a ViewFileSystem."); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFs.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFs.java index 2c8c1a538e433..da793f5d3e49d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFs.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFs.java @@ -17,18 +17,26 @@ */ package org.apache.hadoop.fs.viewfs; +import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS; +import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS_DEFAULT; import static org.apache.hadoop.fs.viewfs.Constants.PERMISSION_555; +import java.util.function.Function; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; +import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.EnumSet; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Set; + +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -40,6 +48,7 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileChecksum; +import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FsConstants; import org.apache.hadoop.fs.FsServerDefaults; @@ -65,7 +74,8 @@ import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.Time; - +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * ViewFs (extends the AbstractFileSystem interface) implements a client-side @@ -152,6 +162,7 @@ @InterfaceAudience.Public @InterfaceStability.Evolving /*Evolving for a release,to be changed to Stable */ public class ViewFs extends AbstractFileSystem { + static final Logger LOG = LoggerFactory.getLogger(ViewFs.class); final long creationTime; // of the the mount table final UserGroupInformation ugi; // the user/group of user who created mtable final Configuration config; @@ -159,6 +170,7 @@ public class ViewFs extends AbstractFileSystem { Path homeDir = null; private ViewFileSystem.RenameStrategy renameStrategy = ViewFileSystem.RenameStrategy.SAME_MOUNTPOINT; + private static boolean showMountLinksAsSymlinks = true; static AccessControlException readOnlyMountTable(final String operation, final String p) { @@ -173,20 +185,31 @@ static AccessControlException readOnlyMountTable(final String operation, static public class MountPoint { - private Path src; // the src of the mount - private URI[] targets; // target of the mount; Multiple targets imply mergeMount - MountPoint(Path srcPath, URI[] targetURIs) { + // the src of the mount + private Path src; + // Target of the mount; Multiple targets imply mergeMount + private String[] targets; + MountPoint(Path srcPath, String[] targetURIs) { src = srcPath; targets = targetURIs; } Path getSrc() { return src; } - URI[] getTargets() { + String[] getTargets() { return targets; } } - + + /** + * Returns the ViewFileSystem type. + * + * @return viewfs + */ + String getType() { + return FsConstants.VIEWFS_TYPE; + } + public ViewFs(final Configuration conf) throws IOException, URISyntaxException { this(FsConstants.VIEWFS_URI, conf); @@ -207,26 +230,50 @@ public ViewFs(final Configuration conf) throws IOException, creationTime = Time.now(); ugi = UserGroupInformation.getCurrentUser(); config = conf; + showMountLinksAsSymlinks = config + .getBoolean(CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS, + CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS_DEFAULT); // Now build client side view (i.e. client side mount table) from config. String authority = theUri.getAuthority(); - fsState = new InodeTree(conf, authority) { + boolean initingUriAsFallbackOnNoMounts = + !FsConstants.VIEWFS_TYPE.equals(getType()); + fsState = new InodeTree(conf, authority, theUri, + initingUriAsFallbackOnNoMounts) { @Override - protected AbstractFileSystem getTargetFileSystem(final URI uri) - throws URISyntaxException, UnsupportedFileSystemException { - String pathString = uri.getPath(); - if (pathString.isEmpty()) { - pathString = "/"; + protected Function initAndGetTargetFs() { + return new Function() { + @Override + public AbstractFileSystem apply(final URI uri) { + AbstractFileSystem fs; + try { + fs = ugi.doAs( + new PrivilegedExceptionAction() { + @Override + public AbstractFileSystem run() throws IOException { + return AbstractFileSystem.createFileSystem(uri, config); + } + }); + String pathString = uri.getPath(); + if (pathString.isEmpty()) { + pathString = "/"; + } + return new ChRootedFs(fs, new Path(pathString)); + } catch (IOException | URISyntaxException | + InterruptedException ex) { + LOG.error("Could not initialize underlying FileSystem object" + +" for uri " + uri + "with exception: " + ex.toString()); + } + return null; } - return new ChRootedFs( - AbstractFileSystem.createFileSystem(uri, config), - new Path(pathString)); + }; } @Override protected AbstractFileSystem getTargetFileSystem( final INodeDir dir) throws URISyntaxException { - return new InternalDirOfViewFs(dir, creationTime, ugi, getUri()); + return new InternalDirOfViewFs(dir, creationTime, ugi, getUri(), this, + config); } @Override @@ -349,6 +396,14 @@ public FileChecksum getFileChecksum(final Path f) return res.targetFileSystem.getFileChecksum(res.remainingPath); } + /** + * {@inheritDoc} + * + * If the given path is a symlink(mount link), the path will be resolved to a + * target path and it will get the resolved path's FileStatus object. It will + * not be represented as a symlink and isDirectory API returns true if the + * resolved path is a directory, false otherwise. + */ @Override public FileStatus getFileStatus(final Path f) throws AccessControlException, FileNotFoundException, UnresolvedLinkException, IOException { @@ -434,6 +489,32 @@ public LocatedFileStatus getViewFsFileStatus(LocatedFileStatus stat, }; } + /** + * {@inheritDoc} + * + * Note: listStatus considers listing from fallbackLink if available. If the + * same directory path is present in configured mount path as well as in + * fallback fs, then only the fallback path will be listed in the returned + * result except for link. + * + * If any of the the immediate children of the given path f is a symlink(mount + * link), the returned FileStatus object of that children would be represented + * as a symlink. It will not be resolved to the target path and will not get + * the target path FileStatus object. The target path will be available via + * getSymlink on that children's FileStatus object. Since it represents as + * symlink, isDirectory on that children's FileStatus will return false. + * This behavior can be changed by setting an advanced configuration + * fs.viewfs.mount.links.as.symlinks to false. In this case, mount points will + * be represented as non-symlinks and all the file/directory attributes like + * permissions, isDirectory etc will be assigned from it's resolved target + * directory/file. + * + * If you want to get the FileStatus of target path for that children, you may + * want to use GetFileStatus API with that children's symlink path. Please see + * {@link ViewFs#getFileStatus(Path f)} + * + * Note: In ViewFs, by default the mount links are represented as symlinks. + */ @Override public FileStatus[] listStatus(final Path f) throws AccessControlException, FileNotFoundException, UnresolvedLinkException, IOException { @@ -488,23 +569,60 @@ public boolean truncate(final Path f, final long newLength) public void renameInternal(final Path src, final Path dst, final boolean overwrite) throws IOException, UnresolvedLinkException { // passing resolveLastComponet as false to catch renaming a mount point - // itself we need to catch this as an internal operation and fail. - InodeTree.ResolveResult resSrc = - fsState.resolve(getUriPath(src), false); - + // itself we need to catch this as an internal operation and fail if no + // fallback. + InodeTree.ResolveResult resSrc = + fsState.resolve(getUriPath(src), false); + if (resSrc.isInternalDir()) { - throw new AccessControlException( - "Cannot Rename within internal dirs of mount table: src=" + src - + " is readOnly"); + if (fsState.getRootFallbackLink() == null) { + // If fallback is null, we can't rename from src. + throw new AccessControlException( + "Cannot Rename within internal dirs of mount table: src=" + src + + " is readOnly"); + } + InodeTree.ResolveResult resSrcWithLastComp = + fsState.resolve(getUriPath(src), true); + if (resSrcWithLastComp.isInternalDir() || resSrcWithLastComp + .isLastInternalDirLink()) { + throw new AccessControlException( + "Cannot Rename within internal dirs of mount table: src=" + src + + " is readOnly"); + } else { + // This is fallback and let's set the src fs with this fallback + resSrc = resSrcWithLastComp; + } } InodeTree.ResolveResult resDst = - fsState.resolve(getUriPath(dst), false); + fsState.resolve(getUriPath(dst), false); + if (resDst.isInternalDir()) { - throw new AccessControlException( - "Cannot Rename within internal dirs of mount table: dest=" + dst - + " is readOnly"); + if (fsState.getRootFallbackLink() == null) { + // If fallback is null, we can't rename to dst. + throw new AccessControlException( + "Cannot Rename within internal dirs of mount table: dest=" + dst + + " is readOnly"); + } + // if the fallback exist, we may have chance to rename to fallback path + // where dst parent is matching to internalDir. + InodeTree.ResolveResult resDstWithLastComp = + fsState.resolve(getUriPath(dst), true); + if (resDstWithLastComp.isInternalDir()) { + // We need to get fallback here. If matching fallback path not exist, it + // will fail later. This is a very special case: Even though we are on + // internal directory, we should allow to rename, so that src files will + // moved under matching fallback dir. + resDst = new InodeTree.ResolveResult( + InodeTree.ResultKind.INTERNAL_DIR, + fsState.getRootFallbackLink().getTargetFileSystem(), "/", + new Path(resDstWithLastComp.resolvedPath), false); + } else { + // The link resolved to some target fs or fallback fs. + resDst = resDstWithLastComp; + } } + //Alternate 1: renames within same file system URI srcUri = resSrc.targetFileSystem.getUri(); URI dstUri = resDst.targetFileSystem.getUri(); @@ -622,11 +740,23 @@ public List> getDelegationTokens(String renewer) throws IOException { List> result = new ArrayList>(initialListSize); for ( int i = 0; i < mountPoints.size(); ++i ) { List> tokens = - mountPoints.get(i).target.targetFileSystem.getDelegationTokens(renewer); + mountPoints.get(i).target.getTargetFileSystem() + .getDelegationTokens(renewer); + if (tokens != null) { + result.addAll(tokens); + } + } + + // Add tokens from fallback FS + if (this.fsState.getRootFallbackLink() != null) { + AbstractFileSystem rootFallbackFs = + this.fsState.getRootFallbackLink().getTargetFileSystem(); + List> tokens = rootFallbackFs.getDelegationTokens(renewer); if (tokens != null) { result.addAll(tokens); } } + return result; } @@ -779,7 +909,7 @@ public void unsetStoragePolicy(final Path src) * * @param src file or directory path. * @return storage policy for give file. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public BlockStoragePolicySpi getStoragePolicy(final Path src) throws IOException { @@ -841,15 +971,20 @@ static class InternalDirOfViewFs extends AbstractFileSystem { final long creationTime; // of the the mount table final UserGroupInformation ugi; // the user/group of user who created mtable final URI myUri; // the URI of the outer ViewFs - + private InodeTree fsState; + private Configuration conf; + public InternalDirOfViewFs(final InodeTree.INodeDir dir, - final long cTime, final UserGroupInformation ugi, final URI uri) + final long cTime, final UserGroupInformation ugi, final URI uri, + InodeTree fsState, Configuration conf) throws URISyntaxException { super(FsConstants.VIEWFS_URI, FsConstants.VIEWFS_SCHEME, false, -1); theInternalDir = dir; creationTime = cTime; this.ugi = ugi; myUri = uri; + this.fsState = fsState; + this.conf = conf; } static private void checkPathIsSlash(final Path f) throws IOException { @@ -868,6 +1003,41 @@ public FSDataOutputStream createInternal(final Path f, FileAlreadyExistsException, FileNotFoundException, ParentNotDirectoryException, UnsupportedFileSystemException, UnresolvedLinkException, IOException { + Preconditions.checkNotNull(f, "File cannot be null."); + if (InodeTree.SlashPath.equals(f)) { + throw new FileAlreadyExistsException( + "/ is not a file. The directory / already exist at: " + + theInternalDir.fullPath); + } + + if (this.fsState.getRootFallbackLink() != null) { + if (theInternalDir.getChildren().containsKey(f.getName())) { + throw new FileAlreadyExistsException( + "A mount path(file/dir) already exist with the requested path: " + + theInternalDir.getChildren().get(f.getName()).fullPath); + } + + AbstractFileSystem linkedFallbackFs = + this.fsState.getRootFallbackLink().getTargetFileSystem(); + Path parent = Path.getPathWithoutSchemeAndAuthority( + new Path(theInternalDir.fullPath)); + String leaf = f.getName(); + Path fileToCreate = new Path(parent, leaf); + + try { + return linkedFallbackFs + .createInternal(fileToCreate, flag, absolutePermission, + bufferSize, replication, blockSize, progress, checksumOpt, + true); + } catch (IOException e) { + StringBuilder msg = + new StringBuilder("Failed to create file:").append(fileToCreate) + .append(" at fallback : ").append(linkedFallbackFs.getUri()); + LOG.error(msg.toString(), e); + throw e; + } + } + throw readOnlyMountTable("create", f); } @@ -881,6 +1051,21 @@ public boolean delete(final Path f, final boolean recursive) @Override public BlockLocation[] getFileBlockLocations(final Path f, final long start, final long len) throws FileNotFoundException, IOException { + // When application calls listFiles on internalDir, it would return + // RemoteIterator from InternalDirOfViewFs. If there is a fallBack, there + // is a chance of files exists under that internalDir in fallback. + // Iterator#next will call getFileBlockLocations with that files. So, we + // should return getFileBlockLocations on fallback. See HDFS-15532. + if (!InodeTree.SlashPath.equals(f) && this.fsState + .getRootFallbackLink() != null) { + AbstractFileSystem linkedFallbackFs = + this.fsState.getRootFallbackLink().getTargetFileSystem(); + Path parent = Path.getPathWithoutSchemeAndAuthority( + new Path(theInternalDir.fullPath)); + Path pathToFallbackFs = new Path(parent, f.getName()); + return linkedFallbackFs + .getFileBlockLocations(pathToFallbackFs, start, len); + } checkPathIsSlash(f); throw new FileNotFoundException("Path points to dir not a file"); } @@ -915,11 +1100,25 @@ public FileStatus getFileLinkStatus(final Path f) if (inode.isLink()) { INodeLink inodelink = (INodeLink) inode; - result = new FileStatus(0, false, 0, 0, creationTime, creationTime, + try { + String linkedPath = inodelink.getTargetFileSystem() + .getUri().getPath(); + FileStatus status = ((ChRootedFs)inodelink.getTargetFileSystem()) + .getMyFs().getFileStatus(new Path(linkedPath)); + result = new FileStatus(status.getLen(), false, + status.getReplication(), status.getBlockSize(), + status.getModificationTime(), status.getAccessTime(), + status.getPermission(), status.getOwner(), status.getGroup(), + inodelink.getTargetLink(), + new Path(inode.fullPath).makeQualified( + myUri, null)); + } catch (FileNotFoundException ex) { + result = new FileStatus(0, false, 0, 0, creationTime, creationTime, PERMISSION_555, ugi.getShortUserName(), ugi.getPrimaryGroupName(), inodelink.getTargetLink(), new Path(inode.fullPath).makeQualified( myUri, null)); + } } else { result = new FileStatus(0, true, 0, 0, creationTime, creationTime, PERMISSION_555, ugi.getShortUserName(), ugi.getPrimaryGroupName(), @@ -950,45 +1149,154 @@ public int getUriDefaultPort() { return -1; } + /** + * {@inheritDoc} + * + * Note: listStatus on root("/") considers listing from fallbackLink if + * available. If the same directory name is present in configured mount + * path as well as in fallback link, then only the configured mount path + * will be listed in the returned result. + */ @Override - public FileStatus[] listStatus(final Path f) throws AccessControlException, - IOException { + public FileStatus[] listStatus(final Path f) throws IOException { checkPathIsSlash(f); - FileStatus[] result = new FileStatus[theInternalDir.getChildren().size()]; + FileStatus[] fallbackStatuses = listStatusForFallbackLink(); + Set linkStatuses = new HashSet<>(); + Set internalDirStatuses = new HashSet<>(); int i = 0; for (Entry> iEntry : theInternalDir.getChildren().entrySet()) { INode inode = iEntry.getValue(); - - + Path path = new Path(inode.fullPath).makeQualified(myUri, null); if (inode.isLink()) { INodeLink link = (INodeLink) inode; - result[i++] = new FileStatus(0, false, 0, 0, - creationTime, creationTime, - PERMISSION_555, ugi.getShortUserName(), ugi.getPrimaryGroupName(), - link.getTargetLink(), - new Path(inode.fullPath).makeQualified( - myUri, null)); + if (showMountLinksAsSymlinks) { + // To maintain backward compatibility, with default option(showing + // mount links as symlinks), we will represent target link as + // symlink and rest other properties are belongs to mount link only. + linkStatuses.add( + new FileStatus(0, false, 0, 0, creationTime, creationTime, + PERMISSION_555, ugi.getShortUserName(), + ugi.getPrimaryGroupName(), link.getTargetLink(), path)); + continue; + } + + // We will represent as non-symlinks. Here it will show target + // directory/file properties like permissions, isDirectory etc on + // mount path. The path will be a mount link path and isDirectory is + // true if target is dir, otherwise false. + String linkedPath = link.getTargetFileSystem().getUri().getPath(); + if ("".equals(linkedPath)) { + linkedPath = "/"; + } + try { + FileStatus status = + ((ChRootedFs) link.getTargetFileSystem()).getMyFs() + .getFileStatus(new Path(linkedPath)); + linkStatuses.add( + new FileStatus(status.getLen(), status.isDirectory(), + status.getReplication(), status.getBlockSize(), + status.getModificationTime(), status.getAccessTime(), + status.getPermission(), status.getOwner(), + status.getGroup(), null, path)); + } catch (FileNotFoundException ex) { + LOG.warn("Cannot get one of the children's(" + path + + ") target path(" + link.getTargetFileSystem().getUri() + + ") file status.", ex); + throw ex; + } } else { - result[i++] = new FileStatus(0, true, 0, 0, - creationTime, creationTime, - PERMISSION_555, ugi.getShortUserName(), ugi.getGroupNames()[0], - new Path(inode.fullPath).makeQualified( - myUri, null)); + internalDirStatuses.add( + new FileStatus(0, true, 0, 0, creationTime, creationTime, + PERMISSION_555, ugi.getShortUserName(), + ugi.getPrimaryGroupName(), path)); } } - return result; + + FileStatus[] internalDirStatusesMergedWithFallBack = internalDirStatuses + .toArray(new FileStatus[internalDirStatuses.size()]); + if (fallbackStatuses.length > 0) { + internalDirStatusesMergedWithFallBack = + merge(fallbackStatuses, internalDirStatusesMergedWithFallBack); + } + + // Links will always have precedence than internalDir or fallback paths. + return merge(linkStatuses.toArray(new FileStatus[linkStatuses.size()]), + internalDirStatusesMergedWithFallBack); + } + + private FileStatus[] merge(FileStatus[] toStatuses, + FileStatus[] fromStatuses) { + ArrayList result = new ArrayList<>(); + Set pathSet = new HashSet<>(); + for (FileStatus status : toStatuses) { + result.add(status); + pathSet.add(status.getPath().getName()); + } + for (FileStatus status : fromStatuses) { + if (!pathSet.contains(status.getPath().getName())) { + result.add(status); + } + } + return result.toArray(new FileStatus[result.size()]); + } + + private FileStatus[] listStatusForFallbackLink() throws IOException { + if (fsState.getRootFallbackLink() != null) { + AbstractFileSystem linkedFallbackFs = + fsState.getRootFallbackLink().getTargetFileSystem(); + Path p = Path.getPathWithoutSchemeAndAuthority( + new Path(theInternalDir.fullPath)); + if (theInternalDir.isRoot() || FileContext + .getFileContext(linkedFallbackFs, conf).util().exists(p)) { + // Fallback link is only applicable for root + FileStatus[] statuses = linkedFallbackFs.listStatus(p); + for (FileStatus status : statuses) { + // Fix the path back to viewfs scheme + Path pathFromConfiguredFallbackRoot = + new Path(p, status.getPath().getName()); + status.setPath( + new Path(myUri.toString(), pathFromConfiguredFallbackRoot)); + } + return statuses; + } + } + return new FileStatus[0]; } @Override public void mkdir(final Path dir, final FsPermission permission, - final boolean createParent) throws AccessControlException, - FileAlreadyExistsException { + final boolean createParent) throws IOException { if (theInternalDir.isRoot() && dir == null) { throw new FileAlreadyExistsException("/ already exits"); } + + if (this.fsState.getRootFallbackLink() != null) { + AbstractFileSystem linkedFallbackFs = + this.fsState.getRootFallbackLink().getTargetFileSystem(); + Path parent = Path.getPathWithoutSchemeAndAuthority( + new Path(theInternalDir.fullPath)); + String leafChild = (InodeTree.SlashPath.equals(dir)) ? + InodeTree.SlashPath.toString() : + dir.getName(); + Path dirToCreate = new Path(parent, leafChild); + try { + // We are here because, the parent dir already exist in the mount + // table internal tree. So, let's create parent always in fallback. + linkedFallbackFs.mkdir(dirToCreate, permission, true); + return; + } catch (IOException e) { + if (LOG.isDebugEnabled()) { + StringBuilder msg = new StringBuilder("Failed to create {}") + .append(" at fallback fs : {}"); + LOG.debug(msg.toString(), dirToCreate, linkedFallbackFs.getUri()); + } + throw e; + } + } + throw readOnlyMountTable("mkdir", dir); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/package-info.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/package-info.java new file mode 100644 index 0000000000000..89986d0e5ef69 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/package-info.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * ViewFileSystem and ViewFileSystemOverloadScheme classes. + */ +@InterfaceAudience.LimitedPrivate({"MapReduce", "HBase", "Hive" }) +@InterfaceStability.Stable +package org.apache.hadoop.fs.viewfs; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java index 828a17bcb972e..5e9503be135e3 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java @@ -43,8 +43,8 @@ import org.apache.zookeeper.data.Stat; import org.apache.zookeeper.KeeperException.Code; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -91,6 +91,8 @@ public interface ActiveStandbyElectorCallback { * * Callback implementations are expected to manage their own * timeouts (e.g. when making an RPC to a remote node). + * + * @throws ServiceFailedException Service Failed Exception. */ void becomeActive() throws ServiceFailedException; @@ -119,6 +121,8 @@ public interface ActiveStandbyElectorCallback { * If there is any fatal error (e.g. wrong ACL's, unexpected Zookeeper * errors or Zookeeper persistent unavailability) then notifyFatalError is * called to notify the app about it. + * + * @param errorMessage error message. */ void notifyFatalError(String errorMessage); @@ -204,8 +208,12 @@ enum State { * ZK connection * @param app * reference to callback interface object - * @throws IOException + * @param maxRetryNum maxRetryNum. + * @throws IOException raised on errors performing I/O. * @throws HadoopIllegalArgumentException + * if valid data is not supplied. + * @throws KeeperException + * other zookeeper operation errors. */ public ActiveStandbyElector(String zookeeperHostPorts, int zookeeperSessionTimeout, String parentZnodeName, List acl, @@ -245,8 +253,13 @@ public ActiveStandbyElector(String zookeeperHostPorts, * reference to callback interface object * @param failFast * whether need to add the retry when establishing ZK connection. + * @param maxRetryNum max Retry Num * @throws IOException + * raised on errors performing I/O. * @throws HadoopIllegalArgumentException + * if valid data is not supplied. + * @throws KeeperException + * other zookeeper operation errors. */ public ActiveStandbyElector(String zookeeperHostPorts, int zookeeperSessionTimeout, String parentZnodeName, List acl, @@ -312,6 +325,8 @@ public synchronized void joinElection(byte[] data) /** * @return true if the configured parent znode exists + * @throws IOException raised on errors performing I/O. + * @throws InterruptedException interrupted exception. */ public synchronized boolean parentZNodeExists() throws IOException, InterruptedException { @@ -327,6 +342,10 @@ public synchronized boolean parentZNodeExists() /** * Utility function to ensure that the configured base znode exists. * This recursively creates the znode as well as all of its parents. + * + * @throws IOException raised on errors performing I/O. + * @throws InterruptedException interrupted exception. + * @throws KeeperException other zookeeper operation errors. */ public synchronized void ensureParentZNode() throws IOException, InterruptedException, KeeperException { @@ -371,6 +390,9 @@ public synchronized void ensureParentZNode() * This recursively deletes everything within the znode as well as the * parent znode itself. It should only be used when it's certain that * no electors are currently participating in the election. + * + * @throws IOException raised on errors performing I/O. + * @throws InterruptedException interrupted exception. */ public synchronized void clearParentZNode() throws IOException, InterruptedException { @@ -435,6 +457,7 @@ public static class ActiveNotFoundException extends Exception { * @throws KeeperException * other zookeeper operation errors * @throws InterruptedException + * interrupted exception. * @throws IOException * when ZooKeeper connection could not be established */ @@ -684,7 +707,7 @@ synchronized void processWatchEvent(ZooKeeper zk, WatchedEvent event) { * inherit and mock out the zookeeper instance * * @return new zookeeper client instance - * @throws IOException + * @throws IOException raised on errors performing I/O. * @throws KeeperException zookeeper connectionloss exception */ protected synchronized ZooKeeper connectToZooKeeper() throws IOException, @@ -714,7 +737,7 @@ protected synchronized ZooKeeper connectToZooKeeper() throws IOException, * inherit and pass in a mock object for zookeeper * * @return new zookeeper client instance - * @throws IOException + * @throws IOException raised on errors performing I/O. */ protected ZooKeeper createZooKeeper() throws IOException { return new ZooKeeper(zkHostPort, zkSessionTimeout, watcher); @@ -781,6 +804,8 @@ private void reJoinElection(int sleepTime) { * Sleep for the given number of milliseconds. * This is non-static, and separated out, so that unit tests * can override the behavior not to sleep. + * + * @param sleepMs sleep ms. */ @VisibleForTesting protected void sleepFor(int sleepMs) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java index 4fc52d557cf9d..e7ed7304988cb 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java @@ -28,7 +28,7 @@ import org.apache.hadoop.ha.HAServiceProtocol.RequestSource; import org.apache.hadoop.ipc.RPC; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -213,7 +213,7 @@ public void failover(HAServiceTarget fromSvc, // Fence fromSvc if it's required or forced by the user if (tryFence) { - if (!fromSvc.getFencer().fence(fromSvc)) { + if (!fromSvc.getFencer().fence(fromSvc, toSvc)) { throw new FailoverFailedException("Unable to fence " + fromSvc + ". Fencing failed."); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java index 0950ea7e01c57..c65950aeec509 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java @@ -39,7 +39,7 @@ import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; -import com.google.common.collect.ImmutableMap; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -325,6 +325,9 @@ private int getServiceState(final CommandLine cmd) /** * Return the serviceId as is, we are assuming it was * given as a service address of form {@literal <}host:ipcport{@literal >}. + * + * @param serviceId serviceId. + * @return service addr. */ protected String getServiceAddr(String serviceId) { return serviceId; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java index 74a3d121a1abe..56c848617ffbc 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java @@ -118,7 +118,8 @@ public void monitorHealth() throws HealthCheckFailedException, /** * Request service to transition to active state. No operation, if the * service is already in active state. - * + * + * @param reqInfo reqInfo. * @throws ServiceFailedException * if transition from standby to active fails. * @throws AccessControlException @@ -135,7 +136,8 @@ public void transitionToActive(StateChangeRequestInfo reqInfo) /** * Request service to transition to standby state. No operation, if the * service is already in standby state. - * + * + * @param reqInfo reqInfo. * @throws ServiceFailedException * if transition from active to standby fails. * @throws AccessControlException @@ -153,6 +155,7 @@ public void transitionToStandby(StateChangeRequestInfo reqInfo) * Request service to transition to observer state. No operation, if the * service is already in observer state. * + * @param reqInfo reqInfo. * @throws ServiceFailedException * if transition from standby to observer fails. * @throws AccessControlException @@ -176,6 +179,7 @@ void transitionToObserver(StateChangeRequestInfo reqInfo) * @throws IOException * if other errors happen * @see HAServiceStatus + * @return HAServiceStatus. */ @Idempotent public HAServiceStatus getServiceStatus() throws AccessControlException, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceTarget.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceTarget.java index 9d5c8e7b7ea3b..288a9dcbe0e53 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceTarget.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceTarget.java @@ -31,7 +31,7 @@ import org.apache.hadoop.ha.protocolPB.ZKFCProtocolClientSideTranslatorPB; import org.apache.hadoop.net.NetUtils; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; /** * Represents a target of the client side HA administration commands. @@ -44,6 +44,12 @@ public abstract class HAServiceTarget { private static final String PORT_SUBST_KEY = "port"; private static final String ADDRESS_SUBST_KEY = "address"; + /** + * The HAState this service target is intended to be after transition + * is complete. + */ + private HAServiceProtocol.HAServiceState transitionTargetHAStatus; + /** * @return the IPC address of the target node. */ @@ -87,12 +93,24 @@ public abstract void checkFencingConfigured() /** * @return a proxy to connect to the target HA Service. + * @param timeoutMs timeout in milliseconds. + * @param conf Configuration. + * @throws IOException raised on errors performing I/O. */ public HAServiceProtocol getProxy(Configuration conf, int timeoutMs) throws IOException { return getProxyForAddress(conf, timeoutMs, getAddress()); } + public void setTransitionTargetHAStatus( + HAServiceProtocol.HAServiceState status) { + this.transitionTargetHAStatus = status; + } + + public HAServiceProtocol.HAServiceState getTransitionTargetHAStatus() { + return this.transitionTargetHAStatus; + } + /** * Returns a proxy to connect to the target HA service for health monitoring. * If {@link #getHealthMonitorAddress()} is implemented to return a non-null @@ -100,7 +118,7 @@ public HAServiceProtocol getProxy(Configuration conf, int timeoutMs) * returned proxy defaults to using {@link #getAddress()}, which means this * method's behavior is identical to {@link #getProxy(Configuration, int)}. * - * @param conf Configuration + * @param conf configuration. * @param timeoutMs timeout in milliseconds * @return a proxy to connect to the target HA service for health monitoring * @throws IOException if there is an error @@ -139,6 +157,9 @@ private HAServiceProtocol getProxyForAddress(Configuration conf, /** * @return a proxy to the ZKFC which is associated with this HA service. + * @param conf configuration. + * @param timeoutMs timeout in milliseconds. + * @throws IOException raised on errors performing I/O. */ public ZKFCProtocol getZKFCProxy(Configuration conf, int timeoutMs) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HealthMonitor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HealthMonitor.java index 16c30752edc20..1d76d0ab76e65 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HealthMonitor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HealthMonitor.java @@ -32,7 +32,7 @@ import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.util.Daemon; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -184,6 +184,9 @@ private void tryConnect() { /** * Connect to the service to be monitored. Stubbed out for easier testing. + * + * @throws IOException raised on errors performing I/O. + * @return HAServiceProtocol. */ protected HAServiceProtocol createProxy() throws IOException { return targetToMonitor.getHealthMonitorProxy(conf, rpcTimeout, rpcConnectRetries); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java index 64e7315130257..7f4a0790a3bc1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java @@ -27,8 +27,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.ReflectionUtils; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -89,15 +89,32 @@ public static NodeFencer create(Configuration conf, String confKey) } public boolean fence(HAServiceTarget fromSvc) { + return fence(fromSvc, null); + } + + public boolean fence(HAServiceTarget fromSvc, HAServiceTarget toSvc) { LOG.info("====== Beginning Service Fencing Process... ======"); int i = 0; for (FenceMethodWithArg method : methods) { LOG.info("Trying method " + (++i) + "/" + methods.size() +": " + method); try { - if (method.method.tryFence(fromSvc, method.arg)) { - LOG.info("====== Fencing successful by method " + method + " ======"); - return true; + // only true when target node is given, AND fencing on it failed + boolean toSvcFencingFailed = false; + // if target is given, try to fence on target first. Only if fencing + // on target succeeded, do fencing on source node. + if (toSvc != null) { + toSvcFencingFailed = !method.method.tryFence(toSvc, method.arg); + } + if (toSvcFencingFailed) { + LOG.error("====== Fencing on target failed, skipping fencing " + + "on source ======"); + } else { + if (method.method.tryFence(fromSvc, method.arg)) { + LOG.info("====== Fencing successful by method " + + method + " ======"); + return true; + } } } catch (BadFencingConfigurationException e) { LOG.error("Fencing method " + method + " misconfigured", e); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ShellCommandFencer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ShellCommandFencer.java index 7e4a88f729fad..3ae8394b62342 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ShellCommandFencer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ShellCommandFencer.java @@ -19,11 +19,12 @@ import java.io.IOException; import java.lang.reflect.Field; +import java.util.Arrays; import java.util.Map; import org.apache.hadoop.conf.Configured; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.util.Shell; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -60,6 +61,11 @@ public class ShellCommandFencer /** Prefix for target parameters added to the environment */ private static final String TARGET_PREFIX = "target_"; + /** Prefix for source parameters added to the environment */ + private static final String SOURCE_PREFIX = "source_"; + + private static final String ARG_DELIMITER = ","; + @VisibleForTesting static Logger LOG = LoggerFactory.getLogger(ShellCommandFencer.class); @@ -73,8 +79,9 @@ public void checkArgs(String args) throws BadFencingConfigurationException { } @Override - public boolean tryFence(HAServiceTarget target, String cmd) { + public boolean tryFence(HAServiceTarget target, String args) { ProcessBuilder builder; + String cmd = parseArgs(target.getTransitionTargetHAStatus(), args); if (!Shell.WINDOWS) { builder = new ProcessBuilder("bash", "-e", "-c", cmd); @@ -127,6 +134,28 @@ public boolean tryFence(HAServiceTarget target, String cmd) { return rc == 0; } + private String parseArgs(HAServiceProtocol.HAServiceState state, + String cmd) { + String[] args = cmd.split(ARG_DELIMITER); + if (args.length == 1) { + // only one command is given, assuming both src and dst + // will execute the same command/script. + return args[0]; + } + if (args.length > 2) { + throw new IllegalArgumentException("Expecting arguments size of at most " + + "two, getting " + Arrays.asList(args)); + } + if (HAServiceProtocol.HAServiceState.ACTIVE.equals(state)) { + return args[0]; + } else if (HAServiceProtocol.HAServiceState.STANDBY.equals(state)) { + return args[1]; + } else { + throw new IllegalArgumentException( + "Unexpected HA service state:" + state); + } + } + /** * Abbreviate a string by putting '...' in the middle of it, * in an attempt to keep logs from getting too messy. @@ -190,9 +219,24 @@ private void setConfAsEnvVars(Map env) { */ private void addTargetInfoAsEnvVars(HAServiceTarget target, Map environment) { + String prefix; + HAServiceProtocol.HAServiceState targetState = + target.getTransitionTargetHAStatus(); + if (targetState == null || + HAServiceProtocol.HAServiceState.ACTIVE.equals(targetState)) { + // null is assumed to be same as ACTIVE, this is to be compatible + // with existing tests/use cases where target state is not specified + // but assuming it's active. + prefix = TARGET_PREFIX; + } else if (HAServiceProtocol.HAServiceState.STANDBY.equals(targetState)) { + prefix = SOURCE_PREFIX; + } else { + throw new IllegalArgumentException( + "Unexpected HA service state:" + targetState); + } for (Map.Entry e : target.getFencingParameters().entrySet()) { - String key = TARGET_PREFIX + e.getKey(); + String key = prefix + e.getKey(); key = key.replace('.', '_'); environment.put(key, e.getValue()); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java index e0c2f4d9e7b77..a13b592e5cb15 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java @@ -25,7 +25,7 @@ import org.apache.hadoop.conf.Configured; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import com.jcraft.jsch.ChannelExec; import com.jcraft.jsch.JSch; import com.jcraft.jsch.JSchException; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFCRpcServer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFCRpcServer.java index 86dd91ee1423d..1e5b27a6fea64 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFCRpcServer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFCRpcServer.java @@ -27,7 +27,7 @@ import org.apache.hadoop.ha.proto.ZKFCProtocolProtos.ZKFCProtocolService; import org.apache.hadoop.ha.protocolPB.ZKFCProtocolPB; import org.apache.hadoop.ha.protocolPB.ZKFCProtocolServerSideTranslatorPB; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RPC.Server; import org.apache.hadoop.security.AccessControlException; @@ -50,7 +50,7 @@ public class ZKFCRpcServer implements ZKFCProtocol { this.zkfc = zkfc; RPC.setProtocolEngine(conf, ZKFCProtocolPB.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); ZKFCProtocolServerSideTranslatorPB translator = new ZKFCProtocolServerSideTranslatorPB(this); BlockingService service = ZKFCProtocolService diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java index 943d53dd3a823..0884a0a6d3e90 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java @@ -31,11 +31,14 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.UnsupportedFileSystemException; import org.apache.hadoop.ha.ActiveStandbyElector.ActiveNotFoundException; import org.apache.hadoop.ha.ActiveStandbyElector.ActiveStandbyElectorCallback; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo; import org.apache.hadoop.ha.HAServiceProtocol.RequestSource; +import org.apache.hadoop.security.ProviderUtils; import org.apache.hadoop.util.ZKUtil; import org.apache.hadoop.util.ZKUtil.ZKAuthInfo; import org.apache.hadoop.ha.HealthMonitor.State; @@ -50,9 +53,9 @@ import org.apache.hadoop.util.ToolRunner; import org.apache.zookeeper.data.ACL; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -150,6 +153,8 @@ protected abstract void checkRpcAdminAccess() * the ZKFC will do all of its work. This is so that multiple federated * nameservices can run on the same ZK quorum without having to manually * configure them to separate subdirectories. + * + * @return ScopeInsideParentNode. */ protected abstract String getScopeInsideParentNode(); @@ -318,9 +323,10 @@ private void initHM() { healthMonitor.addServiceStateCallback(new ServiceStateCallBacks()); healthMonitor.start(); } - + protected void initRPC() throws IOException { InetSocketAddress bindAddr = getRpcAddressToBindTo(); + LOG.info("ZKFC RpcServer binding to {}", bindAddr); rpcServer = new ZKFCRpcServer(conf, bindAddr, this, getPolicyProvider()); } @@ -342,8 +348,19 @@ private void initZK() throws HadoopIllegalArgumentException, IOException, zkAcls = Ids.CREATOR_ALL_ACL; } - // Parse authentication from configuration. - List zkAuths = SecurityUtil.getZKAuthInfos(conf, ZK_AUTH_KEY); + // Parse authentication from configuration. Exclude any Credential providers + // using the hdfs scheme to avoid a circular dependency. As HDFS is likely + // not started when ZKFC is started, we cannot read the credentials from it. + Configuration c = conf; + try { + c = ProviderUtils.excludeIncompatibleCredentialProviders( + conf, FileSystem.getFileSystemClass("hdfs", conf)); + } catch (UnsupportedFileSystemException e) { + // Should not happen in a real cluster, as the hdfs FS will always be + // present. Inside tests, the hdfs filesystem will not be present + LOG.debug("No filesystem found for the hdfs scheme", e); + } + List zkAuths = SecurityUtil.getZKAuthInfos(c, ZK_AUTH_KEY); // Sanity check configuration. Preconditions.checkArgument(zkQuorum != null, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolClientSideTranslatorPB.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolClientSideTranslatorPB.java index e53820cd13107..2cbfd0d0ec030 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolClientSideTranslatorPB.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolClientSideTranslatorPB.java @@ -38,7 +38,7 @@ import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToStandbyRequestProto; import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToObserverRequestProto; import org.apache.hadoop.ipc.ProtobufHelper; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.ProtocolTranslator; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.security.UserGroupInformation; @@ -67,7 +67,7 @@ public class HAServiceProtocolClientSideTranslatorPB implements public HAServiceProtocolClientSideTranslatorPB(InetSocketAddress addr, Configuration conf) throws IOException { RPC.setProtocolEngine(conf, HAServiceProtocolPB.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); rpcProxy = RPC.getProxy(HAServiceProtocolPB.class, RPC.getProtocolVersion(HAServiceProtocolPB.class), addr, conf); } @@ -76,7 +76,7 @@ public HAServiceProtocolClientSideTranslatorPB( InetSocketAddress addr, Configuration conf, SocketFactory socketFactory, int timeout) throws IOException { RPC.setProtocolEngine(conf, HAServiceProtocolPB.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); rpcProxy = RPC.getProxy(HAServiceProtocolPB.class, RPC.getProtocolVersion(HAServiceProtocolPB.class), addr, UserGroupInformation.getCurrentUser(), conf, socketFactory, timeout); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/ZKFCProtocolClientSideTranslatorPB.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/ZKFCProtocolClientSideTranslatorPB.java index 7001d93995f0f..3777207c7e45c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/ZKFCProtocolClientSideTranslatorPB.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/ZKFCProtocolClientSideTranslatorPB.java @@ -28,7 +28,7 @@ import org.apache.hadoop.ha.proto.ZKFCProtocolProtos.CedeActiveRequestProto; import org.apache.hadoop.ha.proto.ZKFCProtocolProtos.GracefulFailoverRequestProto; import org.apache.hadoop.ipc.ProtobufHelper; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.ProtocolTranslator; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.security.AccessControlException; @@ -48,7 +48,7 @@ public ZKFCProtocolClientSideTranslatorPB( InetSocketAddress addr, Configuration conf, SocketFactory socketFactory, int timeout) throws IOException { RPC.setProtocolEngine(conf, ZKFCProtocolPB.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); rpcProxy = RPC.getProxy(ZKFCProtocolPB.class, RPC.getProtocolVersion(ZKFCProtocolPB.class), addr, UserGroupInformation.getCurrentUser(), conf, socketFactory, timeout); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HtmlQuoting.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HtmlQuoting.java index 51db21c185f20..5f47ddb339212 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HtmlQuoting.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HtmlQuoting.java @@ -80,6 +80,7 @@ public static boolean needsQuoting(String str) { * @param buffer the byte array to take the characters from * @param off the index of the first byte to quote * @param len the number of bytes to quote + * @throws IOException raised on errors performing I/O. */ public static void quoteHtmlChars(OutputStream output, byte[] buffer, int off, int len) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java index 3fd74f0e89a27..8dadbe390a56c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java @@ -27,14 +27,18 @@ import java.net.MalformedURLException; import java.net.URI; import java.net.URL; -import java.util.Arrays; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Enumeration; -import java.util.HashMap; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.List; +import java.util.ArrayList; import java.util.Map; +import java.util.HashMap; +import java.util.Collections; +import java.util.Optional; import java.util.Properties; +import java.util.Enumeration; +import java.util.Arrays; +import java.util.Timer; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -50,9 +54,9 @@ import javax.servlet.http.HttpServletRequestWrapper; import javax.servlet.http.HttpServletResponse; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import com.sun.jersey.spi.container.servlet.ServletContainer; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; @@ -74,6 +78,8 @@ import org.apache.hadoop.security.authentication.server.PseudoAuthenticationHandler; import org.apache.hadoop.security.authentication.util.SignerSecretProvider; import org.apache.hadoop.security.authorize.AccessControlList; +import org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory; +import org.apache.hadoop.security.ssl.FileMonitoringTimerTask; import org.apache.hadoop.security.ssl.SSLFactory; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.Shell; @@ -89,7 +95,7 @@ import org.eclipse.jetty.server.Server; import org.eclipse.jetty.server.ServerConnector; import org.eclipse.jetty.server.SslConnectionFactory; -import org.eclipse.jetty.server.handler.AllowSymLinkAliasChecker; +import org.eclipse.jetty.server.SymlinkAllowedResourceAliasChecker; import org.eclipse.jetty.server.handler.ContextHandlerCollection; import org.eclipse.jetty.server.handler.HandlerCollection; import org.eclipse.jetty.server.handler.RequestLogHandler; @@ -148,7 +154,7 @@ public final class HttpServer2 implements FilterContainer { // idle timeout in milliseconds public static final String HTTP_IDLE_TIMEOUT_MS_KEY = "hadoop.http.idle_timeout.ms"; - public static final int HTTP_IDLE_TIMEOUT_MS_DEFAULT = 10000; + public static final int HTTP_IDLE_TIMEOUT_MS_DEFAULT = 60000; public static final String HTTP_TEMP_DIR_KEY = "hadoop.http.temp.dir"; public static final String FILTER_INITIALIZER_PROPERTY @@ -184,6 +190,7 @@ public final class HttpServer2 implements FilterContainer { static final String STATE_DESCRIPTION_ALIVE = " - alive"; static final String STATE_DESCRIPTION_NOT_LIVE = " - not live"; private final SignerSecretProvider secretProvider; + private final Optional configurationChangeMonitor; private XFrameOption xFrameOption; private boolean xFrameOptionIsEnabled; public static final String HTTP_HEADER_PREFIX = "hadoop.http.header."; @@ -239,6 +246,8 @@ public static class Builder { private boolean sniHostCheckEnabled; + private Optional configurationChangeMonitor = Optional.empty(); + public Builder setName(String name){ this.name = name; return this; @@ -253,6 +262,7 @@ public Builder setName(String name){ * specifies the binding address, and the port specifies the * listening port. Unspecified or zero port means that the server * can listen to any port. + * @return Builder. */ public Builder addEndpoint(URI endpoint) { endpoints.add(endpoint); @@ -263,6 +273,9 @@ public Builder addEndpoint(URI endpoint) { * Set the hostname of the http server. The host name is used to resolve the * _HOST field in Kerberos principals. The hostname of the first listener * will be used if the name is unspecified. + * + * @param hostName hostName. + * @return Builder. */ public Builder hostName(String hostName) { this.hostName = hostName; @@ -291,6 +304,9 @@ public Builder keyPassword(String password) { /** * Specify whether the server should authorize the client in SSL * connections. + * + * @param value value. + * @return Builder. */ public Builder needsClientAuth(boolean value) { this.needsClientAuth = value; @@ -315,6 +331,9 @@ public Builder setConf(Configuration conf) { /** * Specify the SSL configuration to load. This API provides an alternative * to keyStore/keyPassword/trustStore. + * + * @param sslCnf sslCnf. + * @return Builder. */ public Builder setSSLConf(Configuration sslCnf) { this.sslConf = sslCnf; @@ -569,12 +588,54 @@ private ServerConnector createHttpsChannelConnector( } setEnabledProtocols(sslContextFactory); + + long storesReloadInterval = + conf.getLong(FileBasedKeyStoresFactory.SSL_STORES_RELOAD_INTERVAL_TPL_KEY, + FileBasedKeyStoresFactory.DEFAULT_SSL_STORES_RELOAD_INTERVAL); + + if (storesReloadInterval > 0 && + (keyStore != null || trustStore != null)) { + this.configurationChangeMonitor = Optional.of( + this.makeConfigurationChangeMonitor(storesReloadInterval, sslContextFactory)); + } + conn.addFirstConnectionFactory(new SslConnectionFactory(sslContextFactory, HttpVersion.HTTP_1_1.asString())); return conn; } + private Timer makeConfigurationChangeMonitor(long reloadInterval, + SslContextFactory.Server sslContextFactory) { + java.util.Timer timer = new java.util.Timer(FileBasedKeyStoresFactory.SSL_MONITORING_THREAD_NAME, true); + ArrayList locations = new ArrayList(); + if (keyStore != null) { + locations.add(Paths.get(keyStore)); + } + if (trustStore != null) { + locations.add(Paths.get(trustStore)); + } + // + // The Jetty SSLContextFactory provides a 'reload' method which will reload both + // truststore and keystore certificates. + // + timer.schedule(new FileMonitoringTimerTask( + locations, + path -> { + LOG.info("Reloading keystore and truststore certificates."); + try { + sslContextFactory.reload(factory -> { }); + } catch (Exception ex) { + LOG.error("Failed to reload SSL keystore " + + "and truststore certificates", ex); + } + },null), + reloadInterval, + reloadInterval + ); + return timer; + } + private void setEnabledProtocols(SslContextFactory sslContextFactory) { String enabledProtocols = conf.get(SSLFactory.SSL_ENABLED_PROTOCOLS_KEY, SSLFactory.SSL_ENABLED_PROTOCOLS_DEFAULT); @@ -617,6 +678,7 @@ private HttpServer2(final Builder b) throws IOException { this.webAppContext = createWebAppContext(b, adminsAcl, appDir); this.xFrameOptionIsEnabled = b.xFrameEnabled; this.xFrameOption = b.xFrameOption; + this.configurationChangeMonitor = b.configurationChangeMonitor; try { this.secretProvider = @@ -779,8 +841,11 @@ private static FilterInitializer[] getFilterInitializers(Configuration conf) { /** * Add default apps. + * + * @param parent contexthandlercollection. * @param appDir The application directory - * @throws IOException + * @param conf configuration. + * @throws IOException raised on errors performing I/O. */ protected void addDefaultApps(ContextHandlerCollection parent, final String appDir, Configuration conf) throws IOException { @@ -807,7 +872,7 @@ protected void addDefaultApps(ContextHandlerCollection parent, handler.setHttpOnly(true); handler.getSessionCookieConfig().setSecure(true); logContext.setSessionHandler(handler); - logContext.addAliasCheck(new AllowSymLinkAliasChecker()); + logContext.addAliasCheck(new SymlinkAllowedResourceAliasChecker(logContext)); setContextAttributes(logContext, conf); addNoCacheFilter(logContext); defaultContexts.put(logContext, true); @@ -826,7 +891,7 @@ protected void addDefaultApps(ContextHandlerCollection parent, handler.setHttpOnly(true); handler.getSessionCookieConfig().setSecure(true); staticContext.setSessionHandler(handler); - staticContext.addAliasCheck(new AllowSymLinkAliasChecker()); + staticContext.addAliasCheck(new SymlinkAllowedResourceAliasChecker(staticContext)); setContextAttributes(staticContext, conf); defaultContexts.put(staticContext, true); } @@ -1061,6 +1126,12 @@ public void addGlobalFilter(String name, String classname, /** * Define a filter for a context and set up default url mappings. + * + * @param ctx ctx. + * @param name name. + * @param classname classname. + * @param parameters parameters. + * @param urls urls. */ public static void defineFilter(ServletContextHandler ctx, String name, String classname, Map parameters, String[] urls) { @@ -1171,6 +1242,7 @@ public int getPort() { /** * Get the address that corresponds to a particular connector. * + * @param index index. * @return the corresponding address for the connector, or null if there's no * such connector or the connector is not bounded or was closed. */ @@ -1190,6 +1262,9 @@ public InetSocketAddress getConnectorAddress(int index) { /** * Set the min, max number of worker threads (simultaneous connections). + * + * @param min min. + * @param max max. */ public void setThreads(int min, int max) { QueuedThreadPool pool = (QueuedThreadPool) webServer.getThreadPool(); @@ -1216,6 +1291,8 @@ private void initSpnego(Configuration conf, String hostName, /** * Start the server. Does not wait for the server to start. + * + * @throws IOException raised on errors performing I/O. */ public void start() throws IOException { try { @@ -1346,7 +1423,11 @@ private void bindForPortRange(ServerConnector listener, int startPort) try { bindListener(listener); return; - } catch (BindException ex) { + } catch (IOException ex) { + if (!(ex instanceof BindException) + && !(ex.getCause() instanceof BindException)) { + throw ex; + } // Ignore exception. Move to next port. ioException = ex; } @@ -1376,10 +1457,22 @@ void openListeners() throws Exception { } /** - * stop the server + * stop the server. + * + * @throws Exception exception. */ public void stop() throws Exception { MultiException exception = null; + if (this.configurationChangeMonitor.isPresent()) { + try { + this.configurationChangeMonitor.get().cancel(); + } catch (Exception e) { + LOG.error( + "Error while canceling configuration monitoring timer for webapp" + + webAppContext.getDisplayName(), e); + exception = addMultiException(exception, e); + } + } for (ServerConnector c : listeners) { try { c.close(); @@ -1464,6 +1557,7 @@ public String toString() { * @param request the servlet request. * @param response the servlet response. * @return TRUE/FALSE based on the logic decribed above. + * @throws IOException raised on errors performing I/O. */ public static boolean isInstrumentationAccessAllowed( ServletContext servletContext, HttpServletRequest request, @@ -1485,9 +1579,11 @@ public static boolean isInstrumentationAccessAllowed( * Does the user sending the HttpServletRequest has the administrator ACLs? If * it isn't the case, response will be modified to send an error to the user. * + * @param servletContext servletContext. + * @param request request. * @param response used to send the error response if user does not have admin access. * @return true if admin-authorized, false otherwise - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static boolean hasAdministratorAccess( ServletContext servletContext, HttpServletRequest request, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/AbstractMapWritable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/AbstractMapWritable.java index 44e0bdce5edd6..4df2d1d1f84ae 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/AbstractMapWritable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/AbstractMapWritable.java @@ -29,7 +29,7 @@ import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Abstract base class for MapWritable and SortedMapWritable @@ -84,7 +84,10 @@ private synchronized void addToMap(Class clazz, byte id) { idToClassMap.put(id, clazz); } - /** Add a Class to the maps if it is not already present. */ + /** + * Add a Class to the maps if it is not already present. + * @param clazz clazz. + */ protected synchronized void addToMap(Class clazz) { if (classToIdMap.containsKey(clazz)) { return; @@ -97,17 +100,28 @@ protected synchronized void addToMap(Class clazz) { addToMap(clazz, id); } - /** @return the Class class for the specified id */ + /** + * the Class class for the specified id. + * @param id id. + * @return the Class class for the specified id. + */ protected Class getClass(byte id) { return idToClassMap.get(id); } - /** @return the id for the specified Class */ + /** + * get id. + * @return the id for the specified Class. + * @param clazz clazz. + */ protected byte getId(Class clazz) { return classToIdMap.containsKey(clazz) ? classToIdMap.get(clazz) : -1; } - /** Used by child copy constructors. */ + /** + * Used by child copy constructors. + * @param other other. + */ protected synchronized void copy(Writable other) { if (other != null) { try { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ArrayFile.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ArrayFile.java index bee5fd2cb430c..313caa6360827 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ArrayFile.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ArrayFile.java @@ -38,7 +38,15 @@ protected ArrayFile() {} // no public ctor public static class Writer extends MapFile.Writer { private LongWritable count = new LongWritable(0); - /** Create the named file for values of the named class. */ + /** + * Create the named file for values of the named class. + * + * @param conf configuration. + * @param fs file system. + * @param file file. + * @param valClass valClass. + * @throws IOException raised on errors performing I/O. + */ public Writer(Configuration conf, FileSystem fs, String file, Class valClass) throws IOException { @@ -46,7 +54,17 @@ public Writer(Configuration conf, FileSystem fs, valueClass(valClass)); } - /** Create the named file for values of the named class. */ + /** + * Create the named file for values of the named class. + * + * @param conf configuration. + * @param fs file system. + * @param file file. + * @param valClass valClass. + * @param compress compress. + * @param progress progress. + * @throws IOException raised on errors performing I/O. + */ public Writer(Configuration conf, FileSystem fs, String file, Class valClass, CompressionType compress, Progressable progress) @@ -58,7 +76,11 @@ public Writer(Configuration conf, FileSystem fs, progressable(progress)); } - /** Append a value to the file. */ + /** + * Append a value to the file. + * @param value value. + * @throws IOException raised on errors performing I/O. + */ public synchronized void append(Writable value) throws IOException { super.append(count, value); // add to map count.set(count.get()+1); // increment count @@ -69,31 +91,59 @@ public synchronized void append(Writable value) throws IOException { public static class Reader extends MapFile.Reader { private LongWritable key = new LongWritable(); - /** Construct an array reader for the named file.*/ + /** + * Construct an array reader for the named file. + * @param fs FileSystem. + * @param file file. + * @param conf configuration. + * @throws IOException raised on errors performing I/O. + */ public Reader(FileSystem fs, String file, Configuration conf) throws IOException { super(new Path(file), conf); } - /** Positions the reader before its nth value. */ + /** + * Positions the reader before its nth value. + * + * @param n n key. + * @throws IOException raised on errors performing I/O. + */ public synchronized void seek(long n) throws IOException { key.set(n); seek(key); } - /** Read and return the next value in the file. */ + /** + * Read and return the next value in the file. + * + * @param value value. + * @throws IOException raised on errors performing I/O. + * @return Writable. + */ public synchronized Writable next(Writable value) throws IOException { return next(key, value) ? value : null; } - /** Returns the key associated with the most recent call to {@link + /** + * Returns the key associated with the most recent call to {@link * #seek(long)}, {@link #next(Writable)}, or {@link - * #get(long,Writable)}. */ + * #get(long,Writable)}. + * + * @return key key. + * @throws IOException raised on errors performing I/O. + */ public synchronized long key() throws IOException { return key.get(); } - /** Return the nth value in the file. */ + /** + * Return the nth value in the file. + * @param n n key. + * @param value value. + * @throws IOException raised on errors performing I/O. + * @return writable. + */ public synchronized Writable get(long n, Writable value) throws IOException { key.set(n); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ArrayPrimitiveWritable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ArrayPrimitiveWritable.java index 2b6f3166bc282..ce7813e7483a6 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ArrayPrimitiveWritable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ArrayPrimitiveWritable.java @@ -106,7 +106,9 @@ public ArrayPrimitiveWritable() { /** * Construct an instance of known type but no value yet - * for use with type-specific wrapper classes + * for use with type-specific wrapper classes. + * + * @param componentType componentType. */ public ArrayPrimitiveWritable(Class componentType) { checkPrimitive(componentType); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BinaryComparable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BinaryComparable.java index a32c44c8e5058..a78ff8b6c583e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BinaryComparable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BinaryComparable.java @@ -31,11 +31,15 @@ public abstract class BinaryComparable implements Comparable { /** * Return n st bytes 0..n-1 from {#getBytes()} are valid. + * + * @return length. */ public abstract int getLength(); /** * Return representative byte array for this instance. + * + * @return getBytes. */ public abstract byte[] getBytes(); @@ -53,6 +57,11 @@ public int compareTo(BinaryComparable other) { /** * Compare bytes from {#getBytes()} to those provided. + * + * @param other other. + * @param off off. + * @param len len. + * @return compareBytes. */ public int compareTo(byte[] other, int off, int len) { return WritableComparator.compareBytes(getBytes(), 0, getLength(), diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BloomMapFile.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BloomMapFile.java index 519fcd74cbb71..91ea07d5de412 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BloomMapFile.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BloomMapFile.java @@ -259,7 +259,7 @@ private void initBloomFilter(Path dirName, * probability of false positives. * @param key key to check * @return false iff key doesn't exist, true if key probably exists. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public boolean probablyHasKey(WritableComparable key) throws IOException { if (bloomFilter == null) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BooleanWritable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BooleanWritable.java index 0079079a7921d..789b866255b01 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BooleanWritable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BooleanWritable.java @@ -35,21 +35,24 @@ public class BooleanWritable implements WritableComparable { */ public BooleanWritable() {}; - /** + /** + * @param value value. */ public BooleanWritable(boolean value) { set(value); } /** - * Set the value of the BooleanWritable + * Set the value of the BooleanWritable. + * @param value value. */ public void set(boolean value) { this.value = value; } /** - * Returns the value of the BooleanWritable + * Returns the value of the BooleanWritable. + * @return the value of the BooleanWritable. */ public boolean get() { return value; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BoundedByteArrayOutputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BoundedByteArrayOutputStream.java index c27449d36189c..542721f318d0a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BoundedByteArrayOutputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BoundedByteArrayOutputStream.java @@ -114,20 +114,28 @@ public void reset() { this.currentPointer = startOffset; } - /** Return the current limit */ + /** + * Return the current limit. + * @return limit. + */ public int getLimit() { return limit; } - /** Returns the underlying buffer. + /** + * Returns the underlying buffer. * Data is only valid to {@link #size()}. + * @return the underlying buffer. */ public byte[] getBuffer() { return buffer; } - /** Returns the length of the valid data + /** + * Returns the length of the valid data * currently in the buffer. + * + * @return the length of the valid data. */ public int size() { return currentPointer - startOffset; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ByteBufferPool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ByteBufferPool.java index aa5f8731c54a7..b30e7cfb9c5f0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ByteBufferPool.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ByteBufferPool.java @@ -45,4 +45,9 @@ public interface ByteBufferPool { * @param buffer a direct bytebuffer */ void putBuffer(ByteBuffer buffer); + + /** + * Clear the buffer pool thus releasing all the buffers. + */ + default void release() { } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ByteWritable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ByteWritable.java index ffcdea2c9a3ab..c4b88f4b5c98b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ByteWritable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ByteWritable.java @@ -33,10 +33,16 @@ public ByteWritable() {} public ByteWritable(byte value) { set(value); } - /** Set the value of this ByteWritable. */ + /** + * Set the value of this ByteWritable. + * @param value value. + */ public void set(byte value) { this.value = value; } - /** Return the value of this ByteWritable. */ + /** + * Return the value of this ByteWritable. + * @return value bytes. + */ public byte get() { return value; } @Override diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BytesWritable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BytesWritable.java index 7d7b75ba05a00..8085331530e12 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BytesWritable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BytesWritable.java @@ -69,6 +69,8 @@ public BytesWritable(byte[] bytes, int length) { /** * Get a copy of the bytes that is exactly the length of the data. * See {@link #getBytes()} for faster access to the underlying array. + * + * @return copyBytes. */ public byte[] copyBytes() { byte[] result = new byte[size]; @@ -89,6 +91,7 @@ public byte[] getBytes() { /** * Get the data from the BytesWritable. * @deprecated Use {@link #getBytes()} instead. + * @return data from the BytesWritable. */ @Deprecated public byte[] get() { @@ -106,6 +109,7 @@ public int getLength() { /** * Get the current size of the buffer. * @deprecated Use {@link #getLength()} instead. + * @return current size of the buffer. */ @Deprecated public int getSize() { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/CompressedWritable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/CompressedWritable.java index 6550e1f2fde04..c0315ab828c3b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/CompressedWritable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/CompressedWritable.java @@ -67,7 +67,11 @@ protected void ensureInflated() { } } - /** Subclasses implement this instead of {@link #readFields(DataInput)}. */ + /** + * Subclasses implement this instead of {@link #readFields(DataInput)}. + * @param in data input. + * @throws IOException raised on errors performing I/O. + */ protected abstract void readFieldsCompressed(DataInput in) throws IOException; @@ -87,7 +91,12 @@ public final void write(DataOutput out) throws IOException { out.write(compressed); } - /** Subclasses implement this instead of {@link #write(DataOutput)}. */ + /** + * Subclasses implement this instead of {@link #write(DataOutput)}. + * + * @param out data output. + * @throws IOException raised on errors performing I/O. + */ protected abstract void writeCompressed(DataOutput out) throws IOException; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/DataInputBuffer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/DataInputBuffer.java index 63c41c2e75008..85e905d870096 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/DataInputBuffer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/DataInputBuffer.java @@ -140,12 +140,23 @@ private DataInputBuffer(Buffer buffer) { this.buffer = buffer; } - /** Resets the data that the buffer reads. */ + /** + * Resets the data that the buffer reads. + * + * @param input input. + * @param length length. + */ public void reset(byte[] input, int length) { buffer.reset(input, 0, length); } - /** Resets the data that the buffer reads. */ + /** + * Resets the data that the buffer reads. + * + * @param input input. + * @param start start. + * @param length length. + */ public void reset(byte[] input, int start, int length) { buffer.reset(input, start, length); } @@ -154,12 +165,18 @@ public byte[] getData() { return buffer.getData(); } - /** Returns the current position in the input. */ + /** + * Returns the current position in the input. + * + * @return position. + */ public int getPosition() { return buffer.getPosition(); } /** * Returns the index one greater than the last valid character in the input * stream buffer. + * + * @return length. */ public int getLength() { return buffer.getLength(); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/DataOutputBuffer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/DataOutputBuffer.java index 4c2fa67f8f24c..c5746e6a3001a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/DataOutputBuffer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/DataOutputBuffer.java @@ -23,7 +23,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** A reusable {@link DataOutput} implementation that writes to an in-memory * buffer. @@ -100,27 +100,45 @@ private DataOutputBuffer(Buffer buffer) { this.buffer = buffer; } - /** Returns the current contents of the buffer. + /** + * Returns the current contents of the buffer. * Data is only valid to {@link #getLength()}. + * + * @return data byte. */ public byte[] getData() { return buffer.getData(); } - /** Returns the length of the valid data currently in the buffer. */ + /** + * Returns the length of the valid data currently in the buffer. + * @return length. + */ public int getLength() { return buffer.getLength(); } - /** Resets the buffer to empty. */ + /** + * Resets the buffer to empty. + * @return DataOutputBuffer. + */ public DataOutputBuffer reset() { this.written = 0; buffer.reset(); return this; } - /** Writes bytes from a DataInput directly into the buffer. */ + /** + * Writes bytes from a DataInput directly into the buffer. + * @param in data input. + * @param length length. + * @throws IOException raised on errors performing I/O. + */ public void write(DataInput in, int length) throws IOException { buffer.write(in, length); } - /** Write to a file stream */ + /** + * Write to a file stream. + * @param out OutputStream. + * @throws IOException raised on errors performing I/O. + */ public void writeTo(OutputStream out) throws IOException { buffer.writeTo(out); } @@ -129,6 +147,10 @@ public void writeTo(OutputStream out) throws IOException { * Overwrite an integer into the internal buffer. Note that this call can only * be used to overwrite existing data in the buffer, i.e., buffer#count cannot * be increased, and DataOutputStream#written cannot be increased. + * + * @param v v. + * @param offset offset. + * @throws IOException raised on errors performing I/O. */ public void writeInt(int v, int offset) throws IOException { Preconditions.checkState(offset + 4 <= buffer.getLength()); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/DefaultStringifier.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/DefaultStringifier.java index 7453996ecab1c..7be50b0c539b9 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/DefaultStringifier.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/DefaultStringifier.java @@ -158,6 +158,9 @@ public static K load(Configuration conf, String keyName, public static void storeArray(Configuration conf, K[] items, String keyName) throws IOException { + if (items.length == 0) { + throw new IndexOutOfBoundsException(); + } DefaultStringifier stringifier = new DefaultStringifier(conf, GenericsUtil.getClass(items[0])); try { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ElasticByteBufferPool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ElasticByteBufferPool.java index bbedf2a2dc371..c4c2940622729 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ElasticByteBufferPool.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ElasticByteBufferPool.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.io; -import com.google.common.collect.ComparisonChain; +import org.apache.hadoop.thirdparty.com.google.common.collect.ComparisonChain; import org.apache.commons.lang3.builder.HashCodeBuilder; import java.nio.ByteBuffer; @@ -36,8 +36,8 @@ */ @InterfaceAudience.Public @InterfaceStability.Stable -public final class ElasticByteBufferPool implements ByteBufferPool { - private static final class Key implements Comparable { +public class ElasticByteBufferPool implements ByteBufferPool { + protected static final class Key implements Comparable { private final int capacity; private final long insertionTime; @@ -96,6 +96,7 @@ public synchronized ByteBuffer getBuffer(boolean direct, int length) { ByteBuffer.allocate(length); } tree.remove(entry.getKey()); + entry.getValue().clear(); return entry.getValue(); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/EnumSetWritable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/EnumSetWritable.java index be86159519b87..4b1dc7513d054 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/EnumSetWritable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/EnumSetWritable.java @@ -64,8 +64,8 @@ public boolean add(E e) { * the argument value's size is bigger than zero, the argument * elementType is not be used. * - * @param value - * @param elementType + * @param value enumSet value. + * @param elementType elementType. */ public EnumSetWritable(EnumSet value, Class elementType) { set(value, elementType); @@ -75,7 +75,7 @@ public EnumSetWritable(EnumSet value, Class elementType) { * Construct a new EnumSetWritable. Argument value should not be null * or empty. * - * @param value + * @param value enumSet value. */ public EnumSetWritable(EnumSet value) { this(value, null); @@ -88,8 +88,8 @@ public EnumSetWritable(EnumSet value) { * null. If the argument value's size is bigger than zero, the * argument elementType is not be used. * - * @param value - * @param elementType + * @param value enumSet Value. + * @param elementType elementType. */ public void set(EnumSet value, Class elementType) { if ((value == null || value.size() == 0) @@ -106,7 +106,10 @@ public void set(EnumSet value, Class elementType) { } } - /** Return the value of this EnumSetWritable. */ + /** + * Return the value of this EnumSetWritable. + * @return EnumSet. + */ public EnumSet get() { return value; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/FastByteComparisons.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/FastByteComparisons.java index 5af6602b87886..1ef2119b688fd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/FastByteComparisons.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/FastByteComparisons.java @@ -26,7 +26,7 @@ import org.slf4j.LoggerFactory; import sun.misc.Unsafe; -import com.google.common.primitives.UnsignedBytes; +import org.apache.hadoop.thirdparty.com.google.common.primitives.UnsignedBytes; /** * Utility code to do optimized byte-array comparison. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/FloatWritable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/FloatWritable.java index 367fc946da135..864bb8752f5c4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/FloatWritable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/FloatWritable.java @@ -33,10 +33,16 @@ public FloatWritable() {} public FloatWritable(float value) { set(value); } - /** Set the value of this FloatWritable. */ + /** + * Set the value of this FloatWritable. + * @param value value. + */ public void set(float value) { this.value = value; } - /** Return the value of this FloatWritable. */ + /** + * Return the value of this FloatWritable. + * @return value. + */ public float get() { return value; } @Override diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/GenericWritable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/GenericWritable.java index 7cfeed7f931d7..6de927467e478 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/GenericWritable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/GenericWritable.java @@ -90,7 +90,7 @@ public abstract class GenericWritable implements Writable, Configurable { /** * Set the instance that is wrapped. * - * @param obj + * @param obj input obj. */ public void set(Writable obj) { instance = obj; @@ -109,6 +109,7 @@ public void set(Writable obj) { /** * Return the wrapped instance. + * @return the wrapped instance. */ public Writable get() { return instance; @@ -145,6 +146,7 @@ public void write(DataOutput out) throws IOException { /** * Return all classes that may be wrapped. Subclasses should implement this * to return a constant array of classes. + * @return all classes that may be wrapped. */ abstract protected Class[] getTypes(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/IOUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/IOUtils.java index 121af64b01182..f0a9b0b6952f2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/IOUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/IOUtils.java @@ -59,7 +59,8 @@ public class IOUtils { * @param out OutputStream to write to * @param buffSize the size of the buffer * @param close whether or not close the InputStream and - * OutputStream at the end. The streams are closed in the finally clause. + * OutputStream at the end. The streams are closed in the finally clause. + * @throws IOException raised on errors performing I/O. */ public static void copyBytes(InputStream in, OutputStream out, int buffSize, boolean close) @@ -85,7 +86,8 @@ public static void copyBytes(InputStream in, OutputStream out, * * @param in InputStrem to read from * @param out OutputStream to write to - * @param buffSize the size of the buffer + * @param buffSize the size of the buffer. + * @throws IOException raised on errors performing I/O. */ public static void copyBytes(InputStream in, OutputStream out, int buffSize) throws IOException { @@ -107,7 +109,8 @@ public static void copyBytes(InputStream in, OutputStream out, int buffSize) * * @param in InputStrem to read from * @param out OutputStream to write to - * @param conf the Configuration object + * @param conf the Configuration object. + * @throws IOException raised on errors performing I/O. */ public static void copyBytes(InputStream in, OutputStream out, Configuration conf) throws IOException { @@ -123,6 +126,7 @@ public static void copyBytes(InputStream in, OutputStream out, Configuration con * @param conf the Configuration object * @param close whether or not close the InputStream and * OutputStream at the end. The streams are closed in the finally clause. + * @throws IOException raised on errors performing I/O. */ public static void copyBytes(InputStream in, OutputStream out, Configuration conf, boolean close) throws IOException { @@ -181,6 +185,7 @@ public static void copyBytes(InputStream in, OutputStream out, long count, * @param off - offset within buf * @param len - amount of data to be read * @return number of bytes read + * @throws IOException raised on errors performing I/O. */ public static int wrappedReadForCompressedData(InputStream is, byte[] buf, int off, int len) throws IOException { @@ -407,6 +412,7 @@ public static List listDirectory(File dir, FilenameFilter filter) * once the sync is done.
    * Borrowed from Uwe Schindler in LUCENE-5588 * @param fileToSync the file to fsync + * @throws IOException raised on errors performing I/O. */ public static void fsync(File fileToSync) throws IOException { if (!fileToSync.exists()) { @@ -440,7 +446,7 @@ public static void fsync(File fileToSync) throws IOException { * @param isDir if true, the given file is a directory (Channel should be * opened for read and ignore IOExceptions, because not all file * systems and operating systems allow to fsync on a directory) - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static void fsync(FileChannel channel, boolean isDir) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/InputBuffer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/InputBuffer.java index 0d084b8396f16..686b359f57d32 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/InputBuffer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/InputBuffer.java @@ -75,20 +75,35 @@ private InputBuffer(Buffer buffer) { this.buffer = buffer; } - /** Resets the data that the buffer reads. */ + /** + * Resets the data that the buffer reads. + * @param input input. + * @param length length. + */ public void reset(byte[] input, int length) { buffer.reset(input, 0, length); } - /** Resets the data that the buffer reads. */ + /** + * Resets the data that the buffer reads. + * @param input input. + * @param start start. + * @param length length. + */ public void reset(byte[] input, int start, int length) { buffer.reset(input, start, length); } - /** Returns the current position in the input. */ + /** + * Returns the current position in the input. + * @return the current position in the input. + */ public int getPosition() { return buffer.getPosition(); } - /** Returns the length of the input. */ + /** + * Returns the length of the input. + * @return length of the input. + */ public int getLength() { return buffer.getLength(); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/IntWritable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/IntWritable.java index f656d028cb054..ffcf93946d06a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/IntWritable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/IntWritable.java @@ -36,10 +36,16 @@ public IntWritable() {} public IntWritable(int value) { set(value); } - /** Set the value of this IntWritable. */ + /** + * Set the value of this IntWritable. + * @param value input value. + */ public void set(int value) { this.value = value; } - /** Return the value of this IntWritable. */ + /** + * Return the value of this IntWritable. + * @return value of this IntWritable. + */ public int get() { return value; } @Override diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/LongWritable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/LongWritable.java index b77ca6781a639..9262af87bc2e1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/LongWritable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/LongWritable.java @@ -36,10 +36,16 @@ public LongWritable() {} public LongWritable(long value) { set(value); } - /** Set the value of this LongWritable. */ + /** + * Set the value of this LongWritable. + * @param value value. + */ public void set(long value) { this.value = value; } - /** Return the value of this LongWritable. */ + /** + * Return the value of this LongWritable. + * @return value of this LongWritable. + */ public long get() { return value; } @Override diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/MD5Hash.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/MD5Hash.java index 99c17acdd43d2..edfcf6e1e7754 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/MD5Hash.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/MD5Hash.java @@ -54,12 +54,18 @@ public MD5Hash() { this.digest = new byte[MD5_LEN]; } - /** Constructs an MD5Hash from a hex string. */ + /** + * Constructs an MD5Hash from a hex string. + * @param hex input hex. + */ public MD5Hash(String hex) { setDigest(hex); } - /** Constructs an MD5Hash with a specified value. */ + /** + * Constructs an MD5Hash with a specified value. + * @param digest digest. + */ public MD5Hash(byte[] digest) { if (digest.length != MD5_LEN) throw new IllegalArgumentException("Wrong length: " + digest.length); @@ -72,7 +78,12 @@ public void readFields(DataInput in) throws IOException { in.readFully(digest); } - /** Constructs, reads and returns an instance. */ + /** + * Constructs, reads and returns an instance. + * @param in in. + * @throws IOException raised on errors performing I/O. + * @return MD5Hash. + */ public static MD5Hash read(DataInput in) throws IOException { MD5Hash result = new MD5Hash(); result.readFields(in); @@ -85,21 +96,32 @@ public void write(DataOutput out) throws IOException { out.write(digest); } - /** Copy the contents of another instance into this instance. */ + /** + * Copy the contents of another instance into this instance. + * @param that that. + */ public void set(MD5Hash that) { System.arraycopy(that.digest, 0, this.digest, 0, MD5_LEN); } - /** Returns the digest bytes. */ + /** + * Returns the digest bytes. + * @return digest. + */ public byte[] getDigest() { return digest; } - /** Construct a hash value for a byte array. */ + /** + * Construct a hash value for a byte array. + * @param data data. + * @return MD5Hash. + */ public static MD5Hash digest(byte[] data) { return digest(data, 0, data.length); } /** - * Create a thread local MD5 digester + * Create a thread local MD5 digester. + * @return MessageDigest. */ public static MessageDigest getDigester() { MessageDigest digester = DIGESTER_FACTORY.get(); @@ -107,7 +129,12 @@ public static MessageDigest getDigester() { return digester; } - /** Construct a hash value for the content from the InputStream. */ + /** + * Construct a hash value for the content from the InputStream. + * @param in input stream. + * @return MD5Hash. + * @throws IOException raised on errors performing I/O. + */ public static MD5Hash digest(InputStream in) throws IOException { final byte[] buffer = new byte[4*1024]; @@ -119,7 +146,13 @@ public static MD5Hash digest(InputStream in) throws IOException { return new MD5Hash(digester.digest()); } - /** Construct a hash value for a byte array. */ + /** + * Construct a hash value for a byte array. + * @param data data. + * @param start start. + * @param len len. + * @return MD5Hash. + */ public static MD5Hash digest(byte[] data, int start, int len) { byte[] digest; MessageDigest digester = getDigester(); @@ -128,7 +161,13 @@ public static MD5Hash digest(byte[] data, int start, int len) { return new MD5Hash(digest); } - /** Construct a hash value for an array of byte array. */ + /** + * Construct a hash value for an array of byte array. + * @param dataArr dataArr. + * @param start start. + * @param len len. + * @return MD5Hash. + */ public static MD5Hash digest(byte[][] dataArr, int start, int len) { byte[] digest; MessageDigest digester = getDigester(); @@ -139,17 +178,28 @@ public static MD5Hash digest(byte[][] dataArr, int start, int len) { return new MD5Hash(digest); } - /** Construct a hash value for a String. */ + /** + * Construct a hash value for a String. + * @param string string. + * @return MD5Hash. + */ public static MD5Hash digest(String string) { return digest(UTF8.getBytes(string)); } - /** Construct a hash value for a String. */ + /** + * Construct a hash value for a String. + * @param utf8 utf8. + * @return MD5Hash. + */ public static MD5Hash digest(UTF8 utf8) { return digest(utf8.getBytes(), 0, utf8.getLength()); } - /** Construct a half-sized version of this MD5. Fits in a long **/ + /** + * Construct a half-sized version of this MD5. Fits in a long. + * @return halfDigest. + */ public long halfDigest() { long value = 0; for (int i = 0; i < 8; i++) @@ -226,7 +276,10 @@ public String toString() { return buf.toString(); } - /** Sets the digest value from a hex string. */ + /** + * Sets the digest value from a hex string. + * @param hex hex. + */ public void setDigest(String hex) { if (hex.length() != MD5_LEN*2) throw new IllegalArgumentException("Wrong length: " + hex.length()); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/MapFile.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/MapFile.java index 51db0b3f0afef..7b3cd78e3ccf3 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/MapFile.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/MapFile.java @@ -98,8 +98,16 @@ public static class Writer implements java.io.Closeable { private long lastIndexKeyCount = Long.MIN_VALUE; - /** Create the named map for keys of the named class. + /** + * Create the named map for keys of the named class. * @deprecated Use Writer(Configuration, Path, Option...) instead. + * + * @param conf configuration. + * @param fs filesystem. + * @param dirName dirName. + * @param keyClass keyClass. + * @param valClass valClass. + * @throws IOException raised on errors performing I/O. */ @Deprecated public Writer(Configuration conf, FileSystem fs, String dirName, @@ -108,8 +116,18 @@ public Writer(Configuration conf, FileSystem fs, String dirName, this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass)); } - /** Create the named map for keys of the named class. + /** + * Create the named map for keys of the named class. * @deprecated Use Writer(Configuration, Path, Option...) instead. + * + * @param conf configuration. + * @param fs fs. + * @param dirName dirName. + * @param keyClass keyClass. + * @param valClass valClass. + * @param compress compress. + * @param progress progress. + * @throws IOException raised on errors performing I/O. */ @Deprecated public Writer(Configuration conf, FileSystem fs, String dirName, @@ -120,8 +138,19 @@ public Writer(Configuration conf, FileSystem fs, String dirName, compression(compress), progressable(progress)); } - /** Create the named map for keys of the named class. + /** + * Create the named map for keys of the named class. * @deprecated Use Writer(Configuration, Path, Option...) instead. + * + * @param conf configuration. + * @param fs FileSystem. + * @param dirName dirName. + * @param keyClass keyClass. + * @param valClass valClass. + * @param compress compress. + * @param codec codec. + * @param progress progress. + * @throws IOException raised on errors performing I/O. */ @Deprecated public Writer(Configuration conf, FileSystem fs, String dirName, @@ -132,8 +161,16 @@ public Writer(Configuration conf, FileSystem fs, String dirName, compression(compress, codec), progressable(progress)); } - /** Create the named map for keys of the named class. + /** + * Create the named map for keys of the named class. * @deprecated Use Writer(Configuration, Path, Option...) instead. + * @param conf configuration. + * @param fs fs. + * @param dirName dirName. + * @param keyClass keyClass. + * @param valClass valClass. + * @param compress compress. + * @throws IOException raised on errors performing I/O. */ @Deprecated public Writer(Configuration conf, FileSystem fs, String dirName, @@ -145,6 +182,12 @@ public Writer(Configuration conf, FileSystem fs, String dirName, /** Create the named map using the named key comparator. * @deprecated Use Writer(Configuration, Path, Option...) instead. + * @param conf configuration. + * @param fs fs. + * @param dirName dirName. + * @param comparator comparator. + * @param valClass valClass. + * @throws IOException raised on errors performing I/O. */ @Deprecated public Writer(Configuration conf, FileSystem fs, String dirName, @@ -154,7 +197,14 @@ public Writer(Configuration conf, FileSystem fs, String dirName, valueClass(valClass)); } - /** Create the named map using the named key comparator. + /** Create the named map using the named key comparator. + * @param conf configuration. + * @param fs filesystem. + * @param dirName dirName. + * @param comparator comparator. + * @param valClass valClass. + * @param compress compress. + * @throws IOException raised on errors performing I/O. * @deprecated Use Writer(Configuration, Path, Option...) instead. */ @Deprecated @@ -165,8 +215,18 @@ public Writer(Configuration conf, FileSystem fs, String dirName, valueClass(valClass), compression(compress)); } - /** Create the named map using the named key comparator. + /** + * Create the named map using the named key comparator. * @deprecated Use Writer(Configuration, Path, Option...)} instead. + * + * @param conf configuration. + * @param fs filesystem. + * @param dirName dirName. + * @param comparator comparator. + * @param valClass valClass. + * @param compress CompressionType. + * @param progress progress. + * @throws IOException raised on errors performing I/O. */ @Deprecated public Writer(Configuration conf, FileSystem fs, String dirName, @@ -178,8 +238,19 @@ public Writer(Configuration conf, FileSystem fs, String dirName, progressable(progress)); } - /** Create the named map using the named key comparator. + /** + * Create the named map using the named key comparator. * @deprecated Use Writer(Configuration, Path, Option...) instead. + * + * @param conf configuration. + * @param fs FileSystem. + * @param dirName dirName. + * @param comparator comparator. + * @param valClass valClass. + * @param compress CompressionType. + * @param codec codec. + * @param progress progress. + * @throws IOException raised on errors performing I/O. */ @Deprecated public Writer(Configuration conf, FileSystem fs, String dirName, @@ -285,16 +356,26 @@ public Writer(Configuration conf, this.index = SequenceFile.createWriter(conf, indexOptions); } - /** The number of entries that are added before an index entry is added.*/ + /** + * The number of entries that are added before an index entry is added. + * @return indexInterval + */ public int getIndexInterval() { return indexInterval; } - /** Sets the index interval. + /** + * Sets the index interval. * @see #getIndexInterval() + * + * @param interval interval. */ public void setIndexInterval(int interval) { indexInterval = interval; } - /** Sets the index interval and stores it in conf + /** + * Sets the index interval and stores it in conf. * @see #getIndexInterval() + * + * @param conf configuration. + * @param interval interval. */ public static void setIndexInterval(Configuration conf, int interval) { conf.setInt(INDEX_INTERVAL, interval); @@ -307,8 +388,14 @@ public synchronized void close() throws IOException { index.close(); } - /** Append a key/value pair to the map. The key must be greater or equal - * to the previous key added to the map. */ + /** + * Append a key/value pair to the map. The key must be greater or equal + * to the previous key added to the map. + * + * @param key key. + * @param val value. + * @throws IOException raised on errors performing I/O. + */ public synchronized void append(WritableComparable key, Writable val) throws IOException { @@ -370,10 +457,18 @@ public static class Reader implements java.io.Closeable { private WritableComparable[] keys; private long[] positions; - /** Returns the class of keys in this file. */ + /** + * Returns the class of keys in this file. + * + * @return keyClass. + */ public Class getKeyClass() { return data.getKeyClass(); } - /** Returns the class of values in this file. */ + /** + * Returns the class of values in this file. + * + * @return Value Class. + */ public Class getValueClass() { return data.getValueClass(); } public static interface Option extends SequenceFile.Reader.Option {} @@ -403,8 +498,14 @@ public Reader(Path dir, Configuration conf, open(dir, comparator, conf, opts); } - /** Construct a map reader for the named map. + /** + * Construct a map reader for the named map. * @deprecated + * + * @param fs FileSystem. + * @param dirName dirName. + * @param conf configuration. + * @throws IOException raised on errors performing I/O. */ @Deprecated public Reader(FileSystem fs, String dirName, @@ -412,8 +513,15 @@ public Reader(FileSystem fs, String dirName, this(new Path(dirName), conf); } - /** Construct a map reader for the named map using the named comparator. + /** + * Construct a map reader for the named map using the named comparator. * @deprecated + * + * @param fs FileSystem. + * @param dirName dirName. + * @param comparator WritableComparator. + * @param conf Configuration. + * @throws IOException raised on errors performing I/O. */ @Deprecated public Reader(FileSystem fs, String dirName, WritableComparator comparator, @@ -450,6 +558,12 @@ protected synchronized void open(Path dir, /** * Override this method to specialize the type of * {@link SequenceFile.Reader} returned. + * + * @param dataFile data file. + * @param conf configuration. + * @param options options. + * @throws IOException raised on errors performing I/O. + * @return SequenceFile.Reader. */ protected SequenceFile.Reader createDataFileReader(Path dataFile, Configuration conf, @@ -516,13 +630,21 @@ private void readIndex() throws IOException { } } - /** Re-positions the reader before its first key. */ + /** + * Re-positions the reader before its first key. + * + * @throws IOException raised on errors performing I/O. + */ public synchronized void reset() throws IOException { data.seek(firstPosition); } - /** Get the key at approximately the middle of the file. Or null if the - * file is empty. + /** + * Get the key at approximately the middle of the file. Or null if the + * file is empty. + * + * @throws IOException raised on errors performing I/O. + * @return WritableComparable. */ public synchronized WritableComparable midKey() throws IOException { @@ -534,9 +656,11 @@ public synchronized WritableComparable midKey() throws IOException { return keys[(count - 1) / 2]; } - /** Reads the final key from the file. + /** + * Reads the final key from the file. * * @param key key to read into + * @throws IOException raised on errors performing I/O. */ public synchronized void finalKey(WritableComparable key) throws IOException { @@ -556,9 +680,14 @@ public synchronized void finalKey(WritableComparable key) } } - /** Positions the reader at the named key, or if none such exists, at the + /** + * Positions the reader at the named key, or if none such exists, at the * first entry after the named key. Returns true iff the named key exists * in this map. + * + * @param key key. + * @throws IOException raised on errors performing I/O. + * @return if the named key exists in this map true, not false. */ public synchronized boolean seek(WritableComparable key) throws IOException { return seekInternal(key) == 0; @@ -669,15 +798,28 @@ else if (cmp > 0) return -(low + 1); // key not found. } - /** Read the next key/value pair in the map into key and + /** + * Read the next key/value pair in the map into key and * val. Returns true if such a pair exists and false when at - * the end of the map */ + * the end of the map. + * + * @param key WritableComparable. + * @param val Writable. + * @return if such a pair exists true,not false. + * @throws IOException raised on errors performing I/O. + */ public synchronized boolean next(WritableComparable key, Writable val) throws IOException { return data.next(key, val); } - /** Return the value for the named key, or null if none exists. */ + /** + * Return the value for the named key, or null if none exists. + * @param key key. + * @param val val. + * @return Writable if such a pair exists true,not false. + * @throws IOException raised on errors performing I/O. + */ public synchronized Writable get(WritableComparable key, Writable val) throws IOException { if (seek(key)) { @@ -692,9 +834,10 @@ public synchronized Writable get(WritableComparable key, Writable val) * Returns key or if it does not exist, at the first entry * after the named key. * -- * @param key - key that we're trying to find -- * @param val - data value if key is found -- * @return - the key that was the closest match or null if eof. + * @param key key that we're trying to find. + * @param val data value if key is found. + * @return the key that was the closest match or null if eof. + * @throws IOException raised on errors performing I/O. */ public synchronized WritableComparable getClosest(WritableComparable key, Writable val) @@ -711,6 +854,7 @@ public synchronized WritableComparable getClosest(WritableComparable key, * the first entry that falls just before the key. Otherwise, * return the record that sorts just after. * @return - the key that was the closest match or null if eof. + * @throws IOException raised on errors performing I/O. */ public synchronized WritableComparable getClosest(WritableComparable key, Writable val, final boolean before) @@ -730,7 +874,10 @@ public synchronized WritableComparable getClosest(WritableComparable key, return nextKey; } - /** Close the map. */ + /** + * Close the map. + * @throws IOException raised on errors performing I/O. + */ @Override public synchronized void close() throws IOException { if (!indexClosed) { @@ -741,7 +888,13 @@ public synchronized void close() throws IOException { } - /** Renames an existing map directory. */ + /** + * Renames an existing map directory. + * @param fs fs. + * @param oldName oldName. + * @param newName newName. + * @throws IOException raised on errors performing I/O. + */ public static void rename(FileSystem fs, String oldName, String newName) throws IOException { Path oldDir = new Path(oldName); @@ -751,7 +904,12 @@ public static void rename(FileSystem fs, String oldName, String newName) } } - /** Deletes the named map file. */ + /** + * Deletes the named map file. + * @param fs input fs. + * @param name input name. + * @throws IOException raised on errors performing I/O. + */ public static void delete(FileSystem fs, String name) throws IOException { Path dir = new Path(name); Path data = new Path(dir, DATA_FILE_NAME); @@ -769,8 +927,9 @@ public static void delete(FileSystem fs, String name) throws IOException { * @param keyClass key class (has to be a subclass of Writable) * @param valueClass value class (has to be a subclass of Writable) * @param dryrun do not perform any changes, just report what needs to be done + * @param conf configuration. * @return number of valid entries in this MapFile, or -1 if no fixing was needed - * @throws Exception + * @throws Exception Exception. */ public static long fix(FileSystem fs, Path dir, Class keyClass, @@ -870,11 +1029,12 @@ public Merger(Configuration conf) throws IOException { } /** - * Merge multiple MapFiles to one Mapfile + * Merge multiple MapFiles to one Mapfile. * - * @param inMapFiles - * @param outMapFile - * @throws IOException + * @param inMapFiles input inMapFiles. + * @param deleteInputs deleteInputs. + * @param outMapFile input outMapFile. + * @throws IOException raised on errors performing I/O. */ public void merge(Path[] inMapFiles, boolean deleteInputs, Path outMapFile) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/MultipleIOException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/MultipleIOException.java index c9d7ade43064f..452965b7c8220 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/MultipleIOException.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/MultipleIOException.java @@ -42,7 +42,11 @@ private MultipleIOException(List exceptions) { /** @return the underlying exceptions */ public List getExceptions() {return exceptions;} - /** A convenient method to create an {@link IOException}. */ + /** + * A convenient method to create an {@link IOException}. + * @param exceptions IOException List. + * @return IOException. + */ public static IOException createIOException(List exceptions) { if (exceptions == null || exceptions.isEmpty()) { return null; @@ -60,7 +64,10 @@ public static IOException createIOException(List exceptions) { public static class Builder { private List exceptions; - /** Add the given {@link Throwable} to the exception list. */ + /** + * Add the given {@link Throwable} to the exception list. + * @param t Throwable. + */ public void add(Throwable t) { if (exceptions == null) { exceptions = new ArrayList<>(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/NullWritable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/NullWritable.java index 77c590fdb6344..d6e4846264f98 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/NullWritable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/NullWritable.java @@ -32,7 +32,10 @@ public class NullWritable implements WritableComparable { private NullWritable() {} // no public ctor - /** Returns the single instance of this class. */ + /** + * Returns the single instance of this class. + * @return the single instance of this class. + */ public static NullWritable get() { return THIS; } @Override diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ObjectWritable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ObjectWritable.java index b35a32f288b4b..29c06a01ad6e3 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ObjectWritable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ObjectWritable.java @@ -54,13 +54,22 @@ public ObjectWritable(Class declaredClass, Object instance) { this.instance = instance; } - /** Return the instance, or null if none. */ + /** + * Return the instance, or null if none. + * @return the instance, or null if none. + */ public Object get() { return instance; } - /** Return the class this is meant to be. */ + /** + * Return the class this is meant to be. + * @return the class this is meant to be. + */ public Class getDeclaredClass() { return declaredClass; } - /** Reset the instance. */ + /** + * Reset the instance. + * @param instance instance. + */ public void set(Object instance) { this.declaredClass = instance.getClass(); this.instance = instance; @@ -120,8 +129,16 @@ public void write(DataOutput out) throws IOException { } } - /** Write a {@link Writable}, {@link String}, primitive type, or an array of - * the preceding. */ + /** + * Write a {@link Writable}, {@link String}, primitive type, or an array of + * the preceding. + * + * @param out DataOutput. + * @param instance instance. + * @param conf Configuration. + * @param declaredClass declaredClass. + * @throws IOException raised on errors performing I/O. + */ public static void writeObject(DataOutput out, Object instance, Class declaredClass, Configuration conf) throws IOException { @@ -137,6 +154,13 @@ public static void writeObject(DataOutput out, Object instance, * usages, to preserve the ability to interchange files with other clusters * that may not be running the same version of software. Sometime in ~2013 * we can consider removing this parameter and always using the compact format. + * + * @param conf configuration. + * @param out dataoutput. + * @param declaredClass declaredClass. + * @param instance instance. + * @throws IOException raised on errors performing I/O. + * */ public static void writeObject(DataOutput out, Object instance, Class declaredClass, Configuration conf, boolean allowCompactArrays) @@ -210,15 +234,30 @@ public static void writeObject(DataOutput out, Object instance, } - /** Read a {@link Writable}, {@link String}, primitive type, or an array of - * the preceding. */ + /** + * Read a {@link Writable}, {@link String}, primitive type, or an array of + * the preceding. + * + * @param conf configuration. + * @param in DataInput. + * @return Object. + * @throws IOException raised on errors performing I/O. + */ public static Object readObject(DataInput in, Configuration conf) throws IOException { return readObject(in, null, conf); } - /** Read a {@link Writable}, {@link String}, primitive type, or an array of - * the preceding. */ + /** + * Read a {@link Writable}, {@link String}, primitive type, or an array of + * the preceding. + * + * @param in DataInput. + * @param objectWritable objectWritable. + * @param conf configuration. + * @return Object. + * @throws IOException raised on errors performing I/O. + */ @SuppressWarnings("unchecked") public static Object readObject(DataInput in, ObjectWritable objectWritable, Configuration conf) throws IOException { @@ -365,6 +404,10 @@ static Method getStaticProtobufMethod(Class declaredClass, String method, * Find and load the class with given name className by first finding * it in the specified conf. If the specified conf is null, * try load it directly. + * + * @param conf configuration. + * @param className classname. + * @return Class. */ public static Class loadClass(Configuration conf, String className) { Class declaredClass = null; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/OutputBuffer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/OutputBuffer.java index 15a396dc2bf55..f80c0a71883d6 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/OutputBuffer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/OutputBuffer.java @@ -77,21 +77,33 @@ private OutputBuffer(Buffer buffer) { this.buffer = buffer; } - /** Returns the current contents of the buffer. + /** + * Returns the current contents of the buffer. * Data is only valid to {@link #getLength()}. + * + * @return the current contents of the buffer. */ public byte[] getData() { return buffer.getData(); } - /** Returns the length of the valid data currently in the buffer. */ + /** + * Returns the length of the valid data currently in the buffer. + * @return the length of the valid data + * currently in the buffer. + */ public int getLength() { return buffer.getLength(); } - /** Resets the buffer to empty. */ + /** @return Resets the buffer to empty. */ public OutputBuffer reset() { buffer.reset(); return this; } - /** Writes bytes from a InputStream directly into the buffer. */ + /** + * Writes bytes from a InputStream directly into the buffer. + * @param in input in. + * @param length input length. + * @throws IOException raised on errors performing I/O. + */ public void write(InputStream in, int length) throws IOException { buffer.write(in, length); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/RawComparator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/RawComparator.java index a52190db5f4d1..354dda964e92b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/RawComparator.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/RawComparator.java @@ -29,7 +29,7 @@ * A {@link Comparator} that operates directly on byte representations of * objects. *

    - * @param + * @param generic type. * @see DeserializerComparator */ @InterfaceAudience.Public diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ReadaheadPool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ReadaheadPool.java index 804d365450692..dd9137b25a4b3 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ReadaheadPool.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ReadaheadPool.java @@ -29,8 +29,9 @@ import static org.apache.hadoop.io.nativeio.NativeIO.POSIX.POSIX_FADV_WILLNEED; -import com.google.common.base.Preconditions; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -49,7 +50,7 @@ public class ReadaheadPool { private static ReadaheadPool instance; /** - * Return the singleton instance for the current process. + * @return Return the singleton instance for the current process. */ public static ReadaheadPool getInstance() { synchronized (ReadaheadPool.class) { @@ -59,7 +60,17 @@ public static ReadaheadPool getInstance() { return instance; } } - + + @VisibleForTesting + public static void resetInstance() { + synchronized (ReadaheadPool.class) { + if (instance != null) { + instance.pool.shutdownNow(); + instance = null; + } + } + } + private ReadaheadPool() { pool = new ThreadPoolExecutor(POOL_SIZE, MAX_POOL_SIZE, 3L, TimeUnit.SECONDS, new ArrayBlockingQueue(CAPACITY)); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SecureIOUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SecureIOUtils.java index 9d3c3c1ceeaa7..e785a722b3c92 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SecureIOUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SecureIOUtils.java @@ -32,7 +32,7 @@ import org.apache.hadoop.io.nativeio.NativeIO.POSIX.Stat; import org.apache.hadoop.security.UserGroupInformation; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * This class provides secure APIs for opening and creating files on the local @@ -90,7 +90,7 @@ public class SecureIOUtils { private final static FileSystem rawFilesystem; /** - * Open the given File for random read access, verifying the expected user/ + * @return Open the given File for random read access, verifying the expected user/ * group constraints if security is enabled. * * Note that this function provides no additional security checks if hadoop @@ -114,8 +114,14 @@ public static RandomAccessFile openForRandomRead(File f, } /** - * Same as openForRandomRead except that it will run even if security is off. + * @return Same as openForRandomRead except that it will run even if security is off. * This is used by unit tests. + * + * @param f input f. + * @param mode input mode. + * @param expectedOwner input expectedOwner. + * @param expectedGroup input expectedGroup. + * @throws IOException raised on errors performing I/O. */ @VisibleForTesting protected static RandomAccessFile forceSecureOpenForRandomRead(File f, @@ -145,6 +151,7 @@ protected static RandomAccessFile forceSecureOpenForRandomRead(File f, * @param expectedGroup the expected group owner for the file * @throws IOException if an IO Error occurred or the user/group does not * match if security is enabled + * @return FSDataInputStream. */ public static FSDataInputStream openFSDataInputStream(File file, String expectedOwner, String expectedGroup) throws IOException { @@ -157,6 +164,12 @@ public static FSDataInputStream openFSDataInputStream(File file, /** * Same as openFSDataInputStream except that it will run even if security is * off. This is used by unit tests. + * + * @param file input file. + * @param expectedOwner input expectedOwner. + * @param expectedGroup input expectedGroup. + * @throws IOException raised on errors performing I/O. + * @return FSDataInputStream. */ @VisibleForTesting protected static FSDataInputStream forceSecureOpenFSDataInputStream( @@ -182,7 +195,7 @@ protected static FSDataInputStream forceSecureOpenFSDataInputStream( * Open the given File for read access, verifying the expected user/group * constraints if security is enabled. * - * Note that this function provides no additional checks if Hadoop + * @return Note that this function provides no additional checks if Hadoop * security is disabled, since doing the checks would be too expensive * when native libraries are not available. * @@ -201,8 +214,12 @@ public static FileInputStream openForRead(File f, String expectedOwner, } /** - * Same as openForRead() except that it will run even if security is off. + * @return Same as openForRead() except that it will run even if security is off. * This is used by unit tests. + * @param f input f. + * @param expectedOwner input expectedOwner. + * @param expectedGroup input expectedGroup. + * @throws IOException raised on errors performing I/O. */ @VisibleForTesting protected static FileInputStream forceSecureOpenForRead(File f, String expectedOwner, @@ -251,6 +268,7 @@ private static FileOutputStream insecureCreateForWrite(File f, * * @throws AlreadyExistsException if the file already exists * @throws IOException if any other error occurred + * @return createForWrite FileOutputStream. */ public static FileOutputStream createForWrite(File f, int permissions) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java index fec0a4ac81f2f..3807868e7bd2f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java @@ -24,9 +24,10 @@ import java.rmi.server.UID; import java.security.MessageDigest; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.util.Options; import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.StreamCapabilities; import org.apache.hadoop.fs.Options.CreateOpts; import org.apache.hadoop.io.compress.CodecPool; import org.apache.hadoop.io.compress.CompressionCodec; @@ -59,6 +60,11 @@ import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_SEQFILE_COMPRESS_BLOCKSIZE_KEY; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_SKIP_CHECKSUM_ERRORS_DEFAULT; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_SKIP_CHECKSUM_ERRORS_KEY; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_BUFFER_SIZE; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_LENGTH; +import static org.apache.hadoop.util.functional.FutureIO.awaitFuture; /** * SequenceFiles are flat files consisting of binary key/value @@ -266,7 +272,7 @@ static public void setDefaultCompressionType(Configuration job, * @param conf the configuration to use * @param opts the options to create the file with * @return a new Writer - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static Writer createWriter(Configuration conf, Writer.Option... opts ) throws IOException { @@ -298,7 +304,7 @@ public static Writer createWriter(Configuration conf, Writer.Option... opts * @param keyClass The 'key' type. * @param valClass The 'value' type. * @return Returns the handle to the constructed SequenceFile Writer. - * @throws IOException + * @throws IOException raised on errors performing I/O. * @deprecated Use {@link #createWriter(Configuration, Writer.Option...)} * instead. */ @@ -320,7 +326,7 @@ public static Writer createWriter(Configuration conf, Writer.Option... opts * @param valClass The 'value' type. * @param compressionType The compression type. * @return Returns the handle to the constructed SequenceFile Writer. - * @throws IOException + * @throws IOException raised on errors performing I/O. * @deprecated Use {@link #createWriter(Configuration, Writer.Option...)} * instead. */ @@ -345,7 +351,7 @@ public static Writer createWriter(Configuration conf, Writer.Option... opts * @param compressionType The compression type. * @param progress The Progressable object to track progress. * @return Returns the handle to the constructed SequenceFile Writer. - * @throws IOException + * @throws IOException raised on errors performing I/O. * @deprecated Use {@link #createWriter(Configuration, Writer.Option...)} * instead. */ @@ -372,7 +378,7 @@ public static Writer createWriter(Configuration conf, Writer.Option... opts * @param compressionType The compression type. * @param codec The compression codec. * @return Returns the handle to the constructed SequenceFile Writer. - * @throws IOException + * @throws IOException raised on errors performing I/O. * @deprecated Use {@link #createWriter(Configuration, Writer.Option...)} * instead. */ @@ -400,7 +406,7 @@ public static Writer createWriter(Configuration conf, Writer.Option... opts * @param progress The Progressable object to track progress. * @param metadata The metadata of the file. * @return Returns the handle to the constructed SequenceFile Writer. - * @throws IOException + * @throws IOException raised on errors performing I/O. * @deprecated Use {@link #createWriter(Configuration, Writer.Option...)} * instead. */ @@ -434,7 +440,7 @@ public static Writer createWriter(Configuration conf, Writer.Option... opts * @param progress The Progressable object to track progress. * @param metadata The metadata of the file. * @return Returns the handle to the constructed SequenceFile Writer. - * @throws IOException + * @throws IOException raised on errors performing I/O. * @deprecated Use {@link #createWriter(Configuration, Writer.Option...)} * instead. */ @@ -472,7 +478,7 @@ public static Writer createWriter(Configuration conf, Writer.Option... opts * @param codec The compression codec. * @param metadata The metadata of the file. * @return Returns the handle to the constructed SequenceFile Writer. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Deprecated public static Writer @@ -505,7 +511,7 @@ public static Writer createWriter(Configuration conf, Writer.Option... opts * @param createFlag gives the semantics of create: overwrite, append etc. * @param opts file creation options; see {@link CreateOpts}. * @return Returns the handle to the constructed SequenceFile Writer. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static Writer createWriter(FileContext fc, Configuration conf, Path name, @@ -529,7 +535,7 @@ public static Writer createWriter(Configuration conf, Writer.Option... opts * @param codec The compression codec. * @param progress The Progressable object to track progress. * @return Returns the handle to the constructed SequenceFile Writer. - * @throws IOException + * @throws IOException raised on errors performing I/O. * @deprecated Use {@link #createWriter(Configuration, Writer.Option...)} * instead. */ @@ -557,7 +563,7 @@ public static Writer createWriter(Configuration conf, Writer.Option... opts * @param codec The compression codec. * @param metadata The metadata of the file. * @return Returns the handle to the constructed SequenceFile Writer. - * @throws IOException + * @throws IOException raised on errors performing I/O. * @deprecated Use {@link #createWriter(Configuration, Writer.Option...)} * instead. */ @@ -582,7 +588,7 @@ public static Writer createWriter(Configuration conf, Writer.Option... opts * @param compressionType The compression type. * @param codec The compression codec. * @return Returns the handle to the constructed SequenceFile Writer. - * @throws IOException + * @throws IOException raised on errors performing I/O. * @deprecated Use {@link #createWriter(Configuration, Writer.Option...)} * instead. */ @@ -600,22 +606,26 @@ public static Writer createWriter(Configuration conf, Writer.Option... opts /** The interface to 'raw' values of SequenceFiles. */ public static interface ValueBytes { - /** Writes the uncompressed bytes to the outStream. + /** + * Writes the uncompressed bytes to the outStream. * @param outStream : Stream to write uncompressed bytes into. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void writeUncompressedBytes(DataOutputStream outStream) throws IOException; - /** Write compressed bytes to outStream. + /** + * Write compressed bytes to outStream. * Note: that it will NOT compress the bytes if they are not compressed. * @param outStream : Stream to write compressed bytes into. + * @throws IllegalArgumentException an illegal or inappropriate argument. + * @throws IOException raised on errors performing I/O. */ public void writeCompressedBytes(DataOutputStream outStream) throws IllegalArgumentException, IOException; /** - * Size of stored data. + * @return Size of stored data. */ public int getSize(); } @@ -834,7 +844,8 @@ public String toString() { } /** Write key/value pairs to a sequence-format file. */ - public static class Writer implements java.io.Closeable, Syncable { + public static class Writer implements java.io.Closeable, Syncable, + Flushable, StreamCapabilities { private Configuration conf; FSDataOutputStream out; boolean ownOutputStream = true; @@ -1194,10 +1205,17 @@ public static Option syncInterval(int value) { codec, metadata, syncInterval); } - /** Create the named file. + /** + * Create the named file. * @deprecated Use * {@link SequenceFile#createWriter(Configuration, Writer.Option...)} * instead. + * @param fs input filesystem. + * @param conf input configuration. + * @param name input name. + * @param keyClass input keyClass. + * @param valClass input valClass. + * @throws IOException raised on errors performing I/O. */ @Deprecated public Writer(FileSystem fs, Configuration conf, Path name, @@ -1207,10 +1225,19 @@ public Writer(FileSystem fs, Configuration conf, Path name, new Metadata(), SYNC_INTERVAL); } - /** Create the named file with write-progress reporter. + /** + * Create the named file with write-progress reporter. * @deprecated Use * {@link SequenceFile#createWriter(Configuration, Writer.Option...)} * instead. + * @param fs input filesystem. + * @param conf input configuration. + * @param name input name. + * @param keyClass input keyClass. + * @param valClass input valClass. + * @param progress input progress. + * @param metadata input metadata. + * @throws IOException raised on errors performing I/O. */ @Deprecated public Writer(FileSystem fs, Configuration conf, Path name, @@ -1221,10 +1248,22 @@ public Writer(FileSystem fs, Configuration conf, Path name, null, metadata, SYNC_INTERVAL); } - /** Create the named file with write-progress reporter. + /** + * Create the named file with write-progress reporter. * @deprecated Use * {@link SequenceFile#createWriter(Configuration, Writer.Option...)} * instead. + * @param fs input filesystem. + * @param conf input configuration. + * @param name input name. + * @param keyClass input keyClass. + * @param valClass input valClass. + * @param bufferSize input bufferSize. + * @param replication input replication. + * @param blockSize input blockSize. + * @param progress input progress. + * @param metadata input metadata. + * @throws IOException raised on errors performing I/O. */ @Deprecated public Writer(FileSystem fs, Configuration conf, Path name, @@ -1325,16 +1364,19 @@ void init(Configuration config, FSDataOutputStream outStream, } } - /** Returns the class of keys in this file. */ + /** @return Returns the class of keys in this file. */ public Class getKeyClass() { return keyClass; } - /** Returns the class of values in this file. */ + /** @return Returns the class of values in this file. */ public Class getValueClass() { return valClass; } - /** Returns the compression codec of data in this file. */ + /** @return Returns the compression codec of data in this file. */ public CompressionCodec getCompressionCodec() { return codec; } - /** create a sync point */ + /** + * create a sync point. + * @throws IOException raised on errors performing I/O. + */ public void sync() throws IOException { if (sync != null && lastSyncPos != out.getPos()) { out.writeInt(SYNC_ESCAPE); // mark the start of the sync @@ -1344,8 +1386,9 @@ public void sync() throws IOException { } /** - * flush all currently written data to the file system + * flush all currently written data to the file system. * @deprecated Use {@link #hsync()} or {@link #hflush()} instead + * @throws IOException raised on errors performing I/O. */ @Deprecated public void syncFs() throws IOException { @@ -1367,6 +1410,21 @@ public void hflush() throws IOException { out.hflush(); } } + + @Override + public void flush() throws IOException { + if (out != null) { + out.flush(); + } + } + + @Override + public boolean hasCapability(String capability) { + if (out !=null && capability != null) { + return out.hasCapability(capability); + } + return false; + } /** Returns the configuration of this file. */ Configuration getConf() { return conf; } @@ -1402,13 +1460,23 @@ synchronized void checkAndWriteSync() throws IOException { } } - /** Append a key/value pair. */ + /** + * Append a key/value pair. + * @param key input Writable key. + * @param val input Writable val. + * @throws IOException raised on errors performing I/O. + */ public void append(Writable key, Writable val) throws IOException { append((Object) key, (Object) val); } - /** Append a key/value pair. */ + /** + * Append a key/value pair. + * @param key input Object key. + * @param val input Object val. + * @throws IOException raised on errors performing I/O. + */ @SuppressWarnings("unchecked") public synchronized void append(Object key, Object val) throws IOException { @@ -1459,14 +1527,16 @@ public synchronized void appendRaw(byte[] keyData, int keyOffset, val.writeUncompressedBytes(out); // value } - /** Returns the current length of the output file. + /** @return Returns the current length of the output file. * *

    This always returns a synchronized position. In other words, * immediately after calling {@link SequenceFile.Reader#seek(long)} with a position * returned by this method, {@link SequenceFile.Reader#next(Writable)} may be called. However * the key may be earlier in the file than key last written when this * method was called (e.g., with block-compression, it may be the first key - * in the block that was being written when this method was called). + * in the block that was being written when this method was called).

    + * + * @throws IOException raised on errors performing I/O. */ public synchronized long getLength() throws IOException { return out.getPos(); @@ -1877,7 +1947,7 @@ public Reader(Configuration conf, Option... opts) throws IOException { * @param fs The file system used to open the file. * @param file The file being read. * @param conf Configuration - * @throws IOException + * @throws IOException raised on errors performing I/O. * @deprecated Use Reader(Configuration, Option...) instead. */ @Deprecated @@ -1893,7 +1963,7 @@ public Reader(FileSystem fs, Path file, * @param start The starting position. * @param length The length being read. * @param conf Configuration - * @throws IOException + * @throws IOException raised on errors performing I/O. * @deprecated Use Reader(Configuration, Reader.Option...) instead. */ @Deprecated @@ -1938,11 +2008,18 @@ private void initialize(Path filename, FSDataInputStream in, * @param length The length being read if it is {@literal >=} 0. * Otherwise, the length is not available. * @return The opened stream. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ protected FSDataInputStream openFile(FileSystem fs, Path file, int bufferSize, long length) throws IOException { - return fs.open(file, bufferSize); + FutureDataInputStreamBuilder builder = fs.openFile(file) + .opt(FS_OPTION_OPENFILE_READ_POLICY, + FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL) + .opt(FS_OPTION_OPENFILE_BUFFER_SIZE, bufferSize); + if (length >= 0) { + builder.optLong(FS_OPTION_OPENFILE_LENGTH, length); + } + return awaitFuture(builder.build()); } /** @@ -2121,12 +2198,12 @@ public synchronized void close() throws IOException { in.close(); } - /** Returns the name of the key class. */ + /** @return Returns the name of the key class. */ public String getKeyClassName() { return keyClassName; } - /** Returns the class of keys in this file. */ + /** @return Returns the class of keys in this file. */ public synchronized Class getKeyClass() { if (null == keyClass) { try { @@ -2138,12 +2215,12 @@ public synchronized Class getKeyClass() { return keyClass; } - /** Returns the name of the value class. */ + /** @return Returns the name of the value class. */ public String getValueClassName() { return valClassName; } - /** Returns the class of values in this file. */ + /** @return Returns the class of values in this file. */ public synchronized Class getValueClass() { if (null == valClass) { try { @@ -2155,13 +2232,22 @@ public synchronized Class getValueClass() { return valClass; } - /** Returns true if values are compressed. */ + /** + * Returns true if values are compressed. + * @return if values are compressed true, not false. + */ public boolean isCompressed() { return decompress; } - /** Returns true if records are block-compressed. */ + /** + * Returns true if records are block-compressed. + * @return if records are block-compressed true, not false. + */ public boolean isBlockCompressed() { return blockCompressed; } - /** Returns the compression codec of data in this file. */ + /** + * Returns the compression codec of data in this file. + * @return CompressionCodec. + */ public CompressionCodec getCompressionCodec() { return codec; } private byte[] getSync() { @@ -2184,7 +2270,10 @@ public CompressionType getCompressionType() { } } - /** Returns the metadata object of the file */ + /** + * Returns the metadata object of the file. + * @return metadata. + */ public Metadata getMetadata() { return this.metadata; } @@ -2293,7 +2382,7 @@ private synchronized void seekToCurrentValue() throws IOException { /** * Get the 'value' corresponding to the last read 'key'. * @param val : The 'value' to be read. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public synchronized void getCurrentValue(Writable val) throws IOException { @@ -2330,9 +2419,9 @@ public synchronized void getCurrentValue(Writable val) } /** - * Get the 'value' corresponding to the last read 'key'. + * @return Get the 'value' corresponding to the last read 'key'. * @param val : The 'value' to be read. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public synchronized Object getCurrentValue(Object val) throws IOException { @@ -2374,8 +2463,13 @@ private Object deserializeValue(Object val) throws IOException { return valDeserializer.deserialize(val); } - /** Read the next key in the file into key, skipping its - * value. True if another entry exists, and false at end of file. */ + /** + * @return Read the next key in the file into key, skipping its + * value.True if another entry exists, and false at end of file. + * + * @param key key. + * @throws IOException raised on errors performing I/O. + */ public synchronized boolean next(Writable key) throws IOException { if (key.getClass() != getKeyClass()) throw new IOException("wrong key class: "+key.getClass().getName() @@ -2422,9 +2516,16 @@ public synchronized boolean next(Writable key) throws IOException { return true; } - /** Read the next key/value pair in the file into key and - * val. Returns true if such a pair exists and false when at - * end of file */ + /** + * Read the next key/value pair in the file into key and + * val. + * @return Returns true if such a pair exists and false when at + * end of file. + * + * @param key input key. + * @param val input val. + * @throws IOException raised on errors performing I/O. + */ public synchronized boolean next(Writable key, Writable val) throws IOException { if (val.getClass() != getValueClass()) @@ -2508,7 +2609,7 @@ public ValueBytes createValueBytes() { * @param key - The buffer into which the key is read * @param val - The 'raw' value * @return Returns the total record length or -1 for end of file - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public synchronized int nextRaw(DataOutputBuffer key, ValueBytes val) throws IOException { @@ -2567,7 +2668,7 @@ public synchronized int nextRaw(DataOutputBuffer key, ValueBytes val) * Read 'raw' keys. * @param key - The buffer into which the key is read * @return Returns the key length or -1 for end of file - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public synchronized int nextRawKey(DataOutputBuffer key) throws IOException { @@ -2606,8 +2707,14 @@ public synchronized int nextRawKey(DataOutputBuffer key) } - /** Read the next key in the file, skipping its - * value. Return null at end of file. */ + /** + * Read the next key in the file, skipping its + * value. + * + * @param key input Object key. + * @throws IOException raised on errors performing I/O. + * @return Return null at end of file. + */ public synchronized Object next(Object key) throws IOException { if (key != null && key.getClass() != getKeyClass()) { throw new IOException("wrong key class: "+key.getClass().getName() @@ -2664,7 +2771,7 @@ private Object deserializeKey(Object key) throws IOException { * Read 'raw' values. * @param val - The 'raw' value * @return Returns the value length - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public synchronized int nextRawValue(ValueBytes val) throws IOException { @@ -2704,16 +2811,20 @@ private void handleChecksumException(ChecksumException e) } } - /** disables sync. often invoked for tmp files */ + /** disables sync. often invoked for tmp files. */ synchronized void ignoreSync() { sync = null; } - /** Set the current byte position in the input file. + /** + * Set the current byte position in the input file. * *

    The position passed must be a position returned by {@link * SequenceFile.Writer#getLength()} when writing this file. To seek to an arbitrary - * position, use {@link SequenceFile.Reader#sync(long)}. + * position, use {@link SequenceFile.Reader#sync(long)}.

    + * + * @param position input position. + * @throws IOException raised on errors performing I/O. */ public synchronized void seek(long position) throws IOException { in.seek(position); @@ -2723,7 +2834,11 @@ public synchronized void seek(long position) throws IOException { } } - /** Seek to the next sync mark past a given position.*/ + /** + * Seek to the next sync mark past a given position. + * @param position position. + * @throws IOException raised on errors performing I/O. + */ public synchronized void sync(long position) throws IOException { if (position+SYNC_SIZE >= end) { seek(end); @@ -2759,10 +2874,13 @@ public synchronized void sync(long position) throws IOException { } } - /** Returns true iff the previous call to next passed a sync mark.*/ + /** @return Returns true iff the previous call to next passed a sync mark.*/ public synchronized boolean syncSeen() { return syncSeen; } - /** Return the current byte position in the input file. */ + /** + * @return Return the current byte position in the input file. + * @throws IOException raised on errors performing I/O. + */ public synchronized long getPosition() throws IOException { return in.getPos(); } @@ -2804,19 +2922,40 @@ public static class Sorter { private Progressable progressable = null; - /** Sort and merge files containing the named classes. */ + /** + * Sort and merge files containing the named classes. + * @param fs input FileSystem. + * @param keyClass input keyClass. + * @param valClass input valClass. + * @param conf input Configuration. + */ public Sorter(FileSystem fs, Class keyClass, Class valClass, Configuration conf) { this(fs, WritableComparator.get(keyClass, conf), keyClass, valClass, conf); } - /** Sort and merge using an arbitrary {@link RawComparator}. */ + /** + * Sort and merge using an arbitrary {@link RawComparator}. + * @param fs input FileSystem. + * @param comparator input RawComparator. + * @param keyClass input keyClass. + * @param valClass input valClass. + * @param conf input Configuration. + */ public Sorter(FileSystem fs, RawComparator comparator, Class keyClass, Class valClass, Configuration conf) { this(fs, comparator, keyClass, valClass, conf, new Metadata()); } - /** Sort and merge using an arbitrary {@link RawComparator}. */ + /** + * Sort and merge using an arbitrary {@link RawComparator}. + * @param fs input FileSystem. + * @param comparator input RawComparator. + * @param keyClass input keyClass. + * @param valClass input valClass. + * @param conf input Configuration. + * @param metadata input metadata. + */ @SuppressWarnings("deprecation") public Sorter(FileSystem fs, RawComparator comparator, Class keyClass, Class valClass, Configuration conf, Metadata metadata) { @@ -2845,19 +2984,28 @@ public Sorter(FileSystem fs, RawComparator comparator, Class keyClass, this.metadata = metadata; } - /** Set the number of streams to merge at once.*/ + /** + * Set the number of streams to merge at once. + * @param factor factor. + */ public void setFactor(int factor) { this.factor = factor; } - /** Get the number of streams to merge at once.*/ + /** @return Get the number of streams to merge at once.*/ public int getFactor() { return factor; } - /** Set the total amount of buffer memory, in bytes.*/ + /** + * Set the total amount of buffer memory, in bytes. + * @param memory buffer memory. + */ public void setMemory(int memory) { this.memory = memory; } - /** Get the total amount of buffer memory, in bytes.*/ + /** @return Get the total amount of buffer memory, in bytes.*/ public int getMemory() { return memory; } - /** Set the progressable object in order to report progress. */ + /** + * Set the progressable object in order to report progress. + * @param progressable input Progressable. + */ public void setProgressable(Progressable progressable) { this.progressable = progressable; } @@ -2867,6 +3015,7 @@ public void setProgressable(Progressable progressable) { * @param inFiles the files to be sorted * @param outFile the sorted output file * @param deleteInput should the input files be deleted as they are read? + * @throws IOException raised on errors performing I/O. */ public void sort(Path[] inFiles, Path outFile, boolean deleteInput) throws IOException { @@ -2889,6 +3038,7 @@ public void sort(Path[] inFiles, Path outFile, * @param tempDir the directory where temp files are created during sort * @param deleteInput should the input files be deleted as they are read? * @return iterator the RawKeyValueIterator + * @throws IOException raised on errors performing I/O. */ public RawKeyValueIterator sortAndIterate(Path[] inFiles, Path tempDir, boolean deleteInput) throws IOException { @@ -2914,8 +3064,9 @@ else if (segments == 1) /** * The backwards compatible interface to sort. - * @param inFile the input file to sort - * @param outFile the sorted output file + * @param inFile the input file to sort. + * @param outFile the sorted output file. + * @throws IOException raised on errors performing I/O. */ public void sort(Path inFile, Path outFile) throws IOException { sort(new Path[]{inFile}, outFile, false); @@ -3133,27 +3284,32 @@ public void setProgressable(Progressable progressable) /** The interface to iterate over raw keys/values of SequenceFiles. */ public static interface RawKeyValueIterator { - /** Gets the current raw key + /** + * Gets the current raw key. * @return DataOutputBuffer - * @throws IOException + * @throws IOException raised on errors performing I/O. */ DataOutputBuffer getKey() throws IOException; - /** Gets the current raw value + /** + * Gets the current raw value. * @return ValueBytes - * @throws IOException + * @throws IOException raised on errors performing I/O. */ ValueBytes getValue() throws IOException; - /** Sets up the current key and value (for getKey and getValue) + /** + * Sets up the current key and value (for getKey and getValue). * @return true if there exists a key/value, false otherwise - * @throws IOException + * @throws IOException raised on errors performing I/O. */ boolean next() throws IOException; - /** closes the iterator so that the underlying streams can be closed - * @throws IOException + /** + * closes the iterator so that the underlying streams can be closed. + * @throws IOException raised on errors performing I/O. */ void close() throws IOException; - /** Gets the Progress object; this has a float (0.0 - 1.0) - * indicating the bytes processed by the iterator so far + /** + * @return Gets the Progress object; this has a float (0.0 - 1.0) + * indicating the bytes processed by the iterator so far. */ Progress getProgress(); } @@ -3163,7 +3319,7 @@ public static interface RawKeyValueIterator { * @param segments the list of SegmentDescriptors * @param tmpDir the directory to write temporary files into * @return RawKeyValueIterator - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public RawKeyValueIterator merge(List segments, Path tmpDir) @@ -3181,7 +3337,7 @@ public RawKeyValueIterator merge(List segments, * unnecessary * @param tmpDir the directory to write temporary files into * @return RawKeyValueIteratorMergeQueue - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public RawKeyValueIterator merge(Path [] inNames, boolean deleteInputs, Path tmpDir) @@ -3199,7 +3355,7 @@ public RawKeyValueIterator merge(Path [] inNames, boolean deleteInputs, * @param factor the factor that will be used as the maximum merge fan-in * @param tmpDir the directory to write temporary files into * @return RawKeyValueIteratorMergeQueue - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public RawKeyValueIterator merge(Path [] inNames, boolean deleteInputs, int factor, Path tmpDir) @@ -3225,7 +3381,7 @@ public RawKeyValueIterator merge(Path [] inNames, boolean deleteInputs, * @param deleteInputs true if the input files should be deleted when * unnecessary * @return RawKeyValueIteratorMergeQueue - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public RawKeyValueIterator merge(Path [] inNames, Path tempDir, boolean deleteInputs) @@ -3256,7 +3412,7 @@ public RawKeyValueIterator merge(Path [] inNames, Path tempDir, * @param outputFile the path of the output file * @param prog the Progressable to report status during the file write * @return Writer - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public Writer cloneFileAttributes(Path inputFile, Path outputFile, Progressable prog) throws IOException { @@ -3278,10 +3434,10 @@ public Writer cloneFileAttributes(Path inputFile, Path outputFile, /** * Writes records from RawKeyValueIterator into a file represented by the - * passed writer + * passed writer. * @param records the RawKeyValueIterator * @param writer the Writer created earlier - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void writeFile(RawKeyValueIterator records, Writer writer) throws IOException { @@ -3295,7 +3451,7 @@ public void writeFile(RawKeyValueIterator records, Writer writer) /** Merge the provided files. * @param inFiles the array of input path names * @param outFile the final output file - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void merge(Path[] inFiles, Path outFile) throws IOException { if (fs.exists(outFile)) { @@ -3631,10 +3787,13 @@ public SegmentDescriptor (long segmentOffset, long segmentLength, this.segmentPathName = segmentPathName; } - /** Do the sync checks */ + /** Do the sync checks. */ public void doSync() {ignoreSync = false;} - /** Whether to delete the files when no longer needed */ + /** + * Whether to delete the files when no longer needed. + * @param preserve input boolean preserve. + */ public void preserveInput(boolean preserve) { preserveInput = preserve; } @@ -3676,9 +3835,10 @@ public int hashCode() { return 37 * 17 + (int) (segmentOffset^(segmentOffset>>>32)); } - /** Fills up the rawKey object with the key returned by the Reader + /** + * Fills up the rawKey object with the key returned by the Reader. * @return true if there is a key returned; false, otherwise - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public boolean nextRawKey() throws IOException { if (in == null) { @@ -3707,18 +3867,19 @@ public boolean nextRawKey() throws IOException { return (keyLength >= 0); } - /** Fills up the passed rawValue with the value corresponding to the key - * read earlier - * @param rawValue + /** + * Fills up the passed rawValue with the value corresponding to the key + * read earlier. + * @param rawValue input ValueBytes rawValue. * @return the length of the value - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public int nextRawValue(ValueBytes rawValue) throws IOException { int valLength = in.nextRawValue(rawValue); return valLength; } - /** Returns the stored rawKey */ + /** @return Returns the stored rawKey */ public DataOutputBuffer getKey() { return rawKey; } @@ -3729,8 +3890,10 @@ private void close() throws IOException { this.in = null; } - /** The default cleanup. Subclasses can override this with a custom - * cleanup + /** + * The default cleanup. Subclasses can override this with a custom + * cleanup. + * @throws IOException raised on errors performing I/O. */ public void cleanup() throws IOException { close(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SetFile.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SetFile.java index 118cce75136ed..de75810df0f70 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SetFile.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SetFile.java @@ -39,15 +39,29 @@ protected SetFile() {} // no public ctor */ public static class Writer extends MapFile.Writer { - /** Create the named set for keys of the named class. - * @deprecated pass a Configuration too + /** + * Create the named set for keys of the named class. + * @deprecated pass a Configuration too + * @param fs input FileSystem. + * @param dirName input dirName. + * @param keyClass input keyClass. + * @throws IOException raised on errors performing I/O. */ public Writer(FileSystem fs, String dirName, Class keyClass) throws IOException { super(new Configuration(), fs, dirName, keyClass, NullWritable.class); } - /** Create a set naming the element class and compression type. */ + /** + * Create a set naming the element class and compression type. + * + * @param conf input Configuration. + * @param fs input FileSystem. + * @param dirName input dirName. + * @param keyClass input keyClass. + * @param compress input compress. + * @throws IOException raised on errors performing I/O. + */ public Writer(Configuration conf, FileSystem fs, String dirName, Class keyClass, SequenceFile.CompressionType compress) @@ -55,7 +69,16 @@ public Writer(Configuration conf, FileSystem fs, String dirName, this(conf, fs, dirName, WritableComparator.get(keyClass, conf), compress); } - /** Create a set naming the element comparator and compression type. */ + /** + * Create a set naming the element comparator and compression type. + * + * @param conf input Configuration. + * @param fs input FileSystem. + * @param dirName input dirName. + * @param comparator input comparator. + * @param compress input compress. + * @throws IOException raised on errors performing I/O. + */ public Writer(Configuration conf, FileSystem fs, String dirName, WritableComparator comparator, SequenceFile.CompressionType compress) throws IOException { @@ -65,8 +88,12 @@ public Writer(Configuration conf, FileSystem fs, String dirName, compression(compress)); } - /** Append a key to a set. The key must be strictly greater than the - * previous key added to the set. */ + /** + * Append a key to a set. The key must be strictly greater than the + * previous key added to the set. + * @param key input key. + * @throws IOException raised on errors performing I/O. + */ public void append(WritableComparable key) throws IOException{ append(key, NullWritable.get()); } @@ -75,12 +102,25 @@ public void append(WritableComparable key) throws IOException{ /** Provide access to an existing set file. */ public static class Reader extends MapFile.Reader { - /** Construct a set reader for the named set.*/ + /** + * Construct a set reader for the named set. + * @param fs input FileSystem. + * @param dirName input dirName. + * @param conf input Configuration. + * @throws IOException raised on errors performing I/O. + */ public Reader(FileSystem fs, String dirName, Configuration conf) throws IOException { super(fs, dirName, conf); } - /** Construct a set reader for the named set using the named comparator.*/ + /** + * Construct a set reader for the named set using the named comparator. + * @param fs input FileSystem. + * @param dirName input dirName. + * @param comparator input comparator. + * @param conf input Configuration. + * @throws IOException raised on errors performing I/O. + */ public Reader(FileSystem fs, String dirName, WritableComparator comparator, Configuration conf) throws IOException { super(new Path(dirName), conf, comparator(comparator)); @@ -93,15 +133,26 @@ public boolean seek(WritableComparable key) return super.seek(key); } - /** Read the next key in a set into key. Returns - * true if such a key exists and false when at the end of the set. */ + /** + * Read the next key in a set into key. + * + * @param key input key. + * @return Returns true if such a key exists + * and false when at the end of the set. + * @throws IOException raised on errors performing I/O. + */ public boolean next(WritableComparable key) throws IOException { return next(key, NullWritable.get()); } - /** Read the matching key from a set into key. - * Returns key, or null if no match exists. */ + /** + * Read the matching key from a set into key. + * + * @param key input key. + * @return Returns key, or null if no match exists. + * @throws IOException raised on errors performing I/O. + */ public WritableComparable get(WritableComparable key) throws IOException { if (seek(key)) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ShortWritable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ShortWritable.java index be09df18017b7..96e6cacae8773 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ShortWritable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ShortWritable.java @@ -38,12 +38,15 @@ public ShortWritable(short value) { set(value); } - /** Set the value of this ShortWritable. */ + /** + * Set the value of this ShortWritable. + * @param value input value. + */ public void set(short value) { this.value = value; } - /** Return the value of this ShortWritable. */ + /** @return Return the value of this ShortWritable. */ public short get() { return value; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java index 3ab327fe76a30..d180c88bcbd37 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java @@ -82,25 +82,32 @@ public Text() { bytes = EMPTY_BYTES; } - /** Construct from a string. + /** + * Construct from a string. + * @param string input string. */ public Text(String string) { set(string); } - /** Construct from another text. */ + /** + * Construct from another text. + * @param utf8 input utf8. + */ public Text(Text utf8) { set(utf8); } - /** Construct from a byte array. + /** + * Construct from a byte array. + * @param utf8 input utf8. */ public Text(byte[] utf8) { set(utf8); } /** - * Get a copy of the bytes that is exactly the length of the data. + * @return Get a copy of the bytes that is exactly the length of the data. * See {@link #getBytes()} for faster access to the underlying array. */ public byte[] copyBytes() { @@ -128,10 +135,12 @@ public int getLength() { /** * Returns the Unicode Scalar Value (32-bit integer value) * for the character at position. Note that this - * method avoids using the converter or doing String instantiation + * method avoids using the converter or doing String instantiation. + * + * @param position input position. * @return the Unicode scalar value at position or -1 * if the position is invalid or points to a - * trailing byte + * trailing byte. */ public int charAt(int position) { if (position > this.length) return -1; // too long @@ -151,6 +160,9 @@ public int find(String what) { * position is measured in bytes and the return value is in * terms of byte position in the buffer. The backing buffer is * not converted to a string for this operation. + * + * @param what input what. + * @param start input start. * @return byte position of the first occurrence of the search * string in the UTF-8 buffer or -1 if not found */ @@ -191,7 +203,9 @@ public int find(String what, int start) { return -1; } } - /** Set to contain the contents of a string. + /** + * Set to contain the contents of a string. + * @param string input string. */ public void set(String string) { try { @@ -203,13 +217,18 @@ public void set(String string) { } } - /** Set to a utf8 byte array + /** + * Set to a utf8 byte array. + * @param utf8 input utf8. */ public void set(byte[] utf8) { set(utf8, 0, utf8.length); } - /** copy a text. */ + /** + * copy a text. + * @param other input other. + */ public void set(Text other) { set(other.getBytes(), 0, other.getLength()); } @@ -303,7 +322,11 @@ public void readFields(DataInput in, int maxLength) throws IOException { readWithKnownLength(in, newLength); } - /** Skips over one Text in the input. */ + /** + * Skips over one Text in the input. + * @param in input in. + * @throws IOException raised on errors performing I/O. + */ public static void skip(DataInput in) throws IOException { int length = WritableUtils.readVInt(in); WritableUtils.skipFully(in, length); @@ -313,6 +336,10 @@ public static void skip(DataInput in) throws IOException { * Read a Text object whose length is already known. * This allows creating Text from a stream which uses a different serialization * format. + * + * @param in input in. + * @param len input len. + * @throws IOException raised on errors performing I/O. */ public void readWithKnownLength(DataInput in, int len) throws IOException { setCapacity(len, false); @@ -376,9 +403,13 @@ public int compare(byte[] b1, int s1, int l1, /// STATIC UTILITIES FROM HERE DOWN /** - * Converts the provided byte array to a String using the + * @return Converts the provided byte array to a String using the * UTF-8 encoding. If the input is malformed, * replace by a default value. + * + * @param utf8 input utf8. + * @throws CharacterCodingException a character encoding or + * decoding error occurs. */ public static String decode(byte[] utf8) throws CharacterCodingException { return decode(ByteBuffer.wrap(utf8), true); @@ -390,11 +421,18 @@ public static String decode(byte[] utf8, int start, int length) } /** - * Converts the provided byte array to a String using the + * @return Converts the provided byte array to a String using the * UTF-8 encoding. If replace is true, then * malformed input is replaced with the * substitution character, which is U+FFFD. Otherwise the * method throws a MalformedInputException. + * + * @param utf8 input utf8. + * @param start input start. + * @param length input length. + * @param replace input replace. + * @throws CharacterCodingException a character encoding or + * decoding error occurs. */ public static String decode(byte[] utf8, int start, int length, boolean replace) throws CharacterCodingException { @@ -422,8 +460,12 @@ private static String decode(ByteBuffer utf8, boolean replace) * Converts the provided String to bytes using the * UTF-8 encoding. If the input is malformed, * invalid chars are replaced by a default value. + * + * @param string input string. * @return ByteBuffer: bytes stores at ByteBuffer.array() * and length is ByteBuffer.limit() + * @throws CharacterCodingException + * a character encoding or decoding error occurs. */ public static ByteBuffer encode(String string) @@ -437,8 +479,12 @@ public static ByteBuffer encode(String string) * malformed input is replaced with the * substitution character, which is U+FFFD. Otherwise the * method throws a MalformedInputException. + * + * @param string input string. + * @param replace input replace. * @return ByteBuffer: bytes stores at ByteBuffer.array() * and length is ByteBuffer.limit() + * @throws CharacterCodingException a character encoding or decoding error occurs. */ public static ByteBuffer encode(String string, boolean replace) throws CharacterCodingException { @@ -458,13 +504,20 @@ public static ByteBuffer encode(String string, boolean replace) static final public int DEFAULT_MAX_LEN = 1024 * 1024; - /** Read a UTF8 encoded string from in + /** + * @return Read a UTF8 encoded string from in. + * @param in input in. + * @throws IOException raised on errors performing I/O. */ public static String readString(DataInput in) throws IOException { return readString(in, Integer.MAX_VALUE); } - /** Read a UTF8 encoded string with a maximum size + /** + * @return Read a UTF8 encoded string with a maximum size. + * @param in input in. + * @param maxLength input maxLength. + * @throws IOException raised on errors performing I/O. */ public static String readString(DataInput in, int maxLength) throws IOException { @@ -474,7 +527,12 @@ public static String readString(DataInput in, int maxLength) return decode(bytes); } - /** Write a UTF8 encoded string to out + /** + * @return Write a UTF8 encoded string to out. + * + * @param out input out. + * @param s input s. + * @throws IOException raised on errors performing I/O. */ public static int writeString(DataOutput out, String s) throws IOException { ByteBuffer bytes = encode(s); @@ -484,7 +542,13 @@ public static int writeString(DataOutput out, String s) throws IOException { return length; } - /** Write a UTF8 encoded string with a maximum size to out + /** + * @return Write a UTF8 encoded string with a maximum size to out. + * + * @param out input out. + * @param s input s. + * @param maxLength input maxLength. + * @throws IOException raised on errors performing I/O. */ public static int writeString(DataOutput out, String s, int maxLength) throws IOException { @@ -616,9 +680,10 @@ public static void validateUTF8(byte[] utf8, int start, int len) 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 }; /** - * Returns the next code point at the current position in + * @return Returns the next code point at the current position in * the buffer. The buffer's position will be incremented. * Any mark set on this buffer will be changed by this method! + * @param bytes input bytes. */ public static int bytesToCodePoint(ByteBuffer bytes) { bytes.mark(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/UTF8.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/UTF8.java index f5d33a13005d7..fdee830e6fea8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/UTF8.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/UTF8.java @@ -63,27 +63,36 @@ public UTF8() { //set(""); } - /** Construct from a given string. */ + /** + * Construct from a given string. + * @param string input string. + */ public UTF8(String string) { set(string); } - /** Construct from a given string. */ + /** + * Construct from a given string. + * @param utf8 input utf8. + */ public UTF8(UTF8 utf8) { set(utf8); } - /** The raw bytes. */ + /** @return The raw bytes. */ public byte[] getBytes() { return bytes; } - /** The number of bytes in the encoded string. */ + /** @return The number of bytes in the encoded string. */ public int getLength() { return length; } - /** Set to contain the contents of a string. */ + /** + * Set to contain the contents of a string. + * @param string input string. + */ public void set(String string) { if (string.length() > 0xffff/3) { // maybe too long LOG.warn("truncating long string: " + string.length() @@ -108,7 +117,10 @@ public void set(String string) { } } - /** Set to contain the contents of a string. */ + /** + * Set to contain the contents of a string. + * @param other input other. + */ public void set(UTF8 other) { length = other.length; if (bytes == null || length > bytes.length) // grow buffer @@ -124,7 +136,11 @@ public void readFields(DataInput in) throws IOException { in.readFully(bytes, 0, length); } - /** Skips over one UTF8 in the input. */ + /** + * Skips over one UTF8 in the input. + * @param in datainput. + * @throws IOException raised on errors performing I/O. + */ public static void skip(DataInput in) throws IOException { int length = in.readUnsignedShort(); WritableUtils.skipFully(in, length); @@ -214,8 +230,10 @@ public int compare(byte[] b1, int s1, int l1, /// These are probably not used much anymore, and might be removed... - /** Convert a string to a UTF-8 encoded byte array. + /** + * @return Convert a string to a UTF-8 encoded byte array. * @see String#getBytes(String) + * @param string input string. */ public static byte[] getBytes(String string) { byte[] result = new byte[utf8Length(string)]; @@ -231,8 +249,9 @@ public static byte[] getBytes(String string) { } /** - * Convert a UTF-8 encoded byte array back into a string. + * @return Convert a UTF-8 encoded byte array back into a string. * + * @param bytes input bytes. * @throws IOException if the byte array is invalid UTF8 */ public static String fromBytes(byte[] bytes) throws IOException { @@ -243,9 +262,12 @@ public static String fromBytes(byte[] bytes) throws IOException { return buf.toString(); } - /** Read a UTF-8 encoded string. + /** + * @return Read a UTF-8 encoded string. * * @see DataInput#readUTF() + * @param in DataInput. + * @throws IOException raised on errors performing I/O. */ public static String readString(DataInput in) throws IOException { int bytes = in.readUnsignedShort(); @@ -318,9 +340,13 @@ private static char lowSurrogate(int codePoint) { return (char) ((codePoint & 0x3ff) + Character.MIN_LOW_SURROGATE); } - /** Write a UTF-8 encoded string. + /** + * @return Write a UTF-8 encoded string. * * @see DataOutput#writeUTF(String) + * @param out input out. + * @param s input s. + * @throws IOException raised on errors performing I/O. */ public static int writeString(DataOutput out, String s) throws IOException { if (s.length() > 0xffff/3) { // maybe too long diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/VIntWritable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/VIntWritable.java index f537524c4b40a..7d3f680858ec3 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/VIntWritable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/VIntWritable.java @@ -37,10 +37,13 @@ public VIntWritable() {} public VIntWritable(int value) { set(value); } - /** Set the value of this VIntWritable. */ + /** + * Set the value of this VIntWritable. + * @param value input value. + */ public void set(int value) { this.value = value; } - /** Return the value of this VIntWritable. */ + /** @return Return the value of this VIntWritable. */ public int get() { return value; } @Override diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/VLongWritable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/VLongWritable.java index a9fac30605be6..a72a7fc2fd39a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/VLongWritable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/VLongWritable.java @@ -37,10 +37,13 @@ public VLongWritable() {} public VLongWritable(long value) { set(value); } - /** Set the value of this LongWritable. */ + /** + * Set the value of this LongWritable. + * @param value input value. + */ public void set(long value) { this.value = value; } - /** Return the value of this LongWritable. */ + /** @return Return the value of this LongWritable. */ public long get() { return value; } @Override diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/VersionedWritable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/VersionedWritable.java index c2db55520c918..421b8daeeae8e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/VersionedWritable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/VersionedWritable.java @@ -36,7 +36,7 @@ @InterfaceStability.Stable public abstract class VersionedWritable implements Writable { - /** Return the version number of the current implementation. */ + /** @return Return the version number of the current implementation. */ public abstract byte getVersion(); // javadoc from Writable diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WeakReferencedElasticByteBufferPool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WeakReferencedElasticByteBufferPool.java new file mode 100644 index 0000000000000..c71c44e798a65 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WeakReferencedElasticByteBufferPool.java @@ -0,0 +1,155 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.lang.ref.WeakReference; +import java.nio.ByteBuffer; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.classification.VisibleForTesting; + +/** + * Buffer pool implementation which uses weak references to store + * buffers in the pool, such that they are garbage collected when + * there are no references to the buffer during a gc run. This is + * important as direct buffers don't get garbage collected automatically + * during a gc run as they are not stored on heap memory. + * Also the buffers are stored in a tree map which helps in returning + * smallest buffer whose size is just greater than requested length. + * This is a thread safe implementation. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public final class WeakReferencedElasticByteBufferPool extends ElasticByteBufferPool { + + /** + * Map to store direct byte buffers of different sizes in the pool. + * Used tree map such that we can return next greater than capacity + * buffer if buffer with exact capacity is unavailable. + * This must be accessed in synchronized blocks. + */ + private final TreeMap> directBuffers = + new TreeMap<>(); + + /** + * Map to store heap based byte buffers of different sizes in the pool. + * Used tree map such that we can return next greater than capacity + * buffer if buffer with exact capacity is unavailable. + * This must be accessed in synchronized blocks. + */ + private final TreeMap> heapBuffers = + new TreeMap<>(); + + /** + * Method to get desired buffer tree. + * @param isDirect whether the buffer is heap based or direct. + * @return corresponding buffer tree. + */ + private TreeMap> getBufferTree(boolean isDirect) { + return isDirect + ? directBuffers + : heapBuffers; + } + + /** + * {@inheritDoc} + * + * @param direct whether we want a direct byte buffer or a heap one. + * @param length length of requested buffer. + * @return returns equal or next greater than capacity buffer from + * pool if already available and not garbage collected else creates + * a new buffer and return it. + */ + @Override + public synchronized ByteBuffer getBuffer(boolean direct, int length) { + TreeMap> buffersTree = getBufferTree(direct); + + // Scan the entire tree and remove all weak null references. + buffersTree.entrySet().removeIf(next -> next.getValue().get() == null); + + Map.Entry> entry = + buffersTree.ceilingEntry(new Key(length, 0)); + // If there is no buffer present in the pool with desired size. + if (entry == null) { + return direct ? ByteBuffer.allocateDirect(length) : + ByteBuffer.allocate(length); + } + // buffer is available in the pool and not garbage collected. + WeakReference bufferInPool = entry.getValue(); + buffersTree.remove(entry.getKey()); + ByteBuffer buffer = bufferInPool.get(); + if (buffer != null) { + return buffer; + } + // buffer was in pool but already got garbage collected. + return direct + ? ByteBuffer.allocateDirect(length) + : ByteBuffer.allocate(length); + } + + /** + * Return buffer to the pool. + * @param buffer buffer to be returned. + */ + @Override + public synchronized void putBuffer(ByteBuffer buffer) { + buffer.clear(); + TreeMap> buffersTree = getBufferTree(buffer.isDirect()); + // Buffers are indexed by (capacity, time). + // If our key is not unique on the first try, we try again, since the + // time will be different. Since we use nanoseconds, it's pretty + // unlikely that we'll loop even once, unless the system clock has a + // poor granularity or multi-socket systems have clocks slightly out + // of sync. + while (true) { + Key keyToInsert = new Key(buffer.capacity(), System.nanoTime()); + if (!buffersTree.containsKey(keyToInsert)) { + buffersTree.put(keyToInsert, new WeakReference<>(buffer)); + return; + } + } + } + + /** + * Clear the buffer pool thus releasing all the buffers. + * The caller must remove all references of + * existing buffers before calling this method to avoid + * memory leaks. + */ + @Override + public synchronized void release() { + heapBuffers.clear(); + directBuffers.clear(); + } + + /** + * Get current buffers count in the pool. + * @param isDirect whether we want to count the heap or direct buffers. + * @return count of buffers. + */ + @VisibleForTesting + public synchronized int getCurrentBuffersCount(boolean isDirect) { + return isDirect + ? directBuffers.size() + : heapBuffers.size(); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Writable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Writable.java index b94de6c3c72bd..56b46d554fee6 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Writable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Writable.java @@ -71,7 +71,7 @@ public interface Writable { * Serialize the fields of this object to out. * * @param out DataOuput to serialize this object into. - * @throws IOException + * @throws IOException any other problem for write. */ void write(DataOutput out) throws IOException; @@ -82,7 +82,7 @@ public interface Writable { * existing object where possible.

    * * @param in DataInput to deseriablize this object from. - * @throws IOException + * @throws IOException any other problem for readFields. */ void readFields(DataInput in) throws IOException; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableComparator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableComparator.java index 1754b8d06f6fa..05d4e3c5c533f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableComparator.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableComparator.java @@ -46,12 +46,22 @@ public class WritableComparator implements RawComparator, Configurable { private Configuration conf; - /** For backwards compatibility. **/ + /** + * For backwards compatibility. + * + * @param c WritableComparable Type. + * @return WritableComparator. + */ public static WritableComparator get(Class c) { return get(c, null); } - /** Get a comparator for a {@link WritableComparable} implementation. */ + /** + * Get a comparator for a {@link WritableComparable} implementation. + * @param c class. + * @param conf configuration. + * @return WritableComparator. + */ public static WritableComparator get( Class c, Configuration conf) { WritableComparator comparator = comparators.get(c); @@ -95,9 +105,13 @@ private static void forceInit(Class cls) { } } - /** Register an optimized comparator for a {@link WritableComparable} + /** + * Register an optimized comparator for a {@link WritableComparable} * implementation. Comparators registered with this method must be - * thread-safe. */ + * thread-safe. + * @param c class. + * @param comparator WritableComparator. + */ public static void define(Class c, WritableComparator comparator) { comparators.put(c, comparator); } @@ -111,7 +125,10 @@ protected WritableComparator() { this(null); } - /** Construct for a {@link WritableComparable} implementation. */ + /** + * Construct for a {@link WritableComparable} implementation. + * @param keyClass WritableComparable Class. + */ protected WritableComparator(Class keyClass) { this(keyClass, null, false); } @@ -136,10 +153,16 @@ protected WritableComparator(Class keyClass, } } - /** Returns the WritableComparable implementation class. */ + /** + * Returns the WritableComparable implementation class. + * @return WritableComparable. + */ public Class getKeyClass() { return keyClass; } - /** Construct a new {@link WritableComparable} instance. */ + /** + * Construct a new {@link WritableComparable} instance. + * @return WritableComparable. + */ public WritableComparable newKey() { return ReflectionUtils.newInstance(keyClass, conf); } @@ -168,27 +191,54 @@ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { return compare(key1, key2); // compare them } - /** Compare two WritableComparables. + /** + * Compare two WritableComparables. * - *

    The default implementation uses the natural ordering, calling {@link - * Comparable#compareTo(Object)}. */ + * The default implementation uses the natural ordering, calling {@link + * Comparable#compareTo(Object)}. + * @param a the first object to be compared. + * @param b the second object to be compared. + * @return compare result. + */ @SuppressWarnings("unchecked") public int compare(WritableComparable a, WritableComparable b) { return a.compareTo(b); } + /** + * Compare two Object. + * + * @param a the first object to be compared. + * @param b the second object to be compared. + * @return compare result. + */ @Override public int compare(Object a, Object b) { return compare((WritableComparable)a, (WritableComparable)b); } - /** Lexicographic order of binary data. */ + /** + * Lexicographic order of binary data. + * @param b1 b1. + * @param s1 s1. + * @param l1 l1. + * @param b2 b2. + * @param s2 s2. + * @param l2 l2. + * @return compare bytes. + */ public static int compareBytes(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { return FastByteComparisons.compareTo(b1, s1, l1, b2, s2, l2); } - /** Compute hash for binary data. */ + /** + * Compute hash for binary data. + * @param bytes bytes. + * @param offset offset. + * @param length length. + * @return hash for binary data. + */ public static int hashBytes(byte[] bytes, int offset, int length) { int hash = 1; for (int i = offset; i < offset + length; i++) @@ -196,18 +246,33 @@ public static int hashBytes(byte[] bytes, int offset, int length) { return hash; } - /** Compute hash for binary data. */ + /** + * Compute hash for binary data. + * @param bytes bytes. + * @param length length. + * @return hash for binary data. + */ public static int hashBytes(byte[] bytes, int length) { return hashBytes(bytes, 0, length); } - /** Parse an unsigned short from a byte array. */ + /** + * Parse an unsigned short from a byte array. + * @param bytes bytes. + * @param start start. + * @return unsigned short from a byte array + */ public static int readUnsignedShort(byte[] bytes, int start) { return (((bytes[start] & 0xff) << 8) + ((bytes[start+1] & 0xff))); } - /** Parse an integer from a byte array. */ + /** + * Parse an integer from a byte array. + * @param bytes bytes. + * @param start start. + * @return integer from a byte array + */ public static int readInt(byte[] bytes, int start) { return (((bytes[start ] & 0xff) << 24) + ((bytes[start+1] & 0xff) << 16) + @@ -216,18 +281,33 @@ public static int readInt(byte[] bytes, int start) { } - /** Parse a float from a byte array. */ + /** + * Parse a float from a byte array. + * @param bytes bytes. + * @param start start. + * @return float from a byte array + */ public static float readFloat(byte[] bytes, int start) { return Float.intBitsToFloat(readInt(bytes, start)); } - /** Parse a long from a byte array. */ + /** + * Parse a long from a byte array. + * @param bytes bytes. + * @param start start. + * @return long from a byte array + */ public static long readLong(byte[] bytes, int start) { return ((long)(readInt(bytes, start)) << 32) + (readInt(bytes, start+4) & 0xFFFFFFFFL); } - /** Parse a double from a byte array. */ + /** + * Parse a double from a byte array. + * @param bytes bytes. + * @param start start. + * @return double from a byte array. + */ public static double readDouble(byte[] bytes, int start) { return Double.longBitsToDouble(readLong(bytes, start)); } @@ -236,7 +316,7 @@ public static double readDouble(byte[] bytes, int start) { * Reads a zero-compressed encoded long from a byte array and returns it. * @param bytes byte array with decode long * @param start starting index - * @throws java.io.IOException + * @throws IOException raised on errors performing I/O. * @return deserialized long */ public static long readVLong(byte[] bytes, int start) throws IOException { @@ -261,7 +341,7 @@ public static long readVLong(byte[] bytes, int start) throws IOException { * Reads a zero-compressed encoded integer from a byte array and returns it. * @param bytes byte array with the encoded integer * @param start start index - * @throws java.io.IOException + * @throws IOException raised on errors performing I/O. * @return deserialized integer */ public static int readVInt(byte[] bytes, int start) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableFactories.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableFactories.java index a8fdbfe98dfdc..9dd231e488780 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableFactories.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableFactories.java @@ -35,17 +35,31 @@ public class WritableFactories { private WritableFactories() {} // singleton - /** Define a factory for a class. */ + /** + * Define a factory for a class. + * @param c input c. + * @param factory input factory. + */ public static void setFactory(Class c, WritableFactory factory) { CLASS_TO_FACTORY.put(c, factory); } - /** Define a factory for a class. */ + /** + * Define a factory for a class. + * @param c input c. + * @return a factory for a class. + */ public static WritableFactory getFactory(Class c) { return CLASS_TO_FACTORY.get(c); } - /** Create a new instance of a class with a defined factory. */ + /** + * Create a new instance of a class with a defined factory. + * + * @param c input c. + * @param conf input configuration. + * @return a new instance of a class with a defined factory. + */ public static Writable newInstance(Class c, Configuration conf) { WritableFactory factory = WritableFactories.getFactory(c); if (factory != null) { @@ -59,7 +73,11 @@ public static Writable newInstance(Class c, Configuration co } } - /** Create a new instance of a class with a defined factory. */ + /** + * Create a new instance of a class with a defined factory. + * @param c input c. + * @return a new instance of a class with a defined factory. + */ public static Writable newInstance(Class c) { return newInstance(c, null); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableFactory.java index bb8af974f051e..d9e9b543c7d96 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableFactory.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableFactory.java @@ -27,7 +27,7 @@ @InterfaceAudience.Public @InterfaceStability.Stable public interface WritableFactory { - /** Return a new instance. */ + /** @return Return a new instance. */ Writable newInstance(); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableName.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableName.java index 43d396edad7ab..683d6c099b540 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableName.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableName.java @@ -45,19 +45,33 @@ public class WritableName { private WritableName() {} // no public ctor - /** Set the name that a class should be known as to something other than the - * class name. */ + /** + * Set the name that a class should be known as to something other than the + * class name. + * + * @param writableClass input writableClass. + * @param name input name. + */ public static synchronized void setName(Class writableClass, String name) { CLASS_TO_NAME.put(writableClass, name); NAME_TO_CLASS.put(name, writableClass); } - /** Add an alternate name for a class. */ + /** + * Add an alternate name for a class. + * @param writableClass input writableClass. + * @param name input name. + */ public static synchronized void addName(Class writableClass, String name) { NAME_TO_CLASS.put(name, writableClass); } - /** Return the name for a class. Default is {@link Class#getName()}. */ + /** + * Return the name for a class. + * Default is {@link Class#getName()}. + * @param writableClass input writableClass. + * @return name for a class. + */ public static synchronized String getName(Class writableClass) { String name = CLASS_TO_NAME.get(writableClass); if (name != null) @@ -65,12 +79,20 @@ public static synchronized String getName(Class writableClass) { return writableClass.getName(); } - /** Return the class for a name. Default is {@link Class#forName(String)}.*/ + /** + * Return the class for a name. + * Default is {@link Class#forName(String)}. + * + * @param name input name. + * @param conf input configuration. + * @return class for a name. + * @throws IOException raised on errors performing I/O. + */ public static synchronized Class getClass(String name, Configuration conf ) throws IOException { Class writableClass = NAME_TO_CLASS.get(name); if (writableClass != null) - return writableClass.asSubclass(Writable.class); + return writableClass; try { return conf.getClassByName(name); } catch (ClassNotFoundException e) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableUtils.java index 2062fb6fe3705..187398de0ec86 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableUtils.java @@ -208,7 +208,10 @@ public static void displayByteArray(byte[] record){ /** * Make a copy of a writable object using serialization to a buffer. + * + * @param Generics Type T. * @param orig The object to copy + * @param conf input Configuration. * @return The copied object */ public static T clone(T orig, Configuration conf) { @@ -223,10 +226,10 @@ public static T clone(T orig, Configuration conf) { } /** - * Make a copy of the writable object using serialization to a buffer + * Make a copy of the writable object using serialization to a buffer. * @param dst the object to copy from * @param src the object to copy into, which is destroyed - * @throws IOException + * @throws IOException raised on errors performing I/O. * @deprecated use ReflectionUtils.cloneInto instead. */ @Deprecated @@ -248,7 +251,7 @@ public static void cloneInto(Writable dst, Writable src) throws IOException { * * @param stream Binary output stream * @param i Integer to be serialized - * @throws java.io.IOException + * @throws IOException raised on errors performing I/O. */ public static void writeVInt(DataOutput stream, int i) throws IOException { writeVLong(stream, i); @@ -268,7 +271,7 @@ public static void writeVInt(DataOutput stream, int i) throws IOException { * * @param stream Binary output stream * @param i Long to be serialized - * @throws java.io.IOException + * @throws IOException raised on errors performing I/O. */ public static void writeVLong(DataOutput stream, long i) throws IOException { if (i >= -112 && i <= 127) { @@ -303,7 +306,7 @@ public static void writeVLong(DataOutput stream, long i) throws IOException { /** * Reads a zero-compressed encoded long from input stream and returns it. * @param stream Binary input stream - * @throws java.io.IOException + * @throws IOException raised on errors performing I/O. * @return deserialized long from stream. */ public static long readVLong(DataInput stream) throws IOException { @@ -324,7 +327,7 @@ public static long readVLong(DataInput stream) throws IOException { /** * Reads a zero-compressed encoded integer from input stream and returns it. * @param stream Binary input stream - * @throws java.io.IOException + * @throws IOException raised on errors performing I/O. * @return deserialized integer from stream. */ public static int readVInt(DataInput stream) throws IOException { @@ -342,8 +345,10 @@ public static int readVInt(DataInput stream) throws IOException { * inclusive. * * @param stream Binary input stream - * @throws java.io.IOException - * @return deserialized integer from stream + * @param lower input lower. + * @param upper input upper. + * @throws IOException raised on errors performing I/O. + * @return deserialized integer from stream. */ public static int readVIntInRange(DataInput stream, int lower, int upper) throws IOException { @@ -387,7 +392,8 @@ public static int decodeVIntSize(byte value) { } /** - * Get the encoded length if an integer is stored in a variable-length format + * Get the encoded length if an integer is stored in a variable-length format. + * @param i input i. * @return the encoded length */ public static int getVIntSize(long i) { @@ -410,7 +416,7 @@ public static int getVIntSize(long i) { * @param in DataInput to read from * @param enumType Class type of Enum * @return Enum represented by String read from DataInput - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static > T readEnum(DataInput in, Class enumType) throws IOException{ @@ -420,7 +426,7 @@ public static > T readEnum(DataInput in, Class enumType) * writes String value of enum to DataOutput. * @param out Dataoutput stream * @param enumVal enum value - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static void writeEnum(DataOutput out, Enum enumVal) throws IOException{ @@ -446,7 +452,11 @@ public static void skipFully(DataInput in, int len) throws IOException { } } - /** Convert writables to a byte array */ + /** + * Convert writables to a byte array. + * @param writables input writables. + * @return ByteArray. + */ public static byte[] toByteArray(Writable... writables) { final DataOutputBuffer out = new DataOutputBuffer(); try { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/AlreadyClosedException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/AlreadyClosedException.java new file mode 100644 index 0000000000000..104ad24577fd9 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/AlreadyClosedException.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.compress; + +import java.io.IOException; + +/** + * An exception class for when a closed compressor/decopressor is being used. + * {@link org.apache.hadoop.io.compress.Compressor} + * {@link org.apache.hadoop.io.compress.Decompressor} + */ +public class AlreadyClosedException extends IOException { + + public AlreadyClosedException(String message) { + super(message); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java index 99590eda679af..7508def9a75c6 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java @@ -24,6 +24,7 @@ import java.io.OutputStream; import java.nio.charset.StandardCharsets; +import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; @@ -99,7 +100,7 @@ public BZip2Codec() { } * @param out the location for the final output stream * @return a stream the user can write uncompressed data to, to have it * compressed - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public CompressionOutputStream createOutputStream(OutputStream out) @@ -116,7 +117,7 @@ public CompressionOutputStream createOutputStream(OutputStream out) * @param compressor compressor to use * @return a stream the user can write uncompressed data to, to have it * compressed - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public CompressionOutputStream createOutputStream(OutputStream out, @@ -154,7 +155,7 @@ public Compressor createCompressor() { * * @param in the stream to read compressed bytes from * @return a stream to read uncompressed bytes from - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public CompressionInputStream createInputStream(InputStream in) @@ -171,7 +172,7 @@ public CompressionInputStream createInputStream(InputStream in) * @param in the stream to read compressed bytes from * @param decompressor decompressor to use * @return a stream to read uncompressed bytes from - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public CompressionInputStream createInputStream(InputStream in, @@ -236,7 +237,7 @@ public Decompressor createDecompressor() { */ @Override public String getDefaultExtension() { - return ".bz2"; + return CodecConstants.BZIP2_CODEC_EXTENSION; } private static class BZip2CompressionOutputStream extends @@ -255,10 +256,7 @@ public BZip2CompressionOutputStream(OutputStream out) private void writeStreamHeader() throws IOException { if (super.out != null) { - // The compressed bzip2 stream should start with the - // identifying characters BZ. Caller of CBZip2OutputStream - // i.e. this class must write these characters. - out.write(HEADER.getBytes(StandardCharsets.UTF_8)); + writeHeader(out); } } @@ -337,6 +335,7 @@ private static class BZip2CompressionInputStream extends private boolean isSubHeaderStripped = false; private READ_MODE readMode = READ_MODE.CONTINUOUS; private long startingPos = 0L; + private boolean didInitialRead; // Following state machine handles different states of compressed stream // position @@ -482,24 +481,42 @@ public void close() throws IOException { */ public int read(byte[] b, int off, int len) throws IOException { + if (b == null) { + throw new NullPointerException(); + } + if (off < 0 || len < 0 || len > b.length - off) { + throw new IndexOutOfBoundsException(); + } + if (len == 0) { + return 0; + } if (needsReset) { internalReset(); } - - int result = 0; - result = this.input.read(b, off, len); + // When startingPos > 0, the stream should be initialized at the end of + // one block (which would correspond to be the start of another block). + // Thus, the initial read would technically be reading one byte passed a + // BZip2 end of block marker. To be consistent, we should also be + // updating the position to be one byte after the end of an block on the + // initial read. + boolean initializedAtEndOfBlock = + !didInitialRead && startingPos > 0 && readMode == READ_MODE.BYBLOCK; + int result = initializedAtEndOfBlock + ? BZip2Constants.END_OF_BLOCK + : this.input.read(b, off, len); if (result == BZip2Constants.END_OF_BLOCK) { this.posSM = POS_ADVERTISEMENT_STATE_MACHINE.ADVERTISE; } if (this.posSM == POS_ADVERTISEMENT_STATE_MACHINE.ADVERTISE) { - result = this.input.read(b, off, off + 1); + result = this.input.read(b, off, 1); // This is the precise time to update compressed stream position // to the client of this code. this.updatePos(true); this.posSM = POS_ADVERTISEMENT_STATE_MACHINE.HOLD; } + didInitialRead = true; return result; } @@ -515,6 +532,7 @@ private void internalReset() throws IOException { needsReset = false; BufferedInputStream bufferedIn = readStreamHeader(); input = new CBZip2InputStream(bufferedIn, this.readMode); + didInitialRead = false; } } @@ -547,4 +565,11 @@ private void updatePos(boolean shouldAddOn) { }// end of BZip2CompressionInputStream + @VisibleForTesting + public static void writeHeader(OutputStream out) throws IOException { + // The compressed bzip2 stream should start with the + // identifying characters BZ. Caller of CBZip2OutputStream + // i.e. this class must write these characters. + out.write(HEADER.getBytes(StandardCharsets.UTF_8)); + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BlockDecompressorStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BlockDecompressorStream.java index de457d192400d..ff10332ea8d5a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BlockDecompressorStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BlockDecompressorStream.java @@ -43,7 +43,7 @@ public class BlockDecompressorStream extends DecompressorStream { * @param in input stream * @param decompressor decompressor to use * @param bufferSize size of buffer - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public BlockDecompressorStream(InputStream in, Decompressor decompressor, int bufferSize) throws IOException { @@ -55,7 +55,7 @@ public BlockDecompressorStream(InputStream in, Decompressor decompressor, * * @param in input stream * @param decompressor decompressor to use - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public BlockDecompressorStream(InputStream in, Decompressor decompressor) throws IOException { super(in, decompressor); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CodecConstants.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CodecConstants.java new file mode 100644 index 0000000000000..96410a18ebcb5 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CodecConstants.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.compress; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Codec related constants. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public final class CodecConstants { + + private CodecConstants() { + } + /** + * Default extension for {@link org.apache.hadoop.io.compress.DefaultCodec}. + */ + public static final String DEFAULT_CODEC_EXTENSION = ".deflate"; + + /** + * Default extension for {@link org.apache.hadoop.io.compress.BZip2Codec}. + */ + public static final String BZIP2_CODEC_EXTENSION = ".bz2"; + + /** + * Default extension for {@link org.apache.hadoop.io.compress.GzipCodec}. + */ + public static final String GZIP_CODEC_EXTENSION = ".gz"; + + /** + * Default extension for {@link org.apache.hadoop.io.compress.Lz4Codec}. + */ + public static final String LZ4_CODEC_EXTENSION = ".lz4"; + + /** + * Default extension for + * {@link org.apache.hadoop.io.compress.PassthroughCodec}. + */ + public static final String PASSTHROUGH_CODEC_EXTENSION = ".passthrough"; + + /** + * Default extension for {@link org.apache.hadoop.io.compress.SnappyCodec}. + */ + public static final String SNAPPY_CODEC_EXTENSION = ".snappy"; + + /** + * Default extension for {@link org.apache.hadoop.io.compress.ZStandardCodec}. + */ + public static final String ZSTANDARD_CODEC_EXTENSION = ".zst"; +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CodecPool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CodecPool.java index f103aad4e8596..9a18f9c52c756 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CodecPool.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CodecPool.java @@ -28,9 +28,9 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.ReflectionUtils; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheBuilder; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheLoader; +import org.apache.hadoop.thirdparty.com.google.common.cache.LoadingCache; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -205,6 +205,7 @@ public static void returnCompressor(Compressor compressor) { } // if the compressor can't be reused, don't pool it. if (compressor.getClass().isAnnotationPresent(DoNotPool.class)) { + compressor.end(); return; } compressor.reset(); @@ -225,6 +226,7 @@ public static void returnDecompressor(Decompressor decompressor) { } // if the decompressor can't be reused, don't pool it. if (decompressor.getClass().isAnnotationPresent(DoNotPool.class)) { + decompressor.end(); return; } decompressor.reset(); @@ -235,7 +237,10 @@ public static void returnDecompressor(Decompressor decompressor) { /** * Return the number of leased {@link Compressor}s for this - * {@link CompressionCodec} + * {@link CompressionCodec}. + * + * @param codec codec. + * @return the number of leased. */ public static int getLeasedCompressorsCount(CompressionCodec codec) { return (codec == null) ? 0 : getLeaseCount(compressorCounts, @@ -244,7 +249,10 @@ public static int getLeasedCompressorsCount(CompressionCodec codec) { /** * Return the number of leased {@link Decompressor}s for this - * {@link CompressionCodec} + * {@link CompressionCodec}. + * + * @param codec codec. + * @return the number of leased */ public static int getLeasedDecompressorsCount(CompressionCodec codec) { return (codec == null) ? 0 : getLeaseCount(decompressorCounts, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodec.java index f37aadfcb57f3..d064e1b914707 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodec.java @@ -39,7 +39,7 @@ public interface CompressionCodec { * * @param out the location for the final output stream * @return a stream the user can write uncompressed data to have it compressed - * @throws IOException + * @throws IOException raised on errors performing I/O. */ CompressionOutputStream createOutputStream(OutputStream out) throws IOException; @@ -51,7 +51,7 @@ CompressionOutputStream createOutputStream(OutputStream out) * @param out the location for the final output stream * @param compressor compressor to use * @return a stream the user can write uncompressed data to have it compressed - * @throws IOException + * @throws IOException raised on errors performing I/O. */ CompressionOutputStream createOutputStream(OutputStream out, Compressor compressor) @@ -77,7 +77,7 @@ CompressionOutputStream createOutputStream(OutputStream out, * * @param in the stream to read compressed bytes from * @return a stream to read uncompressed bytes from - * @throws IOException + * @throws IOException raised on errors performing I/O. */ CompressionInputStream createInputStream(InputStream in) throws IOException; @@ -88,7 +88,7 @@ CompressionOutputStream createOutputStream(OutputStream out, * @param in the stream to read compressed bytes from * @param decompressor decompressor to use * @return a stream to read uncompressed bytes from - * @throws IOException + * @throws IOException raised on errors performing I/O. */ CompressionInputStream createInputStream(InputStream in, Decompressor decompressor) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java index 1fa7fd4b52be5..d6a9df6382fe8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java @@ -171,6 +171,8 @@ public static void setCodecClasses(Configuration conf, /** * Find the codecs specified in the config value io.compression.codecs * and register them. Defaults to gzip and deflate. + * + * @param conf configuration. */ public CompressionCodecFactory(Configuration conf) { codecs = new TreeMap(); @@ -291,7 +293,8 @@ public static String removeSuffix(String filename, String suffix) { /** * A little test program. - * @param args + * @param args arguments. + * @throws Exception exception. */ public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionInputStream.java index 2dfa30bf76ec4..5bfec01ec945d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionInputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionInputStream.java @@ -25,6 +25,10 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.PositionedReadable; import org.apache.hadoop.fs.Seekable; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; +import org.apache.hadoop.fs.statistics.IOStatisticsSupport; + /** * A compression input stream. * @@ -34,7 +38,8 @@ */ @InterfaceAudience.Public @InterfaceStability.Evolving -public abstract class CompressionInputStream extends InputStream implements Seekable { +public abstract class CompressionInputStream extends InputStream + implements Seekable, IOStatisticsSource { /** * The input stream to be compressed. */ @@ -48,7 +53,7 @@ public abstract class CompressionInputStream extends InputStream implements Seek * the decompressed bytes from the given stream. * * @param in The input stream to be compressed. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ protected CompressionInputStream(InputStream in) throws IOException { if (!(in instanceof Seekable) || !(in instanceof PositionedReadable)) { @@ -68,7 +73,16 @@ public void close() throws IOException { } } } - + + /** + * Return any IOStatistics provided by the underlying stream. + * @return IO stats from the inner stream. + */ + @Override + public IOStatistics getIOStatistics() { + return IOStatisticsSupport.retrieveIOStatistics(in); + } + /** * Read bytes from the stream. * Made abstract to prevent leakage to underlying stream. @@ -79,6 +93,8 @@ public void close() throws IOException { /** * Reset the decompressor to its initial state and discard any buffered data, * as the underlying stream may have been repositioned. + * + * @throws IOException raised on errors performing I/O. */ public abstract void resetState() throws IOException; @@ -104,7 +120,7 @@ public long getPos() throws IOException { /** * This method is current not supported. * - * @throws UnsupportedOperationException + * @throws UnsupportedOperationException Unsupported Operation Exception. */ @Override @@ -115,7 +131,7 @@ public void seek(long pos) throws UnsupportedOperationException { /** * This method is current not supported. * - * @throws UnsupportedOperationException + * @throws UnsupportedOperationException Unsupported Operation Exception. */ @Override public boolean seekToNewSource(long targetPos) throws UnsupportedOperationException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionOutputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionOutputStream.java index 71c7f32e665e5..2e412dcd58fce 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionOutputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionOutputStream.java @@ -23,13 +23,17 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; +import org.apache.hadoop.fs.statistics.IOStatisticsSupport; /** * A compression output stream. */ @InterfaceAudience.Public @InterfaceStability.Evolving -public abstract class CompressionOutputStream extends OutputStream { +public abstract class CompressionOutputStream extends OutputStream + implements IOStatisticsSource { /** * The output stream to be compressed. */ @@ -44,7 +48,7 @@ public abstract class CompressionOutputStream extends OutputStream { /** * Create a compression output stream that writes * the compressed bytes to the given stream. - * @param out + * @param out out. */ protected CompressionOutputStream(OutputStream out) { this.out = out; @@ -85,13 +89,23 @@ public void flush() throws IOException { /** * Finishes writing compressed data to the output stream * without closing the underlying stream. + * @throws IOException raised on errors performing I/O. */ public abstract void finish() throws IOException; /** * Reset the compression to the initial state. * Does not reset the underlying stream. + * @throws IOException raised on errors performing I/O. */ public abstract void resetState() throws IOException; + /** + * Return any IOStatistics provided by the underlying stream. + * @return IO stats from the inner stream. + */ + @Override + public IOStatistics getIOStatistics() { + return IOStatisticsSupport.retrieveIOStatistics(out); + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Compressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Compressor.java index 537837faa0a51..7e2a6e679f43c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Compressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Compressor.java @@ -65,11 +65,13 @@ public interface Compressor { /** * Return number of uncompressed bytes input so far. + * @return bytes read. */ public long getBytesRead(); /** * Return number of compressed bytes output so far. + * @return bytes written. */ public long getBytesWritten(); @@ -97,6 +99,7 @@ public interface Compressor { * @param off Start offset of the data * @param len Size of the buffer * @return The actual number of bytes of compressed data. + * @throws IOException raised on errors performing I/O. */ public int compress(byte[] b, int off, int len) throws IOException; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Decompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Decompressor.java index 3808003de291d..30d4e29892eb7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Decompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Decompressor.java @@ -92,11 +92,11 @@ public interface Decompressor { * {@link #needsInput()} should be called in order to determine if more * input data is required. * - * @param b Buffer for the compressed data + * @param b Buffer for the uncompressed data * @param off Start offset of the data * @param len Size of the buffer * @return The actual number of bytes of uncompressed data. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public int decompress(byte[] b, int off, int len) throws IOException; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/DecompressorStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/DecompressorStream.java index 756ccf3c8ed1f..05867214fca25 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/DecompressorStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/DecompressorStream.java @@ -22,7 +22,7 @@ import java.io.IOException; import java.io.InputStream; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -80,7 +80,7 @@ public DecompressorStream(InputStream in, Decompressor decompressor) * Allow derived classes to directly set the underlying stream. * * @param in Underlying input stream. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ protected DecompressorStream(InputStream in) throws IOException { super(in); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/DefaultCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/DefaultCodec.java index 33f39ef9297fb..d2ffb22eaafb3 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/DefaultCodec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/DefaultCodec.java @@ -114,7 +114,7 @@ public DirectDecompressor createDirectDecompressor() { @Override public String getDefaultExtension() { - return ".deflate"; + return CodecConstants.DEFAULT_CODEC_EXTENSION; } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/GzipCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/GzipCodec.java index 9bd861da9e890..1535e8c3d386e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/GzipCodec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/GzipCodec.java @@ -206,7 +206,7 @@ public DirectDecompressor createDirectDecompressor() { @Override public String getDefaultExtension() { - return ".gz"; + return CodecConstants.GZIP_CODEC_EXTENSION; } static final class GzipZlibCompressor extends ZlibCompressor { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Lz4Codec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Lz4Codec.java index 45b5e9cdabd28..a5afb706c99c1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Lz4Codec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Lz4Codec.java @@ -27,17 +27,12 @@ import org.apache.hadoop.io.compress.lz4.Lz4Compressor; import org.apache.hadoop.io.compress.lz4.Lz4Decompressor; import org.apache.hadoop.fs.CommonConfigurationKeys; -import org.apache.hadoop.util.NativeCodeLoader; /** * This class creates lz4 compressors/decompressors. */ public class Lz4Codec implements Configurable, CompressionCodec { - static { - NativeCodeLoader.isNativeCodeLoaded(); - } - Configuration conf; /** @@ -60,26 +55,13 @@ public Configuration getConf() { return conf; } - /** - * Are the native lz4 libraries loaded & initialized? - * - * @return true if loaded & initialized, otherwise false - */ - public static boolean isNativeCodeLoaded() { - return NativeCodeLoader.isNativeCodeLoaded(); - } - - public static String getLibraryName() { - return Lz4Compressor.getLibraryName(); - } - /** * Create a {@link CompressionOutputStream} that will write to the given * {@link OutputStream}. * * @param out the location for the final output stream * @return a stream the user can write uncompressed data to have it compressed - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public CompressionOutputStream createOutputStream(OutputStream out) @@ -95,15 +77,12 @@ public CompressionOutputStream createOutputStream(OutputStream out) * @param out the location for the final output stream * @param compressor compressor to use * @return a stream the user can write uncompressed data to have it compressed - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public CompressionOutputStream createOutputStream(OutputStream out, Compressor compressor) throws IOException { - if (!isNativeCodeLoaded()) { - throw new RuntimeException("native lz4 library not available"); - } int bufferSize = conf.getInt( CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_KEY, CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_DEFAULT); @@ -121,10 +100,6 @@ public CompressionOutputStream createOutputStream(OutputStream out, */ @Override public Class getCompressorType() { - if (!isNativeCodeLoaded()) { - throw new RuntimeException("native lz4 library not available"); - } - return Lz4Compressor.class; } @@ -135,9 +110,6 @@ public Class getCompressorType() { */ @Override public Compressor createCompressor() { - if (!isNativeCodeLoaded()) { - throw new RuntimeException("native lz4 library not available"); - } int bufferSize = conf.getInt( CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_KEY, CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_DEFAULT); @@ -153,7 +125,7 @@ public Compressor createCompressor() { * * @param in the stream to read compressed bytes from * @return a stream to read uncompressed bytes from - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public CompressionInputStream createInputStream(InputStream in) @@ -169,16 +141,12 @@ public CompressionInputStream createInputStream(InputStream in) * @param in the stream to read compressed bytes from * @param decompressor decompressor to use * @return a stream to read uncompressed bytes from - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public CompressionInputStream createInputStream(InputStream in, Decompressor decompressor) throws IOException { - if (!isNativeCodeLoaded()) { - throw new RuntimeException("native lz4 library not available"); - } - return new BlockDecompressorStream(in, decompressor, conf.getInt( CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_KEY, CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_DEFAULT)); @@ -191,10 +159,6 @@ public CompressionInputStream createInputStream(InputStream in, */ @Override public Class getDecompressorType() { - if (!isNativeCodeLoaded()) { - throw new RuntimeException("native lz4 library not available"); - } - return Lz4Decompressor.class; } @@ -205,9 +169,6 @@ public Class getDecompressorType() { */ @Override public Decompressor createDecompressor() { - if (!isNativeCodeLoaded()) { - throw new RuntimeException("native lz4 library not available"); - } int bufferSize = conf.getInt( CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_KEY, CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_DEFAULT); @@ -221,6 +182,6 @@ public Decompressor createDecompressor() { */ @Override public String getDefaultExtension() { - return ".lz4"; + return CodecConstants.LZ4_CODEC_EXTENSION; } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/PassthroughCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/PassthroughCodec.java index a3f0bffeebc0f..074762c0e8f7a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/PassthroughCodec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/PassthroughCodec.java @@ -77,7 +77,8 @@ public class PassthroughCodec * This default extension is here so that if no extension has been defined, * some value is still returned: {@value}.. */ - public static final String DEFAULT_EXTENSION = ".passthrough"; + public static final String DEFAULT_EXTENSION = + CodecConstants.PASSTHROUGH_CODEC_EXTENSION; private Configuration conf; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/SnappyCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/SnappyCodec.java index cd0c7880376bf..d64c6e512f87c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/SnappyCodec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/SnappyCodec.java @@ -28,7 +28,6 @@ import org.apache.hadoop.io.compress.snappy.SnappyDecompressor; import org.apache.hadoop.io.compress.snappy.SnappyDecompressor.SnappyDirectDecompressor; import org.apache.hadoop.fs.CommonConfigurationKeys; -import org.apache.hadoop.util.NativeCodeLoader; /** * This class creates snappy compressors/decompressors. @@ -56,44 +55,13 @@ public Configuration getConf() { return conf; } - /** - * Are the native snappy libraries loaded & initialized? - */ - public static void checkNativeCodeLoaded() { - if (!NativeCodeLoader.buildSupportsSnappy()) { - throw new RuntimeException("native snappy library not available: " + - "this version of libhadoop was built without " + - "snappy support."); - } - if (!NativeCodeLoader.isNativeCodeLoaded()) { - throw new RuntimeException("Failed to load libhadoop."); - } - if (!SnappyCompressor.isNativeCodeLoaded()) { - throw new RuntimeException("native snappy library not available: " + - "SnappyCompressor has not been loaded."); - } - if (!SnappyDecompressor.isNativeCodeLoaded()) { - throw new RuntimeException("native snappy library not available: " + - "SnappyDecompressor has not been loaded."); - } - } - - public static boolean isNativeCodeLoaded() { - return SnappyCompressor.isNativeCodeLoaded() && - SnappyDecompressor.isNativeCodeLoaded(); - } - - public static String getLibraryName() { - return SnappyCompressor.getLibraryName(); - } - /** * Create a {@link CompressionOutputStream} that will write to the given * {@link OutputStream}. * * @param out the location for the final output stream * @return a stream the user can write uncompressed data to have it compressed - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public CompressionOutputStream createOutputStream(OutputStream out) @@ -109,13 +77,12 @@ public CompressionOutputStream createOutputStream(OutputStream out) * @param out the location for the final output stream * @param compressor compressor to use * @return a stream the user can write uncompressed data to have it compressed - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public CompressionOutputStream createOutputStream(OutputStream out, Compressor compressor) throws IOException { - checkNativeCodeLoaded(); int bufferSize = conf.getInt( CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT); @@ -133,7 +100,6 @@ public CompressionOutputStream createOutputStream(OutputStream out, */ @Override public Class getCompressorType() { - checkNativeCodeLoaded(); return SnappyCompressor.class; } @@ -144,7 +110,6 @@ public Class getCompressorType() { */ @Override public Compressor createCompressor() { - checkNativeCodeLoaded(); int bufferSize = conf.getInt( CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT); @@ -157,7 +122,7 @@ public Compressor createCompressor() { * * @param in the stream to read compressed bytes from * @return a stream to read uncompressed bytes from - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public CompressionInputStream createInputStream(InputStream in) @@ -173,13 +138,12 @@ public CompressionInputStream createInputStream(InputStream in) * @param in the stream to read compressed bytes from * @param decompressor decompressor to use * @return a stream to read uncompressed bytes from - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public CompressionInputStream createInputStream(InputStream in, Decompressor decompressor) throws IOException { - checkNativeCodeLoaded(); return new BlockDecompressorStream(in, decompressor, conf.getInt( CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT)); @@ -192,7 +156,6 @@ public CompressionInputStream createInputStream(InputStream in, */ @Override public Class getDecompressorType() { - checkNativeCodeLoaded(); return SnappyDecompressor.class; } @@ -203,7 +166,6 @@ public Class getDecompressorType() { */ @Override public Decompressor createDecompressor() { - checkNativeCodeLoaded(); int bufferSize = conf.getInt( CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT); @@ -215,7 +177,7 @@ public Decompressor createDecompressor() { */ @Override public DirectDecompressor createDirectDecompressor() { - return isNativeCodeLoaded() ? new SnappyDirectDecompressor() : null; + return new SnappyDirectDecompressor(); } /** @@ -225,6 +187,6 @@ public DirectDecompressor createDirectDecompressor() { */ @Override public String getDefaultExtension() { - return ".snappy"; + return CodecConstants.SNAPPY_CODEC_EXTENSION; } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/SplittableCompressionCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/SplittableCompressionCodec.java index a756f47260c33..f2e28774a46db 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/SplittableCompressionCodec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/SplittableCompressionCodec.java @@ -61,6 +61,7 @@ public enum READ_MODE {CONTINUOUS, BYBLOCK}; * Create a stream as dictated by the readMode. This method is used when * the codecs wants the ability to work with the underlying stream positions. * + * @param decompressor decompressor. * @param seekableIn The seekable input stream (seeks in compressed data) * @param start The start offset into the compressed stream. May be changed * by the underlying codec. @@ -69,6 +70,7 @@ public enum READ_MODE {CONTINUOUS, BYBLOCK}; * @param readMode Controls whether stream position is reported continuously * from the compressed stream only only at block boundaries. * @return a stream to read uncompressed bytes from + * @throws IOException raised on errors performing I/O. */ SplitCompressionInputStream createInputStream(InputStream seekableIn, Decompressor decompressor, long start, long end, READ_MODE readMode) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/ZStandardCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/ZStandardCodec.java index c56bbba3b5959..139e81eb73cc2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/ZStandardCodec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/ZStandardCodec.java @@ -116,7 +116,7 @@ private static int getBufferSize(Configuration conf) { * * @param out the location for the final output stream * @return a stream the user can write uncompressed data to have compressed - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public CompressionOutputStream createOutputStream(OutputStream out) @@ -132,7 +132,7 @@ public CompressionOutputStream createOutputStream(OutputStream out) * @param out the location for the final output stream * @param compressor compressor to use * @return a stream the user can write uncompressed data to have compressed - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public CompressionOutputStream createOutputStream(OutputStream out, @@ -173,7 +173,7 @@ public Compressor createCompressor() { * * @param in the stream to read compressed bytes from * @return a stream to read uncompressed bytes from - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public CompressionInputStream createInputStream(InputStream in) @@ -189,7 +189,7 @@ public CompressionInputStream createInputStream(InputStream in) * @param in the stream to read compressed bytes from * @param decompressor decompressor to use * @return a stream to read uncompressed bytes from - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public CompressionInputStream createInputStream(InputStream in, @@ -230,7 +230,7 @@ public Decompressor createDecompressor() { */ @Override public String getDefaultExtension() { - return ".zst"; + return CodecConstants.ZSTANDARD_CODEC_EXTENSION; } @Override diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/Bzip2Compressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/Bzip2Compressor.java index 5713c56df6aef..9d1d85332489a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/Bzip2Compressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/Bzip2Compressor.java @@ -67,6 +67,7 @@ public Bzip2Compressor() { /** * Creates a new compressor, taking settings from the configuration. + * @param conf configuration. */ public Bzip2Compressor(Configuration conf) { this(Bzip2Factory.getBlockSize(conf), diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/Bzip2Decompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/Bzip2Decompressor.java index 72ba97630e206..acd806b9b300a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/Bzip2Decompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/Bzip2Decompressor.java @@ -50,6 +50,8 @@ public class Bzip2Decompressor implements Decompressor { /** * Creates a new decompressor. + * @param conserveMemory conserveMemory. + * @param directBufferSize directBufferSize. */ public Bzip2Decompressor(boolean conserveMemory, int directBufferSize) { this.conserveMemory = conserveMemory; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2InputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2InputStream.java index 8426d25c2950e..61e88d80d8ce4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2InputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2InputStream.java @@ -27,6 +27,7 @@ import java.io.InputStream; import java.io.IOException; +import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.io.compress.SplittableCompressionCodec.READ_MODE; @@ -152,6 +153,7 @@ public enum STATE { * This method reports the processed bytes so far. Please note that this * statistic is only updated on block boundaries and only when the stream is * initiated in BYBLOCK mode. + * @return ProcessedByteCount. */ public long getProcessedByteCount() { return reportedBytesReadFromCompressedStream; @@ -209,7 +211,7 @@ private int readAByte(InputStream inStream) throws IOException { * @param marker The bit pattern to be found in the stream * @param markerBitLength No of bits in the marker * @return true if the marker was found otherwise false - * @throws IOException + * @throws IOException raised on errors performing I/O. * @throws IllegalArgumentException if marketBitLength is greater than 63 */ public boolean skipToNextMarker(long marker, int markerBitLength) @@ -282,7 +284,8 @@ private void makeMaps() { * the magic. Thus callers have to skip the first two bytes. Otherwise this * constructor will throw an exception. *

    - * + * @param in in. + * @param readMode READ_MODE. * @throws IOException * if the stream content is malformed or an I/O error occurs. * @throws NullPointerException @@ -310,13 +313,24 @@ private CBZip2InputStream(final InputStream in, READ_MODE readMode, boolean skip } } else if (readMode == READ_MODE.BYBLOCK) { this.currentState = STATE.NO_PROCESS_STATE; - skipResult = this.skipToNextMarker(CBZip2InputStream.BLOCK_DELIMITER,DELIMITER_BIT_LENGTH); + skipResult = skipToNextBlockMarker(); if(!skipDecompression){ changeStateToProcessABlock(); } } } + /** + * Skips bytes in the stream until the start marker of a block is reached + * or end of stream is reached. Used for testing purposes to identify the + * start offsets of blocks. + */ + @VisibleForTesting + boolean skipToNextBlockMarker() throws IOException { + return skipToNextMarker( + CBZip2InputStream.BLOCK_DELIMITER, DELIMITER_BIT_LENGTH); + } + /** * Returns the number of bytes between the current stream position * and the immediate next BZip2 block marker. @@ -326,7 +340,7 @@ private CBZip2InputStream(final InputStream in, READ_MODE readMode, boolean skip * * @return long Number of bytes between current stream position and the * next BZip2 block start marker. - * @throws IOException + * @throws IOException raised on errors performing I/O. * */ public static long numberOfBytesTillNextMarker(final InputStream in) throws IOException{ @@ -426,7 +440,7 @@ public int read(final byte[] dest, final int offs, final int len) //report 'end of block' or 'end of stream' result = b; - skipResult = this.skipToNextMarker(CBZip2InputStream.BLOCK_DELIMITER, DELIMITER_BIT_LENGTH); + skipResult = skipToNextBlockMarker(); changeStateToProcessABlock(); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2OutputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2OutputStream.java index 850fec77c5109..50bdddb8136fc 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2OutputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2OutputStream.java @@ -27,6 +27,7 @@ import java.io.OutputStream; import java.io.IOException; +import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.io.IOUtils; /** @@ -64,13 +65,10 @@ * * * - * + * * * * - * - * - * * @@ -213,6 +211,10 @@ public class CBZip2OutputStream extends OutputStream implements BZip2Constants { /** * This method is accessible by subclasses for historical purposes. If you * don't know what it does then you don't need it. + * @param len len. + * @param freq freq. + * @param alphaSize alphaSize. + * @param maxLen maxLen. */ protected static void hbMakeCodeLengths(char[] len, int[] freq, int alphaSize, int maxLen) { @@ -780,8 +782,7 @@ private void initBlock() { inUse[i] = false; } - /* 20 is just a paranoia constant */ - this.allowableBlockSize = (this.blockSize100k * BZip2Constants.baseBlockSize) - 20; + this.allowableBlockSize = getAllowableBlockSize(this.blockSize100k); } private void endBlock() throws IOException { @@ -849,6 +850,7 @@ private void endCompression() throws IOException { /** * Returns the blocksize parameter specified at construction time. + * @return blocksize. */ public final int getBlockSize() { return this.blockSize100k; @@ -2091,4 +2093,9 @@ private static final class Data extends Object { } + @VisibleForTesting + static int getAllowableBlockSize(int blockSize100k) { + /* 20 is just a paranoia constant */ + return (blockSize100k * BZip2Constants.baseBlockSize) - 20; + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/lz4/Lz4Compressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/lz4/Lz4Compressor.java index 3792c365b4d98..607a802678bbd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/lz4/Lz4Compressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/lz4/Lz4Compressor.java @@ -22,9 +22,11 @@ import java.nio.Buffer; import java.nio.ByteBuffer; +import net.jpountz.lz4.LZ4Factory; +import net.jpountz.lz4.LZ4Compressor; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.compress.Compressor; -import org.apache.hadoop.util.NativeCodeLoader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -48,22 +50,7 @@ public class Lz4Compressor implements Compressor { private long bytesRead = 0L; private long bytesWritten = 0L; - private final boolean useLz4HC; - - static { - if (NativeCodeLoader.isNativeCodeLoaded()) { - // Initialize the native library - try { - initIDs(); - } catch (Throwable t) { - // Ignore failure to load/initialize lz4 - LOG.warn(t.toString()); - } - } else { - LOG.error("Cannot load " + Lz4Compressor.class.getName() + - " without native hadoop library!"); - } - } + private final LZ4Compressor lz4Compressor; /** * Creates a new compressor. @@ -73,9 +60,21 @@ public class Lz4Compressor implements Compressor { * which trades CPU for compression ratio. */ public Lz4Compressor(int directBufferSize, boolean useLz4HC) { - this.useLz4HC = useLz4HC; this.directBufferSize = directBufferSize; + try { + LZ4Factory lz4Factory = LZ4Factory.fastestInstance(); + if (useLz4HC) { + lz4Compressor = lz4Factory.highCompressor(); + } else { + lz4Compressor = lz4Factory.fastCompressor(); + } + } catch (AssertionError t) { + throw new RuntimeException("lz4-java library is not available: " + + "Lz4Compressor has not been loaded. You need to add " + + "lz4-java.jar to your CLASSPATH. " + t, t); + } + uncompressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize); compressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize); compressedDirectBuf.position(directBufferSize); @@ -236,7 +235,7 @@ public synchronized int compress(byte[] b, int off, int len) } // Compress data - n = useLz4HC ? compressBytesDirectHC() : compressBytesDirect(); + n = compressDirectBuf(); compressedDirectBuf.limit(n); uncompressedDirectBuf.clear(); // lz4 consumes all buffer input @@ -302,11 +301,20 @@ public synchronized long getBytesWritten() { public synchronized void end() { } - private native static void initIDs(); - - private native int compressBytesDirect(); - - private native int compressBytesDirectHC(); - - public native static String getLibraryName(); + private int compressDirectBuf() { + if (uncompressedDirectBufLen == 0) { + return 0; + } else { + // Set the position and limit of `uncompressedDirectBuf` for reading + uncompressedDirectBuf.limit(uncompressedDirectBufLen).position(0); + compressedDirectBuf.clear(); + lz4Compressor.compress((ByteBuffer) uncompressedDirectBuf, + (ByteBuffer) compressedDirectBuf); + uncompressedDirectBufLen = 0; + uncompressedDirectBuf.limit(directBufferSize).position(0); + int size = compressedDirectBuf.position(); + compressedDirectBuf.position(0); + return size; + } + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/lz4/Lz4Decompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/lz4/Lz4Decompressor.java index f26ae8481c3f9..719d216abaed0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/lz4/Lz4Decompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/lz4/Lz4Decompressor.java @@ -22,8 +22,10 @@ import java.nio.Buffer; import java.nio.ByteBuffer; +import net.jpountz.lz4.LZ4Factory; +import net.jpountz.lz4.LZ4SafeDecompressor; + import org.apache.hadoop.io.compress.Decompressor; -import org.apache.hadoop.util.NativeCodeLoader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,20 +46,7 @@ public class Lz4Decompressor implements Decompressor { private int userBufOff = 0, userBufLen = 0; private boolean finished; - static { - if (NativeCodeLoader.isNativeCodeLoaded()) { - // Initialize the native library - try { - initIDs(); - } catch (Throwable t) { - // Ignore failure to load/initialize lz4 - LOG.warn(t.toString()); - } - } else { - LOG.error("Cannot load " + Lz4Compressor.class.getName() + - " without native hadoop library!"); - } - } + private LZ4SafeDecompressor lz4Decompressor; /** * Creates a new compressor. @@ -67,6 +56,15 @@ public class Lz4Decompressor implements Decompressor { public Lz4Decompressor(int directBufferSize) { this.directBufferSize = directBufferSize; + try { + LZ4Factory lz4Factory = LZ4Factory.fastestInstance(); + lz4Decompressor = lz4Factory.safeDecompressor(); + } catch (AssertionError t) { + throw new RuntimeException("lz4-java library is not available: " + + "Lz4Decompressor has not been loaded. You need to add " + + "lz4-java.jar to your CLASSPATH. " + t, t); + } + compressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize); uncompressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize); uncompressedDirectBuf.position(directBufferSize); @@ -200,8 +198,8 @@ public synchronized boolean finished() { * @param b Buffer for the compressed data * @param off Start offset of the data * @param len Size of the buffer - * @return The actual number of bytes of compressed data. - * @throws IOException + * @return The actual number of bytes of uncompressed data. + * @throws IOException raised on errors performing I/O. */ @Override public synchronized int decompress(byte[] b, int off, int len) @@ -228,7 +226,7 @@ public synchronized int decompress(byte[] b, int off, int len) uncompressedDirectBuf.limit(directBufferSize); // Decompress data - n = decompressBytesDirect(); + n = decompressDirectBuf(); uncompressedDirectBuf.limit(n); if (userBufLen <= 0) { @@ -272,7 +270,18 @@ public synchronized void end() { // do nothing } - private native static void initIDs(); - - private native int decompressBytesDirect(); + private int decompressDirectBuf() { + if (compressedDirectBufLen == 0) { + return 0; + } else { + compressedDirectBuf.limit(compressedDirectBufLen).position(0); + lz4Decompressor.decompress((ByteBuffer) compressedDirectBuf, + (ByteBuffer) uncompressedDirectBuf); + compressedDirectBufLen = 0; + compressedDirectBuf.clear(); + int size = uncompressedDirectBuf.position(); + uncompressedDirectBuf.position(0); + return size; + } + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java index 3d386800e4d87..2d514705d1e42 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java @@ -24,9 +24,9 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.compress.Compressor; -import org.apache.hadoop.util.NativeCodeLoader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xerial.snappy.Snappy; /** * A {@link Compressor} based on the snappy compression algorithm. @@ -48,24 +48,6 @@ public class SnappyCompressor implements Compressor { private long bytesRead = 0L; private long bytesWritten = 0L; - private static boolean nativeSnappyLoaded = false; - - static { - if (NativeCodeLoader.isNativeCodeLoaded() && - NativeCodeLoader.buildSupportsSnappy()) { - try { - initIDs(); - nativeSnappyLoaded = true; - } catch (Throwable t) { - LOG.error("failed to load SnappyCompressor", t); - } - } - } - - public static boolean isNativeCodeLoaded() { - return nativeSnappyLoaded; - } - /** * Creates a new compressor. * @@ -225,7 +207,7 @@ public int compress(byte[] b, int off, int len) } // Compress data - n = compressBytesDirect(); + n = compressDirectBuf(); compressedDirectBuf.limit(n); uncompressedDirectBuf.clear(); // snappy consumes all buffer input @@ -291,9 +273,16 @@ public long getBytesWritten() { public void end() { } - private native static void initIDs(); - - private native int compressBytesDirect(); - - public native static String getLibraryName(); + private int compressDirectBuf() throws IOException { + if (uncompressedDirectBufLen == 0) { + return 0; + } else { + // Set the position and limit of `uncompressedDirectBuf` for reading + uncompressedDirectBuf.limit(uncompressedDirectBufLen).position(0); + int size = Snappy.compress((ByteBuffer) uncompressedDirectBuf, + (ByteBuffer) compressedDirectBuf); + uncompressedDirectBufLen = 0; + return size; + } + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java index f31b76c347c5c..58987c4dda3de 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java @@ -24,9 +24,9 @@ import org.apache.hadoop.io.compress.Decompressor; import org.apache.hadoop.io.compress.DirectDecompressor; -import org.apache.hadoop.util.NativeCodeLoader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xerial.snappy.Snappy; /** * A {@link Decompressor} based on the snappy compression algorithm. @@ -45,24 +45,6 @@ public class SnappyDecompressor implements Decompressor { private int userBufOff = 0, userBufLen = 0; private boolean finished; - private static boolean nativeSnappyLoaded = false; - - static { - if (NativeCodeLoader.isNativeCodeLoaded() && - NativeCodeLoader.buildSupportsSnappy()) { - try { - initIDs(); - nativeSnappyLoaded = true; - } catch (Throwable t) { - LOG.error("failed to load SnappyDecompressor", t); - } - } - } - - public static boolean isNativeCodeLoaded() { - return nativeSnappyLoaded; - } - /** * Creates a new compressor. * @@ -201,11 +183,11 @@ public boolean finished() { * {@link #needsInput()} should be called in order to determine if more * input data is required. * - * @param b Buffer for the compressed data + * @param b Buffer for the uncompressed data * @param off Start offset of the data * @param len Size of the buffer * @return The actual number of bytes of compressed data. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public int decompress(byte[] b, int off, int len) @@ -232,7 +214,7 @@ public int decompress(byte[] b, int off, int len) uncompressedDirectBuf.limit(directBufferSize); // Decompress data - n = decompressBytesDirect(); + n = decompressDirectBuf(); uncompressedDirectBuf.limit(n); if (userBufLen <= 0) { @@ -276,10 +258,20 @@ public void end() { // do nothing } - private native static void initIDs(); + private int decompressDirectBuf() throws IOException { + if (compressedDirectBufLen == 0) { + return 0; + } else { + // Set the position and limit of `compressedDirectBuf` for reading + compressedDirectBuf.limit(compressedDirectBufLen).position(0); + int size = Snappy.uncompress((ByteBuffer) compressedDirectBuf, + (ByteBuffer) uncompressedDirectBuf); + compressedDirectBufLen = 0; + compressedDirectBuf.clear(); + return size; + } + } - private native int decompressBytesDirect(); - int decompressDirect(ByteBuffer src, ByteBuffer dst) throws IOException { assert (this instanceof SnappyDirectDecompressor); @@ -298,7 +290,7 @@ int decompressDirect(ByteBuffer src, ByteBuffer dst) throws IOException { directBufferSize = dst.remaining(); int n = 0; try { - n = decompressBytesDirect(); + n = decompressDirectBuf(); presliced.position(presliced.position() + n); // SNAPPY always consumes the whole buffer or throws an exception src.position(src.limit()); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/BuiltInGzipDecompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/BuiltInGzipDecompressor.java index 896d35eb1808b..24b8c392f76eb 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/BuiltInGzipDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/BuiltInGzipDecompressor.java @@ -23,6 +23,7 @@ import java.util.zip.DataFormatException; import java.util.zip.Inflater; +import org.apache.hadoop.io.compress.AlreadyClosedException; import org.apache.hadoop.io.compress.Decompressor; import org.apache.hadoop.io.compress.DoNotPool; import org.apache.hadoop.util.DataChecksum; @@ -105,7 +106,11 @@ private enum GzipStateLabel { * Immediately after the trailer (and potentially prior to the next gzip * member/substream header), without reset() having been called. */ - FINISHED; + FINISHED, + /** + * Immediately after end() has been called. + */ + ENDED; } /** @@ -182,6 +187,10 @@ public synchronized int decompress(byte[] b, int off, int len) throws IOException { int numAvailBytes = 0; + if (state == GzipStateLabel.ENDED) { + throw new AlreadyClosedException("decompress called on closed decompressor"); + } + if (state != GzipStateLabel.DEFLATE_STREAM) { executeHeaderState(); @@ -472,6 +481,8 @@ public synchronized void reset() { @Override public synchronized void end() { inflater.end(); + + state = GzipStateLabel.ENDED; } /** diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/ZlibCompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/ZlibCompressor.java index da8a90bb3170e..89e05fc6d07be 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/ZlibCompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/ZlibCompressor.java @@ -240,6 +240,7 @@ public ZlibCompressor() { /** * Creates a new compressor, taking settings from the configuration. + * @param conf configuration. */ public ZlibCompressor(Configuration conf) { this(ZlibFactory.getCompressionLevel(conf), diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/ZlibDecompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/ZlibDecompressor.java index f642d7713035d..c2615548d23ee 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/ZlibDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/ZlibDecompressor.java @@ -101,6 +101,8 @@ static boolean isNativeZlibLoaded() { /** * Creates a new decompressor. + * @param header header. + * @param directBufferSize directBufferSize. */ public ZlibDecompressor(CompressionHeader header, int directBufferSize) { this.header = header; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/ZlibFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/ZlibFactory.java index 07afbab7246b7..6982ed6d8471a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/ZlibFactory.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/ZlibFactory.java @@ -26,7 +26,7 @@ import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionStrategy; import org.apache.hadoop.util.NativeCodeLoader; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -66,7 +66,7 @@ public static void loadNativeZLib() { /** * Set the flag whether to use native library. Used for testing non-native * libraries - * + * @param isLoaded isLoaded. */ @VisibleForTesting public static void setNativeZlibLoaded(final boolean isLoaded) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zstd/ZStandardCompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zstd/ZStandardCompressor.java index 7445502c989d8..6ba7320652de3 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zstd/ZStandardCompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zstd/ZStandardCompressor.java @@ -18,7 +18,7 @@ package org.apache.hadoop.io.compress.zstd; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; @@ -84,6 +84,8 @@ public static int getRecommendedBufferSize() { /** * Creates a new compressor with the default compression level. * Compressed data will be generated in ZStandard format. + * @param level level. + * @param bufferSize bufferSize. */ public ZStandardCompressor(int level, int bufferSize) { this(level, bufferSize, bufferSize); @@ -159,7 +161,7 @@ public boolean needsInput() { } // have we consumed all input - if (keepUncompressedBuf && uncompressedDirectBufLen > 0) { + if (keepUncompressedBuf && uncompressedDirectBufLen - uncompressedDirectBufOff > 0) { return false; } @@ -223,7 +225,7 @@ public int compress(byte[] b, int off, int len) throws IOException { compressedDirectBuf.limit(n); // Check if we have consumed all input buffer - if (uncompressedDirectBufLen <= 0) { + if (uncompressedDirectBufLen - uncompressedDirectBufOff <= 0) { // consumed all input buffer keepUncompressedBuf = false; uncompressedDirectBuf.clear(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zstd/ZStandardDecompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zstd/ZStandardDecompressor.java index bc9d29cb4f294..792547a62faea 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zstd/ZStandardDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zstd/ZStandardDecompressor.java @@ -73,6 +73,7 @@ public ZStandardDecompressor() { /** * Creates a new decompressor. + * @param bufferSize bufferSize. */ public ZStandardDecompressor(int bufferSize) { this.directBufferSize = bufferSize; @@ -113,6 +114,12 @@ private void setInputFromSavedData() { compressedDirectBuf.put( userBuf, userBufOff, bytesInCompressedBuffer); + // Set the finished to false when compressedDirectBuf still + // contains some bytes. + if (compressedDirectBuf.position() > 0 && finished) { + finished = false; + } + userBufOff += bytesInCompressedBuffer; userBufferBytesToConsume -= bytesInCompressedBuffer; } @@ -186,6 +193,13 @@ public int decompress(byte[] b, int off, int len) 0, directBufferSize ); + + // Set the finished to false when compressedDirectBuf still + // contains some bytes. + if (remaining > 0 && finished) { + finished = false; + } + uncompressedDirectBuf.limit(n); // Get at most 'len' bytes diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/CodecRegistry.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/CodecRegistry.java index daf91e22ecc91..359e07e27fd2a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/CodecRegistry.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/CodecRegistry.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.io.erasurecode; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.io.erasurecode.rawcoder.NativeRSRawErasureCoderFactory; import org.apache.hadoop.io.erasurecode.rawcoder.NativeXORRawErasureCoderFactory; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/CodecUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/CodecUtil.java index 5ba6e9c0dd1ed..85aebd8c74bf5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/CodecUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/CodecUtil.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.io.erasurecode; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.erasurecode.codec.ErasureCodec; @@ -83,6 +83,7 @@ private CodecUtil() { } /** * Create encoder corresponding to given codec. * @param options Erasure codec options + * @param conf configuration. * @return erasure encoder */ public static ErasureEncoder createEncoder(Configuration conf, @@ -100,6 +101,7 @@ public static ErasureEncoder createEncoder(Configuration conf, /** * Create decoder corresponding to given codec. * @param options Erasure codec options + * @param conf configuration. * @return erasure decoder */ public static ErasureDecoder createDecoder(Configuration conf, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ErasureCodeNative.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ErasureCodeNative.java index ec317eee4dc3e..83a3151282096 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ErasureCodeNative.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ErasureCodeNative.java @@ -61,6 +61,7 @@ private ErasureCodeNative() {} /** * Are native libraries loaded? + * @return if is native code loaded true,not false. */ public static boolean isNativeCodeLoaded() { return LOADING_FAILURE_REASON == null; @@ -82,6 +83,7 @@ public static void checkNativeCodeLoaded() { /** * Get the native library name that's available or supported. + * @return library name. */ public static native String getLibraryName(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java index b5ae1f1e399a8..ab1775538bd5f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java @@ -68,6 +68,7 @@ public interface ErasureCoder extends Configurable { * * @param blockGroup the erasure coding block group containing all necessary * information for codec calculation + * @return ErasureCodingStep. */ ErasureCodingStep calculateCoding(ECBlockGroup blockGroup); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCodingStep.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCodingStep.java index fb89d99a0540c..333647c982b9f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCodingStep.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCodingStep.java @@ -46,8 +46,9 @@ public interface ErasureCodingStep { /** * Perform encoding or decoding given the input chunks, and generated results * will be written to the output chunks. - * @param inputChunks - * @param outputChunks + * @param inputChunks inputChunks. + * @param outputChunks outputChunks. + * @throws IOException raised on errors performing I/O. */ void performCoding(ECChunk[] inputChunks, ECChunk[] outputChunks) throws IOException; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureDecoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureDecoder.java index faf44d944eeaf..a046d106f0ed1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureDecoder.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureDecoder.java @@ -65,7 +65,7 @@ public ErasureCoderOptions getOptions() { /** * We have all the data blocks and parity blocks as input blocks for * recovering by default. It's codec specific - * @param blockGroup + * @param blockGroup blockGroup. * @return input blocks */ protected ECBlock[] getInputBlocks(ECBlockGroup blockGroup) { @@ -83,7 +83,7 @@ protected ECBlock[] getInputBlocks(ECBlockGroup blockGroup) { /** * Which blocks were erased ? - * @param blockGroup + * @param blockGroup blockGroup. * @return output blocks to recover */ protected ECBlock[] getOutputBlocks(ECBlockGroup blockGroup) { @@ -118,7 +118,7 @@ public void release() { /** * Perform decoding against a block blockGroup. - * @param blockGroup + * @param blockGroup blockGroup. * @return decoding step for caller to do the real work */ protected abstract ErasureCodingStep prepareDecodingStep( @@ -126,7 +126,7 @@ protected abstract ErasureCodingStep prepareDecodingStep( /** * Get the number of erased blocks in the block group. - * @param blockGroup + * @param blockGroup blockGroup. * @return number of erased blocks */ protected int getNumErasedBlocks(ECBlockGroup blockGroup) { @@ -153,7 +153,7 @@ protected static int getNumErasedBlocks(ECBlock[] inputBlocks) { /** * Get indexes of erased blocks from inputBlocks - * @param inputBlocks + * @param inputBlocks inputBlocks. * @return indexes of erased blocks from inputBlocks */ protected int[] getErasedIndexes(ECBlock[] inputBlocks) { @@ -174,8 +174,8 @@ protected int[] getErasedIndexes(ECBlock[] inputBlocks) { } /** - * Get erased input blocks from inputBlocks - * @param inputBlocks + * Get erased input blocks from inputBlocks. + * @param inputBlocks input inputBlocks. * @return an array of erased blocks from inputBlocks */ protected ECBlock[] getErasedBlocks(ECBlock[] inputBlocks) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureDecodingStep.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureDecodingStep.java index 24f55470e1727..20a396d313678 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureDecodingStep.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureDecodingStep.java @@ -37,10 +37,10 @@ public class ErasureDecodingStep implements ErasureCodingStep { /** * The constructor with all the necessary info. - * @param inputBlocks + * @param inputBlocks inputBlocks. * @param erasedIndexes the indexes of erased blocks in inputBlocks array - * @param outputBlocks - * @param rawDecoder + * @param outputBlocks outputBlocks. + * @param rawDecoder rawDecoder. */ public ErasureDecodingStep(ECBlock[] inputBlocks, int[] erasedIndexes, ECBlock[] outputBlocks, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureEncoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureEncoder.java index 81666e9b76b2e..cca272f69a28d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureEncoder.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureEncoder.java @@ -83,7 +83,7 @@ public void release() { /** * Perform encoding against a block group. - * @param blockGroup + * @param blockGroup blockGroup. * @return encoding step for caller to do the real work */ protected abstract ErasureCodingStep prepareEncodingStep( diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureEncodingStep.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureEncodingStep.java index 5fc5c7a09928f..9e696d2c58477 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureEncodingStep.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureEncodingStep.java @@ -36,9 +36,9 @@ public class ErasureEncodingStep implements ErasureCodingStep { /** * The constructor with all the necessary info. - * @param inputBlocks - * @param outputBlocks - * @param rawEncoder + * @param inputBlocks inputBlocks. + * @param outputBlocks outputBlocks. + * @param rawEncoder rawEncoder. */ public ErasureEncodingStep(ECBlock[] inputBlocks, ECBlock[] outputBlocks, RawErasureEncoder rawEncoder) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/HHErasureCodingStep.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/HHErasureCodingStep.java index a0f5b72710679..46f0a76da17df 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/HHErasureCodingStep.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/HHErasureCodingStep.java @@ -38,8 +38,8 @@ public abstract class HHErasureCodingStep /** * Constructor given input blocks and output blocks. * - * @param inputBlocks - * @param outputBlocks + * @param inputBlocks inputBlocks. + * @param outputBlocks outputBlocks. */ public HHErasureCodingStep(ECBlock[] inputBlocks, ECBlock[] outputBlocks) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/HHXORErasureDecodingStep.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/HHXORErasureDecodingStep.java index 16a3c0fa61c4b..4d594f476dfd9 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/HHXORErasureDecodingStep.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/HHXORErasureDecodingStep.java @@ -43,9 +43,9 @@ public class HHXORErasureDecodingStep extends HHErasureCodingStep { /** * The constructor with all the necessary info. - * @param inputBlocks + * @param inputBlocks inputBlocks. * @param erasedIndexes the indexes of erased blocks in inputBlocks array - * @param outputBlocks + * @param outputBlocks outputBlocks. * @param rawDecoder underlying RS decoder for hitchhiker decoding * @param rawEncoder underlying XOR encoder for hitchhiker decoding */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/HHXORErasureEncodingStep.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/HHXORErasureEncodingStep.java index 6a5644270117b..f571e932b6a85 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/HHXORErasureEncodingStep.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/HHXORErasureEncodingStep.java @@ -40,8 +40,8 @@ public class HHXORErasureEncodingStep extends HHErasureCodingStep { /** * The constructor with all the necessary info. * - * @param inputBlocks - * @param outputBlocks + * @param inputBlocks inputBlocks. + * @param outputBlocks outputBlocks. * @param rsRawEncoder underlying RS encoder for hitchhiker encoding * @param xorRawEncoder underlying XOR encoder for hitchhiker encoding */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/XORErasureDecoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/XORErasureDecoder.java index 1a0e5c030e070..9aae5e43c86f7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/XORErasureDecoder.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/XORErasureDecoder.java @@ -53,7 +53,7 @@ protected ErasureCodingStep prepareDecodingStep( /** * Which blocks were erased ? For XOR it's simple we only allow and return one * erased block, either data or parity. - * @param blockGroup + * @param blockGroup blockGroup. * @return output blocks to recover */ @Override diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/util/HHUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/util/HHUtil.java index 91d02415bfd93..7f771c9677da8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/util/HHUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/util/HHUtil.java @@ -202,6 +202,8 @@ public static ByteBuffer getPiggyBackForDecode(ByteBuffer[][] inputs, /** * Find the valid input from all the inputs. + * + * @param Generics Type T. * @param inputs input buffers to look for valid input * @return the first valid input */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/grouper/BlockGrouper.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/grouper/BlockGrouper.java index 3f1b0c22941bd..0407d16120819 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/grouper/BlockGrouper.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/grouper/BlockGrouper.java @@ -33,7 +33,7 @@ public class BlockGrouper { /** * Set EC schema. - * @param schema + * @param schema schema. */ public void setSchema(ECSchema schema) { this.schema = schema; @@ -41,7 +41,7 @@ public void setSchema(ECSchema schema) { /** * Get EC schema. - * @return + * @return ECSchema. */ protected ECSchema getSchema() { return schema; @@ -67,7 +67,7 @@ public int getRequiredNumParityBlocks() { * Calculating and organizing BlockGroup, to be called by ECManager * @param dataBlocks Data blocks to compute parity blocks against * @param parityBlocks To be computed parity blocks - * @return + * @return ECBlockGroup. */ public ECBlockGroup makeBlockGroup(ECBlock[] dataBlocks, ECBlock[] parityBlocks) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/DecodingValidator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/DecodingValidator.java new file mode 100644 index 0000000000000..ab9cde18ff1ce --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/DecodingValidator.java @@ -0,0 +1,187 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.rawcoder; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.io.erasurecode.ECChunk; + +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * A utility class to validate decoding. + */ +@InterfaceAudience.Private +public class DecodingValidator { + + private final RawErasureDecoder decoder; + private ByteBuffer buffer; + private int[] newValidIndexes; + private int newErasedIndex; + + public DecodingValidator(RawErasureDecoder decoder) { + this.decoder = decoder; + } + + /** + * Validate outputs decoded from inputs, by decoding an input back from + * the outputs and comparing it with the original one. + * + * For instance, in RS (6, 3), let (d0, d1, d2, d3, d4, d5) be sources + * and (p0, p1, p2) be parities, and assume + * inputs = [d0, null (d1), d2, d3, d4, d5, null (p0), p1, null (p2)]; + * erasedIndexes = [1, 6]; + * outputs = [d1, p0]. + * Then + * 1. Create new inputs, erasedIndexes and outputs for validation so that + * the inputs could contain the decoded outputs, and decode them: + * newInputs = [d1, d2, d3, d4, d5, p0] + * newErasedIndexes = [0] + * newOutputs = [d0'] + * 2. Compare d0 and d0'. The comparison will fail with high probability + * when the initial outputs are wrong. + * + * Note that the input buffers' positions must be the ones where data are + * read: If the input buffers have been processed by a decoder, the buffers' + * positions must be reset before being passed into this method. + * + * This method does not change outputs and erasedIndexes. + * + * @param inputs input buffers used for decoding. The buffers' position + * are moved to the end after this method. + * @param erasedIndexes indexes of erased units used for decoding + * @param outputs decoded output buffers, which are ready to be read after + * the call + * @throws IOException raised on errors performing I/O. + */ + public void validate(ByteBuffer[] inputs, int[] erasedIndexes, + ByteBuffer[] outputs) throws IOException { + markBuffers(outputs); + + try { + ByteBuffer validInput = CoderUtil.findFirstValidInput(inputs); + boolean isDirect = validInput.isDirect(); + int capacity = validInput.capacity(); + int remaining = validInput.remaining(); + + // Init buffer + if (buffer == null || buffer.isDirect() != isDirect + || buffer.capacity() < remaining) { + buffer = allocateBuffer(isDirect, capacity); + } + buffer.clear().limit(remaining); + + // Create newInputs and newErasedIndex for validation + ByteBuffer[] newInputs = new ByteBuffer[inputs.length]; + int count = 0; + for (int i = 0; i < erasedIndexes.length; i++) { + newInputs[erasedIndexes[i]] = outputs[i]; + count++; + } + newErasedIndex = -1; + boolean selected = false; + int numValidIndexes = CoderUtil.getValidIndexes(inputs).length; + for (int i = 0; i < newInputs.length; i++) { + if (count == numValidIndexes) { + break; + } else if (!selected && inputs[i] != null) { + newErasedIndex = i; + newInputs[i] = null; + selected = true; + } else if (newInputs[i] == null) { + newInputs[i] = inputs[i]; + if (inputs[i] != null) { + count++; + } + } + } + + // Keep it for testing + newValidIndexes = CoderUtil.getValidIndexes(newInputs); + + decoder.decode(newInputs, new int[]{newErasedIndex}, + new ByteBuffer[]{buffer}); + + if (!buffer.equals(inputs[newErasedIndex])) { + throw new InvalidDecodingException("Failed to validate decoding"); + } + } finally { + toLimits(inputs); + resetBuffers(outputs); + } + } + + /** + * Validate outputs decoded from inputs, by decoding an input back from + * those outputs and comparing it with the original one. + * @param inputs input buffers used for decoding + * @param erasedIndexes indexes of erased units used for decoding + * @param outputs decoded output buffers + * @throws IOException raised on errors performing I/O. + */ + public void validate(ECChunk[] inputs, int[] erasedIndexes, ECChunk[] outputs) + throws IOException { + ByteBuffer[] newInputs = CoderUtil.toBuffers(inputs); + ByteBuffer[] newOutputs = CoderUtil.toBuffers(outputs); + validate(newInputs, erasedIndexes, newOutputs); + } + + private ByteBuffer allocateBuffer(boolean direct, int capacity) { + if (direct) { + buffer = ByteBuffer.allocateDirect(capacity); + } else { + buffer = ByteBuffer.allocate(capacity); + } + return buffer; + } + + private static void markBuffers(ByteBuffer[] buffers) { + for (ByteBuffer buffer: buffers) { + if (buffer != null) { + buffer.mark(); + } + } + } + + private static void resetBuffers(ByteBuffer[] buffers) { + for (ByteBuffer buffer: buffers) { + if (buffer != null) { + buffer.reset(); + } + } + } + + private static void toLimits(ByteBuffer[] buffers) { + for (ByteBuffer buffer: buffers) { + if (buffer != null) { + buffer.position(buffer.limit()); + } + } + } + + @VisibleForTesting + protected int[] getNewValidIndexes() { + return newValidIndexes; + } + + @VisibleForTesting + protected int getNewErasedIndex() { + return newErasedIndex; + } +} \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/InvalidDecodingException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/InvalidDecodingException.java new file mode 100644 index 0000000000000..37869f8eeded0 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/InvalidDecodingException.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.rawcoder; + +import org.apache.hadoop.classification.InterfaceAudience; + +import java.io.IOException; + +/** + * Thrown for invalid decoding. + */ +@InterfaceAudience.Private +public class InvalidDecodingException + extends IOException { + private static final long serialVersionUID = 0L; + + public InvalidDecodingException(String description) { + super(description); + } +} \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RawErasureDecoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RawErasureDecoder.java index 249930ebe3f22..329bf7c3aaf7f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RawErasureDecoder.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RawErasureDecoder.java @@ -80,8 +80,9 @@ public RawErasureDecoder(ErasureCoderOptions coderOptions) { * @param erasedIndexes indexes of erased units in the inputs array * @param outputs output buffers to put decoded data into according to * erasedIndexes, ready for read after the call + * @throws IOException raised on errors performing I/O. */ - public void decode(ByteBuffer[] inputs, int[] erasedIndexes, + public synchronized void decode(ByteBuffer[] inputs, int[] erasedIndexes, ByteBuffer[] outputs) throws IOException { ByteBufferDecodingState decodingState = new ByteBufferDecodingState(this, inputs, erasedIndexes, outputs); @@ -117,6 +118,7 @@ public void decode(ByteBuffer[] inputs, int[] erasedIndexes, /** * Perform the real decoding using Direct ByteBuffer. * @param decodingState the decoding state + * @throws IOException raised on errors performing I/O. */ protected abstract void doDecode(ByteBufferDecodingState decodingState) throws IOException; @@ -130,7 +132,7 @@ protected abstract void doDecode(ByteBufferDecodingState decodingState) * erasedIndexes, ready for read after the call * @throws IOException if the decoder is closed. */ - public void decode(byte[][] inputs, int[] erasedIndexes, byte[][] outputs) + public synchronized void decode(byte[][] inputs, int[] erasedIndexes, byte[][] outputs) throws IOException { ByteArrayDecodingState decodingState = new ByteArrayDecodingState(this, inputs, erasedIndexes, outputs); @@ -163,7 +165,7 @@ protected abstract void doDecode(ByteArrayDecodingState decodingState) * erasedIndexes, ready for read after the call * @throws IOException if the decoder is closed */ - public void decode(ECChunk[] inputs, int[] erasedIndexes, + public synchronized void decode(ECChunk[] inputs, int[] erasedIndexes, ECChunk[] outputs) throws IOException { ByteBuffer[] newInputs = CoderUtil.toBuffers(inputs); ByteBuffer[] newOutputs = CoderUtil.toBuffers(outputs); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RawErasureEncoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RawErasureEncoder.java index 6d2ecd20525f4..d5ccb12c9d6a2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RawErasureEncoder.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RawErasureEncoder.java @@ -100,7 +100,8 @@ public void encode(ByteBuffer[] inputs, ByteBuffer[] outputs) /** * Perform the real encoding work using direct ByteBuffer. - * @param encodingState the encoding state + * @param encodingState the encoding state. + * @throws IOException raised on errors performing I/O. */ protected abstract void doEncode(ByteBufferEncodingState encodingState) throws IOException; @@ -111,6 +112,7 @@ protected abstract void doEncode(ByteBufferEncodingState encodingState) * @param inputs input buffers to read data from * @param outputs output buffers to put the encoded data into, read to read * after the call + * @throws IOException raised on errors performing I/O. */ public void encode(byte[][] inputs, byte[][] outputs) throws IOException { ByteArrayEncodingState baeState = new ByteArrayEncodingState( @@ -128,6 +130,7 @@ public void encode(byte[][] inputs, byte[][] outputs) throws IOException { * Perform the real encoding work using bytes array, supporting offsets * and lengths. * @param encodingState the encoding state + * @throws IOException raised on errors performing I/O. */ protected abstract void doEncode(ByteArrayEncodingState encodingState) throws IOException; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/DumpUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/DumpUtil.java index 6de0716174319..90e57201c545b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/DumpUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/DumpUtil.java @@ -36,6 +36,10 @@ private DumpUtil() { /** * Convert bytes into format like 0x02 02 00 80. * If limit is negative or too large, then all bytes will be converted. + * + * @param bytes bytes. + * @param limit limit. + * @return bytesToHex. */ public static String bytesToHex(byte[] bytes, int limit) { if (limit <= 0 || limit > bytes.length) { @@ -70,8 +74,8 @@ public static void dumpMatrix(byte[] matrix, /** * Print data in hex format in an array of chunks. - * @param header - * @param chunks + * @param header header. + * @param chunks chunks. */ public static void dumpChunks(String header, ECChunk[] chunks) { System.out.println(); @@ -84,7 +88,7 @@ public static void dumpChunks(String header, ECChunk[] chunks) { /** * Print data in hex format in a chunk. - * @param chunk + * @param chunk chunk. */ public static void dumpChunk(ECChunk chunk) { String str; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/GF256.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/GF256.java index 35534f307a7a0..b48a23f8b7085 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/GF256.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/GF256.java @@ -195,6 +195,10 @@ public static byte gfInv(byte a) { * Invert a matrix assuming it's invertible. * * Ported from Intel ISA-L library. + * + * @param inMatrix inMatrix. + * @param outMatrix outMatrix. + * @param n n */ public static void gfInvertMatrix(byte[] inMatrix, byte[] outMatrix, int n) { byte temp; @@ -262,7 +266,11 @@ public static void gfInvertMatrix(byte[] inMatrix, byte[] outMatrix, int n) { * * Calculates const table gftbl in GF(2^8) from single input A * gftbl(A) = {A{00}, A{01}, A{02}, ... , A{0f} }, {A{00}, A{10}, A{20}, - * ... , A{f0} } -- from ISA-L implementation + * ... , A{f0} } -- from ISA-L implementation. + * + * @param c c. + * @param tbl tbl. + * @param offset offset. */ public static void gfVectMulInit(byte c, byte[] tbl, int offset) { byte c2 = (byte) ((c << 1) ^ ((c & 0x80) != 0 ? 0x1d : 0)); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/GaloisField.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/GaloisField.java index f80fceca94c34..6d22ff0f62eb3 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/GaloisField.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/GaloisField.java @@ -93,10 +93,11 @@ private GaloisField(int fieldSize, int primitivePolynomial) { } /** - * Get the object performs Galois field arithmetics + * Get the object performs Galois field arithmetics. * * @param fieldSize size of the field * @param primitivePolynomial a primitive polynomial corresponds to the size + * @return GaloisField. */ public static GaloisField getInstance(int fieldSize, int primitivePolynomial) { @@ -114,7 +115,8 @@ public static GaloisField getInstance(int fieldSize, } /** - * Get the object performs Galois field arithmetic with default setting + * Get the object performs Galois field arithmetic with default setting. + * @return GaloisField. */ public static GaloisField getInstance() { return getInstance(DEFAULT_FIELD_SIZE, DEFAULT_PRIMITIVE_POLYNOMIAL); @@ -236,7 +238,13 @@ public void solveVandermondeSystem(int[] x, int[] y, int len) { } /** - * A "bulk" version to the solving of Vandermonde System + * A "bulk" version to the solving of Vandermonde System. + * + * @param x input x. + * @param y input y. + * @param outputOffsets input outputOffsets. + * @param len input len. + * @param dataLen input dataLen. */ public void solveVandermondeSystem(int[] x, byte[][] y, int[] outputOffsets, int len, int dataLen) { @@ -269,6 +277,10 @@ public void solveVandermondeSystem(int[] x, byte[][] y, int[] outputOffsets, /** * A "bulk" version of the solveVandermondeSystem, using ByteBuffer. + * + * @param x input x. + * @param y input y. + * @param len input len. */ public void solveVandermondeSystem(int[] x, ByteBuffer[] y, int len) { ByteBuffer p; @@ -413,10 +425,10 @@ public void substitute(byte[][] p, byte[] q, int x) { * Tends to be 2X faster than the "int" substitute in a loop. * * @param p input polynomial - * @param offsets - * @param len + * @param offsets input offset. + * @param len input len. * @param q store the return result - * @param offset + * @param offset input offset. * @param x input field */ public void substitute(byte[][] p, int[] offsets, @@ -440,6 +452,7 @@ public void substitute(byte[][] p, int[] offsets, * @param p input polynomial * @param q store the return result * @param x input field + * @param len input len. */ public void substitute(ByteBuffer[] p, int len, ByteBuffer q, int x) { int y = 1, iIdx, oIdx; @@ -459,6 +472,9 @@ public void substitute(ByteBuffer[] p, int len, ByteBuffer q, int x) { /** * The "bulk" version of the remainder. * Warning: This function will modify the "dividend" inputs. + * + * @param divisor divisor. + * @param dividend dividend. */ public void remainder(byte[][] dividend, int[] divisor) { for (int i = dividend.length - divisor.length; i >= 0; i--) { @@ -476,6 +492,11 @@ public void remainder(byte[][] dividend, int[] divisor) { /** * The "bulk" version of the remainder. * Warning: This function will modify the "dividend" inputs. + * + * @param dividend dividend. + * @param offsets offsets. + * @param len len. + * @param divisor divisor. */ public void remainder(byte[][] dividend, int[] offsets, int len, int[] divisor) { @@ -497,6 +518,9 @@ public void remainder(byte[][] dividend, int[] offsets, /** * The "bulk" version of the remainder, using ByteBuffer. * Warning: This function will modify the "dividend" inputs. + * + * @param dividend dividend. + * @param divisor divisor. */ public void remainder(ByteBuffer[] dividend, int[] divisor) { int idx1, idx2; @@ -519,6 +543,8 @@ public void remainder(ByteBuffer[] dividend, int[] divisor) { /** * Perform Gaussian elimination on the given matrix. This matrix has to be a * fat matrix (number of rows > number of columns). + * + * @param matrix matrix. */ public void gaussianElimination(int[][] matrix) { assert(matrix != null && matrix.length > 0 && matrix[0].length > 0 diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/RSUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/RSUtil.java index 43823d0f8c300..b1fdc82a11628 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/RSUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/RSUtil.java @@ -59,6 +59,10 @@ public static void initTables(int k, int rows, byte[] codingMatrix, /** * Ported from Intel ISA-L library. + * + * @param k k. + * @param a a. + * @param m m. */ public static void genCauchyMatrix(byte[] a, int m, int k) { // Identity matrix in high position @@ -82,6 +86,13 @@ public static void genCauchyMatrix(byte[] a, int m, int k) { * * The algorithm is ported from Intel ISA-L library for compatible. It * leverages Java auto-vectorization support for performance. + * + * @param gfTables gfTables. + * @param dataLen dataLen. + * @param inputs inputs. + * @param inputOffsets inputOffsets. + * @param outputs outputs. + * @param outputOffsets outputOffsets. */ public static void encodeData(byte[] gfTables, int dataLen, byte[][] inputs, int[] inputOffsets, byte[][] outputs, @@ -133,6 +144,10 @@ public static void encodeData(byte[] gfTables, int dataLen, byte[][] inputs, /** * See above. Try to use the byte[] version when possible. + * + * @param gfTables gfTables. + * @param inputs inputs. + * @param outputs outputs. */ public static void encodeData(byte[] gfTables, ByteBuffer[] inputs, ByteBuffer[] outputs) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/ByteArray.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/ByteArray.java index c6c8b3fe3e1fe..964fb04c1b976 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/ByteArray.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/ByteArray.java @@ -35,7 +35,7 @@ public final class ByteArray implements RawComparable { /** * Constructing a ByteArray from a {@link BytesWritable}. * - * @param other + * @param other other. */ public ByteArray(BytesWritable other) { this(other.getBytes(), 0, other.getLength()); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/Compression.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/Compression.java index c4347e0c1afab..6eee025a2339d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/Compression.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/Compression.java @@ -24,7 +24,7 @@ import java.io.OutputStream; import java.util.ArrayList; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.compress.CodecPool; import org.apache.hadoop.io.compress.CompressionCodec; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/TFile.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/TFile.java index 09cd2825e3cf2..aeacc16a78f9b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/TFile.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/TFile.java @@ -276,7 +276,7 @@ private enum State { * * @param conf * The configuration object. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public Writer(FSDataOutputStream fsdos, int minBlockSize, String compressName, String comparator, Configuration conf) @@ -350,7 +350,7 @@ public void close() throws IOException { * Buffer for key. * @param value * Buffer for value. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void append(byte[] key, byte[] value) throws IOException { append(key, 0, key.length, value, 0, value.length); @@ -521,7 +521,7 @@ public void close() throws IOException { * exactly as many bytes as specified here before calling close on * the returned output stream. * @return The key appending output stream. - * @throws IOException + * @throws IOException raised on errors performing I/O. * */ public DataOutputStream prepareAppendKey(int length) throws IOException { @@ -548,8 +548,8 @@ public DataOutputStream prepareAppendKey(int length) throws IOException { * the returned output stream. Advertising the value size up-front * guarantees that the value is encoded in one chunk, and avoids * intermediate chunk buffering. - * @throws IOException - * + * @throws IOException raised on errors performing I/O. + * @return DataOutputStream. */ public DataOutputStream prepareAppendValue(int length) throws IOException { if (state != State.END_KEY) { @@ -588,7 +588,7 @@ public DataOutputStream prepareAppendValue(int length) throws IOException { * {@link TFile#getSupportedCompressionAlgorithms()}. * @return A DataOutputStream that can be used to write Meta Block data. * Closing the stream would signal the ending of the block. - * @throws IOException + * @throws IOException raised on errors performing I/O. * @throws MetaBlockAlreadyExists * the Meta Block with the same name already exists. */ @@ -616,7 +616,7 @@ public DataOutputStream prepareMetaBlock(String name, String compressName) * Name of the meta block. * @return A DataOutputStream that can be used to write Meta Block data. * Closing the stream would signal the ending of the block. - * @throws IOException + * @throws IOException raised on errors performing I/O. * @throws MetaBlockAlreadyExists * the Meta Block with the same name already exists. */ @@ -796,8 +796,8 @@ public boolean equals(Object obj) { * The length of TFile. This is required because we have no easy * way of knowing the actual size of the input file through the * File input stream. - * @param conf - * @throws IOException + * @param conf configuration. + * @throws IOException raised on errors performing I/O. */ public Reader(FSDataInputStream fsdis, long fileLength, Configuration conf) throws IOException { @@ -896,7 +896,7 @@ synchronized void checkTFileDataIndex() throws IOException { * Get the first key in the TFile. * * @return The first key in the TFile. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public RawComparable getFirstKey() throws IOException { checkTFileDataIndex(); @@ -907,7 +907,7 @@ public RawComparable getFirstKey() throws IOException { * Get the last key in the TFile. * * @return The last key in the TFile. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public RawComparable getLastKey() throws IOException { checkTFileDataIndex(); @@ -1043,7 +1043,7 @@ Location getLocationNear(long offset) { * the user supplied offset. * @return the RecordNum to the corresponding entry. If no such entry * exists, it returns the total entry count. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public long getRecordNumNear(long offset) throws IOException { return getRecordNumByLocation(getLocationNear(offset)); @@ -1058,7 +1058,7 @@ public long getRecordNumNear(long offset) throws IOException { * @return the key that fits the requirement; or null if no such key exists * (which could happen if the offset is close to the end of the * TFile). - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public RawComparable getKeyNear(long offset) throws IOException { int blockIndex = readerBCF.getBlockIndexNear(offset); @@ -1072,7 +1072,7 @@ public RawComparable getKeyNear(long offset) throws IOException { * * @return The scanner object. A valid Scanner is always returned even if * the TFile is empty. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public Scanner createScanner() throws IOException { return new Scanner(this, begin, end); @@ -1089,7 +1089,7 @@ public Scanner createScanner() throws IOException { * specified byte-region but always round up to the compression * block boundaries. It is possible that the returned scanner * contains zero key-value pairs even if length is positive. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public Scanner createScannerByByteRange(long offset, long length) throws IOException { return new Scanner(this, offset, offset + length); @@ -1106,7 +1106,7 @@ public Scanner createScannerByByteRange(long offset, long length) throws IOExcep * key-value entry of the TFile. * @return The actual coverage of the returned scanner will cover all keys * greater than or equal to the beginKey and less than the endKey. - * @throws IOException + * @throws IOException raised on errors performing I/O. * * @deprecated Use {@link #createScannerByKey(byte[], byte[])} instead. */ @@ -1127,7 +1127,7 @@ public Scanner createScanner(byte[] beginKey, byte[] endKey) * key-value entry of the TFile. * @return The actual coverage of the returned scanner will cover all keys * greater than or equal to the beginKey and less than the endKey. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public Scanner createScannerByKey(byte[] beginKey, byte[] endKey) throws IOException { @@ -1147,7 +1147,7 @@ public Scanner createScannerByKey(byte[] beginKey, byte[] endKey) * key-value entry of the TFile. * @return The actual coverage of the returned scanner will cover all keys * greater than or equal to the beginKey and less than the endKey. - * @throws IOException + * @throws IOException raised on errors performing I/O. * * @deprecated Use {@link #createScannerByKey(RawComparable, RawComparable)} * instead. @@ -1169,7 +1169,7 @@ public Scanner createScanner(RawComparable beginKey, RawComparable endKey) * key-value entry of the TFile. * @return The actual coverage of the returned scanner will cover all keys * greater than or equal to the beginKey and less than the endKey. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public Scanner createScannerByKey(RawComparable beginKey, RawComparable endKey) throws IOException { @@ -1189,7 +1189,7 @@ public Scanner createScannerByKey(RawComparable beginKey, RawComparable endKey) * The RecordNum for the last record (exclusive). To scan the whole * file, either specify endRecNum==-1 or endRecNum==getEntryCount(). * @return The TFile scanner that covers the specified range of records. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public Scanner createScannerByRecordNum(long beginRecNum, long endRecNum) throws IOException { @@ -1313,7 +1313,7 @@ protected Scanner(Reader reader, long offBegin, long offEnd) * @param endKey * End key of the scan. If null, scan up to the last <K, V> * entry of the TFile. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ protected Scanner(Reader reader, RawComparable beginKey, RawComparable endKey) throws IOException { @@ -1338,7 +1338,7 @@ protected Scanner(Reader reader, RawComparable beginKey, * @param key * The input key * @return true if we find an equal key. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public boolean seekTo(byte[] key) throws IOException { return seekTo(key, 0, key.length); @@ -1356,7 +1356,7 @@ public boolean seekTo(byte[] key) throws IOException { * @param keyLen * key buffer length. * @return true if we find an equal key; false otherwise. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public boolean seekTo(byte[] key, int keyOffset, int keyLen) throws IOException { @@ -1432,7 +1432,7 @@ private void seekTo(Location l) throws IOException { * Rewind to the first entry in the scanner. The entry returned by the * previous entry() call will be invalid. * - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void rewind() throws IOException { seekTo(beginLocation); @@ -1442,7 +1442,7 @@ public void rewind() throws IOException { * Seek to the end of the scanner. The entry returned by the previous * entry() call will be invalid. * - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void seekToEnd() throws IOException { parkCursorAtEnd(); @@ -1455,7 +1455,7 @@ public void seekToEnd() throws IOException { * * @param key * The input key - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void lowerBound(byte[] key) throws IOException { lowerBound(key, 0, key.length); @@ -1472,7 +1472,7 @@ public void lowerBound(byte[] key) throws IOException { * offset in the key buffer. * @param keyLen * key buffer length. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void lowerBound(byte[] key, int keyOffset, int keyLen) throws IOException { @@ -1486,7 +1486,7 @@ public void lowerBound(byte[] key, int keyOffset, int keyLen) * * @param key * The input key - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void upperBound(byte[] key) throws IOException { upperBound(key, 0, key.length); @@ -1503,7 +1503,7 @@ public void upperBound(byte[] key) throws IOException { * offset in the key buffer. * @param keyLen * key buffer length. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void upperBound(byte[] key, int keyOffset, int keyLen) throws IOException { @@ -1516,7 +1516,7 @@ public void upperBound(byte[] key, int keyOffset, int keyLen) * * @return true if the cursor successfully moves. False when cursor is * already at the end location and cannot be advanced. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public boolean advance() throws IOException { if (atEnd()) { @@ -1614,7 +1614,7 @@ void checkKey() throws IOException { * Get an entry to access the key and value. * * @return The Entry object to access the key and value. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public Entry entry() throws IOException { checkKey(); @@ -1624,7 +1624,7 @@ public Entry entry() throws IOException { /** * Get the RecordNum corresponding to the entry pointed by the cursor. * @return The RecordNum corresponding to the entry pointed by the cursor. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public long getRecordNum() throws IOException { return reader.getRecordNumByLocation(currentLocation); @@ -1670,7 +1670,7 @@ byte[] getKeyBuffer() { * BytesWritable to hold key. * @param value * BytesWritable to hold value - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void get(BytesWritable key, BytesWritable value) throws IOException { @@ -1684,7 +1684,8 @@ public void get(BytesWritable key, BytesWritable value) * * @param key * BytesWritable to hold the key. - * @throws IOException + * @throws IOException raised on errors performing I/O. + * @return the key into BytesWritable. */ public int getKey(BytesWritable key) throws IOException { key.setSize(getKeyLength()); @@ -1698,8 +1699,9 @@ public int getKey(BytesWritable key) throws IOException { * directly uses the buffer inside BytesWritable for storing the value. * The call does not require the value length to be known. * - * @param value - * @throws IOException + * @param value value. + * @throws IOException raised on errors performing I/O. + * @return long value. */ public long getValue(BytesWritable value) throws IOException { DataInputStream dis = getValueStream(); @@ -1725,7 +1727,7 @@ public long getValue(BytesWritable value) throws IOException { * @param out * The output stream * @return the length of the key. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public int writeKey(OutputStream out) throws IOException { out.write(keyBuffer, 0, klen); @@ -1740,7 +1742,7 @@ public int writeKey(OutputStream out) throws IOException { * @param out * The output stream * @return the length of the value - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public long writeValue(OutputStream out) throws IOException { DataInputStream dis = getValueStream(); @@ -1768,7 +1770,7 @@ public long writeValue(OutputStream out) throws IOException { * not be shorter than the key length. * @return The length of the key. * - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public int getKey(byte[] buf) throws IOException { return getKey(buf, 0); @@ -1784,7 +1786,7 @@ public int getKey(byte[] buf) throws IOException { * the key into. Requiring the key-length + offset no greater * than the buffer length. * @return The length of the key. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public int getKey(byte[] buf, int offset) throws IOException { if ((offset | (buf.length - offset - klen)) < 0) { @@ -1828,10 +1830,11 @@ public int getValueLength() { * without moving the cursor will result in exception: * {@link #getValue(byte[])}, {@link #getValue(byte[], int)}, * {@link #getValueStream}. - * + * + * @param buf buf. * @return the length of the value. Does not require * isValueLengthKnown() to be true. - * @throws IOException + * @throws IOException raised on errors performing I/O. * */ public int getValue(byte[] buf) throws IOException { @@ -1846,10 +1849,12 @@ public int getValue(byte[] buf) throws IOException { * functions more than once without moving the cursor will result in * exception: {@link #getValue(byte[])}, {@link #getValue(byte[], int)}, * {@link #getValueStream}. - * + * + * @param buf buf. + * @param offset offset. * @return the length of the value. Does not require * isValueLengthKnown() to be true. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public int getValue(byte[] buf, int offset) throws IOException { DataInputStream dis = getValueStream(); @@ -1892,7 +1897,7 @@ public int getValue(byte[] buf, int offset) throws IOException { * {@link #getValue(byte[], int)}, {@link #getValueStream}. * * @return The input stream for reading the value. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public DataInputStream getValueStream() throws IOException { if (valueChecked == true) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/Utils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/Utils.java index 17a27f16b9a4a..714dc5a12acd2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/Utils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/Utils.java @@ -49,7 +49,7 @@ private Utils() { * output stream * @param n * The integer to be encoded - * @throws IOException + * @throws IOException raised on errors performing I/O. * @see Utils#writeVLong(DataOutput, long) */ public static void writeVInt(DataOutput out, int n) throws IOException { @@ -95,7 +95,7 @@ public static void writeVInt(DataOutput out, int n) throws IOException { * output stream * @param n * the integer number - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @SuppressWarnings("fallthrough") public static void writeVLong(DataOutput out, long n) throws IOException { @@ -170,7 +170,7 @@ public static void writeVLong(DataOutput out, long n) throws IOException { * @param in * input stream * @return the decoded integer - * @throws IOException + * @throws IOException raised on errors performing I/O. * * @see Utils#readVLong(DataInput) */ @@ -199,7 +199,7 @@ public static int readVInt(DataInput in) throws IOException { * @param in * input stream * @return the decoded long integer. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static long readVLong(DataInput in) throws IOException { @@ -249,9 +249,9 @@ public static long readVLong(DataInput in) throws IOException { /** * Write a String as a VInt n, followed by n Bytes as in Text format. * - * @param out - * @param s - * @throws IOException + * @param out out. + * @param s s. + * @throws IOException raised on errors performing I/O. */ public static void writeString(DataOutput out, String s) throws IOException { if (s != null) { @@ -271,7 +271,7 @@ public static void writeString(DataOutput out, String s) throws IOException { * @param in * The input stream. * @return The string - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static String readString(DataInput in) throws IOException { int length = readVInt(in); @@ -299,7 +299,7 @@ public static final class Version implements Comparable { * * @param in * input stream - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public Version(DataInput in) throws IOException { major = in.readShort(); @@ -326,7 +326,7 @@ public Version(short major, short minor) { * * @param out * The DataOutput object. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void write(DataOutput out) throws IOException { out.writeShort(major); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java index 160b8e029e56b..63620f21063df 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java @@ -47,7 +47,7 @@ import org.slf4j.LoggerFactory; import sun.misc.Unsafe; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * JNI wrappers for various native IO-related calls not available in Java. @@ -355,7 +355,7 @@ public boolean verifyCanMlock() { } /** - * Return true if the JNI-based native IO extensions are available. + * @return Return true if the JNI-based native IO extensions are available. */ public static boolean isAvailable() { return NativeCodeLoader.isNativeCodeLoaded() && nativeLoaded; @@ -367,7 +367,14 @@ private static void assertCodeLoaded() throws IOException { } } - /** Wrapper around open(2) */ + /** + * Wrapper around open(2) . + * @param path input path. + * @param flags input flags. + * @param mode input mode. + * @return FileDescriptor. + * @throws IOException raised on errors performing I/O. + */ public static native FileDescriptor open(String path, int flags, int mode) throws IOException; /** Wrapper around fstat(2) */ private static native Stat fstat(FileDescriptor fd) throws IOException; @@ -428,6 +435,10 @@ static void posixFadviseIfPossible(String identifier, * for this syscall for more information. On systems where this * call is not available, does nothing. * + * @param fd input fd. + * @param offset input offset. + * @param nbytes input nbytes. + * @param flags input flag. * @throws NativeIOException if there is an error with the syscall */ public static void syncFileRangeIfPossible( @@ -712,7 +723,14 @@ public static void createDirectoryWithMode(File path, int mode) private static native void createDirectoryWithMode0(String path, int mode) throws NativeIOException; - /** Wrapper around CreateFile() on Windows */ + /** + * @return Wrapper around CreateFile() on Windows. + * @param path input path. + * @param desiredAccess input desiredAccess. + * @param shareMode input shareMode. + * @param creationDisposition input creationDisposition. + * @throws IOException raised on errors performing I/O. + */ public static native FileDescriptor createFile(String path, long desiredAccess, long shareMode, long creationDisposition) throws IOException; @@ -749,7 +767,13 @@ private static native FileDescriptor createFileWithMode0(String path, long desiredAccess, long shareMode, long creationDisposition, int mode) throws NativeIOException; - /** Wrapper around SetFilePointer() on Windows */ + /** + * @return Wrapper around SetFilePointer() on Windows. + * @param fd input fd. + * @param distanceToMove input distanceToMove. + * @param moveMethod input moveMethod. + * @throws IOException raised on errors performing I/O. + */ public static native long setFilePointer(FileDescriptor fd, long distanceToMove, long moveMethod) throws IOException; @@ -840,7 +864,7 @@ public static boolean access(String path, AccessRight desiredAccess) } /** - * Return true if the JNI-based native IO extensions are available. + * @return Return true if the JNI-based native IO extensions are available. */ public static boolean isAvailable() { return NativeCodeLoader.isNativeCodeLoaded() && nativeLoaded; @@ -898,6 +922,7 @@ public CachedUid(String username, long timestamp) { * * @param name the full principal name containing the domain * @return name with domain removed + * @throws IOException raised on errors performing I/O. */ private static String stripDomain(String name) { int i = name.indexOf('\\'); @@ -933,6 +958,11 @@ public static String getOwner(FileDescriptor fd) throws IOException { * file opened at a given offset, i.e. other process can delete * the file the FileDescriptor is reading. Only Windows implementation * uses the native interface. + * + * @param f input f. + * @param seekOffset input seekOffset. + * @return FileDescriptor. + * @throws IOException raised on errors performing I/O. */ public static FileDescriptor getShareDeleteFileDescriptor( File f, long seekOffset) throws IOException { @@ -961,7 +991,7 @@ public static FileDescriptor getShareDeleteFileDescriptor( } /** - * Create the specified File for write access, ensuring that it does not exist. + * @return Create the specified File for write access, ensuring that it does not exist. * @param f the file that we want to create * @param permissions we want to have on the file (if security is enabled) * @@ -1045,7 +1075,7 @@ public static void renameTo(File src, File dst) * * @param src source file * @param dst hardlink location - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Deprecated public static void link(File src, File dst) throws IOException { @@ -1103,7 +1133,7 @@ private static native void link0(String src, String dst) * * @param src The source path * @param dst The destination path - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static void copyFileUnbuffered(File src, File dst) throws IOException { if (nativeLoaded && Shell.WINDOWS) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/AsyncCallHandler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/AsyncCallHandler.java index 69e1233e16b07..b9129617c8964 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/AsyncCallHandler.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/AsyncCallHandler.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.io.retry; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.ipc.Client; @@ -49,7 +49,11 @@ public class AsyncCallHandler { private static final ThreadLocal> ASYNC_RETURN = new ThreadLocal<>(); - /** @return the async return value from {@link AsyncCallHandler}. */ + /** + * @return the async return value from {@link AsyncCallHandler}. + * @param T. + * @param R. + */ @InterfaceStability.Unstable @SuppressWarnings("unchecked") public static AsyncGet getAsyncReturn() { @@ -62,7 +66,10 @@ public static AsyncGet getAsyncReturn() { } } - /** For the lower rpc layers to set the async return value. */ + /** + * For the lower rpc layers to set the async return value. + * @param asyncReturn asyncReturn. + */ @InterfaceStability.Unstable public static void setLowerLayerAsyncReturn( AsyncGet asyncReturn) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/CallReturn.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/CallReturn.java index 022b78507fcee..7ccd6deb7f913 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/CallReturn.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/CallReturn.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.io.retry; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** The call return from a method invocation. */ class CallReturn { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java index 64824a15cd89c..a0cf8b2ee0da9 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.io.retry; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.io.retry.FailoverProxyProvider.ProxyInfo; import org.apache.hadoop.io.retry.RetryPolicy.RetryAction; @@ -35,6 +35,7 @@ import java.lang.reflect.Proxy; import java.util.Arrays; import java.util.Collections; +import java.util.HashSet; import java.util.Map; /** @@ -312,6 +313,8 @@ public String toString() { private volatile boolean hasSuccessfulCall = false; + private HashSet failedAtLeastOnce = new HashSet<>(); + private final RetryPolicy defaultPolicy; private final Map methodNameToPolicyMap; @@ -384,28 +387,36 @@ private RetryInfo handleException(final Method method, final int callId, throw retryInfo.getFailException(); } - log(method, retryInfo.isFailover(), counters.failovers, retryInfo.delay, e); + log(method, retryInfo.isFailover(), counters.failovers, counters.retries, retryInfo.delay, e); return retryInfo; } - private void log(final Method method, final boolean isFailover, - final int failovers, final long delay, final Exception ex) { - // log info if this has made some successful calls or - // this is not the first failover - final boolean info = hasSuccessfulCall || failovers != 0 - || asyncCallHandler.hasSuccessfulCall(); - if (!info && !LOG.isDebugEnabled()) { - return; + private void log(final Method method, final boolean isFailover, final int failovers, + final int retries, final long delay, final Exception ex) { + boolean info = true; + // If this is the first failover to this proxy, skip logging at INFO level + if (!failedAtLeastOnce.contains(proxyDescriptor.getProxyInfo().toString())) + { + failedAtLeastOnce.add(proxyDescriptor.getProxyInfo().toString()); + + // If successful calls were made to this proxy, log info even for first + // failover + info = hasSuccessfulCall || asyncCallHandler.hasSuccessfulCall(); + if (!info && !LOG.isDebugEnabled()) { + return; + } } final StringBuilder b = new StringBuilder() - .append(ex + ", while invoking ") + .append(ex) + .append(", while invoking ") .append(proxyDescriptor.getProxyInfo().getString(method.getName())); if (failovers > 0) { b.append(" after ").append(failovers).append(" failover attempts"); } b.append(isFailover? ". Trying to failover ": ". Retrying "); b.append(delay > 0? "after sleeping for " + delay + "ms.": "immediately."); + b.append(" Current retry count: ").append(retries).append("."); if (info) { LOG.info(b.toString()); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java index fcbcc868cf6dd..27fa2eedcfcbd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java @@ -43,7 +43,7 @@ import org.apache.hadoop.security.token.SecretManager.InvalidToken; import org.ietf.jgss.GSSException; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -75,6 +75,10 @@ public class RetryPolicies { *

    * Keep trying forever with a fixed time between attempts. *

    + * + * @param sleepTime sleepTime. + * @param timeUnit timeUnit. + * @return RetryPolicy. */ public static final RetryPolicy retryForeverWithFixedSleep(long sleepTime, TimeUnit timeUnit) { @@ -87,6 +91,11 @@ public static final RetryPolicy retryForeverWithFixedSleep(long sleepTime, * Keep trying a limited number of times, waiting a fixed time between attempts, * and then fail by re-throwing the exception. *

    + * + * @param maxRetries maxRetries. + * @param sleepTime sleepTime. + * @param timeUnit timeUnit. + * @return RetryPolicy. */ public static final RetryPolicy retryUpToMaximumCountWithFixedSleep(int maxRetries, long sleepTime, TimeUnit timeUnit) { return new RetryUpToMaximumCountWithFixedSleep(maxRetries, sleepTime, timeUnit); @@ -97,6 +106,11 @@ public static final RetryPolicy retryUpToMaximumCountWithFixedSleep(int maxRetri * Keep trying for a maximum time, waiting a fixed time between attempts, * and then fail by re-throwing the exception. *

    + * + * @param timeUnit timeUnit. + * @param sleepTime sleepTime. + * @param maxTime maxTime. + * @return RetryPolicy. */ public static final RetryPolicy retryUpToMaximumTimeWithFixedSleep(long maxTime, long sleepTime, TimeUnit timeUnit) { return new RetryUpToMaximumTimeWithFixedSleep(maxTime, sleepTime, timeUnit); @@ -108,6 +122,11 @@ public static final RetryPolicy retryUpToMaximumTimeWithFixedSleep(long maxTime, * and then fail by re-throwing the exception. * The time between attempts is sleepTime mutliplied by the number of tries so far. *

    + * + * @param sleepTime sleepTime. + * @param maxRetries maxRetries. + * @param timeUnit timeUnit. + * @return RetryPolicy. */ public static final RetryPolicy retryUpToMaximumCountWithProportionalSleep(int maxRetries, long sleepTime, TimeUnit timeUnit) { return new RetryUpToMaximumCountWithProportionalSleep(maxRetries, sleepTime, timeUnit); @@ -120,6 +139,12 @@ public static final RetryPolicy retryUpToMaximumCountWithProportionalSleep(int m * The time between attempts is sleepTime mutliplied by a random * number in the range of [0, 2 to the number of retries) *

    + * + * + * @param timeUnit timeUnit. + * @param maxRetries maxRetries. + * @param sleepTime sleepTime. + * @return RetryPolicy. */ public static final RetryPolicy exponentialBackoffRetry( int maxRetries, long sleepTime, TimeUnit timeUnit) { @@ -130,6 +155,10 @@ public static final RetryPolicy exponentialBackoffRetry( *

    * Set a default policy with some explicit handlers for specific exceptions. *

    + * + * @param exceptionToPolicyMap exceptionToPolicyMap. + * @param defaultPolicy defaultPolicy. + * @return RetryPolicy. */ public static final RetryPolicy retryByException(RetryPolicy defaultPolicy, Map, RetryPolicy> exceptionToPolicyMap) { @@ -141,6 +170,10 @@ public static final RetryPolicy retryByException(RetryPolicy defaultPolicy, * A retry policy for RemoteException * Set a default policy with some explicit handlers for specific exceptions. *

    + * + * @param defaultPolicy defaultPolicy. + * @param exceptionToPolicyMap exceptionToPolicyMap. + * @return RetryPolicy. */ public static final RetryPolicy retryByRemoteException( RetryPolicy defaultPolicy, @@ -150,6 +183,9 @@ public static final RetryPolicy retryByRemoteException( /** * A retry policy for exceptions other than RemoteException. + * @param defaultPolicy defaultPolicy. + * @param exceptionToPolicyMap exceptionToPolicyMap. + * @return RetryPolicy. */ public static final RetryPolicy retryOtherThanRemoteException( RetryPolicy defaultPolicy, @@ -437,6 +473,7 @@ public String toString() { * where t_i and n_i are the i-th pair of sleep time and number of retries. * Note that the white spaces in the string are ignored. * + * @param s input string. * @return the parsed object, or null if the parsing fails. */ public static MultipleLinearRandomRetry parseCommaSeparatedString(String s) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryProxy.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryProxy.java index 7fcd5fd4b0080..eaff5bbd528d8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryProxy.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryProxy.java @@ -34,6 +34,7 @@ public class RetryProxy { * @param iface the interface that the retry will implement * @param implementation the instance whose methods should be retried * @param retryPolicy the policy for retrying method call failures + * @param T. * @return the retry proxy */ public static Object create(Class iface, T implementation, @@ -51,6 +52,7 @@ public static Object create(Class iface, T implementation, * @param iface the interface that the retry will implement * @param proxyProvider provides implementation instances whose methods should be retried * @param retryPolicy the policy for retrying or failing over method call failures + * @param T. * @return the retry proxy */ public static Object create(Class iface, @@ -69,6 +71,7 @@ public static Object create(Class iface, * {@link RetryPolicies#TRY_ONCE_THEN_FAIL} is used. * * @param iface the interface that the retry will implement + * @param T. * @param implementation the instance whose methods should be retried * @param methodNameToPolicyMap a map of method names to retry policies * @return the retry proxy @@ -90,6 +93,8 @@ public static Object create(Class iface, T implementation, * @param iface the interface that the retry will implement * @param proxyProvider provides implementation instances whose methods should be retried * @param methodNameToPolicyMap map of method names to retry policies + * @param defaultPolicy defaultPolicy. + * @param T. * @return the retry proxy */ public static Object create(Class iface, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryUtils.java index c035a42d4a751..d2fb070ee2c25 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryUtils.java @@ -48,7 +48,7 @@ public class RetryUtils { * - non-IOException. * * - * @param conf + * @param conf configuration. * @param retryPolicyEnabledKey conf property key for enabling retry * @param defaultRetryPolicyEnabled default retryPolicyEnabledKey conf value * @param retryPolicySpecKey conf property key for retry policy spec @@ -168,7 +168,7 @@ public String toString() { * Retry policy spec: * N pairs of sleep-time and number-of-retries "s1,n1,s2,n2,..." * - * @param conf + * @param conf configuration. * @param retryPolicyEnabledKey conf property key for enabling retry * @param defaultRetryPolicyEnabled default retryPolicyEnabledKey conf value * @param retryPolicySpecKey conf property key for retry policy spec diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/Deserializer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/Deserializer.java index 3c8dfccafa8bb..4bdd60d90c382 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/Deserializer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/Deserializer.java @@ -35,13 +35,15 @@ * other producers may read from the input between calls to * {@link #deserialize(Object)}. *

    - * @param + * @param generic type. */ @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) @InterfaceStability.Evolving public interface Deserializer { /** *

    Prepare the deserializer for reading.

    + * @param in input stream. + * @throws IOException raised on errors performing I/O. */ void open(InputStream in) throws IOException; @@ -53,12 +55,15 @@ public interface Deserializer { * stream. Otherwise, if the object t is null a new * deserialized object will be created. *

    + * @param t t. * @return the deserialized object + * @throws IOException raised on errors performing I/O. */ T deserialize(T t) throws IOException; /** *

    Close the underlying input stream and clear up any resources.

    + * @throws IOException raised on errors performing I/O. */ void close() throws IOException; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/DeserializerComparator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/DeserializerComparator.java index 05205c5523cc6..29c04f66d4370 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/DeserializerComparator.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/DeserializerComparator.java @@ -37,7 +37,7 @@ * implementation of {@link RawComparator} that operates directly * on byte representations. *

    - * @param + * @param generic type. */ @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) @InterfaceStability.Evolving diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/JavaSerializationComparator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/JavaSerializationComparator.java index f9bf692f1fcc8..d53f7ab75c503 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/JavaSerializationComparator.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/JavaSerializationComparator.java @@ -31,7 +31,7 @@ * {@link Deserializer} to deserialize objects that are then compared via * their {@link Comparable} interfaces. *

    - * @param + * @param generic type. * @see JavaSerialization */ @InterfaceAudience.Public diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/Serialization.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/Serialization.java index 6f2097f7bf9da..0793dc1ca0184 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/Serialization.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/Serialization.java @@ -25,7 +25,7 @@ *

    * Encapsulates a {@link Serializer}/{@link Deserializer} pair. *

    - * @param + * @param generic type. */ @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) @InterfaceStability.Evolving @@ -34,16 +34,21 @@ public interface Serialization { /** * Allows clients to test whether this {@link Serialization} * supports the given class. + * + * @param c class. + * @return if accept true,not false. */ boolean accept(Class c); /** * @return a {@link Serializer} for the given class. + * @param c class. */ Serializer getSerializer(Class c); /** * @return a {@link Deserializer} for the given class. + * @param c class. */ Deserializer getDeserializer(Class c); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/SerializationFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/SerializationFactory.java index ce0c3fe398eed..b531ae85233e8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/SerializationFactory.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/SerializationFactory.java @@ -52,6 +52,8 @@ public class SerializationFactory extends Configured { * property from conf, which is a comma-delimited list of * classnames. *

    + * + * @param conf configuration. */ public SerializationFactory(Configuration conf) { super(conf); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/Serializer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/Serializer.java index 5ada541370ee0..c44b3678fc3be 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/Serializer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/Serializer.java @@ -35,23 +35,28 @@ * other producers may write to the output between calls to * {@link #serialize(Object)}. *

    - * @param + * @param generic type. */ @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) @InterfaceStability.Evolving public interface Serializer { /** *

    Prepare the serializer for writing.

    + * @param out output stream. + * @throws IOException raised on errors performing I/O. */ void open(OutputStream out) throws IOException; /** *

    Serialize t to the underlying output stream.

    + * @param t t. + * @throws IOException raised on errors performing I/O. */ void serialize(T t) throws IOException; /** *

    Close the underlying output stream and clear up any resources.

    + * @throws IOException raised on errors performing I/O. */ void close() throws IOException; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/avro/AvroSerialization.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/avro/AvroSerialization.java index f340cb3a98a44..2327fd2d55a2e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/avro/AvroSerialization.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/serializer/avro/AvroSerialization.java @@ -61,18 +61,24 @@ public Serializer getSerializer(Class c) { /** * Return an Avro Schema instance for the given class. + * @param t Generics Type T. + * @return schema. */ @InterfaceAudience.Private public abstract Schema getSchema(T t); /** * Create and return Avro DatumWriter for the given class. + * @param clazz clazz. + * @return DatumWriter. */ @InterfaceAudience.Private public abstract DatumWriter getWriter(Class clazz); /** * Create and return Avro DatumReader for the given class. + * @param clazz clazz. + * @return DatumReader. */ @InterfaceAudience.Private public abstract DatumReader getReader(Class clazz); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/AlignmentContext.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/AlignmentContext.java index fbf825bcb91b1..8d43fd74a843c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/AlignmentContext.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/AlignmentContext.java @@ -46,7 +46,7 @@ public interface AlignmentContext { void updateResponseState(RpcResponseHeaderProto.Builder header); /** - * This is the intended client method call to implement to recieve state info + * This is the intended client method call to implement to receive state info * during RPC response processing. * * @param header The RPC response header. @@ -71,7 +71,7 @@ public interface AlignmentContext { * misaligned with the client state. * See implementation for more details. * @return state id required for the server to execute the call. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ long receiveRequestState(RpcRequestHeaderProto header, long threshold) throws IOException; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CallQueueManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CallQueueManager.java index 81b7d34d0d1e0..fdd536a75e64d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CallQueueManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CallQueueManager.java @@ -32,7 +32,8 @@ import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto.RpcStatusProto; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.security.UserGroupInformation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -208,6 +209,19 @@ int getPriorityLevel(Schedulable e) { return scheduler.getPriorityLevel(e); } + int getPriorityLevel(UserGroupInformation user) { + if (scheduler instanceof DecayRpcScheduler) { + return ((DecayRpcScheduler)scheduler).getPriorityLevel(user); + } + return 0; + } + + void setPriorityLevel(UserGroupInformation user, int priority) { + if (scheduler instanceof DecayRpcScheduler) { + ((DecayRpcScheduler)scheduler).setPriorityLevel(user, priority); + } + } + void setClientBackoffEnabled(boolean value) { clientBackOffEnabled = value; } @@ -346,6 +360,12 @@ private static int parseNumLevels(String ns, Configuration conf) { /** * Replaces active queue with the newly requested one and transfers * all calls to the newQ before returning. + * + * @param schedulerClass input schedulerClass. + * @param queueClassToUse input queueClassToUse. + * @param maxSize input maxSize. + * @param ns input ns. + * @param conf input configuration. */ public synchronized void swapQueue( Class schedulerClass, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CallerContext.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CallerContext.java index dd14ba133e124..ba627adc2c4ff 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CallerContext.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CallerContext.java @@ -21,10 +21,17 @@ import org.apache.commons.lang3.builder.HashCodeBuilder; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_CONTEXT_SEPARATOR_DEFAULT; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_CONTEXT_SEPARATOR_KEY; /** * A class defining the caller context for auditing coarse granularity @@ -36,6 +43,12 @@ @InterfaceStability.Evolving public final class CallerContext { public static final Charset SIGNATURE_ENCODING = StandardCharsets.UTF_8; + + // field names + public static final String CLIENT_IP_STR = "clientIp"; + public static final String CLIENT_PORT_STR = "clientPort"; + public static final String REAL_USER_STR = "realUser"; + /** The caller context. * * It will be truncated if it exceeds the maximum allowed length in @@ -54,8 +67,8 @@ public final class CallerContext { private final byte[] signature; private CallerContext(Builder builder) { - this.context = builder.context; - this.signature = builder.signature; + this.context = builder.getContext(); + this.signature = builder.getSignature(); } public String getContext() { @@ -109,11 +122,59 @@ public String toString() { /** The caller context builder. */ public static final class Builder { - private final String context; + public static final String KEY_VALUE_SEPARATOR = ":"; + /** + * The illegal separators include '\t', '\n', '='. + * User should not set illegal separator. + */ + private static final Set ILLEGAL_SEPARATORS = + Collections.unmodifiableSet( + new HashSet<>(Arrays.asList("\t", "\n", "="))); + private final String fieldSeparator; + private final StringBuilder sb = new StringBuilder(); private byte[] signature; public Builder(String context) { - this.context = context; + this(context, HADOOP_CALLER_CONTEXT_SEPARATOR_DEFAULT); + } + + public Builder(String context, Configuration conf) { + if (isValid(context)) { + sb.append(context); + } + fieldSeparator = conf.get(HADOOP_CALLER_CONTEXT_SEPARATOR_KEY, + HADOOP_CALLER_CONTEXT_SEPARATOR_DEFAULT); + checkFieldSeparator(fieldSeparator); + } + + public Builder(String context, String separator) { + if (isValid(context)) { + sb.append(context); + } + fieldSeparator = separator; + checkFieldSeparator(fieldSeparator); + } + + /** + * Check whether the separator is legal. + * The illegal separators include '\t', '\n', '='. + * Throw IllegalArgumentException if the separator is Illegal. + * @param separator the separator of fields. + */ + private void checkFieldSeparator(String separator) { + if (ILLEGAL_SEPARATORS.contains(separator)) { + throw new IllegalArgumentException("Illegal field separator: " + + separator); + } + } + + /** + * Whether the field is valid. + * @param field one of the fields in context. + * @return true if the field is not null or empty. + */ + private boolean isValid(String field) { + return field != null && field.length() > 0; } public Builder setSignature(byte[] signature) { @@ -123,6 +184,74 @@ public Builder setSignature(byte[] signature) { return this; } + /** + * Get the context. + * For example, the context is "key1:value1,key2:value2". + * @return the valid context or null. + */ + public String getContext() { + return sb.length() > 0 ? sb.toString() : null; + } + + /** + * Get the signature. + * @return the signature. + */ + public byte[] getSignature() { + return signature; + } + + /** + * Append new field to the context. + * @param field one of fields to append. + * @return the builder. + */ + public Builder append(String field) { + if (isValid(field)) { + if (sb.length() > 0) { + sb.append(fieldSeparator); + } + sb.append(field); + } + return this; + } + + /** + * Append new field which contains key and value to the context. + * @param key the key of field. + * @param value the value of field. + * @return the builder. + */ + public Builder append(String key, String value) { + if (isValid(key) && isValid(value)) { + if (sb.length() > 0) { + sb.append(fieldSeparator); + } + sb.append(key).append(KEY_VALUE_SEPARATOR).append(value); + } + return this; + } + + /** + * Append new field which contains key and value to the context + * if the key("key:") is absent. + * @param key the key of field. + * @param value the value of field. + * @return the builder. + */ + public Builder appendIfAbsent(String key, String value) { + if (sb.toString().contains(key + KEY_VALUE_SEPARATOR)) { + return this; + } + if (isValid(key) && isValid(value)) { + if (sb.length() > 0) { + sb.append(fieldSeparator); + } + sb.append(key).append(KEY_VALUE_SEPARATOR).append(value); + } + return this; + } + public CallerContext build() { return new CallerContext(this); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java index 688eed647c209..be0db11b02b52 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java @@ -18,9 +18,10 @@ package org.apache.hadoop.ipc; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability; @@ -53,8 +54,8 @@ import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; import org.apache.hadoop.util.concurrent.AsyncGet; -import org.apache.htrace.core.Span; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.tracing.Span; +import org.apache.hadoop.tracing.Tracer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -111,7 +112,12 @@ protected Boolean initialValue() { return (AsyncGet) ASYNC_RPC_RESPONSE.get(); } - /** Set call id and retry count for the next call. */ + /** + * Set call id and retry count for the next call. + * @param cid input cid. + * @param rc input rc. + * @param externalHandler input externalHandler. + */ public static void setCallIdAndRetryCount(int cid, int rc, Object externalHandler) { Preconditions.checkArgument(cid != RpcConstants.INVALID_CALL_ID); @@ -143,73 +149,6 @@ public static void setCallIdAndRetryCount(int cid, int rc, private final int maxAsyncCalls; private final AtomicInteger asyncCallCounter = new AtomicInteger(0); - /** - * Executor on which IPC calls' parameters are sent. - * Deferring the sending of parameters to a separate - * thread isolates them from thread interruptions in the - * calling code. - */ - private final ExecutorService sendParamsExecutor; - private final static ClientExecutorServiceFactory clientExcecutorFactory = - new ClientExecutorServiceFactory(); - - private static class ClientExecutorServiceFactory { - private int executorRefCount = 0; - private ExecutorService clientExecutor = null; - - /** - * Get Executor on which IPC calls' parameters are sent. - * If the internal reference counter is zero, this method - * creates the instance of Executor. If not, this method - * just returns the reference of clientExecutor. - * - * @return An ExecutorService instance - */ - synchronized ExecutorService refAndGetInstance() { - if (executorRefCount == 0) { - clientExecutor = Executors.newCachedThreadPool( - new ThreadFactoryBuilder() - .setDaemon(true) - .setNameFormat("IPC Parameter Sending Thread #%d") - .build()); - } - executorRefCount++; - - return clientExecutor; - } - - /** - * Cleanup Executor on which IPC calls' parameters are sent. - * If reference counter is zero, this method discards the - * instance of the Executor. If not, this method - * just decrements the internal reference counter. - * - * @return An ExecutorService instance if it exists. - * Null is returned if not. - */ - synchronized ExecutorService unrefAndCleanup() { - executorRefCount--; - assert(executorRefCount >= 0); - - if (executorRefCount == 0) { - clientExecutor.shutdown(); - try { - if (!clientExecutor.awaitTermination(1, TimeUnit.MINUTES)) { - clientExecutor.shutdownNow(); - } - } catch (InterruptedException e) { - LOG.warn("Interrupted while waiting for clientExecutor" + - " to stop"); - clientExecutor.shutdownNow(); - Thread.currentThread().interrupt(); - } - clientExecutor = null; - } - - return clientExecutor; - } - } - /** * set the ping interval value in configuration * @@ -278,11 +217,6 @@ public static final void setConnectTimeout(Configuration conf, int timeout) { conf.setInt(CommonConfigurationKeys.IPC_CLIENT_CONNECT_TIMEOUT_KEY, timeout); } - @VisibleForTesting - public static final ExecutorService getClientExecutor() { - return Client.clientExcecutorFactory.clientExecutor; - } - /** * Increment this client's reference count */ @@ -412,7 +346,7 @@ public synchronized Writable getRpcResponse() { * socket: responses may be delivered out of order. */ private class Connection extends Thread { private InetSocketAddress server; // server ip:port - private final ConnectionId remoteId; // connection id + private final ConnectionId remoteId; // connection id private AuthMethod authMethod; // authentication method private AuthProtocol authProtocol; private int serviceClass; @@ -439,8 +373,10 @@ private class Connection extends Thread { private AtomicLong lastActivity = new AtomicLong();// last I/O activity time private AtomicBoolean shouldCloseConnection = new AtomicBoolean(); // indicate if the connection is closed private IOException closeException; // close reason - - private final Object sendRpcRequestLock = new Object(); + + private final Thread rpcRequestThread; + private final SynchronousQueue> rpcRequestQueue = + new SynchronousQueue<>(true); private AtomicReference connectingThread = new AtomicReference<>(); private final Consumer removeMethod; @@ -449,6 +385,9 @@ private class Connection extends Thread { Consumer removeMethod) { this.remoteId = remoteId; this.server = remoteId.getAddress(); + this.rpcRequestThread = new Thread(new RpcRequestSender(), + "IPC Parameter Sending Thread for " + remoteId); + this.rpcRequestThread.setDaemon(true); this.maxResponseLength = remoteId.conf.getInt( CommonConfigurationKeys.IPC_MAXIMUM_RESPONSE_LENGTH, @@ -638,6 +577,9 @@ private synchronized boolean updateAddress() throws IOException { LOG.warn("Address change detected. Old: " + server.toString() + " New: " + currentAddr.toString()); server = currentAddr; + // Update the remote address so that reconnections are with the updated address. + // This avoids thrashing. + remoteId.setAddress(currentAddr); UserGroupInformation ticket = remoteId.getTicket(); this.setName("IPC Client (" + socketFactory.hashCode() + ") connection to " + server.toString() + " from " @@ -649,10 +591,21 @@ private synchronized boolean updateAddress() throws IOException { private synchronized void setupConnection( UserGroupInformation ticket) throws IOException { + LOG.debug("Setup connection to " + server.toString()); short ioFailures = 0; short timeoutFailures = 0; while (true) { try { + if (server.isUnresolved()) { + // Jump into the catch block. updateAddress() will re-resolve + // the address if this is just a temporary DNS failure. If not, + // it will timeout after max ipc client retries + throw NetUtils.wrapException(server.getHostName(), + server.getPort(), + NetUtils.getHostname(), + 0, + new UnknownHostException()); + } this.socket = socketFactory.createSocket(); this.socket.setTcpNoDelay(tcpNoDelay); this.socket.setKeepAlive(true); @@ -711,8 +664,16 @@ private synchronized void setupConnection( } catch (IOException ie) { if (updateAddress()) { timeoutFailures = ioFailures = 0; + try { + // HADOOP-17068: when server changed, ignore the exception. + handleConnectionFailure(ioFailures++, ie); + } catch (IOException ioe) { + LOG.warn("Exception when handle ConnectionFailure: " + + ioe.getMessage()); + } + } else { + handleConnectionFailure(ioFailures++, ie); } - handleConnectionFailure(ioFailures++, ie); } } } @@ -726,7 +687,7 @@ private synchronized void setupConnection( * handle that, a relogin is attempted. */ private synchronized void handleSaslConnectionFailure( - final int currRetries, final int maxRetries, final Exception ex, + final int currRetries, final int maxRetries, final IOException ex, final Random rand, final UserGroupInformation ugi) throws IOException, InterruptedException { ugi.doAs(new PrivilegedExceptionAction() { @@ -737,10 +698,7 @@ public Object run() throws IOException, InterruptedException { disposeSasl(); if (shouldAuthenticateOverKrb()) { if (currRetries < maxRetries) { - if(LOG.isDebugEnabled()) { - LOG.debug("Exception encountered while connecting to " - + "the server : " + ex); - } + LOG.debug("Exception encountered while connecting to the server {}", remoteId, ex); // try re-login if (UserGroupInformation.isLoginKeytabBased()) { UserGroupInformation.getLoginUser().reloginFromKeytab(); @@ -758,7 +716,11 @@ public Object run() throws IOException, InterruptedException { + UserGroupInformation.getLoginUser().getUserName() + " to " + remoteId; LOG.warn(msg, ex); - throw (IOException) new IOException(msg).initCause(ex); + throw NetUtils.wrapException(remoteId.getAddress().getHostName(), + remoteId.getAddress().getPort(), + NetUtils.getHostname(), + 0, + ex); } } else { // With RequestHedgingProxyProvider, one rpc call will send multiple @@ -766,11 +728,9 @@ public Object run() throws IOException, InterruptedException { // all other requests will be interrupted. It's not a big problem, // and should not print a warning log. if (ex instanceof InterruptedIOException) { - LOG.debug("Exception encountered while connecting to the server", - ex); + LOG.debug("Exception encountered while connecting to the server {}", remoteId, ex); } else { - LOG.warn("Exception encountered while connecting to the server ", - ex); + LOG.warn("Exception encountered while connecting to the server {}", remoteId, ex); } } if (ex instanceof RemoteException) @@ -787,17 +747,18 @@ public Object run() throws IOException, InterruptedException { */ private synchronized void setupIOstreams( AtomicBoolean fallbackToSimpleAuth) { - if (socket != null || shouldCloseConnection.get()) { - return; - } - UserGroupInformation ticket = remoteId.getTicket(); - if (ticket != null) { - final UserGroupInformation realUser = ticket.getRealUser(); - if (realUser != null) { - ticket = realUser; - } - } try { + if (socket != null || shouldCloseConnection.get()) { + setFallBackToSimpleAuth(fallbackToSimpleAuth); + return; + } + UserGroupInformation ticket = remoteId.getTicket(); + if (ticket != null) { + final UserGroupInformation realUser = ticket.getRealUser(); + if (realUser != null) { + ticket = realUser; + } + } connectingThread.set(Thread.currentThread()); if (LOG.isDebugEnabled()) { LOG.debug("Connecting to "+server); @@ -843,19 +804,8 @@ public AuthMethod run() remoteId.saslQop = (String)saslRpcClient.getNegotiatedProperty(Sasl.QOP); LOG.debug("Negotiated QOP is :" + remoteId.saslQop); - if (fallbackToSimpleAuth != null) { - fallbackToSimpleAuth.set(false); - } - } else if (UserGroupInformation.isSecurityEnabled()) { - if (!fallbackAllowed) { - throw new IOException("Server asks us to fall back to SIMPLE " + - "auth, but this client is configured to only allow secure " + - "connections."); - } - if (fallbackToSimpleAuth != null) { - fallbackToSimpleAuth.set(true); - } } + setFallBackToSimpleAuth(fallbackToSimpleAuth); } if (doPing) { @@ -888,7 +838,41 @@ public AuthMethod run() connectingThread.set(null); } } - + + private void setFallBackToSimpleAuth(AtomicBoolean fallbackToSimpleAuth) + throws AccessControlException { + if (authMethod == null || authProtocol != AuthProtocol.SASL) { + if (authProtocol == AuthProtocol.SASL) { + LOG.trace("Auth method is not set, yield from setting auth fallback."); + } + return; + } + if (fallbackToSimpleAuth == null) { + // this should happen only during testing. + LOG.trace("Connection {} will skip to set fallbackToSimpleAuth as it is null.", remoteId); + } else { + if (fallbackToSimpleAuth.get()) { + // we already set the value to true, we do not need to examine again. + return; + } + } + if (authMethod != AuthMethod.SIMPLE) { + if (fallbackToSimpleAuth != null) { + LOG.trace("Disabling fallbackToSimpleAuth, target does not use SIMPLE authentication."); + fallbackToSimpleAuth.set(false); + } + } else if (UserGroupInformation.isSecurityEnabled()) { + if (!fallbackAllowed) { + throw new AccessControlException("Server asks us to fall back to SIMPLE auth, but this " + + "client is configured to only allow secure connections."); + } + if (fallbackToSimpleAuth != null) { + LOG.trace("Enabling fallbackToSimpleAuth for target, as we are allowed to fall back."); + fallbackToSimpleAuth.set(true); + } + } + } + private void closeConnection() { if (socket == null) { return; @@ -1040,7 +1024,10 @@ private synchronized boolean waitForWork() { if (timeout>0) { try { wait(timeout); - } catch (InterruptedException e) {} + } catch (InterruptedException e) { + LOG.trace("Interrupted while waiting to retrieve RPC response."); + Thread.currentThread().interrupt(); + } } } @@ -1078,6 +1065,10 @@ private synchronized void sendPing() throws IOException { @Override public void run() { + // Don't start the ipc parameter sending thread until we start this + // thread, because the shutdown logic only gets triggered if this + // thread is started. + rpcRequestThread.start(); if (LOG.isDebugEnabled()) LOG.debug(getName() + ": starting, having connections " + connections.size()); @@ -1101,9 +1092,52 @@ public void run() { + connections.size()); } + /** + * A thread to write rpc requests to the socket. + */ + private class RpcRequestSender implements Runnable { + @Override + public void run() { + while (!shouldCloseConnection.get()) { + ResponseBuffer buf = null; + try { + Pair pair = + rpcRequestQueue.poll(maxIdleTime, TimeUnit.MILLISECONDS); + if (pair == null || shouldCloseConnection.get()) { + continue; + } + buf = pair.getRight(); + synchronized (ipcStreams.out) { + if (LOG.isDebugEnabled()) { + Call call = pair.getLeft(); + LOG.debug(getName() + "{} sending #{} {}", getName(), call.id, + call.rpcRequest); + } + // RpcRequestHeader + RpcRequest + ipcStreams.sendRequest(buf.toByteArray()); + ipcStreams.flush(); + } + } catch (InterruptedException ie) { + // stop this thread + return; + } catch (IOException e) { + // exception at this point would leave the connection in an + // unrecoverable state (eg half a call left on the wire). + // So, close the connection, killing any outstanding calls + markClosed(e); + } finally { + //the buffer is just an in-memory buffer, but it is still polite to + // close early + IOUtils.closeStream(buf); + } + } + } + } + /** Initiates a rpc call by sending the rpc request to the remote server. - * Note: this is not called from the Connection thread, but by other - * threads. + * Note: this is not called from the current thread, but by another + * thread, so that if the current thread is interrupted that the socket + * state isn't corrupted with a partially written message. * @param call - the rpc request */ public void sendRpcRequest(final Call call) @@ -1113,8 +1147,7 @@ public void sendRpcRequest(final Call call) } // Serialize the call to be sent. This is done from the actual - // caller thread, rather than the sendParamsExecutor thread, - + // caller thread, rather than the rpcRequestThread in the connection, // so that if the serialization throws an error, it is reported // properly. This also parallelizes the serialization. // @@ -1131,49 +1164,12 @@ public void sendRpcRequest(final Call call) final ResponseBuffer buf = new ResponseBuffer(); header.writeDelimitedTo(buf); RpcWritable.wrap(call.rpcRequest).writeTo(buf); - - synchronized (sendRpcRequestLock) { - Future senderFuture = sendParamsExecutor.submit(new Runnable() { - @Override - public void run() { - try { - synchronized (ipcStreams.out) { - if (shouldCloseConnection.get()) { - return; - } - if (LOG.isDebugEnabled()) { - LOG.debug(getName() + " sending #" + call.id - + " " + call.rpcRequest); - } - // RpcRequestHeader + RpcRequest - ipcStreams.sendRequest(buf.toByteArray()); - ipcStreams.flush(); - } - } catch (IOException e) { - // exception at this point would leave the connection in an - // unrecoverable state (eg half a call left on the wire). - // So, close the connection, killing any outstanding calls - markClosed(e); - } finally { - //the buffer is just an in-memory buffer, but it is still polite to - // close early - IOUtils.closeStream(buf); - } - } - }); - - try { - senderFuture.get(); - } catch (ExecutionException e) { - Throwable cause = e.getCause(); - - // cause should only be a RuntimeException as the Runnable above - // catches IOException - if (cause instanceof RuntimeException) { - throw (RuntimeException) cause; - } else { - throw new RuntimeException("unexpected checked exception", cause); - } + // Wait for the message to be sent. We offer with timeout to + // prevent a race condition between checking the shouldCloseConnection + // and the stopping of the polling thread + while (!shouldCloseConnection.get()) { + if (rpcRequestQueue.offer(Pair.of(call, buf), 1, TimeUnit.SECONDS)) { + break; } } } @@ -1277,7 +1273,7 @@ private synchronized void close() { cleanupCalls(); } } else { - // log the info + // Log the newest server information if update address. if (LOG.isDebugEnabled()) { LOG.debug("closing ipc connection to " + server + ": " + closeException.getMessage(),closeException); @@ -1302,8 +1298,14 @@ private void cleanupCalls() { } } - /** Construct an IPC client whose values are of the given {@link Writable} - * class. */ + /** + * Construct an IPC client whose values are of the given {@link Writable} + * class. + * + * @param valueClass input valueClass. + * @param conf input configuration. + * @param factory input factory. + */ public Client(Class valueClass, Configuration conf, SocketFactory factory) { this.valueClass = valueClass; @@ -1318,16 +1320,15 @@ public Client(Class valueClass, Configuration conf, CommonConfigurationKeys.IPC_CLIENT_BIND_WILDCARD_ADDR_DEFAULT); this.clientId = ClientId.getClientId(); - this.sendParamsExecutor = clientExcecutorFactory.refAndGetInstance(); this.maxAsyncCalls = conf.getInt( CommonConfigurationKeys.IPC_CLIENT_ASYNC_CALLS_MAX_KEY, CommonConfigurationKeys.IPC_CLIENT_ASYNC_CALLS_MAX_DEFAULT); } /** - * Construct an IPC client with the default SocketFactory - * @param valueClass - * @param conf + * Construct an IPC client with the default SocketFactory. + * @param valueClass input valueClass. + * @param conf input Configuration. */ public Client(Class valueClass, Configuration conf) { this(valueClass, conf, NetUtils.getDefaultSocketFactory(conf)); @@ -1362,6 +1363,7 @@ public void stop() { // wake up all connections for (Connection conn : connections.values()) { conn.interrupt(); + conn.rpcRequestThread.interrupt(); conn.interruptConnectingThread(); } @@ -1372,17 +1374,19 @@ public void stop() { try { emptyCondition.wait(); } catch (InterruptedException e) { + LOG.trace( + "Interrupted while waiting on all connections to be closed."); + Thread.currentThread().interrupt(); } } } - clientExcecutorFactory.unrefAndCleanup(); } /** * Make a call, passing rpcRequest, to the IPC server defined by * remoteId, returning the rpc respond. * - * @param rpcKind + * @param rpcKind - input rpcKind. * @param rpcRequest - contains serialized method and method parameters * @param remoteId - the target rpc server * @param fallbackToSimpleAuth - set to true or false during this method to @@ -1390,6 +1394,7 @@ public void stop() { * @return the rpc response * Throws exceptions if there are network problems or if the remote code * threw an exception. + * @throws IOException raised on errors performing I/O. */ public Writable call(RPC.RpcKind rpcKind, Writable rpcRequest, ConnectionId remoteId, AtomicBoolean fallbackToSimpleAuth) @@ -1587,15 +1592,6 @@ Set getConnectionIds() { private Connection getConnection(ConnectionId remoteId, Call call, int serviceClass, AtomicBoolean fallbackToSimpleAuth) throws IOException { - final InetSocketAddress address = remoteId.getAddress(); - if (address.isUnresolved()) { - throw NetUtils.wrapException(address.getHostName(), - address.getPort(), - null, - 0, - new UnknownHostException()); - } - final Consumer removeMethod = c -> { final boolean removed = connections.remove(remoteId, c); if (removed && connections.isEmpty()) { @@ -1645,9 +1641,9 @@ private Connection getConnection(ConnectionId remoteId, @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) @InterfaceStability.Evolving public static class ConnectionId { - InetSocketAddress address; - UserGroupInformation ticket; - final Class protocol; + private InetSocketAddress address; + private final UserGroupInformation ticket; + private final Class protocol; private static final int PRIME = 16777619; private final int rpcTimeout; private final int maxIdleTime; //connections will be culled if it was idle for @@ -1663,7 +1659,7 @@ public static class ConnectionId { private String saslQop; // here for testing private final Configuration conf; // used to get the expected kerberos principal name - ConnectionId(InetSocketAddress address, Class protocol, + public ConnectionId(InetSocketAddress address, Class protocol, UserGroupInformation ticket, int rpcTimeout, RetryPolicy connectionRetryPolicy, Configuration conf) { this.protocol = protocol; @@ -1698,7 +1694,28 @@ public static class ConnectionId { InetSocketAddress getAddress() { return address; } - + + /** + * This is used to update the remote address when an address change is detected. This method + * ensures that the {@link #hashCode()} won't change. + * + * @param address the updated address + * @throws IllegalArgumentException if the hostname or port doesn't match + * @see Connection#updateAddress() + */ + void setAddress(InetSocketAddress address) { + if (!Objects.equals(this.address.getHostName(), address.getHostName())) { + throw new IllegalArgumentException("Hostname must match: " + this.address + " vs " + + address); + } + if (this.address.getPort() != address.getPort()) { + throw new IllegalArgumentException("Port must match: " + this.address + " vs " + address); + } + + this.address = address; + } + + Class getProtocol() { return protocol; } @@ -1707,7 +1724,7 @@ UserGroupInformation getTicket() { return ticket; } - private int getRpcTimeout() { + int getRpcTimeout() { return rpcTimeout; } @@ -1719,7 +1736,7 @@ public int getMaxRetriesOnSasl() { return maxRetriesOnSasl; } - /** max connection retries on socket time outs */ + /** @return max connection retries on socket time outs */ public int getMaxRetriesOnSocketTimeouts() { return maxRetriesOnSocketTimeouts; } @@ -1741,6 +1758,10 @@ boolean getDoPing() { int getPingInterval() { return pingInterval; } + + RetryPolicy getRetryPolicy() { + return connectionRetryPolicy; + } @VisibleForTesting String getSaslQop() { @@ -1805,7 +1826,11 @@ && isEqual(this.protocol, that.protocol) @Override public int hashCode() { int result = connectionRetryPolicy.hashCode(); - result = PRIME * result + ((address == null) ? 0 : address.hashCode()); + // We calculate based on the host name and port without the IP address, since the hashCode + // must be stable even if the IP address is updated. + result = PRIME * result + ((address == null || address.getHostName() == null) ? 0 : + address.getHostName().hashCode()); + result = PRIME * result + ((address == null) ? 0 : address.getPort()); result = PRIME * result + (doPing ? 1231 : 1237); result = PRIME * result + maxIdleTime; result = PRIME * result + pingInterval; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ClientCache.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ClientCache.java index a0720d421820a..3364cee33c78a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ClientCache.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ClientCache.java @@ -29,6 +29,8 @@ import org.apache.hadoop.io.ObjectWritable; import org.apache.hadoop.io.Writable; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; + /* Cache a client using its socket factory as the hash key */ @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) @InterfaceStability.Evolving @@ -91,6 +93,8 @@ public synchronized Client getClient(Configuration conf, SocketFactory factory) /** * Stop a RPC client connection * A RPC client is closed only when its reference count becomes zero. + * + * @param client input client. */ public void stopClient(Client client) { if (Client.LOG.isDebugEnabled()) { @@ -114,4 +118,10 @@ public void stopClient(Client client) { client.stop(); } } + + @VisibleForTesting + public void clearCache() { + clients.values().forEach(c -> c.stop()); + clients.clear(); + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ClientId.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ClientId.java index c95dbb9facaac..1873ef47cf4eb 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ClientId.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ClientId.java @@ -22,7 +22,7 @@ import org.apache.hadoop.classification.InterfaceAudience; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * A class defining a set of static helper methods to provide conversion between @@ -36,7 +36,7 @@ public class ClientId { private static final int shiftWidth = 8; /** - * Return clientId as byte[] + * @return Return clientId as byte[]. */ public static byte[] getClientId() { UUID uuid = UUID.randomUUID(); @@ -46,7 +46,10 @@ public static byte[] getClientId() { return buf.array(); } - /** Convert a clientId byte[] to string */ + /** + * @return Convert a clientId byte[] to string. + * @param clientId input clientId. + */ public static String toString(byte[] clientId) { // clientId can be null or an empty array if (clientId == null || clientId.length == 0) { @@ -75,7 +78,10 @@ public static long getLsb(byte[] clientId) { return lsb; } - /** Convert from clientId string byte[] representation of clientId */ + /** + * @return Convert from clientId string byte[] representation of clientId. + * @param id input id. + */ public static byte[] toBytes(String id) { if (id == null || "".equals(id)) { return new byte[0]; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/DecayRpcScheduler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/DecayRpcScheduler.java index 3e952eb63c3ff..aa643154b9556 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/DecayRpcScheduler.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/DecayRpcScheduler.java @@ -25,6 +25,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.Timer; import java.util.TimerTask; import java.util.concurrent.ConcurrentHashMap; @@ -37,12 +38,14 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; -import com.google.common.base.Preconditions; -import com.google.common.util.concurrent.AtomicDoubleArray; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.AtomicDoubleArray; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.ipc.metrics.DecayRpcSchedulerDetailedMetrics; +import org.apache.hadoop.ipc.metrics.RpcMetrics; import org.apache.hadoop.metrics2.MetricsCollector; import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.metrics2.MetricsSource; @@ -52,7 +55,7 @@ import org.apache.hadoop.metrics2.util.Metrics2Util.NameValuePair; import org.apache.hadoop.metrics2.util.Metrics2Util.TopN; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -175,9 +178,11 @@ public class DecayRpcScheduler implements RpcScheduler, private final String namespace; private final int topUsersCount; // e.g., report top 10 users' metrics private static final double PRECISION = 0.0001; + private final TimeUnit metricsTimeUnit; private MetricsProxy metricsProxy; private final CostProvider costProvider; + private final Map staticPriorities = new HashMap<>(); /** * This TimerTask will call decayCurrentCosts until * the scheduler has been garbage collected. @@ -245,6 +250,8 @@ public DecayRpcScheduler(int numLevels, String ns, Configuration conf) { DecayRpcSchedulerDetailedMetrics.create(ns); decayRpcSchedulerDetailedMetrics.init(numLevels); + metricsTimeUnit = RpcMetrics.getMetricsTimeUnit(conf); + // Setup delay timer Timer timer = new Timer(true); DecayTask task = new DecayTask(this, timer); @@ -485,7 +492,7 @@ private void recomputeScheduleCache() { AtomicLong value = entry.getValue().get(0); long snapshot = value.get(); - int computedLevel = computePriorityLevel(snapshot); + int computedLevel = computePriorityLevel(snapshot, id); nextCache.put(id, computedLevel); } @@ -535,7 +542,11 @@ private void addCost(Object identity, long costDelta) { * @param cost the cost for an identity * @return scheduling decision from 0 to numLevels - 1 */ - private int computePriorityLevel(long cost) { + private int computePriorityLevel(long cost, Object identity) { + Integer staticPriority = staticPriorities.get(identity); + if (staticPriority != null) { + return staticPriority.intValue(); + } long totalCallSnapshot = totalDecayedCallCost.get(); double proportion = 0; @@ -575,11 +586,20 @@ private int cachedOrComputedPriorityLevel(Object identity) { // Cache was no good, compute it List costList = callCosts.get(identity); long currentCost = costList == null ? 0 : costList.get(0).get(); - int priority = computePriorityLevel(currentCost); + int priority = computePriorityLevel(currentCost, identity); LOG.debug("compute priority for {} priority {}", identity, priority); return priority; } + private String getIdentity(Schedulable obj) { + String identity = this.identityProvider.makeIdentity(obj); + if (identity == null) { + // Identity provider did not handle this + identity = DECAYSCHEDULER_UNKNOWN_IDENTITY; + } + return identity; + } + /** * Compute the appropriate priority for a schedulable based on past requests. * @param obj the schedulable obj to query and remember @@ -588,15 +608,42 @@ private int cachedOrComputedPriorityLevel(Object identity) { @Override public int getPriorityLevel(Schedulable obj) { // First get the identity - String identity = this.identityProvider.makeIdentity(obj); - if (identity == null) { - // Identity provider did not handle this - identity = DECAYSCHEDULER_UNKNOWN_IDENTITY; - } + String identity = getIdentity(obj); + // highest priority users may have a negative priority but their + // calls will be priority 0. + return Math.max(0, cachedOrComputedPriorityLevel(identity)); + } + @VisibleForTesting + int getPriorityLevel(UserGroupInformation ugi) { + String identity = getIdentity(newSchedulable(ugi)); + // returns true priority of the user. return cachedOrComputedPriorityLevel(identity); } + @VisibleForTesting + void setPriorityLevel(UserGroupInformation ugi, int priority) { + String identity = getIdentity(newSchedulable(ugi)); + priority = Math.min(numLevels - 1, priority); + LOG.info("Setting priority for user:" + identity + "=" + priority); + staticPriorities.put(identity, priority); + } + + // dummy instance to conform to identity provider api. + private static Schedulable newSchedulable(UserGroupInformation ugi) { + return new Schedulable() { + @Override + public UserGroupInformation getUserGroupInformation() { + return ugi; + } + + @Override + public int getPriorityLevel() { + return 0; + } + }; + } + @Override public boolean shouldBackOff(Schedulable obj) { Boolean backOff = false; @@ -632,8 +679,9 @@ public void addResponseTime(String callName, Schedulable schedulable, addCost(user, processingCost); int priorityLevel = schedulable.getPriorityLevel(); - long queueTime = details.get(Timing.QUEUE, TimeUnit.MILLISECONDS); - long processingTime = details.get(Timing.PROCESSING, TimeUnit.MILLISECONDS); + long queueTime = details.get(Timing.QUEUE, metricsTimeUnit); + long processingTime = details.get(Timing.PROCESSING, + metricsTimeUnit); this.decayRpcSchedulerDetailedMetrics.addQueueTime( priorityLevel, queueTime); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/FairCallQueue.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/FairCallQueue.java index d15a71000bd54..75f3420fb2d65 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/FairCallQueue.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/FairCallQueue.java @@ -31,7 +31,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.commons.lang3.NotImplementedException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; @@ -82,6 +82,7 @@ private void signalNotEmpty() { private boolean serverFailOverEnabled; /** * Create a FairCallQueue. + * @param priorityLevels input priorityLevels. * @param capacity the total size of all sub-queues * @param ns the prefix to use for configuration * @param conf the configuration to read from diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/GenericRefreshProtocol.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/GenericRefreshProtocol.java index bfa055bcb0997..10e661a3095cd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/GenericRefreshProtocol.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/GenericRefreshProtocol.java @@ -41,7 +41,11 @@ public interface GenericRefreshProtocol { /** * Refresh the resource based on identity passed in. - * @throws IOException + * + * @param identifier input identifier. + * @param args input args. + * @throws IOException raised on errors performing I/O. + * @return Collection RefreshResponse. */ @Idempotent Collection refresh(String identifier, String[] args) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufHelper.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufHelper.java index bb86cfc35bf4e..9ed0640c8dcfa 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufHelper.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufHelper.java @@ -53,6 +53,23 @@ public static IOException getRemoteException(ServiceException se) { return e instanceof IOException ? (IOException) e : new IOException(se); } + /** + * Kept for backward compatible. + * Return the IOException thrown by the remote server wrapped in + * ServiceException as cause. + * @param se ServiceException that wraps IO exception thrown by the server + * @return Exception wrapped in ServiceException or + * a new IOException that wraps the unexpected ServiceException. + */ + @Deprecated + public static IOException getRemoteException( + com.google.protobuf.ServiceException se) { + Throwable e = se.getCause(); + if (e == null) { + return new IOException(se); + } + return e instanceof IOException ? (IOException) e : new IOException(se); + } /** * Map used to cache fixed strings to ByteStrings. Since there is no @@ -68,7 +85,7 @@ public static IOException getRemoteException(ServiceException se) { /** * Get the ByteString for frequently used fixed and small set strings. * @param key string - * @return + * @return the ByteString for frequently used fixed and small set strings. */ public static ByteString getFixedByteString(Text key) { ByteString value = FIXED_BYTESTRING_CACHE.get(key); @@ -82,7 +99,7 @@ public static ByteString getFixedByteString(Text key) { /** * Get the ByteString for frequently used fixed and small set strings. * @param key string - * @return + * @return ByteString for frequently used fixed and small set strings. */ public static ByteString getFixedByteString(String key) { ByteString value = FIXED_BYTESTRING_CACHE.get(key); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java index 14b356f847acf..01fceeb954e0e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java @@ -18,9 +18,12 @@ package org.apache.hadoop.ipc; -import com.google.common.annotations.VisibleForTesting; -import org.apache.hadoop.thirdparty.protobuf.*; -import org.apache.hadoop.thirdparty.protobuf.Descriptors.MethodDescriptor; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import com.google.protobuf.BlockingService; +import com.google.protobuf.Descriptors.MethodDescriptor; +import com.google.protobuf.Message; +import com.google.protobuf.ServiceException; +import com.google.protobuf.TextFormat; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability.Unstable; @@ -28,15 +31,14 @@ import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.ipc.Client.ConnectionId; -import org.apache.hadoop.ipc.RPC.RpcInvoker; import org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestHeaderProto; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.SecretManager; import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.util.Time; import org.apache.hadoop.util.concurrent.AsyncGet; -import org.apache.htrace.core.TraceScope; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.tracing.TraceScope; +import org.apache.hadoop.tracing.Tracer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,7 +54,10 @@ /** * RPC Engine for for protobuf based RPCs. + * This engine uses Protobuf 2.5.0. Recommended to upgrade to Protobuf 3.x + * from hadoop-thirdparty and use ProtobufRpcEngine2. */ +@Deprecated @InterfaceStability.Evolving public class ProtobufRpcEngine implements RpcEngine { public static final Logger LOG = @@ -61,9 +66,8 @@ public class ProtobufRpcEngine implements RpcEngine { ASYNC_RETURN_MESSAGE = new ThreadLocal<>(); static { // Register the rpcRequest deserializer for ProtobufRpcEngine - org.apache.hadoop.ipc.Server.registerProtocolEngine( - RPC.RpcKind.RPC_PROTOCOL_BUFFER, RpcProtobufRequest.class, - new Server.ProtoBufRpcInvoker()); + //These will be used in server side, which is always ProtobufRpcEngine2 + ProtobufRpcEngine2.registerProtocolEngine(); } private static final ClientCache CLIENTS = new ClientCache(); @@ -73,6 +77,16 @@ public static AsyncGet getAsyncReturnMessage() { return ASYNC_RETURN_MESSAGE.get(); } + @Override + @SuppressWarnings("unchecked") + public ProtocolProxy getProxy(Class protocol, long clientVersion, + ConnectionId connId, Configuration conf, SocketFactory factory, + AlignmentContext alignmentContext) throws IOException { + final Invoker invoker = new Invoker(protocol, connId, conf, factory, alignmentContext); + return new ProtocolProxy(protocol, (T) Proxy.newProxyInstance( + protocol.getClassLoader(), new Class[] {protocol}, invoker), false); + } + public ProtocolProxy getProxy(Class protocol, long clientVersion, InetSocketAddress addr, UserGroupInformation ticket, Configuration conf, SocketFactory factory, int rpcTimeout) throws IOException { @@ -112,10 +126,10 @@ public ProtocolProxy getProtocolMetaInfoProxy( return new ProtocolProxy(protocol, (ProtocolMetaInfoPB) Proxy.newProxyInstance(protocol.getClassLoader(), new Class[] { protocol }, new Invoker(protocol, connId, conf, - factory)), false); + factory, null)), false); } - private static class Invoker implements RpcInvocationHandler { + protected static class Invoker implements RpcInvocationHandler { private final Map returnTypes = new ConcurrentHashMap(); private boolean isClosed = false; @@ -126,28 +140,33 @@ private static class Invoker implements RpcInvocationHandler { private AtomicBoolean fallbackToSimpleAuth; private AlignmentContext alignmentContext; - private Invoker(Class protocol, InetSocketAddress addr, + protected Invoker(Class protocol, InetSocketAddress addr, UserGroupInformation ticket, Configuration conf, SocketFactory factory, int rpcTimeout, RetryPolicy connectionRetryPolicy, AtomicBoolean fallbackToSimpleAuth, AlignmentContext alignmentContext) throws IOException { this(protocol, Client.ConnectionId.getConnectionId( addr, protocol, ticket, rpcTimeout, connectionRetryPolicy, conf), - conf, factory); + conf, factory, alignmentContext); this.fallbackToSimpleAuth = fallbackToSimpleAuth; - this.alignmentContext = alignmentContext; } /** * This constructor takes a connectionId, instead of creating a new one. + * @param protocol input protocol. + * @param connId input connId. + * @param conf input Configuration. + * @param factory input factory. + * @param alignmentContext Alignment context */ - private Invoker(Class protocol, Client.ConnectionId connId, - Configuration conf, SocketFactory factory) { + protected Invoker(Class protocol, Client.ConnectionId connId, + Configuration conf, SocketFactory factory, AlignmentContext alignmentContext) { this.remoteId = connId; this.client = CLIENTS.getClient(conf, factory, RpcWritable.Buffer.class); this.protocolName = RPC.getProtocolName(protocol); this.clientProtocolVersion = RPC .getProtocolVersion(protocol); + this.alignmentContext = alignmentContext; } private RequestHeaderProto constructRpcRequestHeader(Method method) { @@ -218,8 +237,6 @@ public Message invoke(Object proxy, final Method method, Object[] args) traceScope = tracer.newScope(RpcClientUtil.methodToTraceString(method)); } - RequestHeaderProto rpcRequestHeader = constructRpcRequestHeader(method); - if (LOG.isTraceEnabled()) { LOG.trace(Thread.currentThread().getId() + ": Call -> " + remoteId + ": " + method.getName() + @@ -231,7 +248,7 @@ public Message invoke(Object proxy, final Method method, Object[] args) final RpcWritable.Buffer val; try { val = (RpcWritable.Buffer) client.call(RPC.RpcKind.RPC_PROTOCOL_BUFFER, - new RpcProtobufRequest(rpcRequestHeader, theRequest), remoteId, + constructRpcRequest(method, theRequest), remoteId, fallbackToSimpleAuth, alignmentContext); } catch (Throwable e) { @@ -276,6 +293,11 @@ public boolean isDone() { } } + protected Writable constructRpcRequest(Method method, Message theRequest) { + RequestHeaderProto rpcRequestHeader = constructRpcRequestHeader(method); + return new RpcProtobufRequest(rpcRequestHeader, theRequest); + } + private Message getReturnMessage(final Method method, final RpcWritable.Buffer buf) throws ServiceException { Message prototype = null; @@ -325,6 +347,14 @@ private Message getReturnProtoType(Method method) throws Exception { public ConnectionId getConnectionId() { return remoteId; } + + protected long getClientProtocolVersion() { + return clientProtocolVersion; + } + + protected String getProtocolName() { + return protocolName; + } } @VisibleForTesting @@ -334,8 +364,6 @@ static Client getClient(Configuration conf) { return CLIENTS.getClient(conf, SocketFactory.getDefault(), RpcWritable.Buffer.class); } - - @Override public RPC.Server getServer(Class protocol, Object protocolImpl, @@ -348,24 +376,17 @@ public RPC.Server getServer(Class protocol, Object protocolImpl, numHandlers, numReaders, queueSizePerHandler, verbose, secretManager, portRangeConfig, alignmentContext); } - - public static class Server extends RPC.Server { + + /** + * Server implementation is always ProtobufRpcEngine2 based implementation, + * supports backward compatibility for protobuf 2.5 based implementations, + * which uses non-shaded protobuf classes. + */ + public static class Server extends ProtobufRpcEngine2.Server { static final ThreadLocal currentCallback = new ThreadLocal<>(); - static final ThreadLocal currentCallInfo = new ThreadLocal<>(); - - static class CallInfo { - private final RPC.Server server; - private final String methodName; - - public CallInfo(RPC.Server server, String methodName) { - this.server = server; - this.methodName = methodName; - } - } - static class ProtobufRpcEngineCallbackImpl implements ProtobufRpcEngineCallback { @@ -375,9 +396,9 @@ static class ProtobufRpcEngineCallbackImpl private final long setupTime; public ProtobufRpcEngineCallbackImpl() { - this.server = currentCallInfo.get().server; + this.server = CURRENT_CALL_INFO.get().getServer(); this.call = Server.getCurCall().get(); - this.methodName = currentCallInfo.get().methodName; + this.methodName = CURRENT_CALL_INFO.get().getMethodName(); this.setupTime = Time.now(); } @@ -417,6 +438,10 @@ public static ProtobufRpcEngineCallback registerForDeferredResponse() { * @param portRangeConfig A config parameter that can be used to restrict * the range of ports used when port is 0 (an ephemeral port) * @param alignmentContext provides server state info on client responses + * @param secretManager input secretManager. + * @param queueSizePerHandler input queueSizePerHandler. + * @param numReaders input numReaders. + * @throws IOException raised on errors performing I/O. */ public Server(Class protocolClass, Object protocolImpl, Configuration conf, String bindAddress, int port, int numHandlers, @@ -424,129 +449,58 @@ public Server(Class protocolClass, Object protocolImpl, SecretManager secretManager, String portRangeConfig, AlignmentContext alignmentContext) throws IOException { - super(bindAddress, port, null, numHandlers, - numReaders, queueSizePerHandler, conf, - serverNameFromClass(protocolImpl.getClass()), secretManager, - portRangeConfig); - setAlignmentContext(alignmentContext); - this.verbose = verbose; - registerProtocolAndImpl(RPC.RpcKind.RPC_PROTOCOL_BUFFER, protocolClass, - protocolImpl); + super(protocolClass, protocolImpl, conf, bindAddress, port, numHandlers, + numReaders, queueSizePerHandler, verbose, secretManager, + portRangeConfig, alignmentContext); } - + /** - * Protobuf invoker for {@link RpcInvoker} + * This implementation is same as + * ProtobufRpcEngine2.Server.ProtobufInvoker#call(..) + * except this implementation uses non-shaded protobuf classes from legacy + * protobuf version (default 2.5.0). */ - static class ProtoBufRpcInvoker implements RpcInvoker { - private static ProtoClassProtoImpl getProtocolImpl(RPC.Server server, - String protoName, long clientVersion) throws RpcServerException { - ProtoNameVer pv = new ProtoNameVer(protoName, clientVersion); - ProtoClassProtoImpl impl = - server.getProtocolImplMap(RPC.RpcKind.RPC_PROTOCOL_BUFFER).get(pv); - if (impl == null) { // no match for Protocol AND Version - VerProtocolImpl highest = - server.getHighestSupportedProtocol(RPC.RpcKind.RPC_PROTOCOL_BUFFER, - protoName); - if (highest == null) { - throw new RpcNoSuchProtocolException( - "Unknown protocol: " + protoName); - } - // protocol supported but not the version that client wants - throw new RPC.VersionMismatch(protoName, clientVersion, - highest.version); - } - return impl; + static RpcWritable processCall(RPC.Server server, + String connectionProtocolName, RpcWritable.Buffer request, + String methodName, ProtoClassProtoImpl protocolImpl) throws Exception { + BlockingService service = (BlockingService) protocolImpl.protocolImpl; + MethodDescriptor methodDescriptor = service.getDescriptorForType() + .findMethodByName(methodName); + if (methodDescriptor == null) { + String msg = "Unknown method " + methodName + " called on " + + connectionProtocolName + " protocol."; + LOG.warn(msg); + throw new RpcNoSuchMethodException(msg); } + Message prototype = service.getRequestPrototype(methodDescriptor); + Message param = request.getValue(prototype); - @Override - /** - * This is a server side method, which is invoked over RPC. On success - * the return response has protobuf response payload. On failure, the - * exception name and the stack trace are returned in the response. - * See {@link HadoopRpcResponseProto} - * - * In this method there three types of exceptions possible and they are - * returned in response as follows. - *
      - *
    1. Exceptions encountered in this method that are returned - * as {@link RpcServerException}
    2. - *
    3. Exceptions thrown by the service is wrapped in ServiceException. - * In that this method returns in response the exception thrown by the - * service.
    4. - *
    5. Other exceptions thrown by the service. They are returned as - * it is.
    6. - *
    - */ - public Writable call(RPC.Server server, String connectionProtocolName, - Writable writableRequest, long receiveTime) throws Exception { - RpcProtobufRequest request = (RpcProtobufRequest) writableRequest; - RequestHeaderProto rpcRequest = request.getRequestHeader(); - String methodName = rpcRequest.getMethodName(); - - /** - * RPCs for a particular interface (ie protocol) are done using a - * IPC connection that is setup using rpcProxy. - * The rpcProxy's has a declared protocol name that is - * sent form client to server at connection time. - * - * Each Rpc call also sends a protocol name - * (called declaringClassprotocolName). This name is usually the same - * as the connection protocol name except in some cases. - * For example metaProtocols such ProtocolInfoProto which get info - * about the protocol reuse the connection but need to indicate that - * the actual protocol is different (i.e. the protocol is - * ProtocolInfoProto) since they reuse the connection; in this case - * the declaringClassProtocolName field is set to the ProtocolInfoProto. - */ - - String declaringClassProtoName = - rpcRequest.getDeclaringClassProtocolName(); - long clientVersion = rpcRequest.getClientProtocolVersion(); - if (server.verbose) - LOG.info("Call: connectionProtocolName=" + connectionProtocolName + - ", method=" + methodName); - - ProtoClassProtoImpl protocolImpl = getProtocolImpl(server, - declaringClassProtoName, clientVersion); - BlockingService service = (BlockingService) protocolImpl.protocolImpl; - MethodDescriptor methodDescriptor = service.getDescriptorForType() - .findMethodByName(methodName); - if (methodDescriptor == null) { - String msg = "Unknown method " + methodName + " called on " - + connectionProtocolName + " protocol."; - LOG.warn(msg); - throw new RpcNoSuchMethodException(msg); - } - Message prototype = service.getRequestPrototype(methodDescriptor); - Message param = request.getValue(prototype); - - Message result; - Call currentCall = Server.getCurCall().get(); - try { - server.rpcDetailedMetrics.init(protocolImpl.protocolClass); - currentCallInfo.set(new CallInfo(server, methodName)); - currentCall.setDetailedMetricsName(methodName); - result = service.callBlockingMethod(methodDescriptor, null, param); - // Check if this needs to be a deferred response, - // by checking the ThreadLocal callback being set - if (currentCallback.get() != null) { - currentCall.deferResponse(); - currentCallback.set(null); - return null; - } - } catch (ServiceException e) { - Exception exception = (Exception) e.getCause(); - currentCall.setDetailedMetricsName( - exception.getClass().getSimpleName()); - throw (Exception) e.getCause(); - } catch (Exception e) { - currentCall.setDetailedMetricsName(e.getClass().getSimpleName()); - throw e; - } finally { - currentCallInfo.set(null); + Message result; + Call currentCall = Server.getCurCall().get(); + try { + server.rpcDetailedMetrics.init(protocolImpl.protocolClass); + CURRENT_CALL_INFO.set(new CallInfo(server, methodName)); + currentCall.setDetailedMetricsName(methodName); + result = service.callBlockingMethod(methodDescriptor, null, param); + // Check if this needs to be a deferred response, + // by checking the ThreadLocal callback being set + if (currentCallback.get() != null) { + currentCall.deferResponse(); + currentCallback.set(null); + return null; } - return RpcWritable.wrap(result); + } catch (ServiceException e) { + Exception exception = (Exception) e.getCause(); + currentCall + .setDetailedMetricsName(exception.getClass().getSimpleName()); + throw (Exception) e.getCause(); + } catch (Exception e) { + currentCall.setDetailedMetricsName(e.getClass().getSimpleName()); + throw e; + } finally { + CURRENT_CALL_INFO.set(null); } + return RpcWritable.wrap(result); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine2.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine2.java new file mode 100644 index 0000000000000..3594320ce067f --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine2.java @@ -0,0 +1,687 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ipc; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.retry.RetryPolicy; +import org.apache.hadoop.ipc.Client.ConnectionId; +import org.apache.hadoop.ipc.RPC.RpcInvoker; +import org.apache.hadoop.ipc.protobuf.ProtobufRpcEngine2Protos.RequestHeaderProto; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.SecretManager; +import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.protobuf.BlockingService; +import org.apache.hadoop.thirdparty.protobuf.Descriptors.MethodDescriptor; +import org.apache.hadoop.thirdparty.protobuf.Message; +import org.apache.hadoop.thirdparty.protobuf.ServiceException; +import org.apache.hadoop.thirdparty.protobuf.TextFormat; +import org.apache.hadoop.util.Time; +import org.apache.hadoop.util.concurrent.AsyncGet; +import org.apache.hadoop.tracing.Tracer; +import org.apache.hadoop.tracing.TraceScope; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.net.SocketFactory; +import java.io.IOException; +import java.lang.reflect.Method; +import java.lang.reflect.Proxy; +import java.net.InetSocketAddress; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * RPC Engine for for protobuf based RPCs. + */ +@InterfaceStability.Evolving +public class ProtobufRpcEngine2 implements RpcEngine { + public static final Logger LOG = + LoggerFactory.getLogger(ProtobufRpcEngine2.class); + private static final ThreadLocal> + ASYNC_RETURN_MESSAGE = new ThreadLocal<>(); + + static { // Register the rpcRequest deserializer for ProtobufRpcEngine + registerProtocolEngine(); + } + + static void registerProtocolEngine() { + if (Server.getRpcInvoker(RPC.RpcKind.RPC_PROTOCOL_BUFFER) == null) { + org.apache.hadoop.ipc.Server + .registerProtocolEngine(RPC.RpcKind.RPC_PROTOCOL_BUFFER, + ProtobufRpcEngine2.RpcProtobufRequest.class, + new Server.ProtoBufRpcInvoker()); + } + } + + private static final ClientCache CLIENTS = new ClientCache(); + + @Unstable + public static AsyncGet getAsyncReturnMessage() { + return ASYNC_RETURN_MESSAGE.get(); + } + + public ProtocolProxy getProxy(Class protocol, long clientVersion, + InetSocketAddress addr, UserGroupInformation ticket, Configuration conf, + SocketFactory factory, int rpcTimeout) throws IOException { + return getProxy(protocol, clientVersion, addr, ticket, conf, factory, + rpcTimeout, null); + } + + @Override + public ProtocolProxy getProxy( + Class protocol, long clientVersion, + InetSocketAddress addr, UserGroupInformation ticket, Configuration conf, + SocketFactory factory, int rpcTimeout, RetryPolicy connectionRetryPolicy) + throws IOException { + return getProxy(protocol, clientVersion, addr, ticket, conf, factory, + rpcTimeout, connectionRetryPolicy, null, null); + } + + @Override + @SuppressWarnings("unchecked") + public ProtocolProxy getProxy(Class protocol, long clientVersion, + ConnectionId connId, Configuration conf, SocketFactory factory, + AlignmentContext alignmentContext) throws IOException { + final Invoker invoker = new Invoker(protocol, connId, conf, factory, alignmentContext); + return new ProtocolProxy(protocol, (T) Proxy.newProxyInstance( + protocol.getClassLoader(), new Class[] {protocol}, invoker), false); + } + + @Override + @SuppressWarnings("unchecked") + public ProtocolProxy getProxy(Class protocol, long clientVersion, + InetSocketAddress addr, UserGroupInformation ticket, Configuration conf, + SocketFactory factory, int rpcTimeout, RetryPolicy connectionRetryPolicy, + AtomicBoolean fallbackToSimpleAuth, AlignmentContext alignmentContext) + throws IOException { + + final Invoker invoker = new Invoker(protocol, addr, ticket, conf, factory, + rpcTimeout, connectionRetryPolicy, fallbackToSimpleAuth, + alignmentContext); + return new ProtocolProxy(protocol, (T) Proxy.newProxyInstance( + protocol.getClassLoader(), new Class[]{protocol}, invoker), false); + } + + @Override + public ProtocolProxy getProtocolMetaInfoProxy( + ConnectionId connId, Configuration conf, SocketFactory factory) + throws IOException { + Class protocol = ProtocolMetaInfoPB.class; + return new ProtocolProxy(protocol, + (ProtocolMetaInfoPB) Proxy.newProxyInstance(protocol.getClassLoader(), + new Class[]{protocol}, new Invoker(protocol, connId, conf, + factory, null)), false); + } + + protected static class Invoker implements RpcInvocationHandler { + private final Map returnTypes = + new ConcurrentHashMap(); + private boolean isClosed = false; + private final Client.ConnectionId remoteId; + private final Client client; + private final long clientProtocolVersion; + private final String protocolName; + private AtomicBoolean fallbackToSimpleAuth; + private AlignmentContext alignmentContext; + + protected Invoker(Class protocol, InetSocketAddress addr, + UserGroupInformation ticket, Configuration conf, SocketFactory factory, + int rpcTimeout, RetryPolicy connectionRetryPolicy, + AtomicBoolean fallbackToSimpleAuth, AlignmentContext alignmentContext) + throws IOException { + this(protocol, Client.ConnectionId.getConnectionId( + addr, protocol, ticket, rpcTimeout, connectionRetryPolicy, conf), + conf, factory, alignmentContext); + this.fallbackToSimpleAuth = fallbackToSimpleAuth; + } + + /** + * This constructor takes a connectionId, instead of creating a new one. + * + * @param protocol input protocol. + * @param connId input connId. + * @param conf input Configuration. + * @param factory input factory. + * @param alignmentContext Alignment context + */ + protected Invoker(Class protocol, Client.ConnectionId connId, + Configuration conf, SocketFactory factory, AlignmentContext alignmentContext) { + this.remoteId = connId; + this.client = CLIENTS.getClient(conf, factory, RpcWritable.Buffer.class); + this.protocolName = RPC.getProtocolName(protocol); + this.clientProtocolVersion = RPC + .getProtocolVersion(protocol); + this.alignmentContext = alignmentContext; + } + + private RequestHeaderProto constructRpcRequestHeader(Method method) { + RequestHeaderProto.Builder builder = RequestHeaderProto + .newBuilder(); + builder.setMethodName(method.getName()); + + + // For protobuf, {@code protocol} used when creating client side proxy is + // the interface extending BlockingInterface, which has the annotations + // such as ProtocolName etc. + // + // Using Method.getDeclaringClass(), as in WritableEngine to get at + // the protocol interface will return BlockingInterface, from where + // the annotation ProtocolName and Version cannot be + // obtained. + // + // Hence we simply use the protocol class used to create the proxy. + // For PB this may limit the use of mixins on client side. + builder.setDeclaringClassProtocolName(protocolName); + builder.setClientProtocolVersion(clientProtocolVersion); + return builder.build(); + } + + /** + * This is the client side invoker of RPC method. It only throws + * ServiceException, since the invocation proxy expects only + * ServiceException to be thrown by the method in case protobuf service. + * + * ServiceException has the following causes: + *
      + *
    1. Exceptions encountered on the client side in this method are + * set as cause in ServiceException as is.
    2. + *
    3. Exceptions from the server are wrapped in RemoteException and are + * set as cause in ServiceException
    4. + *
    + * + * Note that the client calling protobuf RPC methods, must handle + * ServiceException by getting the cause from the ServiceException. If the + * cause is RemoteException, then unwrap it to get the exception thrown by + * the server. + */ + @Override + public Message invoke(Object proxy, final Method method, Object[] args) + throws ServiceException { + long startTime = 0; + if (LOG.isDebugEnabled()) { + startTime = Time.now(); + } + + if (args.length != 2) { // RpcController + Message + throw new ServiceException( + "Too many or few parameters for request. Method: [" + + method.getName() + "]" + ", Expected: 2, Actual: " + + args.length); + } + if (args[1] == null) { + throw new ServiceException("null param while calling Method: [" + + method.getName() + "]"); + } + + // if Tracing is on then start a new span for this rpc. + // guard it in the if statement to make sure there isn't + // any extra string manipulation. + Tracer tracer = Tracer.curThreadTracer(); + TraceScope traceScope = null; + if (tracer != null) { + traceScope = tracer.newScope(RpcClientUtil.methodToTraceString(method)); + } + + if (LOG.isTraceEnabled()) { + LOG.trace(Thread.currentThread().getId() + ": Call -> " + + remoteId + ": " + method.getName() + + " {" + TextFormat.shortDebugString((Message) args[1]) + "}"); + } + + + final Message theRequest = (Message) args[1]; + final RpcWritable.Buffer val; + try { + val = (RpcWritable.Buffer) client.call(RPC.RpcKind.RPC_PROTOCOL_BUFFER, + constructRpcRequest(method, theRequest), remoteId, + fallbackToSimpleAuth, alignmentContext); + + } catch (Throwable e) { + if (LOG.isTraceEnabled()) { + LOG.trace(Thread.currentThread().getId() + ": Exception <- " + + remoteId + ": " + method.getName() + + " {" + e + "}"); + } + if (traceScope != null) { + traceScope.addTimelineAnnotation("Call got exception: " + + e.toString()); + } + throw new ServiceException(e); + } finally { + if (traceScope != null) { + traceScope.close(); + } + } + + if (LOG.isDebugEnabled()) { + long callTime = Time.now() - startTime; + LOG.debug("Call: " + method.getName() + " took " + callTime + "ms"); + } + + if (Client.isAsynchronousMode()) { + final AsyncGet arr + = Client.getAsyncRpcResponse(); + final AsyncGet asyncGet = + new AsyncGet() { + @Override + public Message get(long timeout, TimeUnit unit) throws Exception { + return getReturnMessage(method, arr.get(timeout, unit)); + } + + @Override + public boolean isDone() { + return arr.isDone(); + } + }; + ASYNC_RETURN_MESSAGE.set(asyncGet); + return null; + } else { + return getReturnMessage(method, val); + } + } + + protected Writable constructRpcRequest(Method method, Message theRequest) { + RequestHeaderProto rpcRequestHeader = constructRpcRequestHeader(method); + return new RpcProtobufRequest(rpcRequestHeader, theRequest); + } + + private Message getReturnMessage(final Method method, + final RpcWritable.Buffer buf) throws ServiceException { + Message prototype = null; + try { + prototype = getReturnProtoType(method); + } catch (Exception e) { + throw new ServiceException(e); + } + Message returnMessage; + try { + returnMessage = buf.getValue(prototype.getDefaultInstanceForType()); + + if (LOG.isTraceEnabled()) { + LOG.trace(Thread.currentThread().getId() + ": Response <- " + + remoteId + ": " + method.getName() + + " {" + TextFormat.shortDebugString(returnMessage) + "}"); + } + + } catch (Throwable e) { + throw new ServiceException(e); + } + return returnMessage; + } + + @Override + public void close() throws IOException { + if (!isClosed) { + isClosed = true; + CLIENTS.stopClient(client); + } + } + + private Message getReturnProtoType(Method method) throws Exception { + if (returnTypes.containsKey(method.getName())) { + return returnTypes.get(method.getName()); + } + + Class returnType = method.getReturnType(); + Method newInstMethod = returnType.getMethod("getDefaultInstance"); + newInstMethod.setAccessible(true); + Message prototype = (Message) newInstMethod.invoke(null, (Object[]) null); + returnTypes.put(method.getName(), prototype); + return prototype; + } + + @Override //RpcInvocationHandler + public ConnectionId getConnectionId() { + return remoteId; + } + + protected long getClientProtocolVersion() { + return clientProtocolVersion; + } + + protected String getProtocolName() { + return protocolName; + } + } + + @VisibleForTesting + @InterfaceAudience.Private + @InterfaceStability.Unstable + static Client getClient(Configuration conf) { + return CLIENTS.getClient(conf, SocketFactory.getDefault(), + RpcWritable.Buffer.class); + } + + + + @Override + public RPC.Server getServer(Class protocol, Object protocolImpl, + String bindAddress, int port, int numHandlers, int numReaders, + int queueSizePerHandler, boolean verbose, Configuration conf, + SecretManager secretManager, + String portRangeConfig, AlignmentContext alignmentContext) + throws IOException { + return new Server(protocol, protocolImpl, conf, bindAddress, port, + numHandlers, numReaders, queueSizePerHandler, verbose, secretManager, + portRangeConfig, alignmentContext); + } + + @VisibleForTesting + public static void clearClientCache() { + CLIENTS.clearCache(); + } + + public static class Server extends RPC.Server { + + static final ThreadLocal CURRENT_CALLBACK = + new ThreadLocal<>(); + + static final ThreadLocal CURRENT_CALL_INFO = new ThreadLocal<>(); + + static class CallInfo { + private final RPC.Server server; + private final String methodName; + + CallInfo(RPC.Server server, String methodName) { + this.server = server; + this.methodName = methodName; + } + + public RPC.Server getServer() { + return server; + } + + public String getMethodName() { + return methodName; + } + } + + static class ProtobufRpcEngineCallbackImpl + implements ProtobufRpcEngineCallback2 { + + private final RPC.Server server; + private final Call call; + private final String methodName; + private final long setupTime; + + ProtobufRpcEngineCallbackImpl() { + this.server = CURRENT_CALL_INFO.get().getServer(); + this.call = Server.getCurCall().get(); + this.methodName = CURRENT_CALL_INFO.get().getMethodName(); + this.setupTime = Time.now(); + } + + @Override + public void setResponse(Message message) { + long processingTime = Time.now() - setupTime; + call.setDeferredResponse(RpcWritable.wrap(message)); + server.updateDeferredMetrics(methodName, processingTime); + } + + @Override + public void error(Throwable t) { + long processingTime = Time.now() - setupTime; + String detailedMetricsName = t.getClass().getSimpleName(); + server.updateDeferredMetrics(detailedMetricsName, processingTime); + call.setDeferredError(t); + } + } + + @InterfaceStability.Unstable + public static ProtobufRpcEngineCallback2 registerForDeferredResponse2() { + ProtobufRpcEngineCallback2 callback = new ProtobufRpcEngineCallbackImpl(); + CURRENT_CALLBACK.set(callback); + return callback; + } + + /** + * Construct an RPC server. + * + * @param protocolClass the class of protocol + * @param protocolImpl the protocolImpl whose methods will be called + * @param conf the configuration to use + * @param bindAddress the address to bind on to listen for connection + * @param port the port to listen for connections on + * @param numHandlers the number of method handler threads to run + * @param numReaders number of read threads + * @param queueSizePerHandler the size of the queue contained + * in each Handler + * @param verbose whether each call should be logged + * @param secretManager the server-side secret manager for each token type + * @param portRangeConfig A config parameter that can be used to restrict + * the range of ports used when port is 0 (an ephemeral port) + * @param alignmentContext provides server state info on client responses + * @throws IOException raised on errors performing I/O. + */ + public Server(Class protocolClass, Object protocolImpl, + Configuration conf, String bindAddress, int port, int numHandlers, + int numReaders, int queueSizePerHandler, boolean verbose, + SecretManager secretManager, + String portRangeConfig, AlignmentContext alignmentContext) + throws IOException { + super(bindAddress, port, null, numHandlers, + numReaders, queueSizePerHandler, conf, + serverNameFromClass(protocolImpl.getClass()), secretManager, + portRangeConfig); + setAlignmentContext(alignmentContext); + this.verbose = verbose; + registerProtocolAndImpl(RPC.RpcKind.RPC_PROTOCOL_BUFFER, protocolClass, + protocolImpl); + } + + //Use the latest protobuf rpc invoker itself as that is backward compatible. + private static final RpcInvoker RPC_INVOKER = new ProtoBufRpcInvoker(); + + @Override + protected RpcInvoker getServerRpcInvoker(RPC.RpcKind rpcKind) { + if (rpcKind == RPC.RpcKind.RPC_PROTOCOL_BUFFER) { + return RPC_INVOKER; + } + return super.getServerRpcInvoker(rpcKind); + } + + /** + * Protobuf invoker for {@link RpcInvoker}. + */ + static class ProtoBufRpcInvoker implements RpcInvoker { + private static ProtoClassProtoImpl getProtocolImpl(RPC.Server server, + String protoName, long clientVersion) throws RpcServerException { + ProtoNameVer pv = new ProtoNameVer(protoName, clientVersion); + ProtoClassProtoImpl impl = + server.getProtocolImplMap(RPC.RpcKind.RPC_PROTOCOL_BUFFER).get(pv); + if (impl == null) { // no match for Protocol AND Version + VerProtocolImpl highest = server.getHighestSupportedProtocol( + RPC.RpcKind.RPC_PROTOCOL_BUFFER, protoName); + if (highest == null) { + throw new RpcNoSuchProtocolException( + "Unknown protocol: " + protoName); + } + // protocol supported but not the version that client wants + throw new RPC.VersionMismatch(protoName, clientVersion, + highest.version); + } + return impl; + } + + @Override + /** + * This is a server side method, which is invoked over RPC. On success + * the return response has protobuf response payload. On failure, the + * exception name and the stack trace are returned in the response. + * See {@link HadoopRpcResponseProto} + * + * In this method there three types of exceptions possible and they are + * returned in response as follows. + *
      + *
    1. Exceptions encountered in this method that are returned + * as {@link RpcServerException}
    2. + *
    3. Exceptions thrown by the service is wrapped in ServiceException. + * In that this method returns in response the exception thrown by the + * service.
    4. + *
    5. Other exceptions thrown by the service. They are returned as + * it is.
    6. + *
    + */ + public Writable call(RPC.Server server, String connectionProtocolName, + Writable writableRequest, long receiveTime) throws Exception { + RpcProtobufRequest request = (RpcProtobufRequest) writableRequest; + RequestHeaderProto rpcRequest = request.getRequestHeader(); + String methodName = rpcRequest.getMethodName(); + + /** + * RPCs for a particular interface (ie protocol) are done using a + * IPC connection that is setup using rpcProxy. + * The rpcProxy's has a declared protocol name that is + * sent form client to server at connection time. + * + * Each Rpc call also sends a protocol name + * (called declaringClassprotocolName). This name is usually the same + * as the connection protocol name except in some cases. + * For example metaProtocols such ProtocolInfoProto which get info + * about the protocol reuse the connection but need to indicate that + * the actual protocol is different (i.e. the protocol is + * ProtocolInfoProto) since they reuse the connection; in this case + * the declaringClassProtocolName field is set to the ProtocolInfoProto. + */ + + String declaringClassProtoName = + rpcRequest.getDeclaringClassProtocolName(); + long clientVersion = rpcRequest.getClientProtocolVersion(); + return call(server, connectionProtocolName, request, receiveTime, + methodName, declaringClassProtoName, clientVersion); + } + + @SuppressWarnings("deprecation") + protected Writable call(RPC.Server server, String connectionProtocolName, + RpcWritable.Buffer request, long receiveTime, String methodName, + String declaringClassProtoName, long clientVersion) throws Exception { + if (server.verbose) { + LOG.info("Call: connectionProtocolName=" + connectionProtocolName + + ", method=" + methodName); + } + + ProtoClassProtoImpl protocolImpl = getProtocolImpl(server, + declaringClassProtoName, clientVersion); + if (protocolImpl.isShadedPBImpl()) { + return call(server, connectionProtocolName, request, methodName, + protocolImpl); + } + //Legacy protobuf implementation. Handle using legacy (Non-shaded) + // protobuf classes. + return ProtobufRpcEngine.Server + .processCall(server, connectionProtocolName, request, methodName, + protocolImpl); + } + + private RpcWritable call(RPC.Server server, + String connectionProtocolName, RpcWritable.Buffer request, + String methodName, ProtoClassProtoImpl protocolImpl) + throws Exception { + BlockingService service = (BlockingService) protocolImpl.protocolImpl; + MethodDescriptor methodDescriptor = service.getDescriptorForType() + .findMethodByName(methodName); + if (methodDescriptor == null) { + String msg = "Unknown method " + methodName + " called on " + + connectionProtocolName + " protocol."; + LOG.warn(msg); + throw new RpcNoSuchMethodException(msg); + } + Message prototype = service.getRequestPrototype(methodDescriptor); + Message param = request.getValue(prototype); + + Message result; + Call currentCall = Server.getCurCall().get(); + try { + server.rpcDetailedMetrics.init(protocolImpl.protocolClass); + CURRENT_CALL_INFO.set(new CallInfo(server, methodName)); + currentCall.setDetailedMetricsName(methodName); + result = service.callBlockingMethod(methodDescriptor, null, param); + // Check if this needs to be a deferred response, + // by checking the ThreadLocal callback being set + if (CURRENT_CALLBACK.get() != null) { + currentCall.deferResponse(); + CURRENT_CALLBACK.set(null); + return null; + } + } catch (ServiceException e) { + Exception exception = (Exception) e.getCause(); + currentCall.setDetailedMetricsName( + exception.getClass().getSimpleName()); + throw (Exception) e.getCause(); + } catch (Exception e) { + currentCall.setDetailedMetricsName(e.getClass().getSimpleName()); + throw e; + } finally { + CURRENT_CALL_INFO.set(null); + } + return RpcWritable.wrap(result); + } + } + } + + // htrace in the ipc layer creates the span name based on toString() + // which uses the rpc header. in the normal case we want to defer decoding + // the rpc header until needed by the rpc engine. + static class RpcProtobufRequest extends RpcWritable.Buffer { + private volatile RequestHeaderProto requestHeader; + private Message payload; + + RpcProtobufRequest() { + } + + RpcProtobufRequest(RequestHeaderProto header, Message payload) { + this.requestHeader = header; + this.payload = payload; + } + + RequestHeaderProto getRequestHeader() throws IOException { + if (getByteBuffer() != null && requestHeader == null) { + requestHeader = getValue(RequestHeaderProto.getDefaultInstance()); + } + return requestHeader; + } + + @Override + public void writeTo(ResponseBuffer out) throws IOException { + requestHeader.writeDelimitedTo(out); + if (payload != null) { + payload.writeDelimitedTo(out); + } + } + + // this is used by htrace to name the span. + @Override + public String toString() { + try { + RequestHeaderProto header = getRequestHeader(); + return header.getDeclaringClassProtocolName() + "." + + header.getMethodName(); + } catch (IOException e) { + throw new IllegalArgumentException(e); + } + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngineCallback.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngineCallback.java index 50b70ca4bec1a..f85adb17d3f8e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngineCallback.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngineCallback.java @@ -18,12 +18,17 @@ package org.apache.hadoop.ipc; -import org.apache.hadoop.thirdparty.protobuf.Message; +import com.google.protobuf.Message; +/** + * This engine uses Protobuf 2.5.0. Recommended to upgrade to Protobuf 3.x + * from hadoop-thirdparty and use ProtobufRpcEngineCallback2. + */ +@Deprecated public interface ProtobufRpcEngineCallback { - public void setResponse(Message message); + void setResponse(Message message); - public void error(Throwable t); + void error(Throwable t); } \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngineCallback2.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngineCallback2.java new file mode 100644 index 0000000000000..e8c09f56282e6 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngineCallback2.java @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ipc; + +import org.apache.hadoop.thirdparty.protobuf.Message; + +public interface ProtobufRpcEngineCallback2 { + + public void setResponse(Message message); + + public void error(Throwable t); + +} \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolMetaInterface.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolMetaInterface.java index 29c07ac29cb87..f23c05936a356 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolMetaInterface.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolMetaInterface.java @@ -36,7 +36,7 @@ public interface ProtocolMetaInterface { * It is assumed that all method names are unique for a protocol. * @param methodName The name of the method * @return true if method is supported, otherwise false. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public boolean isMethodSupported(String methodName) throws IOException; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolProxy.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolProxy.java index cc66958d14e08..49029f97b3d29 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolProxy.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolProxy.java @@ -85,11 +85,12 @@ public T getProxy() { } /** - * Check if a method is supported by the server or not + * Check if a method is supported by the server or not. * * @param methodName a method's name in String format * @param parameterTypes a method's parameter types * @return true if the method is supported by the server + * @throws IOException raised on errors performing I/O. */ public synchronized boolean isMethodSupported(String methodName, Class... parameterTypes) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolSignature.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolSignature.java index 69d18eacd34d6..e1a1f0eb3e7a1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolSignature.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolSignature.java @@ -29,7 +29,7 @@ import org.apache.hadoop.io.WritableFactories; import org.apache.hadoop.io.WritableFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; public class ProtocolSignature implements Writable { static { // register a ctor diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProxyCombiner.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProxyCombiner.java index 99eb487be495c..843183f4b72d5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProxyCombiner.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProxyCombiner.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.ipc; -import com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; import java.io.Closeable; import java.io.IOException; import java.lang.reflect.InvocationHandler; @@ -51,7 +51,7 @@ private ProxyCombiner() { } * to which proxy implements that method. If multiple proxies implement the * same method, the first in the list will be used for delegation. * - *

    This will check that every method on the combined interface is + * This will check that every method on the combined interface is * implemented by at least one of the supplied proxy objects. * * @param combinedProxyInterface The interface of the combined proxy. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java index 4f95863b03db6..c7ca09c60bad7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java @@ -51,6 +51,7 @@ import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto.RpcStatusProto; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.SaslRpcServer; +import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.SecretManager; import org.apache.hadoop.security.token.TokenIdentifier; @@ -149,6 +150,9 @@ static Class[] getProtocolInterfaces(Class protocol) { * Get the protocol name. * If the protocol class has a ProtocolAnnotation, then get the protocol * name from the annotation; otherwise the class name is the protocol name. + * + * @param protocol input protocol. + * @return protocol name. */ static public String getProtocolName(Class protocol) { if (protocol == null) { @@ -163,6 +167,9 @@ static public String getProtocolName(Class protocol) { * If the protocol class has a ProtocolAnnotation, * then get the protocol version from the annotation; * otherwise get it from the versionID field of the protocol class. + * + * @param protocol input protocol. + * @return ProtocolVersion. */ static public long getProtocolVersion(Class protocol) { if (protocol == null) { @@ -195,14 +202,18 @@ private RPC() {} // no public ctor private static final String ENGINE_PROP = "rpc.engine"; /** - * Set a protocol to use a non-default RpcEngine. + * Set a protocol to use a non-default RpcEngine if one + * is not specified in the configuration. * @param conf configuration to use * @param protocol the protocol interface * @param engine the RpcEngine impl */ public static void setProtocolEngine(Configuration conf, Class protocol, Class engine) { - conf.setClass(ENGINE_PROP+"."+protocol.getName(), engine, RpcEngine.class); + if (conf.get(ENGINE_PROP+"."+protocol.getName()) == null) { + conf.setClass(ENGINE_PROP+"."+protocol.getName(), engine, + RpcEngine.class); + } } // return the RpcEngine configured to handle a protocol @@ -253,14 +264,14 @@ public String getInterfaceName() { } /** - * Get the client's preferred version + * @return Get the client's preferred version. */ public long getClientVersion() { return clientVersion; } /** - * Get the server's agreed to version. + * @return Get the server's agreed to version. */ public long getServerVersion() { return serverVersion; @@ -281,8 +292,9 @@ public RpcErrorCodeProto getRpcErrorCodeProto() { } /** - * Get a proxy connection to a remote server - * + * Get a proxy connection to a remote server. + * + * @param Generics Type T. * @param protocol protocol class * @param clientVersion client version * @param addr remote address @@ -301,8 +313,9 @@ public static T waitForProxy( /** * Get a protocol proxy that contains a proxy connection to a remote server - * and a set of methods that are supported by the server - * + * and a set of methods that are supported by the server. + * + * @param Generics Type T. * @param protocol protocol class * @param clientVersion client version * @param addr remote address @@ -319,8 +332,9 @@ public static ProtocolProxy waitForProtocolProxy(Class protocol, } /** - * Get a proxy connection to a remote server - * + * Get a proxy connection to a remote server. + * + * @param Generics Type T. * @param protocol protocol class * @param clientVersion client version * @param addr remote address @@ -339,7 +353,8 @@ public static T waitForProxy(Class protocol, long clientVersion, /** * Get a protocol proxy that contains a proxy connection to a remote server * and a set of methods that are supported by the server - * + * + * @param Generics Type T. * @param protocol protocol class * @param clientVersion client version * @param addr remote address @@ -357,8 +372,9 @@ public static ProtocolProxy waitForProtocolProxy(Class protocol, } /** - * Get a proxy connection to a remote server - * + * Get a proxy connection to a remote server. + * + * @param Generics Type T. * @param protocol protocol class * @param clientVersion client version * @param addr remote address @@ -379,16 +395,18 @@ public static T waitForProxy(Class protocol, /** * Get a protocol proxy that contains a proxy connection to a remote server - * and a set of methods that are supported by the server - * + * and a set of methods that are supported by the server. + * + * @param Generics Type. * @param protocol protocol class * @param clientVersion client version * @param addr remote address * @param conf configuration to use * @param rpcTimeout timeout for each RPC + * @param connectionRetryPolicy input connectionRetryPolicy. * @param timeout time in milliseconds before giving up * @return the proxy - * @throws IOException if the far end through a RemoteException + * @throws IOException if the far end through a RemoteException. */ public static ProtocolProxy waitForProtocolProxy(Class protocol, long clientVersion, @@ -434,9 +452,18 @@ public static ProtocolProxy waitForProtocolProxy(Class protocol, } } - /** Construct a client-side proxy object that implements the named protocol, + /** + * Construct a client-side proxy object that implements the named protocol, * talking to a server at the named address. - * @param */ + * @param Generics Type T. + * @param protocol input protocol. + * @param clientVersion input clientVersion. + * @param addr input addr. + * @param conf input Configuration. + * @param factory input factory. + * @throws IOException raised on errors performing I/O. + * @return proxy. + */ public static T getProxy(Class protocol, long clientVersion, InetSocketAddress addr, Configuration conf, @@ -447,8 +474,9 @@ public static T getProxy(Class protocol, /** * Get a protocol proxy that contains a proxy connection to a remote server - * and a set of methods that are supported by the server - * + * and a set of methods that are supported by the server. + * + * @param Generics Type T. * @param protocol protocol class * @param clientVersion client version * @param addr remote address @@ -465,9 +493,21 @@ public static ProtocolProxy getProtocolProxy(Class protocol, return getProtocolProxy(protocol, clientVersion, addr, ugi, conf, factory); } - /** Construct a client-side proxy object that implements the named protocol, + /** + * Construct a client-side proxy object that implements the named protocol, * talking to a server at the named address. - * @param */ + * + * @param Generics Type T. + * @param protocol input protocol. + * @param clientVersion input clientVersion. + * @param addr input addr. + * @param ticket input tocket. + * @param conf input conf. + * @param factory input factory. + * @return the protocol proxy. + * @throws IOException raised on errors performing I/O. + * + */ public static T getProxy(Class protocol, long clientVersion, InetSocketAddress addr, @@ -481,7 +521,8 @@ public static T getProxy(Class protocol, /** * Get a protocol proxy that contains a proxy connection to a remote server * and a set of methods that are supported by the server - * + * + * @param Generics Type T. * @param protocol protocol class * @param clientVersion client version * @param addr remote address @@ -500,12 +541,56 @@ public static ProtocolProxy getProtocolProxy(Class protocol, return getProtocolProxy(protocol, clientVersion, addr, ticket, conf, factory, getRpcTimeout(conf), null); } + + /** + * Get a protocol proxy that contains a proxy connection to a remote server + * and a set of methods that are supported by the server. + * + * @param Generics Type T + * @param protocol protocol class + * @param clientVersion client's version + * @param connId client connection identifier + * @param conf configuration + * @param factory socket factory + * @return the protocol proxy + * @throws IOException if the far end through a RemoteException + */ + public static ProtocolProxy getProtocolProxy(Class protocol, + long clientVersion, ConnectionId connId, Configuration conf, + SocketFactory factory) throws IOException { + return getProtocolProxy(protocol, clientVersion, connId, conf, + factory, null); + } + + /** + * Get a protocol proxy that contains a proxy connection to a remote server + * and a set of methods that are supported by the server. + * + * @param Generics Type T + * @param protocol protocol class + * @param clientVersion client's version + * @param connId client connection identifier + * @param conf configuration + * @param factory socket factory + * @param alignmentContext StateID alignment context + * @return the protocol proxy + * @throws IOException if the far end through a RemoteException + */ + public static ProtocolProxy getProtocolProxy(Class protocol, + long clientVersion, ConnectionId connId, Configuration conf, + SocketFactory factory, AlignmentContext alignmentContext) throws IOException { + if (UserGroupInformation.isSecurityEnabled()) { + SaslRpcServer.init(conf); + } + return getProtocolEngine(protocol, conf).getProxy( + protocol, clientVersion, connId, conf, factory, alignmentContext); + } /** * Construct a client-side proxy that implements the named protocol, * talking to a server at the named address. - * @param - * + * + * @param Generics Type T. * @param protocol protocol * @param clientVersion client's version * @param addr server address @@ -529,8 +614,9 @@ public static T getProxy(Class protocol, /** * Get a protocol proxy that contains a proxy connection to a remote server - * and a set of methods that are supported by the server - * + * and a set of methods that are supported by the server. + * + * @param Generics Type T. * @param protocol protocol * @param clientVersion client's version * @param addr server address @@ -556,8 +642,9 @@ public static ProtocolProxy getProtocolProxy(Class protocol, /** * Get a protocol proxy that contains a proxy connection to a remote server - * and a set of methods that are supported by the server + * and a set of methods that are supported by the server. * + * @param Generics Type T. * @param protocol protocol * @param clientVersion client's version * @param addr server address @@ -604,6 +691,7 @@ public static ProtocolProxy getProtocolProxy(Class protocol, * @param fallbackToSimpleAuth set to true or false during calls to indicate * if a secure client falls back to simple auth * @param alignmentContext state alignment context + * @param Generics Type T. * @return the proxy * @throws IOException if any error occurs */ @@ -627,15 +715,15 @@ public static ProtocolProxy getProtocolProxy(Class protocol, } /** - * Construct a client-side proxy object with the default SocketFactory - * @param - * - * @param protocol - * @param clientVersion - * @param addr - * @param conf + * Construct a client-side proxy object with the default SocketFactory. + * + * @param Generics Type T. + * @param protocol input protocol. + * @param clientVersion input clientVersion. + * @param addr input addr. + * @param conf input Configuration. * @return a proxy instance - * @throws IOException + * @throws IOException if the thread is interrupted. */ public static T getProxy(Class protocol, long clientVersion, @@ -646,7 +734,8 @@ public static T getProxy(Class protocol, } /** - * Returns the server address for a given proxy. + * @return Returns the server address for a given proxy. + * @param proxy input proxy. */ public static InetSocketAddress getServerAddress(Object proxy) { return getConnectionIdForProxy(proxy).getAddress(); @@ -673,12 +762,13 @@ public static ConnectionId getConnectionIdForProxy(Object proxy) { * Get a protocol proxy that contains a proxy connection to a remote server * and a set of methods that are supported by the server * - * @param protocol - * @param clientVersion - * @param addr - * @param conf + * @param protocol input protocol. + * @param clientVersion input clientVersion. + * @param addr input addr. + * @param conf input configuration. + * @param Generics Type T. * @return a protocol proxy - * @throws IOException + * @throws IOException if the thread is interrupted. */ public static ProtocolProxy getProtocolProxy(Class protocol, long clientVersion, @@ -762,75 +852,123 @@ public Builder(Configuration conf) { this.conf = conf; } - /** Mandatory field */ + /** + * @return Mandatory field. + * @param protocol input protocol. + */ public Builder setProtocol(Class protocol) { this.protocol = protocol; return this; } - /** Mandatory field */ + /** + * @return Mandatory field. + * @param instance input instance. + */ public Builder setInstance(Object instance) { this.instance = instance; return this; } - /** Default: 0.0.0.0 */ + /** + * @return Default: 0.0.0.0. + * @param bindAddress input bindAddress. + */ public Builder setBindAddress(String bindAddress) { this.bindAddress = bindAddress; return this; } - /** Default: 0 */ + /** + * @return Default: 0. + * @param port input port. + */ public Builder setPort(int port) { this.port = port; return this; } - /** Default: 1 */ + /** + * @return Default: 1. + * @param numHandlers input numHandlers. + */ public Builder setNumHandlers(int numHandlers) { this.numHandlers = numHandlers; return this; } - - /** Default: -1 */ + + /** + * @return Default: -1. + * @param numReaders input numReaders. + * @deprecated call {@link #setNumReaders(int value)} instead. + */ + @Deprecated public Builder setnumReaders(int numReaders) { this.numReaders = numReaders; return this; } + + /** + * Set the number of reader threads. + * + * @return this builder. + * @param value input numReaders. + * @since HADOOP-18625. + */ + public Builder setNumReaders(int value) { + this.numReaders = value; + return this; + } - /** Default: -1 */ + /** + * @return Default: -1. + * @param queueSizePerHandler + * input queueSizePerHandler. + */ public Builder setQueueSizePerHandler(int queueSizePerHandler) { this.queueSizePerHandler = queueSizePerHandler; return this; } - /** Default: false */ + /** + * @return Default: false. + * @param verbose input verbose. + */ public Builder setVerbose(boolean verbose) { this.verbose = verbose; return this; } - /** Default: null */ + /** + * @return Default: null. + * @param secretManager input secretManager. + */ public Builder setSecretManager( SecretManager secretManager) { this.secretManager = secretManager; return this; } - /** Default: null */ + /** + * @return Default: null. + * @param portRangeConfig input portRangeConfig. + */ public Builder setPortRangeConfig(String portRangeConfig) { this.portRangeConfig = portRangeConfig; return this; } - /** Default: null */ + /** + * @return Default: null. + * @param alignmentContext input alignmentContext. + */ public Builder setAlignmentContext(AlignmentContext alignmentContext) { this.alignmentContext = alignmentContext; return this; } /** - * Build the RPC Server. + * @return Build the RPC Server. * @throws IOException on error * @throws HadoopIllegalArgumentException when mandatory fields are not set */ @@ -932,11 +1070,18 @@ public int hashCode() { */ static class ProtoClassProtoImpl { final Class protocolClass; - final Object protocolImpl; + final Object protocolImpl; + private final boolean shadedPBImpl; + ProtoClassProtoImpl(Class protocolClass, Object protocolImpl) { this.protocolClass = protocolClass; this.protocolImpl = protocolImpl; + this.shadedPBImpl = protocolImpl instanceof BlockingService; } + + public boolean isShadedPBImpl() { + return shadedPBImpl; + } } ArrayList> protocolImplMapArray = @@ -976,7 +1121,18 @@ void registerProtocolAndImpl(RpcKind rpcKind, Class protocolClass, " ProtocolImpl=" + protocolImpl.getClass().getName() + " protocolClass=" + protocolClass.getName()); } - } + String client = SecurityUtil.getClientPrincipal(protocolClass, getConf()); + if (client != null) { + // notify the server's rpc scheduler that the protocol user has + // highest priority. the scheduler should exempt the user from + // priority calculations. + try { + setPriorityLevel(UserGroupInformation.createRemoteUser(client), -1); + } catch (Exception ex) { + LOG.warn("Failed to set scheduling priority for " + client, ex); + } + } + } static class VerProtocolImpl { final long version; @@ -1043,7 +1199,7 @@ protected Server(String bindAddress, int port, private void initProtocolMetaInfo(Configuration conf) { RPC.setProtocolEngine(conf, ProtocolMetaInfoPB.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); ProtocolMetaInfoServerSideTranslatorPB xlator = new ProtocolMetaInfoServerSideTranslatorPB(this); BlockingService protocolInfoBlockingService = ProtocolInfoService @@ -1054,6 +1210,7 @@ private void initProtocolMetaInfo(Configuration conf) { /** * Add a protocol to the existing server. + * @param rpcKind - input rpcKind * @param protocolClass - the protocol class * @param protocolImpl - the impl of the protocol that will be called * @return the server (for convenience) @@ -1067,7 +1224,7 @@ public Server addProtocol(RpcKind rpcKind, Class protocolClass, @Override public Writable call(RPC.RpcKind rpcKind, String protocol, Writable rpcRequest, long receiveTime) throws Exception { - return getRpcInvoker(rpcKind).call(this, protocol, rpcRequest, + return getServerRpcInvoker(rpcKind).call(this, protocol, rpcRequest, receiveTime); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RefreshCallQueueProtocol.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RefreshCallQueueProtocol.java index 553f9a00d4cca..b1aa0197040a2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RefreshCallQueueProtocol.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RefreshCallQueueProtocol.java @@ -41,7 +41,7 @@ public interface RefreshCallQueueProtocol { /** * Refresh the callqueue. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Idempotent void refreshCallQueue() throws IOException; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RefreshRegistry.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RefreshRegistry.java index e67e8d9cbeb92..0cc0b8ba3d8b8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RefreshRegistry.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RefreshRegistry.java @@ -20,9 +20,9 @@ import java.util.ArrayList; import java.util.Collection; -import com.google.common.base.Joiner; -import com.google.common.collect.HashMultimap; -import com.google.common.collect.Multimap; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.collect.HashMultimap; +import org.apache.hadoop.thirdparty.com.google.common.collect.Multimap; import org.apache.hadoop.classification.InterfaceStability; import org.slf4j.Logger; @@ -72,6 +72,7 @@ public synchronized void register(String identifier, RefreshHandler handler) { /** * Remove the registered object for a given identity. * @param identifier the resource to unregister + * @param handler input handler. * @return the true if removed */ public synchronized boolean unregister(String identifier, RefreshHandler handler) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RemoteException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RemoteException.java index f1142d35e72c2..da08c3d152e61 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RemoteException.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RemoteException.java @@ -124,8 +124,9 @@ private IOException instantiateException(Class cls) } /** - * Create RemoteException from attributes - * @param attrs may not be null + * Create RemoteException from attributes. + * @param attrs may not be null. + * @return RemoteException. */ public static RemoteException valueOf(Attributes attrs) { return new RemoteException(attrs.getValue("class"), diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RetryCache.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RetryCache.java index 4bde261eab1b0..b32319faa8a43 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RetryCache.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RetryCache.java @@ -28,8 +28,8 @@ import org.apache.hadoop.util.LightWeightGSet; import org.apache.hadoop.util.LightWeightGSet.LinkedElement; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -49,11 +49,11 @@ public class RetryCache { private static final int MAX_CAPACITY = 16; /** - * CacheEntry is tracked using unique client ID and callId of the RPC request + * CacheEntry is tracked using unique client ID and callId of the RPC request. */ public static class CacheEntry implements LightWeightCache.Entry { /** - * Processing state of the requests + * Processing state of the requests. */ private static byte INPROGRESS = 0; private static byte SUCCESS = 1; @@ -233,7 +233,7 @@ public RetryCacheMetrics getMetricsForTests() { } /** - * This method returns cache name for metrics. + * @return This method returns cache name for metrics. */ public String getCacheName() { return cacheName; @@ -302,6 +302,9 @@ private CacheEntry waitForCompletion(CacheEntry newEntry) { /** * Add a new cache entry into the retry cache. The cache entry consists of * clientId and callId extracted from editlog. + * + * @param clientId input clientId. + * @param callId input callId. */ public void addCacheEntry(byte[] clientId, int callId) { CacheEntry newEntry = new CacheEntry(clientId, callId, System.nanoTime() @@ -340,7 +343,11 @@ private static CacheEntryWithPayload newEntry(Object payload, payload, System.nanoTime() + expirationTime); } - /** Static method that provides null check for retryCache */ + /** + * Static method that provides null check for retryCache. + * @param cache input Cache. + * @return CacheEntry. + */ public static CacheEntry waitForCompletion(RetryCache cache) { if (skipRetryCache()) { return null; @@ -349,7 +356,12 @@ public static CacheEntry waitForCompletion(RetryCache cache) { .waitForCompletion(newEntry(cache.expirationTime)) : null; } - /** Static method that provides null check for retryCache */ + /** + * Static method that provides null check for retryCache. + * @param cache input cache. + * @param payload input payload. + * @return CacheEntryWithPayload. + */ public static CacheEntryWithPayload waitForCompletion(RetryCache cache, Object payload) { if (skipRetryCache()) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcClientUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcClientUtil.java index 84ecba1d34e9c..4af35ad9270f1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcClientUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcClientUtil.java @@ -103,7 +103,7 @@ private static Map getVersionSignatureMap( * @param version The version at the client. * @param methodName Name of the method. * @return true if the method is supported, false otherwise. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static boolean isMethodSupported(Object rpcProxy, Class protocol, RPC.RpcKind rpcKind, long version, String methodName) throws IOException { @@ -114,7 +114,7 @@ public static boolean isMethodSupported(Object rpcProxy, Class protocol, if (versionMap == null) { Configuration conf = new Configuration(); RPC.setProtocolEngine(conf, ProtocolMetaInfoPB.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); ProtocolMetaInfoPB protocolInfoProxy = getProtocolMetaInfoProxy(rpcProxy, conf); GetProtocolSignatureRequestProto.Builder builder = @@ -200,6 +200,8 @@ private static ProtocolMetaInfoPB getProtocolMetaInfoProxy(Object proxy, * * the format we want is: * ClientNamenodeProtocol#getServerDefaults + * @param method input method. + * @return methodToTraceString. */ public static String methodToTraceString(Method method) { Class clazz = method.getDeclaringClass(); @@ -221,6 +223,8 @@ public static String methodToTraceString(Method method) { * * the format we want is: * ClientProtocol#getBlockLocations + * @param fullName input fullName. + * @return toTraceName. */ public static String toTraceName(String fullName) { int lastPeriod = fullName.lastIndexOf('.'); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcEngine.java index 0f5769e705028..f322f6eb98abb 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcEngine.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcEngine.java @@ -36,15 +36,62 @@ @InterfaceStability.Evolving public interface RpcEngine { - /** Construct a client-side proxy object. - * @param */ + /** + * Construct a client-side proxy object. + * + * @param Generics Type T. + * @param protocol input protocol. + * @param clientVersion input clientVersion. + * @param addr input addr. + * @param ticket input ticket. + * @param conf input Configuration. + * @param factory input factory. + * @param rpcTimeout input rpcTimeout. + * @param connectionRetryPolicy input connectionRetryPolicy. + * @throws IOException raised on errors performing I/O. + * @return ProtocolProxy. + */ ProtocolProxy getProxy(Class protocol, long clientVersion, InetSocketAddress addr, UserGroupInformation ticket, Configuration conf, SocketFactory factory, int rpcTimeout, RetryPolicy connectionRetryPolicy) throws IOException; - /** Construct a client-side proxy object. */ + /** + * Construct a client-side proxy object with a ConnectionId. + * + * @param Generics Type T. + * @param protocol input protocol. + * @param clientVersion input clientVersion. + * @param connId input ConnectionId. + * @param conf input Configuration. + * @param factory input factory. + * @param alignmentContext Alignment context + * @throws IOException raised on errors performing I/O. + * @return ProtocolProxy. + */ + ProtocolProxy getProxy(Class protocol, long clientVersion, + Client.ConnectionId connId, Configuration conf, SocketFactory factory, + AlignmentContext alignmentContext) + throws IOException; + + /** + * Construct a client-side proxy object. + * + * @param Generics Type T. + * @param protocol input protocol. + * @param clientVersion input clientVersion. + * @param addr input addr. + * @param ticket input tocket. + * @param conf input Configuration. + * @param factory input factory. + * @param rpcTimeout input rpcTimeout. + * @param connectionRetryPolicy input connectionRetryPolicy. + * @param fallbackToSimpleAuth input fallbackToSimpleAuth. + * @param alignmentContext input alignmentContext. + * @throws IOException raised on errors performing I/O. + * @return ProtocolProxy. + */ ProtocolProxy getProxy(Class protocol, long clientVersion, InetSocketAddress addr, UserGroupInformation ticket, Configuration conf, @@ -87,7 +134,7 @@ RPC.Server getServer(Class protocol, Object instance, String bindAddress, * @param conf, Configuration. * @param factory, Socket factory. * @return Proxy object. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ ProtocolProxy getProtocolMetaInfoProxy( ConnectionId connId, Configuration conf, SocketFactory factory) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcScheduler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcScheduler.java index 63812f47f2db0..bffe5f2d257fc 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcScheduler.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcScheduler.java @@ -18,7 +18,7 @@ package org.apache.hadoop.ipc; -import java.util.concurrent.TimeUnit; +import org.apache.hadoop.ipc.metrics.RpcMetrics; /** * Implement this interface to be used for RPC scheduling and backoff. @@ -26,7 +26,8 @@ */ public interface RpcScheduler { /** - * Returns priority level greater than zero as a hint for scheduling. + * @return Returns priority level greater than zero as a hint for scheduling. + * @param obj input obj. */ int getPriorityLevel(Schedulable obj); @@ -37,6 +38,12 @@ public interface RpcScheduler { * implementations. It will not be called by any Hadoop code, and should not * be implemented by new implementations. * + * @param name input name. + * @param priorityLevel input priorityLevel. + * @param queueTime input queueTime. + * @param processingTime input processingTime. + * @throws UnsupportedOperationException + * the requested operation is not supported. * @deprecated Use * {@link #addResponseTime(String, Schedulable, ProcessingDetails)} instead. */ @@ -62,12 +69,12 @@ default void addResponseTime(String callName, Schedulable schedulable, // this interface, a default implementation is supplied which uses the old // method. All new implementations MUST override this interface and should // NOT use the other addResponseTime method. - int queueTimeMs = (int) - details.get(ProcessingDetails.Timing.QUEUE, TimeUnit.MILLISECONDS); - int processingTimeMs = (int) - details.get(ProcessingDetails.Timing.PROCESSING, TimeUnit.MILLISECONDS); + int queueTime = (int) details.get(ProcessingDetails.Timing.QUEUE, + RpcMetrics.DEFAULT_METRIC_TIME_UNIT); + int processingTime = (int) details.get(ProcessingDetails.Timing.PROCESSING, + RpcMetrics.DEFAULT_METRIC_TIME_UNIT); addResponseTime(callName, schedulable.getPriorityLevel(), - queueTimeMs, processingTimeMs); + queueTime, processingTime); } void stop(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcServerException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcServerException.java index 992997ead25de..ce4aac54b6cd2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcServerException.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcServerException.java @@ -47,14 +47,14 @@ public RpcServerException(final String message, final Throwable cause) { } /** - * get the rpc status corresponding to this exception + * @return get the rpc status corresponding to this exception. */ public RpcStatusProto getRpcStatusProto() { return RpcStatusProto.ERROR; } /** - * get the detailed rpc status corresponding to this exception + * @return get the detailed rpc status corresponding to this exception. */ public RpcErrorCodeProto getRpcErrorCodeProto() { return RpcErrorCodeProto.ERROR_RPC_SERVER; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcWritable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcWritable.java index 6604bd0cc1c68..f5f0d071f39ed 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcWritable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcWritable.java @@ -42,6 +42,8 @@ static RpcWritable wrap(Object o) { return (RpcWritable)o; } else if (o instanceof Message) { return new ProtobufWrapper((Message)o); + } else if (o instanceof com.google.protobuf.Message) { + return new ProtobufWrapperLegacy((com.google.protobuf.Message) o); } else if (o instanceof Writable) { return new WritableWrapper((Writable)o); } @@ -132,6 +134,49 @@ T readFrom(ByteBuffer bb) throws IOException { } } + // adapter for Protobufs. + static class ProtobufWrapperLegacy extends RpcWritable { + private com.google.protobuf.Message message; + + ProtobufWrapperLegacy(com.google.protobuf.Message message) { + this.message = message; + } + + com.google.protobuf.Message getMessage() { + return message; + } + + @Override + void writeTo(ResponseBuffer out) throws IOException { + int length = message.getSerializedSize(); + length += com.google.protobuf.CodedOutputStream. + computeUInt32SizeNoTag(length); + out.ensureCapacity(length); + message.writeDelimitedTo(out); + } + + @SuppressWarnings("unchecked") + @Override + T readFrom(ByteBuffer bb) throws IOException { + // using the parser with a byte[]-backed coded input stream is the + // most efficient way to deserialize a protobuf. it has a direct + // path to the PB ctor that doesn't create multi-layered streams + // that internally buffer. + com.google.protobuf.CodedInputStream cis = + com.google.protobuf.CodedInputStream.newInstance( + bb.array(), bb.position() + bb.arrayOffset(), bb.remaining()); + try { + cis.pushLimit(cis.readRawVarint32()); + message = message.getParserForType().parseFrom(cis); + cis.checkLastTagWas(0); + } finally { + // advance over the bytes read. + bb.position(bb.position() + cis.getTotalBytesRead()); + } + return (T)message; + } + } + /** * adapter to allow decoding of writables and protobufs from a byte buffer. */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Schedulable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Schedulable.java index 3b28d85428b3f..00c9994e2a4a6 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Schedulable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Schedulable.java @@ -29,5 +29,19 @@ public interface Schedulable { public UserGroupInformation getUserGroupInformation(); + /** + * This is overridden only in {@link Server.Call}. + * The CallerContext field will be used to carry information + * about the user in cases where UGI proves insufficient. + * Any other classes that might try to use this method, + * will get an UnsupportedOperationException. + * + * @return an instance of CallerContext if method + * is overridden else get an UnsupportedOperationException + */ + default CallerContext getCallerContext() { + throw new UnsupportedOperationException("Invalid operation."); + } + int getPriorityLevel(); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java index 4448164f4b137..939180b195cc4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java @@ -65,9 +65,13 @@ import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.LongAdder; +import java.util.stream.Collectors; import javax.security.sasl.Sasl; import javax.security.sasl.SaslException; @@ -99,6 +103,7 @@ import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcSaslProto; import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcSaslProto.SaslAuth; import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcSaslProto.SaslState; +import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RPCTraceInfoProto; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.SaslPropertiesResolver; @@ -118,12 +123,14 @@ import org.apache.hadoop.util.ProtoUtil; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; -import org.apache.htrace.core.SpanId; -import org.apache.htrace.core.TraceScope; -import org.apache.htrace.core.Tracer; - +import org.apache.hadoop.tracing.Span; +import org.apache.hadoop.tracing.SpanContext; +import org.apache.hadoop.tracing.TraceScope; +import org.apache.hadoop.tracing.Tracer; +import org.apache.hadoop.tracing.TraceUtils; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.hadoop.thirdparty.protobuf.ByteString; import org.apache.hadoop.thirdparty.protobuf.CodedOutputStream; import org.apache.hadoop.thirdparty.protobuf.Message; @@ -182,8 +189,11 @@ public void setAlignmentContext(AlignmentContext alignmentContext) { * e.g., terse exception group for concise logging messages */ static class ExceptionsHandler { - private volatile Set terseExceptions = new HashSet<>(); - private volatile Set suppressedExceptions = new HashSet<>(); + + private final Set terseExceptions = + ConcurrentHashMap.newKeySet(); + private final Set suppressedExceptions = + ConcurrentHashMap.newKeySet(); /** * Add exception classes for which server won't log stack traces. @@ -191,8 +201,10 @@ static class ExceptionsHandler { * @param exceptionClass exception classes */ void addTerseLoggingExceptions(Class... exceptionClass) { - // Thread-safe replacement of terseExceptions. - terseExceptions = addExceptions(terseExceptions, exceptionClass); + terseExceptions.addAll(Arrays + .stream(exceptionClass) + .map(Class::toString) + .collect(Collectors.toSet())); } /** @@ -201,9 +213,10 @@ void addTerseLoggingExceptions(Class... exceptionClass) { * @param exceptionClass exception classes */ void addSuppressedLoggingExceptions(Class... exceptionClass) { - // Thread-safe replacement of suppressedExceptions. - suppressedExceptions = addExceptions( - suppressedExceptions, exceptionClass); + suppressedExceptions.addAll(Arrays + .stream(exceptionClass) + .map(Class::toString) + .collect(Collectors.toSet())); } boolean isTerseLog(Class t) { @@ -214,23 +227,6 @@ boolean isSuppressedLog(Class t) { return suppressedExceptions.contains(t.toString()); } - /** - * Return a new set containing all the exceptions in exceptionsSet - * and exceptionClass. - * @return - */ - private static Set addExceptions( - final Set exceptionsSet, Class[] exceptionClass) { - // Make a copy of the exceptionSet for performing modification - final HashSet newSet = new HashSet<>(exceptionsSet); - - // Add all class names into the HashSet - for (Class name : exceptionClass) { - newSet.add(name.toString()); - } - - return Collections.unmodifiableSet(newSet); - } } @@ -274,10 +270,10 @@ static class RpcKindMapValue { * Register a RPC kind and the class to deserialize the rpc request. * * Called by static initializers of rpcKind Engines - * @param rpcKind + * @param rpcKind - input rpcKind. * @param rpcRequestWrapperClass - this class is used to deserialze the * the rpc request. - * @param rpcInvoker - use to process the calls on SS. + * @param rpcInvoker - use to process the calls on SS. */ public static void registerProtocolEngine(RPC.RpcKind rpcKind, @@ -304,7 +300,11 @@ public Class getRpcRequestWrapper( RpcKindMapValue val = rpcKindMap.get(ProtoUtil.convert(rpcKind)); return (val == null) ? null : val.rpcRequestWrapperClass; } - + + protected RpcInvoker getServerRpcInvoker(RPC.RpcKind rpcKind) { + return getRpcInvoker(rpcKind); + } + public static RpcInvoker getRpcInvoker(RPC.RpcKind rpcKind) { RpcKindMapValue val = rpcKindMap.get(rpcKind); return (val == null) ? null : val.rpcInvoker; @@ -332,7 +332,7 @@ static Class getProtocolClass(String protocolName, Configuration conf) return protocol; } - /** Returns the server instance called under or null. May be called under + /** @return Returns the server instance called under or null. May be called under * {@link #call(Writable, long)} implementations, and under {@link Writable} * methods of paramters and return values. Permits applications to access * the server context.*/ @@ -345,7 +345,7 @@ public static Server get() { */ private static final ThreadLocal CurCall = new ThreadLocal(); - /** Get the current call */ + /** @return Get the current call. */ @VisibleForTesting public static ThreadLocal getCurCall() { return CurCall; @@ -372,12 +372,22 @@ public static int getCallRetryCount() { return call != null ? call.retryCount : RpcConstants.INVALID_RETRY_COUNT; } - /** Returns the remote side ip address when invoked inside an RPC - * Returns null incase of an error. + /** + * @return Returns the remote side ip address when invoked inside an RPC + * Returns null in case of an error. */ public static InetAddress getRemoteIp() { Call call = CurCall.get(); - return (call != null ) ? call.getHostInetAddress() : null; + return (call != null) ? call.getHostInetAddress() : null; + } + + /** + * @return Returns the remote side port when invoked inside an RPC + * Returns 0 in case of an error. + */ + public static int getRemotePort() { + Call call = CurCall.get(); + return (call != null) ? call.getRemotePort() : 0; } /** @@ -407,14 +417,14 @@ public static String getAuxiliaryPortEstablishedQOP() { } /** - * Returns the clientId from the current RPC request + * @return Returns the clientId from the current RPC request. */ public static byte[] getClientId() { Call call = CurCall.get(); return call != null ? call.clientId : RpcConstants.DUMMY_CLIENT_ID; } - - /** Returns remote address as a string when invoked inside an RPC. + + /** @return Returns remote address as a string when invoked inside an RPC. * Returns null in case of an error. */ public static String getRemoteAddress() { @@ -436,14 +446,14 @@ public static String getProtocol() { return (call != null) ? call.getProtocol() : null; } - /** Return true if the invocation was through an RPC. + /** @return Return true if the invocation was through an RPC. */ public static boolean isRpcInvocation() { return CurCall.get() != null; } /** - * Return the priority level assigned by call queue to an RPC + * @return Return the priority level assigned by call queue to an RPC * Returns 0 in case no priority is assigned. */ public static int getPriorityLevel() { @@ -451,7 +461,7 @@ public static int getPriorityLevel() { return call != null? call.getPriorityLevel() : 0; } - private String bindAddress; + private String bindAddress; private int port; // port we listen on private int handlerCount; // number of handler threads private int readThreads; // number of read threads @@ -459,7 +469,7 @@ public static int getPriorityLevel() { private Class rpcRequestClass; // class used for deserializing the rpc request final protected RpcMetrics rpcMetrics; final protected RpcDetailedMetrics rpcDetailedMetrics; - + private Configuration conf; private String portRangeConfig = null; private SecretManager secretManager; @@ -482,6 +492,8 @@ protected ResponseBuffer initialValue() { volatile private boolean running = true; // true while server runs private CallQueueManager callQueue; + private long purgeIntervalNanos; + // maintains the set of client connections and handles idle timeouts private ConnectionManager connectionManager; private Listener listener = null; @@ -491,6 +503,11 @@ protected ResponseBuffer initialValue() { private Map auxiliaryListenerMap; private Responder responder = null; private Handler[] handlers = null; + private final LongAdder totalRequests = new LongAdder(); + private long lastSeenTotalRequests = 0; + private long totalRequestsPerSecond = 0; + private final long metricsUpdaterInterval; + private final ScheduledExecutorService scheduledExecutorService; private boolean logSlowRPC = false; @@ -502,15 +519,37 @@ protected boolean isLogSlowRPC() { return logSlowRPC; } + public long getTotalRequests() { + return totalRequests.sum(); + } + + public long getTotalRequestsPerSecond() { + return totalRequestsPerSecond; + } + /** * Sets slow RPC flag. - * @param logSlowRPCFlag + * @param logSlowRPCFlag input logSlowRPCFlag. */ @VisibleForTesting protected void setLogSlowRPC(boolean logSlowRPCFlag) { this.logSlowRPC = logSlowRPCFlag; } + private void setPurgeIntervalNanos(int purgeInterval) { + int tmpPurgeInterval = CommonConfigurationKeysPublic. + IPC_SERVER_PURGE_INTERVAL_MINUTES_DEFAULT; + if (purgeInterval > 0) { + tmpPurgeInterval = purgeInterval; + } + this.purgeIntervalNanos = TimeUnit.NANOSECONDS.convert( + tmpPurgeInterval, TimeUnit.MINUTES); + } + + @VisibleForTesting + public long getPurgeIntervalNanos() { + return this.purgeIntervalNanos; + } /** * Logs a Slow RPC Request. @@ -538,19 +577,20 @@ void logSlowRpcCalls(String methodName, Call call, (rpcMetrics.getProcessingStdDev() * deviation); long processingTime = - details.get(Timing.PROCESSING, RpcMetrics.TIMEUNIT); + details.get(Timing.PROCESSING, rpcMetrics.getMetricsTimeUnit()); if ((rpcMetrics.getProcessingSampleCount() > minSampleSize) && (processingTime > threeSigma)) { LOG.warn( "Slow RPC : {} took {} {} to process from client {}," + " the processing detail is {}", - methodName, processingTime, RpcMetrics.TIMEUNIT, call, + methodName, processingTime, rpcMetrics.getMetricsTimeUnit(), call, details.toString()); rpcMetrics.incrSlowRpc(); } } void updateMetrics(Call call, long startTime, boolean connDropped) { + totalRequests.increment(); // delta = handler + processing + response long deltaNanos = Time.monotonicNowNanos() - startTime; long timestampNanos = call.timestampNanos; @@ -564,7 +604,7 @@ void updateMetrics(Call call, long startTime, boolean connDropped) { deltaNanos -= details.get(Timing.RESPONSE); details.set(Timing.HANDLER, deltaNanos); - long queueTime = details.get(Timing.QUEUE, RpcMetrics.TIMEUNIT); + long queueTime = details.get(Timing.QUEUE, rpcMetrics.getMetricsTimeUnit()); rpcMetrics.addRpcQueueTime(queueTime); if (call.isResponseDeferred() || connDropped) { @@ -573,9 +613,9 @@ void updateMetrics(Call call, long startTime, boolean connDropped) { } long processingTime = - details.get(Timing.PROCESSING, RpcMetrics.TIMEUNIT); + details.get(Timing.PROCESSING, rpcMetrics.getMetricsTimeUnit()); long waitTime = - details.get(Timing.LOCKWAIT, RpcMetrics.TIMEUNIT); + details.get(Timing.LOCKWAIT, rpcMetrics.getMetricsTimeUnit()); rpcMetrics.addRpcLockWaitTime(waitTime); rpcMetrics.addRpcProcessingTime(processingTime); // don't include lock wait for detailed metrics. @@ -639,7 +679,22 @@ public static void bind(ServerSocket socket, InetSocketAddress address, address.getPort(), e); } } - + + @VisibleForTesting + int getPriorityLevel(Schedulable e) { + return callQueue.getPriorityLevel(e); + } + + @VisibleForTesting + int getPriorityLevel(UserGroupInformation ugi) { + return callQueue.getPriorityLevel(ugi); + } + + @VisibleForTesting + void setPriorityLevel(UserGroupInformation ugi, int priority) { + callQueue.setPriorityLevel(ugi, priority); + } + /** * Returns a handle to the rpcMetrics (required in tests) * @return rpc metrics @@ -666,6 +721,9 @@ Connection[] getConnections() { /** * Refresh the service authorization ACL for the service handled by this server. + * + * @param conf input Configuration. + * @param provider input PolicyProvider. */ public void refreshServiceAcl(Configuration conf, PolicyProvider provider) { serviceAuthorizationManager.refresh(conf, provider); @@ -674,6 +732,9 @@ public void refreshServiceAcl(Configuration conf, PolicyProvider provider) { /** * Refresh the service authorization ACL for the service handled by this server * using the specified Configuration. + * + * @param conf input Configuration. + * @param provider input provider. */ @Private public void refreshServiceAclWithLoadedConfiguration(Configuration conf, @@ -759,12 +820,12 @@ public static class Call implements Schedulable, private volatile String detailedMetricsName = ""; final int callId; // the client's call id final int retryCount; // the retry count of the call - long timestampNanos; // time the call was received + private final long timestampNanos; // time the call was received long responseTimestampNanos; // time the call was served private AtomicInteger responseWaitCount = new AtomicInteger(1); final RPC.RpcKind rpcKind; final byte[] clientId; - private final TraceScope traceScope; // the HTrace scope on the server side + private final Span span; // the trace span on the server side private final CallerContext callerContext; // the call context private boolean deferredResponse = false; private int priorityLevel; @@ -779,7 +840,7 @@ public static class Call implements Schedulable, Call(Call call) { this(call.callId, call.retryCount, call.rpcKind, call.clientId, - call.traceScope, call.callerContext); + call.span, call.callerContext); } Call(int id, int retryCount, RPC.RpcKind kind, byte[] clientId) { @@ -793,14 +854,14 @@ public Call(int id, int retryCount, Void ignore1, Void ignore2, } Call(int id, int retryCount, RPC.RpcKind kind, byte[] clientId, - TraceScope traceScope, CallerContext callerContext) { + Span span, CallerContext callerContext) { this.callId = id; this.retryCount = retryCount; this.timestampNanos = Time.monotonicNowNanos(); this.responseTimestampNanos = timestampNanos; this.rpcKind = kind; this.clientId = clientId; - this.traceScope = traceScope; + this.span = span; this.callerContext = callerContext; this.clientStateId = Long.MIN_VALUE; this.isCallCoordinated = false; @@ -844,6 +905,9 @@ public UserGroupInformation getRemoteUser() { public InetAddress getHostInetAddress() { return null; } + public int getRemotePort() { + return 0; + } public String getHostAddress() { InetAddress addr = getHostInetAddress(); return (addr != null) ? addr.getHostAddress() : null; @@ -898,6 +962,11 @@ public UserGroupInformation getUserGroupInformation() { return getRemoteUser(); } + @Override + public CallerContext getCallerContext() { + return this.callerContext; + } + @Override public int getPriorityLevel() { return this.priorityLevel; @@ -938,6 +1007,10 @@ public void setDeferredResponse(Writable response) { public void setDeferredError(Throwable t) { } + + public long getTimestampNanos() { + return timestampNanos; + } } /** A RPC extended call queued for handling. */ @@ -969,8 +1042,8 @@ private class RpcCall extends Call { RpcCall(Connection connection, int id, int retryCount, Writable param, RPC.RpcKind kind, byte[] clientId, - TraceScope traceScope, CallerContext context) { - super(id, retryCount, kind, clientId, traceScope, context); + Span span, CallerContext context) { + super(id, retryCount, kind, clientId, span, context); this.connection = connection; this.rpcRequest = param; } @@ -1001,6 +1074,11 @@ public InetAddress getHostInetAddress() { return connection.getHostInetAddress(); } + @Override + public int getRemotePort() { + return connection.getRemotePort(); + } + @Override public Void run() throws Exception { if (!connection.channel.isOpen()) { @@ -1014,7 +1092,7 @@ public Void run() throws Exception { try { value = call( - rpcKind, connection.protocolName, rpcRequest, timestampNanos); + rpcKind, connection.protocolName, rpcRequest, getTimestampNanos()); } catch (Throwable e) { populateResponseParamsOnError(e, responseParams); } @@ -1206,8 +1284,7 @@ private class Listener extends Thread { bind(acceptChannel.socket(), address, backlogLength, conf, portRangeConfig); //Could be an ephemeral port this.listenPort = acceptChannel.socket().getLocalPort(); - Thread.currentThread().setName("Listener at " + - bindAddress + "/" + this.listenPort); + LOG.info("Listener at {}:{}", bindAddress, this.listenPort); // create a selector; selector= Selector.open(); readers = new Reader[readThreads]; @@ -1468,9 +1545,6 @@ Reader getReader() { } } - private final static long PURGE_INTERVAL_NANOS = TimeUnit.NANOSECONDS.convert( - 15, TimeUnit.MINUTES); - // Sends responses of RPC back to clients. private class Responder extends Thread { private final Selector writeSelector; @@ -1506,7 +1580,7 @@ private void doRunLoop() { try { waitPending(); // If a channel is being registered, wait. writeSelector.select( - TimeUnit.NANOSECONDS.toMillis(PURGE_INTERVAL_NANOS)); + TimeUnit.NANOSECONDS.toMillis(purgeIntervalNanos)); Iterator iter = writeSelector.selectedKeys().iterator(); while (iter.hasNext()) { SelectionKey key = iter.next(); @@ -1529,7 +1603,7 @@ private void doRunLoop() { } } long nowNanos = Time.monotonicNowNanos(); - if (nowNanos < lastPurgeTimeNanos + PURGE_INTERVAL_NANOS) { + if (nowNanos < lastPurgeTimeNanos + purgeIntervalNanos) { continue; } lastPurgeTimeNanos = nowNanos; @@ -1607,7 +1681,7 @@ private void doPurge(RpcCall call, long now) { Iterator iter = responseQueue.listIterator(0); while (iter.hasNext()) { call = iter.next(); - if (now > call.responseTimestampNanos + PURGE_INTERVAL_NANOS) { + if (now > call.responseTimestampNanos + purgeIntervalNanos) { closeConnection(call.connection); break; } @@ -1801,11 +1875,26 @@ public class Connection { private long lastContact; private int dataLength; private Socket socket; + // Cache the remote host & port info so that even if the socket is // disconnected, we can say where it used to connect to. - private String hostAddress; - private int remotePort; - private InetAddress addr; + + /** + * Client Host IP address from where the socket connection is being established to the Server. + */ + private final String hostAddress; + /** + * Client remote port used for the given socket connection. + */ + private final int remotePort; + /** + * Address to which the socket is connected to. + */ + private final InetAddress addr; + /** + * Client Host address from where the socket connection is being established to the Server. + */ + private final String hostName; IpcConnectionContextProto connectionContext; String protocolName; @@ -1849,8 +1938,12 @@ public Connection(SocketChannel channel, long lastContact, this.isOnAuxiliaryPort = isOnAuxiliaryPort; if (addr == null) { this.hostAddress = "*Unknown*"; + this.hostName = this.hostAddress; } else { + // host IP address this.hostAddress = addr.getHostAddress(); + // host name for the IP address + this.hostName = addr.getHostName(); } this.remotePort = socket.getPort(); this.responseQueue = new LinkedList(); @@ -1866,7 +1959,7 @@ public Connection(SocketChannel channel, long lastContact, @Override public String toString() { - return getHostAddress() + ":" + remotePort; + return hostName + ":" + remotePort + " / " + hostAddress + ":" + remotePort; } boolean setShouldClose() { @@ -1885,6 +1978,10 @@ public int getIngressPort() { return ingressPort; } + public int getRemotePort() { + return remotePort; + } + public InetAddress getHostInetAddress() { return addr; } @@ -2039,7 +2136,7 @@ private void saslProcess(RpcSaslProto saslMessage) LOG.debug("SASL server successfully authenticated client: " + user); } rpcMetrics.incrAuthenticationSuccesses(); - AUDITLOG.info(AUTH_SUCCESSFUL_FOR + user); + AUDITLOG.info(AUTH_SUCCESSFUL_FOR + user + " from " + toString()); saslContextEstablished = true; } } catch (RpcServerException rse) { // don't re-wrap @@ -2183,7 +2280,7 @@ private void doSaslReply(Message message) throws IOException { private void doSaslReply(Exception ioe) throws IOException { setupResponse(authFailedCall, RpcStatusProto.FATAL, RpcErrorCodeProto.FATAL_UNAUTHORIZED, - null, ioe.getClass().getName(), ioe.toString()); + null, ioe.getClass().getName(), ioe.getMessage()); sendResponse(authFailedCall); } @@ -2228,7 +2325,7 @@ private void checkDataLength(int dataLength) throws IOException { * @return -1 in case of error, else num bytes read so far * @throws IOException - internal error that should not be returned to * client, typically failure to respond to client - * @throws InterruptedException + * @throws InterruptedException - if the thread is interrupted. */ public int readAndProcess() throws IOException, InterruptedException { while (!shouldClose()) { // stop if a fatal response has been sent. @@ -2264,19 +2361,18 @@ public int readAndProcess() throws IOException, InterruptedException { return -1; } - if(!RpcConstants.HEADER.equals(dataLengthBuffer)) { - LOG.warn("Incorrect RPC Header length from {}:{} " - + "expected length: {} got length: {}", - hostAddress, remotePort, RpcConstants.HEADER, dataLengthBuffer); + if (!RpcConstants.HEADER.equals(dataLengthBuffer)) { + LOG.warn("Incorrect RPC Header length from {}:{} / {}:{}. Expected: {}. Actual: {}", + hostName, remotePort, hostAddress, remotePort, RpcConstants.HEADER, + dataLengthBuffer); setupBadVersionResponse(version); return -1; } if (version != CURRENT_VERSION) { //Warning is ok since this is not supposed to happen. - LOG.warn("Version mismatch from " + - hostAddress + ":" + remotePort + - " got version " + version + - " expected version " + CURRENT_VERSION); + LOG.warn("Version mismatch from {}:{} / {}:{}. " + + "Expected version: {}. Actual version: {} ", hostName, + remotePort, hostAddress, remotePort, CURRENT_VERSION, version); setupBadVersionResponse(version); return -1; } @@ -2578,8 +2674,7 @@ private void processOneRpc(ByteBuffer bb) final RpcCall call = new RpcCall(this, callId, retry); setupResponse(call, rse.getRpcStatusProto(), rse.getRpcErrorCodeProto(), null, - t.getClass().getName(), - t.getMessage() != null ? t.getMessage() : t.toString()); + t.getClass().getName(), t.getMessage()); sendResponse(call); } } @@ -2654,19 +2749,24 @@ private void processRpcRequest(RpcRequestHeaderProto header, throw new FatalRpcServerException( RpcErrorCodeProto.FATAL_DESERIALIZING_REQUEST, err); } - - TraceScope traceScope = null; + + Span span = null; if (header.hasTraceInfo()) { - if (tracer != null) { - // If the incoming RPC included tracing info, always continue the - // trace - SpanId parentSpanId = new SpanId( - header.getTraceInfo().getTraceId(), - header.getTraceInfo().getParentId()); - traceScope = tracer.newScope( - RpcClientUtil.toTraceName(rpcRequest.toString()), - parentSpanId); - traceScope.detach(); + RPCTraceInfoProto traceInfoProto = header.getTraceInfo(); + if (traceInfoProto.hasSpanContext()) { + if (tracer == null) { + setTracer(Tracer.curThreadTracer()); + } + if (tracer != null) { + // If the incoming RPC included tracing info, always continue the + // trace + SpanContext spanCtx = TraceUtils.byteStringToSpanContext( + traceInfoProto.getSpanContext()); + if (spanCtx != null) { + span = tracer.newSpan( + RpcClientUtil.toTraceName(rpcRequest.toString()), spanCtx); + } + } } } @@ -2682,21 +2782,21 @@ private void processRpcRequest(RpcRequestHeaderProto header, RpcCall call = new RpcCall(this, header.getCallId(), header.getRetryCount(), rpcRequest, ProtoUtil.convert(header.getRpcKind()), - header.getClientId().toByteArray(), traceScope, callerContext); + header.getClientId().toByteArray(), span, callerContext); // Save the priority level assignment by the scheduler call.setPriorityLevel(callQueue.getPriorityLevel(call)); call.markCallCoordinated(false); if(alignmentContext != null && call.rpcRequest != null && - (call.rpcRequest instanceof ProtobufRpcEngine.RpcProtobufRequest)) { + (call.rpcRequest instanceof ProtobufRpcEngine2.RpcProtobufRequest)) { // if call.rpcRequest is not RpcProtobufRequest, will skip the following // step and treat the call as uncoordinated. As currently only certain // ClientProtocol methods request made through RPC protobuf needs to be // coordinated. String methodName; String protoName; - ProtobufRpcEngine.RpcProtobufRequest req = - (ProtobufRpcEngine.RpcProtobufRequest) call.rpcRequest; + ProtobufRpcEngine2.RpcProtobufRequest req = + (ProtobufRpcEngine2.RpcProtobufRequest) call.rpcRequest; try { methodName = req.getRequestHeader().getMethodName(); protoName = req.getRequestHeader().getDeclaringClassProtocolName(); @@ -2929,16 +3029,16 @@ public void run() { */ // Re-queue the call and continue requeueCall(call); + call = null; continue; } if (LOG.isDebugEnabled()) { LOG.debug(Thread.currentThread().getName() + ": " + call + " for RpcKind " + call.rpcKind); } CurCall.set(call); - if (call.traceScope != null) { - call.traceScope.reattach(); - traceScope = call.traceScope; - traceScope.getSpan().addTimelineAnnotation("called"); + if (call.span != null) { + traceScope = tracer.activateSpan(call.span); + call.span.addTimelineAnnotation("called"); } // always update the current call context CallerContext.setCurrent(call.callerContext); @@ -2953,14 +3053,14 @@ public void run() { if (running) { // unexpected -- log it LOG.info(Thread.currentThread().getName() + " unexpectedly interrupted", e); if (traceScope != null) { - traceScope.getSpan().addTimelineAnnotation("unexpectedly interrupted: " + + traceScope.addTimelineAnnotation("unexpectedly interrupted: " + StringUtils.stringifyException(e)); } } } catch (Exception e) { LOG.info(Thread.currentThread().getName() + " caught an exception", e); if (traceScope != null) { - traceScope.getSpan().addTimelineAnnotation("Exception: " + + traceScope.addTimelineAnnotation("Exception: " + StringUtils.stringifyException(e)); } } finally { @@ -3040,6 +3140,18 @@ protected Server(String bindAddress, int port, * Class, RPC.RpcInvoker)} * This parameter has been retained for compatibility with existing tests * and usage. + * + * @param bindAddress input bindAddress. + * @param port input port. + * @param rpcRequestClass input rpcRequestClass. + * @param handlerCount input handlerCount. + * @param numReaders input numReaders. + * @param queueSizePerHandler input queueSizePerHandler. + * @param conf input Configuration. + * @param serverName input serverName. + * @param secretManager input secretManager. + * @param portRangeConfig input portRangeConfig. + * @throws IOException raised on errors performing I/O. */ @SuppressWarnings("unchecked") protected Server(String bindAddress, int port, @@ -3110,6 +3222,10 @@ protected Server(String bindAddress, int port, CommonConfigurationKeysPublic.IPC_SERVER_LOG_SLOW_RPC, CommonConfigurationKeysPublic.IPC_SERVER_LOG_SLOW_RPC_DEFAULT)); + this.setPurgeIntervalNanos(conf.getInt( + CommonConfigurationKeysPublic.IPC_SERVER_PURGE_INTERVAL_MINUTES_KEY, + CommonConfigurationKeysPublic.IPC_SERVER_PURGE_INTERVAL_MINUTES_DEFAULT)); + // Create the responder here responder = new Responder(); @@ -3121,6 +3237,14 @@ protected Server(String bindAddress, int port, this.exceptionsHandler.addTerseLoggingExceptions(StandbyException.class); this.exceptionsHandler.addTerseLoggingExceptions( HealthCheckFailedException.class); + this.metricsUpdaterInterval = + conf.getLong(CommonConfigurationKeysPublic.IPC_SERVER_METRICS_UPDATE_RUNNER_INTERVAL, + CommonConfigurationKeysPublic.IPC_SERVER_METRICS_UPDATE_RUNNER_INTERVAL_DEFAULT); + this.scheduledExecutorService = new ScheduledThreadPoolExecutor(1, + new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Hadoop-Metrics-Updater-%d") + .build()); + this.scheduledExecutorService.scheduleWithFixedDelay(new MetricsUpdateRunner(), + metricsUpdaterInterval, metricsUpdaterInterval, TimeUnit.MILLISECONDS); } public synchronized void addAuxiliaryListener(int auxiliaryPort) @@ -3366,7 +3490,10 @@ Configuration getConf() { return conf; } - /** Sets the socket buffer size used for responding to RPCs */ + /** + * Sets the socket buffer size used for responding to RPCs. + * @param size input size. + */ public void setSocketSendBufSize(int size) { this.socketSendBufferSize = size; } public void setTracer(Tracer t) { @@ -3412,13 +3539,30 @@ public synchronized void stop() { } responder.interrupt(); notifyAll(); + shutdownMetricsUpdaterExecutor(); this.rpcMetrics.shutdown(); this.rpcDetailedMetrics.shutdown(); } - /** Wait for the server to be stopped. + private void shutdownMetricsUpdaterExecutor() { + this.scheduledExecutorService.shutdown(); + try { + boolean isExecutorShutdown = + this.scheduledExecutorService.awaitTermination(3, TimeUnit.SECONDS); + if (!isExecutorShutdown) { + LOG.info("Hadoop Metrics Updater executor could not be shutdown."); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.info("Hadoop Metrics Updater executor shutdown interrupted.", e); + } + } + + /** + * Wait for the server to be stopped. * Does not wait for all subthreads to finish. * See {@link #stop()}. + * @throws InterruptedException if the thread is interrupted. */ public synchronized void join() throws InterruptedException { while (running) { @@ -3455,13 +3599,25 @@ public synchronized Set getAuxiliaryListenerAddresses() { * Called for each call. * @deprecated Use {@link #call(RPC.RpcKind, String, * Writable, long)} instead + * @param param input param. + * @param receiveTime input receiveTime. + * @throws Exception if any error occurs. + * @return Call */ @Deprecated public Writable call(Writable param, long receiveTime) throws Exception { return call(RPC.RpcKind.RPC_BUILTIN, null, param, receiveTime); } - /** Called for each call. */ + /** + * Called for each call. + * @param rpcKind input rpcKind. + * @param protocol input protocol. + * @param param input param. + * @param receiveTime input receiveTime. + * @return Call. + * @throws Exception raised on errors performing I/O. + */ public abstract Writable call(RPC.RpcKind rpcKind, String protocol, Writable param, long receiveTime) throws Exception; @@ -3509,7 +3665,7 @@ public int getNumOpenConnections() { } /** - * Get the NumOpenConnections/User. + * @return Get the NumOpenConnections/User. */ public String getNumOpenConnectionsPerUser() { ObjectMapper mapper = new ObjectMapper(); @@ -3710,7 +3866,7 @@ void incrUserConnections(String user) { if (count == null) { count = 1; } else { - count++; + count = count + 1; } userToConnectionsMap.put(user, count); } @@ -3722,7 +3878,7 @@ void decrUserConnections(String user) { if (count == null) { return; } else { - count--; + count = count - 1; } if (count == 0) { userToConnectionsMap.remove(user); @@ -3861,4 +4017,32 @@ protected int getMaxIdleTime() { public String getServerName() { return serverName; } + + /** + * Server metrics updater thread, used to update some metrics on a regular basis. + * For instance, requests per second. + */ + private class MetricsUpdateRunner implements Runnable { + + private long lastExecuted = 0; + + @Override + public synchronized void run() { + long currentTime = Time.monotonicNow(); + if (lastExecuted == 0) { + lastExecuted = currentTime - metricsUpdaterInterval; + } + long currentTotalRequests = totalRequests.sum(); + long totalRequestsDiff = currentTotalRequests - lastSeenTotalRequests; + lastSeenTotalRequests = currentTotalRequests; + if ((currentTime - lastExecuted) > 0) { + double totalRequestsPerSecInDouble = + (double) totalRequestsDiff / TimeUnit.MILLISECONDS.toSeconds( + currentTime - lastExecuted); + totalRequestsPerSecond = ((long) totalRequestsPerSecInDouble); + } + lastExecuted = currentTime; + } + } + } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/UserIdentityProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/UserIdentityProvider.java index 763605e6a464f..91ec1a259f134 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/UserIdentityProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/UserIdentityProvider.java @@ -31,6 +31,6 @@ public String makeIdentity(Schedulable obj) { return null; } - return ugi.getUserName(); + return ugi.getShortUserName(); } } \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/VersionedProtocol.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/VersionedProtocol.java index 4d02027a0e688..98daa84187464 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/VersionedProtocol.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/VersionedProtocol.java @@ -46,6 +46,7 @@ public long getProtocolVersion(String protocol, * a list of its supported methods * @see ProtocolSignature#getProtocolSignature(VersionedProtocol, String, * long, int) for a default implementation + * @throws IOException raised on errors performing I/O. */ public ProtocolSignature getProtocolSignature(String protocol, long clientVersion, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WeightedTimeCostProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WeightedTimeCostProvider.java index 4304b24299f29..aa218eb743561 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WeightedTimeCostProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WeightedTimeCostProvider.java @@ -30,7 +30,7 @@ * {@link org.apache.hadoop.fs.CommonConfigurationKeys#IPC_COST_PROVIDER_KEY} * configuration key. * - *

    This allows for configuration of how heavily each of the operations + *

    This allows for configuration of how heavily each of the operations

    * within {@link ProcessingDetails} is weighted. By default, * {@link ProcessingDetails.Timing#LOCKFREE}, * {@link ProcessingDetails.Timing#RESPONSE}, and diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java index b303f8494b63c..d92bcea5d2eff 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java @@ -39,8 +39,8 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.*; -import org.apache.htrace.core.TraceScope; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.tracing.TraceScope; +import org.apache.hadoop.tracing.Tracer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -282,9 +282,20 @@ static Client getClient(Configuration conf) { return CLIENTS.getClient(conf); } - /** Construct a client-side proxy object that implements the named protocol, + /** + * Construct a client-side proxy object that implements the named protocol, * talking to a server at the named address. - * @param */ + * @param Generics Type T + * @param protocol input protocol. + * @param clientVersion input clientVersion. + * @param addr input addr. + * @param ticket input ticket. + * @param conf input configuration. + * @param factory input factory. + * @param rpcTimeout input rpcTimeout. + * @param connectionRetryPolicy input connectionRetryPolicy. + * @throws IOException raised on errors performing I/O. + */ @Override public ProtocolProxy getProxy(Class protocol, long clientVersion, InetSocketAddress addr, UserGroupInformation ticket, @@ -295,9 +306,45 @@ public ProtocolProxy getProxy(Class protocol, long clientVersion, rpcTimeout, connectionRetryPolicy, null, null); } - /** Construct a client-side proxy object that implements the named protocol, + /** + * Construct a client-side proxy object with a ConnectionId. + * + * @param Generics Type T. + * @param protocol input protocol. + * @param clientVersion input clientVersion. + * @param connId input ConnectionId. + * @param conf input Configuration. + * @param factory input factory. + * @param alignmentContext Alignment context + * @throws IOException raised on errors performing I/O. + * @return ProtocolProxy. + */ + @Override + public ProtocolProxy getProxy(Class protocol, long clientVersion, + Client.ConnectionId connId, Configuration conf, SocketFactory factory, + AlignmentContext alignmentContext) + throws IOException { + return getProxy(protocol, clientVersion, connId.getAddress(), + connId.getTicket(), conf, factory, connId.getRpcTimeout(), + connId.getRetryPolicy(), null, alignmentContext); + } + + /** + * Construct a client-side proxy object that implements the named protocol, * talking to a server at the named address. - * @param */ + * @param Generics Type. + * @param protocol input protocol. + * @param clientVersion input clientVersion. + * @param addr input addr. + * @param ticket input ticket. + * @param conf input configuration. + * @param factory input factory. + * @param rpcTimeout input rpcTimeout. + * @param connectionRetryPolicy input connectionRetryPolicy. + * @param fallbackToSimpleAuth input fallbackToSimpleAuth. + * @param alignmentContext input alignmentContext. + * @return ProtocolProxy. + */ @Override @SuppressWarnings("unchecked") public ProtocolProxy getProxy(Class protocol, long clientVersion, @@ -345,7 +392,8 @@ public static class Server extends RPC.Server { * @param bindAddress the address to bind on to listen for connection * @param port the port to listen for connections on * - * @deprecated Use #Server(Class, Object, Configuration, String, int) + * @deprecated Use #Server(Class, Object, Configuration, String, int) + * @throws IOException raised on errors performing I/O. */ @Deprecated public Server(Object instance, Configuration conf, String bindAddress, @@ -360,6 +408,7 @@ public Server(Object instance, Configuration conf, String bindAddress, * @param conf the configuration to use * @param bindAddress the address to bind on to listen for connection * @param port the port to listen for connections on + * @throws IOException raised on errors performing I/O. */ public Server(Class protocolClass, Object protocolImpl, Configuration conf, String bindAddress, int port) @@ -376,9 +425,13 @@ public Server(Class protocolClass, Object protocolImpl, * @param port the port to listen for connections on * @param numHandlers the number of method handler threads to run * @param verbose whether each call should be logged + * @param numReaders input numberReaders. + * @param queueSizePerHandler input queueSizePerHandler. + * @param secretManager input secretManager. * * @deprecated use Server#Server(Class, Object, * Configuration, String, int, int, int, int, boolean, SecretManager) + * @throws IOException raised on errors performing I/O. */ @Deprecated public Server(Object protocolImpl, Configuration conf, String bindAddress, @@ -401,9 +454,14 @@ public Server(Object protocolImpl, Configuration conf, String bindAddress, * @param port the port to listen for connections on * @param numHandlers the number of method handler threads to run * @param verbose whether each call should be logged + * @param secretManager input secretManager. + * @param queueSizePerHandler input queueSizePerHandler. + * @param portRangeConfig input portRangeConfig. + * @param numReaders input numReaders. * * @deprecated use Server#Server(Class, Object, * Configuration, String, int, int, int, int, boolean, SecretManager) + * @throws IOException raised on errors performing I/O. */ @Deprecated public Server(Class protocolClass, Object protocolImpl, @@ -428,6 +486,11 @@ public Server(Class protocolClass, Object protocolImpl, * @param numHandlers the number of method handler threads to run * @param verbose whether each call should be logged * @param alignmentContext provides server state info on client responses + * @param numReaders input numReaders. + * @param portRangeConfig input portRangeConfig. + * @param queueSizePerHandler input queueSizePerHandler. + * @param secretManager input secretManager. + * @throws IOException raised on errors performing I/O. */ public Server(Class protocolClass, Object protocolImpl, Configuration conf, String bindAddress, int port, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/DecayRpcSchedulerDetailedMetrics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/DecayRpcSchedulerDetailedMetrics.java index 04a6c0eab1c42..046bb2d9719ba 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/DecayRpcSchedulerDetailedMetrics.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/DecayRpcSchedulerDetailedMetrics.java @@ -27,7 +27,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * This class is for maintaining queue (priority) level related @@ -65,6 +65,7 @@ public static DecayRpcSchedulerDetailedMetrics create(String ns) { /** * Initialize the metrics for JMX with priority levels. + * @param numLevels input numLevels. */ public void init(int numLevels) { LOG.info("Initializing RPC stats for {} priority levels", numLevels); @@ -106,14 +107,16 @@ public void shutdown() { } /** - * Returns the rate name inside the metric. + * @return Returns the rate name inside the metric. + * @param priority input priority. */ public String getQueueName(int priority) { return "DecayRPCSchedulerPriority."+priority+".RpcQueueTime"; } /** - * Returns the rate name inside the metric. + * @return Returns the rate name inside the metric. + * @param priority input priority. */ public String getProcessingName(int priority) { return "DecayRPCSchedulerPriority."+priority+".RpcProcessingTime"; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java index bb4bfcfd08be5..07c0b665551d7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java @@ -19,7 +19,8 @@ import java.util.concurrent.TimeUnit; -import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.ipc.Server; import org.apache.hadoop.classification.InterfaceAudience; @@ -48,9 +49,12 @@ public class RpcMetrics { final MetricsRegistry registry; final String name; final boolean rpcQuantileEnable; + + public static final TimeUnit DEFAULT_METRIC_TIME_UNIT = + TimeUnit.MILLISECONDS; /** The time unit used when storing/accessing time durations. */ - public final static TimeUnit TIMEUNIT = TimeUnit.MILLISECONDS; - + private final TimeUnit metricsTimeUnit; + RpcMetrics(Server server, Configuration conf) { String port = String.valueOf(server.getListenerAddress().getPort()); name = "RpcActivityForPort" + port; @@ -63,6 +67,7 @@ public class RpcMetrics { rpcQuantileEnable = (intervals.length > 0) && conf.getBoolean( CommonConfigurationKeys.RPC_METRICS_QUANTILE_ENABLE, CommonConfigurationKeys.RPC_METRICS_QUANTILE_ENABLE_DEFAULT); + metricsTimeUnit = getMetricsTimeUnit(conf); if (rpcQuantileEnable) { rpcQueueTimeQuantiles = new MutableQuantiles[intervals.length]; @@ -75,19 +80,19 @@ public class RpcMetrics { for (int i = 0; i < intervals.length; i++) { int interval = intervals[i]; rpcQueueTimeQuantiles[i] = registry.newQuantiles("rpcQueueTime" - + interval + "s", "rpc queue time in " + TIMEUNIT, "ops", + + interval + "s", "rpc queue time in " + metricsTimeUnit, "ops", "latency", interval); rpcLockWaitTimeQuantiles[i] = registry.newQuantiles( "rpcLockWaitTime" + interval + "s", - "rpc lock wait time in " + TIMEUNIT, "ops", + "rpc lock wait time in " + metricsTimeUnit, "ops", "latency", interval); rpcProcessingTimeQuantiles[i] = registry.newQuantiles( "rpcProcessingTime" + interval + "s", - "rpc processing time in " + TIMEUNIT, "ops", + "rpc processing time in " + metricsTimeUnit, "ops", "latency", interval); deferredRpcProcessingTimeQuantiles[i] = registry.newQuantiles( "deferredRpcProcessingTime" + interval + "s", - "deferred rpc processing time in " + TIMEUNIT, "ops", + "deferred rpc processing time in " + metricsTimeUnit, "ops", "latency", interval); } } @@ -141,6 +146,37 @@ public String numOpenConnectionsPerUser() { return server.getNumDroppedConnections(); } + @Metric("Number of total requests") + public long getTotalRequests() { + return server.getTotalRequests(); + } + + @Metric("Number of total requests per second") + public long getTotalRequestsPerSecond() { + return server.getTotalRequestsPerSecond(); + } + + public TimeUnit getMetricsTimeUnit() { + return metricsTimeUnit; + } + + public static TimeUnit getMetricsTimeUnit(Configuration conf) { + TimeUnit metricsTimeUnit = RpcMetrics.DEFAULT_METRIC_TIME_UNIT; + String timeunit = conf.get(CommonConfigurationKeys.RPC_METRICS_TIME_UNIT); + if (StringUtils.isNotEmpty(timeunit)) { + try { + metricsTimeUnit = TimeUnit.valueOf(timeunit); + } catch (IllegalArgumentException e) { + LOG.info("Config key {} 's value {} does not correspond to enum values" + + " of java.util.concurrent.TimeUnit. Hence default unit" + + " {} will be used", + CommonConfigurationKeys.RPC_METRICS_TIME_UNIT, timeunit, + RpcMetrics.DEFAULT_METRIC_TIME_UNIT); + } + } + return metricsTimeUnit; + } + // Public instrumentation methods that could be extracted to an // abstract class if we decide to do custom instrumentation classes a la // JobTrackerInstrumentation. The methods with //@Override comment are diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/jmx/JMXJsonServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/jmx/JMXJsonServlet.java index f20933b5c8668..85f2d2828562d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/jmx/JMXJsonServlet.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/jmx/JMXJsonServlet.java @@ -64,27 +64,31 @@ * functionality is provided through the * {@link MBeanServer#queryNames(ObjectName, javax.management.QueryExp)} * method. + *

    *

    * For example http://.../jmx?qry=Hadoop:* will return * all hadoop metrics exposed through JMX. + *

    *

    * The optional get parameter is used to query an specific * attribute of a JMX bean. The format of the URL is * http://.../jmx?get=MXBeanName::AttributeName + *

    *

    * For example * * http://../jmx?get=Hadoop:service=NameNode,name=NameNodeInfo::ClusterId * will return the cluster id of the namenode mxbean. + *

    *

    * If the qry or the get parameter is not formatted - * correctly then a 400 BAD REQUEST http response code will be returned. + * correctly then a 400 BAD REQUEST http response code will be returned. + *

    *

    * If a resouce such as a mbean or attribute can not be found, * a 404 SC_NOT_FOUND http response code will be returned. - *

    + *

    * The return format is JSON and in the form - *

    *

    
      *  {
      *    "beans" : [
    @@ -95,7 +99,6 @@
      *    ]
      *  }
      *  
    - *

    * The servlet attempts to convert the the JMXBeans into JSON. Each * bean's attributes will be converted to a JSON object member. * diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/log/LogLevel.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/log/LogLevel.java index 79eae12314493..52552ddc543b8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/log/LogLevel.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/log/LogLevel.java @@ -32,8 +32,8 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.impl.Jdk14Logger; @@ -66,6 +66,8 @@ public class LogLevel { public static final String PROTOCOL_HTTPS = "https"; /** * A command line implementation + * @param args input args. + * @throws Exception exception. */ public static void main(String[] args) throws Exception { CLI cli = new CLI(new Configuration()); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/log/LogThrottlingHelper.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/log/LogThrottlingHelper.java index eb147ca47ee19..78b60e4117616 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/log/LogThrottlingHelper.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/log/LogThrottlingHelper.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.log; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import java.util.HashMap; import java.util.Map; import org.apache.commons.math3.stat.descriptive.SummaryStatistics; @@ -65,7 +65,7 @@ *

    This class can also be used to coordinate multiple logging points; see * {@link #record(String, long, double...)} for more details. * - *

    This class is not thread-safe. + *

    This class is thread-safe. */ public class LogThrottlingHelper { @@ -88,21 +88,22 @@ public class LogThrottlingHelper { public interface LogAction { /** - * Return the number of records encapsulated in this action; that is, the + * @return Return the number of records encapsulated in this action; that is, the * number of times {@code record} was called to produce this action, * including the current one. */ int getCount(); /** - * Return summary information for the value that was recorded at index + * @return Return summary information for the value that was recorded at index * {@code idx}. Corresponds to the ordering of values passed to * {@link #record(double...)}. + * @param idx input idx. */ SummaryStatistics getStats(int idx); /** - * If this is true, the caller should write to its log. Otherwise, the + * @return If this is true, the caller should write to its log. Otherwise, the * caller should take no action, and it is an error to call other methods * on this object. */ @@ -139,6 +140,7 @@ public interface LogAction { * Create a log helper without any primary recorder. * * @see #LogThrottlingHelper(long, String) + * @param minLogPeriodMs input minLogPeriodMs. */ public LogThrottlingHelper(long minLogPeriodMs) { this(minLogPeriodMs, null); @@ -190,7 +192,7 @@ public LogThrottlingHelper(long minLogPeriodMs, String primaryRecorderName) { * @return A LogAction indicating whether or not the caller should write to * its log. */ - public LogAction record(double... values) { + public synchronized LogAction record(double... values) { return record(DEFAULT_RECORDER_NAME, timer.monotonicNow(), values); } @@ -242,7 +244,7 @@ public LogAction record(double... values) { * * @see #record(double...) */ - public LogAction record(String recorderName, long currentTimeMs, + public synchronized LogAction record(String recorderName, long currentTimeMs, double... values) { if (primaryRecorderName == null) { primaryRecorderName = recorderName; @@ -260,9 +262,15 @@ public LogAction record(String recorderName, long currentTimeMs, if (primaryRecorderName.equals(recorderName) && currentTimeMs - minLogPeriodMs >= lastLogTimestampMs) { lastLogTimestampMs = currentTimeMs; - for (LoggingAction log : currentLogs.values()) { - log.setShouldLog(); - } + currentLogs.replaceAll((key, log) -> { + LoggingAction newLog = log; + if (log.hasLogged()) { + // create a fresh log since the old one has already been logged + newLog = new LoggingAction(log.getValueCount()); + } + newLog.setShouldLog(); + return newLog; + }); } if (currentLog.shouldLog()) { currentLog.setHasLogged(); @@ -279,7 +287,7 @@ public LogAction record(String recorderName, long currentTimeMs, * @param idx The index value. * @return The summary information. */ - public SummaryStatistics getCurrentStats(String recorderName, int idx) { + public synchronized SummaryStatistics getCurrentStats(String recorderName, int idx) { LoggingAction currentLog = currentLogs.get(recorderName); if (currentLog != null) { return currentLog.getStats(idx); @@ -306,6 +314,13 @@ public static String getLogSupressionMessage(LogAction action) { } } + @VisibleForTesting + public synchronized void reset() { + primaryRecorderName = null; + currentLogs.clear(); + lastLogTimestampMs = Long.MIN_VALUE; + } + /** * A standard log action which keeps track of all of the values which have * been logged. This is also used for internal bookkeeping via its private @@ -355,6 +370,10 @@ private void setHasLogged() { hasLogged = true; } + private int getValueCount() { + return stats.length; + } + private void recordValues(double... values) { if (values.length != stats.length) { throw new IllegalArgumentException("received " + values.length + diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/AbstractMetric.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/AbstractMetric.java index e2574f647e3a2..a9e777bcba952 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/AbstractMetric.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/AbstractMetric.java @@ -18,13 +18,13 @@ package org.apache.hadoop.metrics2; -import com.google.common.base.Objects; +import org.apache.hadoop.thirdparty.com.google.common.base.Objects; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import java.util.StringJoiner; -import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull; /** * The immutable metric diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java index a277abd6e1384..fef8c4b7e4ba9 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java @@ -50,7 +50,7 @@ public abstract class MetricsSystem implements MetricsSystemMXBean { * the annotations of the source object.) * @param desc the description of the source (or null. See above.) * @return the source object - * @exception MetricsException + * @exception MetricsException Metrics Exception. */ public abstract T register(String name, String desc, T source); @@ -65,7 +65,7 @@ public abstract class MetricsSystem implements MetricsSystemMXBean { * @param the actual type of the source object * @param source object to register * @return the source object - * @exception MetricsException + * @exception MetricsException Metrics Exception. */ public T register(T source) { return register(null, null, source); @@ -85,7 +85,7 @@ public T register(T source) { * @param name of the sink. Must be unique. * @param desc the description of the sink * @return the sink - * @exception MetricsException + * @exception MetricsException Metrics Exception. */ public abstract T register(String name, String desc, T sink); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystemMXBean.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystemMXBean.java index e471ab7498ce4..8656da6f316c0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystemMXBean.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystemMXBean.java @@ -29,19 +29,19 @@ public interface MetricsSystemMXBean { /** * Start the metrics system - * @throws MetricsException + * @throws MetricsException Metrics Exception. */ public void start(); /** * Stop the metrics system - * @throws MetricsException + * @throws MetricsException Metrics Exception. */ public void stop(); /** * Start metrics MBeans - * @throws MetricsException + * @throws MetricsException Metrics Exception. */ public void startMetricsMBeans(); @@ -49,7 +49,7 @@ public interface MetricsSystemMXBean { * Stop metrics MBeans. * Note, it doesn't stop the metrics system control MBean, * i.e this interface. - * @throws MetricsException + * @throws MetricsException Metrics Exception. */ public void stopMetricsMBeans(); @@ -57,7 +57,7 @@ public interface MetricsSystemMXBean { * @return the current config * Avoided getConfig, as it'll turn into a "Config" attribute, * which doesn't support multiple line values in jconsole. - * @throws MetricsException + * @throws MetricsException Metrics Exception. */ public String currentConfig(); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsTag.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsTag.java index db8a5d9a8578e..26973f8fb9870 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsTag.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsTag.java @@ -18,13 +18,13 @@ package org.apache.hadoop.metrics2; -import com.google.common.base.Objects; +import org.apache.hadoop.thirdparty.com.google.common.base.Objects; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import java.util.StringJoiner; -import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull; /** * Immutable tag for metrics (for grouping on host/queue/username etc.) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/filter/AbstractPatternFilter.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/filter/AbstractPatternFilter.java index ca1d7f97f340c..7e3257d409ebc 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/filter/AbstractPatternFilter.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/filter/AbstractPatternFilter.java @@ -20,7 +20,7 @@ import java.util.Map; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.apache.commons.configuration2.SubsetConfiguration; import org.apache.hadoop.classification.InterfaceAudience; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/AbstractMetricsRecord.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/AbstractMetricsRecord.java index a4632c60c108e..28348c7ae36b8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/AbstractMetricsRecord.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/AbstractMetricsRecord.java @@ -18,8 +18,8 @@ package org.apache.hadoop.metrics2.impl; -import com.google.common.base.Objects; -import com.google.common.collect.Iterables; +import org.apache.hadoop.thirdparty.com.google.common.base.Objects; +import org.apache.hadoop.thirdparty.com.google.common.collect.Iterables; import org.apache.hadoop.metrics2.MetricsRecord; import java.util.StringJoiner; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MBeanInfoBuilder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MBeanInfoBuilder.java index cdd0ba4275ce2..a297072d236d4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MBeanInfoBuilder.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MBeanInfoBuilder.java @@ -22,7 +22,7 @@ import javax.management.MBeanAttributeInfo; import javax.management.MBeanInfo; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.hadoop.metrics2.AbstractMetric; import org.apache.hadoop.metrics2.MetricsInfo; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsCollectorImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsCollectorImpl.java index 5345c1baf88fd..4b4b70bd8e607 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsCollectorImpl.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsCollectorImpl.java @@ -21,8 +21,8 @@ import java.util.Iterator; import java.util.List; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.metrics2.MetricsInfo; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsConfig.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsConfig.java index 976f16bedd81b..2d22b75841b33 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsConfig.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsConfig.java @@ -29,14 +29,15 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import com.google.common.base.Joiner; -import com.google.common.base.Splitter; -import com.google.common.collect.Iterables; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Splitter; +import org.apache.hadoop.thirdparty.com.google.common.collect.Iterables; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.apache.commons.configuration2.Configuration; import org.apache.commons.configuration2.PropertiesConfiguration; import org.apache.commons.configuration2.SubsetConfiguration; +import org.apache.commons.configuration2.convert.DefaultListDelimiterHandler; import org.apache.commons.configuration2.ex.ConfigurationException; import org.apache.commons.configuration2.io.FileHandler; import org.apache.hadoop.metrics2.MetricsFilter; @@ -111,6 +112,7 @@ static MetricsConfig loadFirst(String prefix, String... fileNames) { for (String fname : fileNames) { try { PropertiesConfiguration pcf = new PropertiesConfiguration(); + pcf.setListDelimiterHandler(new DefaultListDelimiterHandler(',')); FileHandler fh = new FileHandler(pcf); fh.setFileName(fname); fh.load(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsRecordBuilderImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsRecordBuilderImpl.java index f66ec5cb99ffb..19e4c3b6d4187 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsRecordBuilderImpl.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsRecordBuilderImpl.java @@ -21,7 +21,7 @@ import java.util.Collections; import java.util.List; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.hadoop.metrics2.AbstractMetric; import org.apache.hadoop.metrics2.MetricsInfo; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsRecordFiltered.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsRecordFiltered.java index 5674dfbf68d26..58ebbcffa8e58 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsRecordFiltered.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsRecordFiltered.java @@ -21,7 +21,7 @@ import java.util.Iterator; import java.util.Collection; -import com.google.common.collect.AbstractIterator; +import org.apache.hadoop.thirdparty.com.google.common.collect.AbstractIterator; import org.apache.hadoop.metrics2.AbstractMetric; import org.apache.hadoop.metrics2.MetricsFilter; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsRecordImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsRecordImpl.java index 8eb6cb8f8678a..14b930e830d77 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsRecordImpl.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsRecordImpl.java @@ -20,7 +20,7 @@ import java.util.List; -import static com.google.common.base.Preconditions.*; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.*; import org.apache.hadoop.metrics2.MetricsInfo; import org.apache.hadoop.metrics2.AbstractMetric; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSinkAdapter.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSinkAdapter.java index f2e607b577619..836d9d5cf816f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSinkAdapter.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSinkAdapter.java @@ -22,7 +22,7 @@ import java.util.Random; import java.util.concurrent.*; -import static com.google.common.base.Preconditions.*; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.*; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSourceAdapter.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSourceAdapter.java index f12ec67dd6125..852f31995a27b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSourceAdapter.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSourceAdapter.java @@ -29,10 +29,10 @@ import javax.management.ObjectName; import javax.management.ReflectionException; -import static com.google.common.base.Preconditions.*; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Maps; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.*; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.apache.hadoop.metrics2.AbstractMetric; import org.apache.hadoop.metrics2.MetricsFilter; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java index 624edc96b8ae7..a6edf08e5a717 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java @@ -30,10 +30,10 @@ import java.util.TimerTask; import javax.management.ObjectName; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.annotations.VisibleForTesting; -import static com.google.common.base.Preconditions.*; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.*; import org.apache.commons.configuration2.PropertiesConfiguration; import org.apache.commons.math3.util.ArithmeticUtils; @@ -273,7 +273,11 @@ void registerSource(String name, String desc, MetricsSource source) { T register(final String name, final String description, final T sink) { LOG.debug(name +", "+ description); if (allSinks.containsKey(name)) { - LOG.warn("Sink "+ name +" already exists!"); + if(sinks.get(name) == null) { + registerSink(name, description, sink); + } else { + LOG.warn("Sink "+ name +" already exists!"); + } return sink; } allSinks.put(name, sink); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/DefaultMetricsSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/DefaultMetricsSystem.java index 935f47f3a09ee..83e458f06c68c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/DefaultMetricsSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/DefaultMetricsSystem.java @@ -27,7 +27,7 @@ import org.apache.hadoop.metrics2.MetricsSystem; import org.apache.hadoop.metrics2.impl.MetricsSystemImpl; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * The default metrics system singleton. This class is used by all the daemon diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MethodMetric.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MethodMetric.java index 9b54adcb4314f..96eb5026be179 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MethodMetric.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MethodMetric.java @@ -20,7 +20,7 @@ import java.lang.reflect.Method; -import static com.google.common.base.Preconditions.*; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.*; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.metrics2.MetricsException; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsInfoImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsInfoImpl.java index e3adc821de5c4..e86398f544edf 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsInfoImpl.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsInfoImpl.java @@ -18,12 +18,12 @@ package org.apache.hadoop.metrics2.lib; -import com.google.common.base.Objects; +import org.apache.hadoop.thirdparty.com.google.common.base.Objects; import org.apache.hadoop.metrics2.MetricsInfo; import java.util.StringJoiner; -import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull; /** * Making implementing metric info a little easier diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsRegistry.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsRegistry.java index 6227d0954fb6d..b71f7f8cc5ee0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsRegistry.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsRegistry.java @@ -18,7 +18,7 @@ package org.apache.hadoop.metrics2.lib; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.metrics2.MetricsException; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsSourceBuilder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsSourceBuilder.java index 1fcede464dacd..f400f02d256f2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsSourceBuilder.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsSourceBuilder.java @@ -22,7 +22,7 @@ import java.lang.reflect.Field; import java.lang.reflect.Method; -import static com.google.common.base.Preconditions.*; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.*; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.metrics2.MetricsCollector; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableCounter.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableCounter.java index 8ba72343f2249..e616bb6d934dd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableCounter.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableCounter.java @@ -18,7 +18,7 @@ package org.apache.hadoop.metrics2.lib; -import static com.google.common.base.Preconditions.*; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.*; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableCounterLong.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableCounterLong.java index d3dec2e4d06e2..efaf8a14eaf42 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableCounterLong.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableCounterLong.java @@ -23,7 +23,7 @@ import org.apache.hadoop.metrics2.MetricsInfo; import org.apache.hadoop.metrics2.MetricsRecordBuilder; -import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.LongAdder; /** * A mutable long counter @@ -32,11 +32,11 @@ @InterfaceStability.Evolving public class MutableCounterLong extends MutableCounter { - private AtomicLong value = new AtomicLong(); + private final LongAdder value = new LongAdder(); public MutableCounterLong(MetricsInfo info, long initValue) { super(info); - this.value.set(initValue); + this.value.add(initValue); } @Override @@ -49,12 +49,12 @@ public void incr() { * @param delta of the increment */ public void incr(long delta) { - value.addAndGet(delta); + value.add(delta); setChanged(); } public long value() { - return value.get(); + return value.longValue(); } @Override diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableGauge.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableGauge.java index 03384f69147b7..6c77e97353869 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableGauge.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableGauge.java @@ -18,7 +18,7 @@ package org.apache.hadoop.metrics2.lib; -import static com.google.common.base.Preconditions.*; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.*; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableMetricsFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableMetricsFactory.java index c7adaa5d9917f..e4886cb603e4e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableMetricsFactory.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableMetricsFactory.java @@ -146,8 +146,10 @@ protected MetricsInfo getInfo(Class cls, Metrics annotation) { } /** - * Remove the prefix "get", if any, from the method name. Return the + * @return Remove the prefix "get", if any, from the method name. Return the * capacitalized method name." + * + * @param method input method. */ protected String getName(Method method) { String methodName = method.getName(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableQuantiles.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableQuantiles.java index 910805e4246d0..0e69c268c94cd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableQuantiles.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableQuantiles.java @@ -35,8 +35,8 @@ import org.apache.hadoop.metrics2.util.QuantileEstimator; import org.apache.hadoop.metrics2.util.SampleQuantiles; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; /** * Watches a stream of long values, maintaining online estimates of specific diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRates.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRates.java index 994eb13e08dae..c31c2e67f8f31 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRates.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRates.java @@ -21,8 +21,8 @@ import java.lang.reflect.Method; import java.util.Set; -import static com.google.common.base.Preconditions.*; -import com.google.common.collect.Sets; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.*; +import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRatesWithAggregation.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRatesWithAggregation.java index 5fe0083aa5dce..7795343de3c20 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRatesWithAggregation.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRatesWithAggregation.java @@ -18,7 +18,7 @@ package org.apache.hadoop.metrics2.lib; -import com.google.common.collect.Sets; +import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; import java.lang.ref.WeakReference; import java.lang.reflect.Method; import java.util.Iterator; @@ -163,6 +163,7 @@ private synchronized MutableRate addMetricIfNotExists(String name) { MutableRate metric = globalMetrics.get(name); if (metric == null) { metric = new MutableRate(name + typePrefix, name + typePrefix, false); + metric.setUpdateTimeStamp(true); globalMetrics.put(name, metric); } return metric; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRollingAverages.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRollingAverages.java index 6803d11d1ca38..74a795deb3a24 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRollingAverages.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRollingAverages.java @@ -31,7 +31,7 @@ import java.util.concurrent.TimeUnit; import java.util.function.Function; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -39,8 +39,9 @@ import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.metrics2.impl.MetricsCollectorImpl; -import com.google.common.base.Preconditions; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.util.Time; import javax.annotation.Nullable; @@ -77,13 +78,26 @@ public class MutableRollingAverages extends MutableMetric implements Closeable { private final String avgInfoDescTemplate; private int numWindows; + /** + * This class maintains sub-sum and sub-total of SampleStat. + */ private static class SumAndCount { private final double sum; private final long count; - - SumAndCount(final double sum, final long count) { + private final long snapshotTimeStamp; + + /** + * Constructor for {@link SumAndCount}. + * + * @param sum sub-sum in sliding windows + * @param count sub-total in sliding windows + * @param snapshotTimeStamp when is a new SampleStat snapshot. + */ + SumAndCount(final double sum, final long count, + final long snapshotTimeStamp) { this.sum = sum; this.count = count; + this.snapshotTimeStamp = snapshotTimeStamp; } public double getSum() { @@ -93,6 +107,10 @@ public double getSum() { public long getCount() { return count; } + + public long getSnapshotTimeStamp() { + return snapshotTimeStamp; + } } /** @@ -110,9 +128,19 @@ public long getCount() { private static final long WINDOW_SIZE_MS_DEFAULT = 300_000; private static final int NUM_WINDOWS_DEFAULT = 36; + /** + * Time duration after which a record is considered stale. + * {@link MutableRollingAverages} should be time-sensitive, and it should use + * the time window length(i.e. NUM_WINDOWS_DEFAULT * WINDOW_SIZE_MS_DEFAULT) + * as the valid time to make sure some too old record won't be use to compute + * average. + */ + private long recordValidityMs = + NUM_WINDOWS_DEFAULT * WINDOW_SIZE_MS_DEFAULT; + /** * Constructor for {@link MutableRollingAverages}. - * @param metricValueName + * @param metricValueName input metricValueName. */ public MutableRollingAverages(String metricValueName) { if (metricValueName == null) { @@ -231,7 +259,8 @@ public LinkedBlockingDeque apply(String k) { }); final SumAndCount sumAndCount = new SumAndCount( rate.lastStat().total(), - rate.lastStat().numSamples()); + rate.lastStat().numSamples(), + rate.getSnapshotTimeStamp()); /* put newest sum and count to the end */ if (!deque.offerLast(sumAndCount)) { deque.pollFirst(); @@ -254,6 +283,7 @@ public void close() throws IOException { * Retrieve a map of metric name {@literal ->} (aggregate). * Filter out entries that don't have at least minSamples. * + * @param minSamples input minSamples. * @return a map of peer DataNode Id to the average latency to that * node seen over the measurement period. */ @@ -267,8 +297,11 @@ public synchronized Map getStats(long minSamples) { long totalCount = 0; for (final SumAndCount sumAndCount : entry.getValue()) { - totalCount += sumAndCount.getCount(); - totalSum += sumAndCount.getSum(); + if (Time.monotonicNow() - sumAndCount.getSnapshotTimeStamp() + < recordValidityMs) { + totalCount += sumAndCount.getCount(); + totalSum += sumAndCount.getSum(); + } } if (totalCount > minSamples) { @@ -277,4 +310,13 @@ public synchronized Map getStats(long minSamples) { } return stats; } + + /** + * Use for test only. + * @param value input value. + */ + @VisibleForTesting + public synchronized void setRecordValidityMs(long value) { + this.recordValidityMs = value; + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java index 5ef31785a61e8..f2e072545ad28 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java @@ -24,6 +24,8 @@ import org.apache.hadoop.metrics2.MetricsInfo; import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.metrics2.util.SampleStat; +import org.apache.hadoop.util.Time; + import static org.apache.hadoop.metrics2.lib.Interns.*; /** @@ -47,7 +49,9 @@ public class MutableStat extends MutableMetric { private final SampleStat prevStat = new SampleStat(); private final SampleStat.MinMax minMax = new SampleStat.MinMax(); private long numSamples = 0; + private long snapshotTimeStamp = 0; private boolean extended = false; + private boolean updateTimeStamp = false; /** * Construct a sample statistics metric @@ -100,6 +104,13 @@ public synchronized void setExtended(boolean extended) { this.extended = extended; } + /** + * Set whether to update the snapshot time or not. + * @param updateTimeStamp enable update stats snapshot timestamp + */ + public synchronized void setUpdateTimeStamp(boolean updateTimeStamp) { + this.updateTimeStamp = updateTimeStamp; + } /** * Add a number of samples and their sum to the running stat * @@ -115,7 +126,7 @@ public synchronized void add(long numSamples, long sum) { } /** - * Add a snapshot to the metric + * Add a snapshot to the metric. * @param value of the metric */ public synchronized void add(long value) { @@ -142,6 +153,9 @@ public synchronized void snapshot(MetricsRecordBuilder builder, boolean all) { if (numSamples > 0) { intervalStat.copyTo(prevStat); intervalStat.reset(); + if (updateTimeStamp) { + snapshotTimeStamp = Time.monotonicNow(); + } } clearChanged(); } @@ -164,6 +178,12 @@ public void resetMinMax() { minMax.reset(); } + /** + * @return Return the SampleStat snapshot timestamp. + */ + public long getSnapshotTimeStamp() { + return snapshotTimeStamp; + } @Override public String toString() { return lastStat().toString(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/UniqueNames.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/UniqueNames.java index 0df852d46b371..2508ee27bbee8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/UniqueNames.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/UniqueNames.java @@ -20,8 +20,8 @@ import java.util.Map; -import com.google.common.base.Joiner; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.apache.hadoop.classification.InterfaceAudience; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/package-info.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/package-info.java index 8fd3b33b3a253..196469be9dce2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/package-info.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/package-info.java @@ -87,7 +87,7 @@ usually does not need to reference any class here.

    Getting started

    Implementing metrics sources

    Memory usage by blocksize
    Memory usage by blocksize
    Blocksize Compression
    * memory usage
    Decompression
    * memory usage
    - + @@ -290,10 +290,10 @@ metrics system decouples the concept for context (for grouping) with the backend that can handle multiple contexts (file, gangalia etc.):

    Implementing metrics sources
    Using annotationsUsing MetricsSource interface
    - + - + - + @@ -416,7 +419,7 @@ {#DecomNodes} - + @@ -445,7 +448,7 @@ {#LiveNodes} - + @@ -489,7 +492,7 @@
    Migration from previous system
    BeforeAfterBeforeAfter
    @@ -312,10 +312,10 @@ backend that can handle multiple contexts (file, gangalia etc.):
         using the context option in the sink options like the following:
       

    - + - + + - + + @@ -90,61 +93,17 @@ {#dn.BPServiceActorInfo} + + {/dn.BPServiceActorInfo}
    Metrics2
    BeforeAfterBeforeAfter
    diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/PrometheusMetricsSink.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/PrometheusMetricsSink.java
    index 10df76941caf4..9024203700ee1 100644
    --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/PrometheusMetricsSink.java
    +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/PrometheusMetricsSink.java
    @@ -17,6 +17,9 @@
      */
     package org.apache.hadoop.metrics2.sink;
     
    +import java.util.ArrayList;
    +import java.util.List;
    +import java.util.regex.Matcher;
     import org.apache.commons.configuration2.SubsetConfiguration;
     import org.apache.hadoop.metrics2.AbstractMetric;
     import org.apache.hadoop.metrics2.MetricType;
    @@ -26,6 +29,7 @@
     
     import java.io.IOException;
     import java.io.Writer;
    +import java.util.Collection;
     import java.util.Map;
     import java.util.concurrent.ConcurrentHashMap;
     import java.util.regex.Pattern;
    @@ -42,53 +46,37 @@ public class PrometheusMetricsSink implements MetricsSink {
       /**
        * Cached output lines for each metrics.
        */
    -  private final Map metricLines = new ConcurrentHashMap<>();
    +  private Map, AbstractMetric>> promMetrics =
    +      new ConcurrentHashMap<>();
    +  private Map, AbstractMetric>> nextPromMetrics =
    +      new ConcurrentHashMap<>();
     
       private static final Pattern SPLIT_PATTERN =
           Pattern.compile("(?\\w+)(.user=(?.*)|)\\.(TotalCount|count)$");
    +
       public PrometheusMetricsSink() {
       }
     
       @Override
       public void putMetrics(MetricsRecord metricsRecord) {
    -    for (AbstractMetric metrics : metricsRecord.metrics()) {
    -      if (metrics.type() == MetricType.COUNTER
    -          || metrics.type() == MetricType.GAUGE) {
    +    for (AbstractMetric metric : metricsRecord.metrics()) {
    +      if (metric.type() == MetricType.COUNTER
    +          || metric.type() == MetricType.GAUGE) {
     
             String key = prometheusName(
    -            metricsRecord.name(), metrics.name());
    -
    -        StringBuilder builder = new StringBuilder();
    -        builder.append("# TYPE ")
    -            .append(key)
    -            .append(" ")
    -            .append(metrics.type().toString().toLowerCase())
    -            .append("\n")
    -            .append(key)
    -            .append("{");
    -        String sep = "";
    -
    -        //add tags
    -        for (MetricsTag tag : metricsRecord.tags()) {
    -          String tagName = tag.name().toLowerCase();
    -
    -          //ignore specific tag which includes sub-hierarchy
    -          if (!tagName.equals("numopenconnectionsperuser")) {
    -            builder.append(sep)
    -                .append(tagName)
    -                .append("=\"")
    -                .append(tag.value())
    -                .append("\"");
    -            sep = ",";
    -          }
    -        }
    -        builder.append("} ");
    -        builder.append(metrics.value());
    -        builder.append("\n");
    -        metricLines.put(key, builder.toString());
    +            metricsRecord.name(), metric.name());
     
    +        nextPromMetrics.computeIfAbsent(key,
    +            any -> new ConcurrentHashMap<>())
    +            .put(metricsRecord.tags(), metric);
           }
         }
       }
    @@ -96,6 +84,10 @@ public void putMetrics(MetricsRecord metricsRecord) {
       /**
        * Convert CamelCase based names to lower-case names where the separator
        * is the underscore, to follow prometheus naming conventions.
    +   *
    +   * @param metricName metricName.
    +   * @param recordName recordName.
    +   * @return prometheusName.
        */
       public String prometheusName(String recordName,
                                    String metricName) {
    @@ -108,17 +100,100 @@ public String prometheusName(String recordName,
     
       @Override
       public void flush() {
    -
    +    promMetrics = nextPromMetrics;
    +    nextPromMetrics = new ConcurrentHashMap<>();
       }
     
       @Override
    -  public void init(SubsetConfiguration subsetConfiguration) {
    -
    +  public void init(SubsetConfiguration conf) {
       }
     
       public void writeMetrics(Writer writer) throws IOException {
    -    for (String line : metricLines.values()) {
    -      writer.write(line);
    +    List extendMetricsTags = new ArrayList<>();
    +    for (Map.Entry, AbstractMetric>> promMetric :
    +        promMetrics.entrySet()) {
    +      AbstractMetric firstMetric = promMetric.getValue().values().iterator().next();
    +      String metricKey = getMetricKey(promMetric.getKey(), firstMetric,
    +          extendMetricsTags);
    +
    +      StringBuilder builder = new StringBuilder();
    +      builder.append("# HELP ")
    +          .append(metricKey)
    +          .append(" ")
    +          .append(firstMetric.description())
    +          .append("\n")
    +          .append("# TYPE ")
    +          .append(metricKey)
    +          .append(" ")
    +          .append(firstMetric.type().toString().toLowerCase())
    +          .append("\n");
    +
    +      for (Map.Entry, AbstractMetric> metric :
    +          promMetric.getValue().entrySet()) {
    +        builder.append(metricKey)
    +            .append("{");
    +
    +        String sep = "";
    +        for (MetricsTag tag : metric.getKey()) {
    +          String tagName = tag.name().toLowerCase();
    +
    +          if (!tagName.equals("numopenconnectionsperuser")) {
    +            builder.append(sep)
    +                .append(tagName)
    +                .append("=\"")
    +                .append(tag.value())
    +                .append("\"");
    +            sep = ",";
    +          }
    +        }
    +        if (!extendMetricsTags.isEmpty()) {
    +          //add extend tags
    +          for (String tagStr : extendMetricsTags) {
    +            builder.append(sep).append(tagStr);
    +          }
    +          extendMetricsTags.clear();
    +        }
    +        builder.append("} ");
    +        builder.append(metric.getValue().value());
    +        builder.append("\n");
    +      }
    +
    +      writer.write(builder.toString());
    +    }
    +  }
    +
    +  private String getMetricKey(String promMetricKey, AbstractMetric metric,
    +      List extendTags) {
    +    Matcher matcher = NN_TOPMETRICS_PATTERN.matcher(promMetricKey);
    +    if (matcher.find() && matcher.groupCount() == 2) {
    +      extendTags.addAll(parseTopMetricsTags(metric.name()));
    +      return String.format("%s_%s",
    +          matcher.group(1), matcher.group(2));
    +    }
    +    return promMetricKey;
    +  }
    +
    +  /**
    +   * Parse Custom tags for TopMetrics.
    +   *
    +   * @param metricName metricName
    +   * @return Tags for TopMetrics
    +   */
    +  private List parseTopMetricsTags(String metricName) {
    +    List topMetricsTags = new ArrayList<>();
    +    Matcher matcher = NN_TOPMETRICS_TAGS_PATTERN.matcher(metricName);
    +    if (matcher.find()) {
    +      String op = matcher.group("op");
    +      String user = matcher.group("user");
    +      // add tag op = "$op"
    +      topMetricsTags.add(String
    +          .format("op=\"%s\"", op));
    +      if (StringUtils.isNoneEmpty(user)) {
    +        // add tag user = "$user"
    +        topMetricsTags.add(String
    +            .format("user=\"%s\"", user));
    +      }
         }
    +    return topMetricsTags;
       }
     }
    diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/RollingFileSystemSink.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/RollingFileSystemSink.java
    index 1d330c74ab46a..4dfe9c6854049 100644
    --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/RollingFileSystemSink.java
    +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/RollingFileSystemSink.java
    @@ -18,7 +18,7 @@
     
     package org.apache.hadoop.metrics2.sink;
     
    -import com.google.common.annotations.VisibleForTesting;
    +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
     import java.io.Closeable;
     import java.io.IOException;
     import java.io.PrintStream;
    diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/ganglia/AbstractGangliaSink.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/ganglia/AbstractGangliaSink.java
    index 804e90330fba3..d3d794fa74a91 100644
    --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/ganglia/AbstractGangliaSink.java
    +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/ganglia/AbstractGangliaSink.java
    @@ -212,7 +212,7 @@ private void loadGangliaConf(GangliaConfType gtype) {
       /**
        * Lookup GangliaConf from cache. If not found, return default values
        *
    -   * @param metricName
    +   * @param metricName metricName.
        * @return looked up GangliaConf
        */
       protected GangliaConf getGangliaConfForMetric(String metricName) {
    @@ -253,6 +253,7 @@ private void pad() {
     
       /**
        * Puts an integer into the buffer as 4 bytes, big-endian.
    +   * @param i i.
        */
       protected void xdr_int(int i) {
         buffer[offset++] = (byte) ((i >> 24) & 0xff);
    @@ -263,7 +264,7 @@ protected void xdr_int(int i) {
     
       /**
        * Sends Ganglia Metrics to the configured hosts
    -   * @throws IOException
    +   * @throws IOException raised on errors performing I/O.
        */
       protected void emitToGangliaHosts() throws IOException {
         try {
    diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/ganglia/GangliaSink30.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/ganglia/GangliaSink30.java
    index 3e8314ee884d8..196824f433c81 100644
    --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/ganglia/GangliaSink30.java
    +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/ganglia/GangliaSink30.java
    @@ -216,7 +216,7 @@ private GangliaSlope calculateSlope(GangliaConf gConf,
        * @param value The value of the metric
        * @param gConf The GangliaConf for this metric
        * @param gSlope The slope for this metric
    -   * @throws IOException
    +   * @throws IOException raised on errors performing I/O.
        */
       protected void emitMetric(String groupName, String name, String type,
           String value, GangliaConf gConf, GangliaSlope gSlope) throws IOException {
    diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/ganglia/GangliaSink31.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/ganglia/GangliaSink31.java
    index 5aebff8c031a9..fae0d4e85e1ec 100644
    --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/ganglia/GangliaSink31.java
    +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/ganglia/GangliaSink31.java
    @@ -42,7 +42,7 @@ public class GangliaSink31 extends GangliaSink30 {
        * @param value The value of the metric
        * @param gConf The GangliaConf for this metric
        * @param gSlope The slope for this metric
    -   * @throws IOException
    +   * @throws IOException raised on errors performing I/O.
        */
       @Override
       protected void emitMetric(String groupName, String name, String type,
    diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetrics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetrics.java
    index f19a2be0b4195..816940b109879 100644
    --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetrics.java
    +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetrics.java
    @@ -27,8 +27,8 @@
     import java.util.List;
     import java.util.concurrent.ConcurrentHashMap;
     
    -import com.google.common.annotations.VisibleForTesting;
    -import com.google.common.base.Preconditions;
    +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
    +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
     
     import org.apache.hadoop.classification.InterfaceAudience;
     import org.apache.hadoop.conf.Configuration;
    diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/MBeans.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/MBeans.java
    index 1b50498bbaf5a..11d4868b8fda1 100644
    --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/MBeans.java
    +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/MBeans.java
    @@ -28,12 +28,12 @@
     import javax.management.MBeanServer;
     import javax.management.ObjectName;
     
    -import com.google.common.annotations.VisibleForTesting;
    +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
     import org.apache.hadoop.classification.InterfaceAudience;
     import org.apache.hadoop.classification.InterfaceStability;
     import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
     
    -import com.google.common.base.Preconditions;
    +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
     import org.slf4j.Logger;
     import org.slf4j.LoggerFactory;
     
    @@ -63,8 +63,8 @@ private MBeans() {
        * Where the {@literal  and } are the supplied
        * parameters.
        *
    -   * @param serviceName
    -   * @param nameName
    +   * @param serviceName serviceName.
    +   * @param nameName nameName.
        * @param theMbean - the MBean to register
        * @return the named used to register the MBean
        */
    @@ -80,8 +80,8 @@ static public ObjectName register(String serviceName, String nameName,
        * Where the {@literal  and } are the supplied
        * parameters.
        *
    -   * @param serviceName
    -   * @param nameName
    +   * @param serviceName serviceName.
    +   * @param nameName nameName.
        * @param properties - Key value pairs to define additional JMX ObjectName
        *                     properties.
        * @param theMbean    - the MBean to register
    diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/MetricsCache.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/MetricsCache.java
    index 6cfbc39f896f4..bf6e910c3e232 100644
    --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/MetricsCache.java
    +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/MetricsCache.java
    @@ -18,7 +18,7 @@
     
     package org.apache.hadoop.metrics2.util;
     
    -import com.google.common.collect.Maps;
    +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps;
     import org.apache.hadoop.classification.InterfaceAudience;
     import org.apache.hadoop.classification.InterfaceStability;
     import org.apache.hadoop.metrics2.AbstractMetric;
    diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/Quantile.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/Quantile.java
    index 40e31bd841ba8..7a100edc228fb 100644
    --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/Quantile.java
    +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/Quantile.java
    @@ -20,7 +20,7 @@
     
     import org.apache.hadoop.classification.InterfaceAudience;
     
    -import com.google.common.collect.ComparisonChain;
    +import org.apache.hadoop.thirdparty.com.google.common.collect.ComparisonChain;
     
     /**
      * Specifies a quantile (with error bounds) to be watched by a
    diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleQuantiles.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleQuantiles.java
    index 0c5d98f2374ed..cd543283dc8c3 100644
    --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleQuantiles.java
    +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleQuantiles.java
    @@ -26,9 +26,9 @@
     
     import org.apache.hadoop.classification.InterfaceAudience;
     
    -import com.google.common.annotations.VisibleForTesting;
    -import com.google.common.base.Joiner;
    -import com.google.common.base.Preconditions;
    +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
    +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner;
    +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
     
     /**
      * Implementation of the Cormode, Korn, Muthukrishnan, and Srivastava algorithm
    @@ -108,7 +108,7 @@ private double allowableError(int rank) {
       /**
        * Add a new value from the stream.
        * 
    -   * @param v
    +   * @param v v.
        */
       synchronized public void insert(long v) {
         buffer[bufferCount] = v;
    diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/Servers.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/Servers.java
    index 19a64a9f9d171..2bd49e9f211ba 100644
    --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/Servers.java
    +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/Servers.java
    @@ -22,7 +22,7 @@
     import java.net.InetSocketAddress;
     import java.util.List;
     
    -import com.google.common.collect.Lists;
    +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists;
     
     import org.apache.hadoop.classification.InterfaceAudience;
     import org.apache.hadoop.classification.InterfaceStability;
    diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNS.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNS.java
    index 531ad80f41722..031a2f03c9bce 100644
    --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNS.java
    +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNS.java
    @@ -18,7 +18,7 @@
     
     package org.apache.hadoop.net;
     
    -import com.google.common.net.InetAddresses;
    +import org.apache.hadoop.thirdparty.com.google.common.net.InetAddresses;
     import org.apache.hadoop.classification.InterfaceAudience;
     import org.apache.hadoop.classification.InterfaceStability;
     import org.slf4j.Logger;
    @@ -141,8 +141,12 @@ private static LinkedHashSet getSubinterfaceInetAddrs(
       }
     
       /**
    -   * Like {@link DNS#getIPs(String, boolean)}, but returns all
    +   * @return Like {@link DNS#getIPs(String, boolean)}, but returns all
        * IPs associated with the given interface and its subinterfaces.
    +   *
    +   * @param strInterface input strInterface.
    +   * @throws UnknownHostException
    +   * If no IP address for the local host could be found.
        */
       public static String[] getIPs(String strInterface)
           throws UnknownHostException {
    @@ -345,6 +349,8 @@ public static String[] getHosts(String strInterface)
        *            The name of the network interface to query (e.g. eth0)
        * @param nameserver
        *            The DNS host name
    +   * @param tryfallbackResolution
    +   *            Input tryfallbackResolution.
        * @return The default host names associated with IPs bound to the network
        *         interface
        * @throws UnknownHostException
    @@ -384,7 +390,7 @@ public static String getDefaultHost(@Nullable String strInterface)
       }
     
       /**
    -   * Returns the default (first) host name associated by the provided
    +   * @return Returns the default (first) host name associated by the provided
        * nameserver with the address bound to the specified network interface.
        *
        * @param strInterface
    diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNSDomainNameResolver.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNSDomainNameResolver.java
    index 5866e2960fe1b..ce962bf9e8c6a 100644
    --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNSDomainNameResolver.java
    +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNSDomainNameResolver.java
    @@ -18,6 +18,10 @@
     
     package org.apache.hadoop.net;
     
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;
    +
    +import javax.naming.NamingException;
     import java.net.InetAddress;
     import java.net.UnknownHostException;
     
    @@ -27,6 +31,10 @@
      * fully qualified domain names belonging to the IPs from this host name
      */
     public class DNSDomainNameResolver implements DomainNameResolver {
    +
    +  private final static Logger LOG =
    +      LoggerFactory.getLogger(DNSDomainNameResolver.class.getName());
    +
       @Override
       public InetAddress[] getAllByDomainName(String domainName)
           throws UnknownHostException {
    @@ -40,6 +48,16 @@ public String getHostnameByIP(InetAddress address) {
             && host.charAt(host.length()-1) == '.') {
           host = host.substring(0, host.length()-1);
         }
    +    // Protect against the Java behaviour of returning the IP address as a string from a cache
    +    // instead of performing a reverse lookup.
    +    if (host != null && host.equals(address.getHostAddress())) {
    +      LOG.debug("IP address returned for FQDN detected: {}", address.getHostAddress());
    +      try {
    +        return DNS.reverseDns(address, null);
    +      } catch (NamingException lookupFailure) {
    +        LOG.warn("Failed to perform reverse lookup: {}", address);
    +      }
    +    }
         return host;
       }
     
    diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNSToSwitchMapping.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNSToSwitchMapping.java
    index 1e6f5f500849f..d29c6e3077df5 100644
    --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNSToSwitchMapping.java
    +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNSToSwitchMapping.java
    @@ -65,6 +65,8 @@ public interface DNSToSwitchMapping {
        *
        * If there is a cache on these nodes, this method will clear it, so that 
        * future accesses will see updated data.
    +   *
    +   * @param names input names.
        */
       public void reloadCachedMappings(List names);
     }
    diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DomainNameResolver.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DomainNameResolver.java
    index 4c44e9da4c063..debfe2feaa8b7 100644
    --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DomainNameResolver.java
    +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DomainNameResolver.java
    @@ -30,9 +30,10 @@ public interface DomainNameResolver {
        * Takes one domain name and returns its IP addresses based on the actual
        * service discovery methods.
        *
    -   * @param domainName
    +   * @param domainName input domainName.
        * @return all IP addresses
    -   * @throws UnknownHostException
    +   * @throws UnknownHostException indicate that the IP address of a
    +   * host could not be determined.
        */
       InetAddress[] getAllByDomainName(String domainName)
           throws UnknownHostException;
    @@ -40,7 +41,7 @@ InetAddress[] getAllByDomainName(String domainName)
       /**
        * Reverse lookup an IP address and get the fully qualified domain name(fqdn).
        *
    -   * @param address
    +   * @param address input address.
        * @return fully qualified domain name
        */
       String getHostnameByIP(InetAddress address);
    @@ -52,10 +53,12 @@ InetAddress[] getAllByDomainName(String domainName)
        * This function is necessary in secure environment since Kerberos uses fqdn
        * in the service principal instead of IP.
        *
    -   * @param domainName
    +   * @param domainName input domainName.
    +   * @param useFQDN input useFQDN.
        * @return all fully qualified domain names belonging to the IPs resolved from
        * the input domainName
    -   * @throws UnknownHostException
    +   * @throws UnknownHostException indicate that the IP address of a
    +   * host could not be determined.
        */
        String[] getAllResolvedHostnameByDomainName(
            String domainName, boolean useFQDN) throws UnknownHostException;
    diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DomainNameResolverFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DomainNameResolverFactory.java
    index a0b0380c18963..b4dd36fce707f 100644
    --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DomainNameResolverFactory.java
    +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DomainNameResolverFactory.java
    @@ -47,6 +47,7 @@ private DomainNameResolverFactory() {
        * @param configKey The config key name suffixed with
        *                  the nameservice/yarnservice.
        * @return Domain name resolver.
    +   * @throws IOException raised on errors performing I/O.
        */
       public static DomainNameResolver newInstance(
           Configuration conf, URI uri, String configKey) throws IOException {
    @@ -61,7 +62,6 @@ public static DomainNameResolver newInstance(
        * @param conf Configuration
        * @param configKey config key name.
        * @return Domain name resolver.
    -   * @throws IOException when the class cannot be found or initiated.
        */
       public static DomainNameResolver newInstance(
           Configuration conf, String configKey) {
    diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/InnerNode.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/InnerNode.java
    index efd1cc07d44bd..df4a01af27bc9 100644
    --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/InnerNode.java
    +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/InnerNode.java
    @@ -27,7 +27,10 @@
     @InterfaceStability.Unstable
     public interface InnerNode extends Node {
       interface Factory {
    -    /** Construct an InnerNode from a path-like string */
    +    /**
    +     * @return Construct an InnerNode from a path-like string.
    +     * @param path input path.
    +     */
         N newInnerNode(String path);
       }
     
    diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/InnerNodeImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/InnerNodeImpl.java
    index 923515b6efe7e..1dd3105080778 100644
    --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/InnerNodeImpl.java
    +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/InnerNodeImpl.java
    @@ -41,13 +41,22 @@ public InnerNodeImpl newInnerNode(String path) {
       protected final Map childrenMap = new HashMap<>();
       protected int numOfLeaves;
     
    -  /** Construct an InnerNode from a path-like string. */
    +  /**
    +   * Construct an InnerNode from a path-like string.
    +   * @param path input path.
    +   */
       protected InnerNodeImpl(String path) {
         super(path);
       }
     
    -  /** Construct an InnerNode
    -   * from its name, its network location, its parent, and its level. */
    +  /**
    +   * Construct an InnerNode
    +   * from its name, its network location, its parent, and its level.
    +   * @param name input name.
    +   * @param location input location.
    +   * @param parent input parent.
    +   * @param level input level.
    +   */
       protected InnerNodeImpl(String name, String location,
           InnerNode parent, int level) {
         super(name, location, parent, level);
    diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java
    index d98254cb1ca25..6db755743b9cc 100644
    --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java
    +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java
    @@ -37,13 +37,18 @@
     import java.net.UnknownHostException;
     import java.net.ConnectException;
     import java.nio.channels.SocketChannel;
    +import java.nio.channels.UnresolvedAddressException;
     import java.util.Map.Entry;
    +import java.util.concurrent.TimeUnit;
     import java.util.regex.Pattern;
     import java.util.*;
     import java.util.concurrent.ConcurrentHashMap;
     
     import javax.net.SocketFactory;
     
    +import org.apache.hadoop.security.AccessControlException;
    +import org.apache.hadoop.thirdparty.com.google.common.cache.Cache;
    +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheBuilder;
     import org.apache.commons.net.util.SubnetUtils;
     import org.apache.commons.net.util.SubnetUtils.SubnetInfo;
     import org.apache.hadoop.classification.InterfaceAudience;
    @@ -55,7 +60,7 @@
     import org.apache.hadoop.security.SecurityUtil;
     import org.apache.hadoop.util.ReflectionUtils;
     
    -import com.google.common.base.Preconditions;
    +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
     import org.slf4j.Logger;
     import org.slf4j.LoggerFactory;
     
    @@ -127,7 +132,8 @@ public static SocketFactory getDefaultSocketFactory(Configuration conf) {
        * Get the socket factory corresponding to the given proxy URI. If the
        * given proxy URI corresponds to an absence of configuration parameter,
        * returns null. If the URI is malformed raises an exception.
    -   * 
    +   *
    +   * @param conf configuration.
        * @param propValue the property which is the class name of the
        *        SocketFactory to instantiate; assumed non null and non empty.
        * @return a socket factory as defined in the property value.
    @@ -145,19 +151,26 @@ public static SocketFactory getSocketFactoryFromProperty(
       }
     
       /**
    -   * Util method to build socket addr from either:
    +   * Util method to build socket addr from either.
        *   {@literal :}
        *   {@literal ://:/}
    +   *
    +   * @param target target.
    +   * @return socket addr.
        */
       public static InetSocketAddress createSocketAddr(String target) {
         return createSocketAddr(target, -1);
       }
     
       /**
    -   * Util method to build socket addr from either:
    +   * Util method to build socket addr from either.
        *   {@literal }
        *   {@literal :}
        *   {@literal ://:/}
    +   *
    +   * @param target target.
    +   * @param defaultPort default port.
    +   * @return socket addr.
        */
       public static InetSocketAddress createSocketAddr(String target,
                                                        int defaultPort) {
    @@ -176,11 +189,35 @@ public static InetSocketAddress createSocketAddr(String target,
        *                    include a port number
        * @param configName the name of the configuration from which
        *                   target was loaded. This is used in the
    -   *                   exception message in the case that parsing fails. 
    +   *                   exception message in the case that parsing fails.
    +   * @return socket addr.
        */
       public static InetSocketAddress createSocketAddr(String target,
                                                        int defaultPort,
                                                        String configName) {
    +    return createSocketAddr(target, defaultPort, configName, false);
    +  }
    +
    +  /**
    +   * Create an InetSocketAddress from the given target string and
    +   * default port. If the string cannot be parsed correctly, the
    +   * configName parameter is used as part of the
    +   * exception message, allowing the user to better diagnose
    +   * the misconfiguration.
    +   *
    +   * @param target a string of either "host" or "host:port"
    +   * @param defaultPort the default port if target does not
    +   *                    include a port number
    +   * @param configName the name of the configuration from which
    +   *                   target was loaded. This is used in the
    +   *                   exception message in the case that parsing fails.
    +   * @param useCacheIfPresent Whether use cache when create URI
    +   * @return  socket addr
    +   */
    +  public static InetSocketAddress createSocketAddr(String target,
    +                                                   int defaultPort,
    +                                                   String configName,
    +                                                   boolean useCacheIfPresent) {
         String helpText = "";
         if (configName != null) {
           helpText = " (configuration property '" + configName + "')";
    @@ -190,15 +227,8 @@ public static InetSocketAddress createSocketAddr(String target,
               helpText);
         }
         target = target.trim();
    -    boolean hasScheme = target.contains("://");    
    -    URI uri = null;
    -    try {
    -      uri = hasScheme ? URI.create(target) : URI.create("dummyscheme://"+target);
    -    } catch (IllegalArgumentException e) {
    -      throw new IllegalArgumentException(
    -          "Does not contain a valid host:port authority: " + target + helpText
    -      );
    -    }
    +    boolean hasScheme = target.contains("://");
    +    URI uri = createURI(target, hasScheme, helpText, useCacheIfPresent);
     
         String host = uri.getHost();
         int port = uri.getPort();
    @@ -206,10 +236,9 @@ public static InetSocketAddress createSocketAddr(String target,
           port = defaultPort;
         }
         String path = uri.getPath();
    -    
    +
         if ((host == null) || (port < 0) ||
    -        (!hasScheme && path != null && !path.isEmpty()))
    -    {
    +        (!hasScheme && path != null && !path.isEmpty())) {
           throw new IllegalArgumentException(
               "Does not contain a valid host:port authority: " + target + helpText
           );
    @@ -217,6 +246,40 @@ public static InetSocketAddress createSocketAddr(String target,
         return createSocketAddrForHost(host, port);
       }
     
    +  private static final long URI_CACHE_SIZE_DEFAULT = 1000;
    +  private static final long URI_CACHE_EXPIRE_TIME_DEFAULT = 12;
    +  private static final Cache URI_CACHE = CacheBuilder.newBuilder()
    +      .maximumSize(URI_CACHE_SIZE_DEFAULT)
    +      .expireAfterWrite(URI_CACHE_EXPIRE_TIME_DEFAULT, TimeUnit.HOURS)
    +      .build();
    +
    +  private static URI createURI(String target,
    +                               boolean hasScheme,
    +                               String helpText,
    +                               boolean useCacheIfPresent) {
    +    URI uri;
    +    if (useCacheIfPresent) {
    +      uri = URI_CACHE.getIfPresent(target);
    +      if (uri != null) {
    +        return uri;
    +      }
    +    }
    +
    +    try {
    +      uri = hasScheme ? URI.create(target) :
    +              URI.create("dummyscheme://" + target);
    +    } catch (IllegalArgumentException e) {
    +      throw new IllegalArgumentException(
    +          "Does not contain a valid host:port authority: " + target + helpText
    +      );
    +    }
    +
    +    if (useCacheIfPresent) {
    +      URI_CACHE.put(target, uri);
    +    }
    +    return uri;
    +  }
    +
       /**
        * Create a socket address with the given host and port.  The hostname
        * might be replaced with another host that was set via
    @@ -307,8 +370,8 @@ private static String canonicalizeHost(String host) {
        * daemons, one can set up mappings from those hostnames to "localhost".
        * {@link NetUtils#getStaticResolution(String)} can be used to query for
        * the actual hostname. 
    -   * @param host
    -   * @param resolvedName
    +   * @param host the hostname or IP use to instantiate the object.
    +   * @param resolvedName resolved name.
        */
       public static void addStaticResolution(String host, String resolvedName) {
         synchronized (hostToResolved) {
    @@ -320,7 +383,7 @@ public static void addStaticResolution(String host, String resolvedName) {
        * Retrieves the resolved name for the passed host. The resolved name must
        * have been set earlier using 
        * {@link NetUtils#addStaticResolution(String, String)}
    -   * @param host
    +   * @param host the hostname or IP use to instantiate the object.
        * @return the resolution
        */
       public static String getStaticResolution(String host) {
    @@ -356,7 +419,7 @@ public static List  getAllStaticResolutions() {
        * the server binds to "0.0.0.0". This returns "hostname:port" of the server,
        * or "127.0.0.1:port" when the getListenerAddress() returns "0.0.0.0:port".
        * 
    -   * @param server
    +   * @param server server.
        * @return socket address that a client can use to connect to the server.
        */
       public static InetSocketAddress getConnectAddress(Server server) {
    @@ -384,8 +447,10 @@ public static InetSocketAddress getConnectAddress(InetSocketAddress addr) {
       
       /**
        * Same as getInputStream(socket, socket.getSoTimeout()).
    -   * 

    - * + * + * @param socket socket. + * @throws IOException raised on errors performing I/O. + * @return SocketInputWrapper for reading from the socket. * @see #getInputStream(Socket, long) */ public static SocketInputWrapper getInputStream(Socket socket) @@ -408,11 +473,11 @@ public static SocketInputWrapper getInputStream(Socket socket) * * @see Socket#getChannel() * - * @param socket + * @param socket socket. * @param timeout timeout in milliseconds. zero for waiting as * long as necessary. * @return SocketInputWrapper for reading from the socket. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static SocketInputWrapper getInputStream(Socket socket, long timeout) throws IOException { @@ -440,9 +505,9 @@ public static SocketInputWrapper getInputStream(Socket socket, long timeout) * * @see #getOutputStream(Socket, long) * - * @param socket + * @param socket socket. * @return OutputStream for writing to the socket. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static OutputStream getOutputStream(Socket socket) throws IOException { @@ -462,11 +527,11 @@ public static OutputStream getOutputStream(Socket socket) * * @see Socket#getChannel() * - * @param socket + * @param socket socket. * @param timeout timeout in milliseconds. This may not always apply. zero * for waiting as long as necessary. * @return OutputStream for writing to the socket. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static OutputStream getOutputStream(Socket socket, long timeout) throws IOException { @@ -487,9 +552,10 @@ public static OutputStream getOutputStream(Socket socket, long timeout) * * @see java.net.Socket#connect(java.net.SocketAddress, int) * - * @param socket + * @param socket socket. * @param address the remote address * @param timeout timeout in milliseconds + * @throws IOException raised on errors performing I/O. */ public static void connect(Socket socket, SocketAddress address, @@ -501,10 +567,11 @@ public static void connect(Socket socket, * Like {@link NetUtils#connect(Socket, SocketAddress, int)} but * also takes a local address and port to bind the socket to. * - * @param socket + * @param socket socket. * @param endpoint the remote address * @param localAddr the local address to bind the socket to * @param timeout timeout in milliseconds + * @throws IOException raised on errors performing I/O. */ public static void connect(Socket socket, SocketAddress endpoint, @@ -534,6 +601,8 @@ public static void connect(Socket socket, } } catch (SocketTimeoutException ste) { throw new ConnectTimeoutException(ste.getMessage()); + } catch (UnresolvedAddressException uae) { + throw new UnknownHostException(endpoint.toString()); } // There is a very rare case allowed by the TCP specification, such that @@ -588,7 +657,7 @@ public static List normalizeHostNames(Collection names) { * Performs a sanity check on the list of hostnames/IPs to verify they at least * appear to be valid. * @param names - List of hostnames/IPs - * @throws UnknownHostException + * @throws UnknownHostException Unknown Host Exception. */ public static void verifyHostnames(String[] names) throws UnknownHostException { for (String name: names) { @@ -638,6 +707,22 @@ public static String getHostNameOfIP(String ipPort) { } } + /** + * Attempt to normalize the given string to "host:port" + * if it like "ip:port". + * + * @param ipPort maybe lik ip:port or host:port. + * @return host:port + */ + public static String normalizeIP2HostName(String ipPort) { + if (null == ipPort || !ipPortPattern.matcher(ipPort).matches()) { + return ipPort; + } + + InetSocketAddress address = createSocketAddr(ipPort); + return getHostPortString(address); + } + /** * Return hostname without throwing exception. * The returned hostname String format is "hostname". @@ -663,6 +748,9 @@ public static String getHostname() { /** * Compose a "host:port" string from the address. + * + * @param addr address. + * @return hort port string. */ public static String getHostPortString(InetSocketAddress addr) { return addr.getHostName() + ":" + addr.getPort(); @@ -803,6 +891,11 @@ public static IOException wrapException(final String destHost, + " failed on socket exception: " + exception + ";" + see("SocketException")); + } else if (exception instanceof AccessControlException) { + return wrapWithMessage(exception, + "Call From " + + localHost + " to " + destHost + ":" + destPort + + " failed: " + exception.getMessage()); } else { // 1. Return instance of same type with exception msg if Exception has a // String constructor. @@ -875,6 +968,8 @@ private static String quoteHost(final String hostname) { } /** + * isValidSubnet. + * @param subnet subnet. * @return true if the given string is a subnet specified * using CIDR notation, false otherwise */ @@ -910,6 +1005,7 @@ private static void addMatchingAddrs(NetworkInterface nif, * @param returnSubinterfaces * whether to return IPs associated with subinterfaces * @throws IllegalArgumentException if subnet is invalid + * @return ips. */ public static List getIPs(String subnet, boolean returnSubinterfaces) { @@ -959,12 +1055,38 @@ public static int getFreeSocketPort() { return port; } + /** + * Return free ports. There is no guarantee they will remain free, so + * ports should be used immediately. The number of free ports returned by + * this method should match argument {@code numOfPorts}. Num of ports + * provided in the argument should not exceed 25. + * + * @param numOfPorts Number of free ports to acquire. + * @return Free ports for binding a local socket. + */ + public static Set getFreeSocketPorts(int numOfPorts) { + Preconditions.checkArgument(numOfPorts > 0 && numOfPorts <= 25, + "Valid range for num of ports is between 0 and 26"); + final Set freePorts = new HashSet<>(numOfPorts); + for (int i = 0; i < numOfPorts * 5; i++) { + int port = getFreeSocketPort(); + if (port == 0) { + continue; + } + freePorts.add(port); + if (freePorts.size() == numOfPorts) { + return freePorts; + } + } + throw new IllegalStateException(numOfPorts + " free ports could not be acquired."); + } + /** * Return an @{@link InetAddress} to bind to. If bindWildCardAddress is true * than returns null. * - * @param localAddr - * @param bindWildCardAddress + * @param localAddr local addr. + * @param bindWildCardAddress bind wildcard address. * @return InetAddress */ public static InetAddress bindToLocalAddress(InetAddress localAddr, boolean diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java index aae56dd98daa4..851c9e44a01ce 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java @@ -17,9 +17,8 @@ */ package org.apache.hadoop.net; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -29,6 +28,8 @@ import org.slf4j.LoggerFactory; import java.util.*; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.function.Consumer; @@ -52,6 +53,8 @@ public class NetworkTopology { private static final char PATH_SEPARATOR = '/'; private static final String PATH_SEPARATOR_STR = "/"; private static final String ROOT = "/"; + private static final AtomicReference RANDOM_REF = + new AtomicReference<>(); public static class InvalidTopologyException extends RuntimeException { private static final long serialVersionUID = 1L; @@ -98,6 +101,13 @@ protected NetworkTopology init(InnerNode.Factory factory) { private int depthOfAllLeaves = -1; /** rack counter */ protected int numOfRacks = 0; + /** empty rack map, rackname->nodenumber. */ + private HashMap> rackMap = + new HashMap>(); + /** decommission nodes, contained stoped nodes. */ + private HashSet decommissionNodes = new HashSet<>(); + /** empty rack counter. */ + private int numOfEmptyRacks = 0; /** * Whether or not this cluster has ever consisted of more than 1 rack, @@ -147,6 +157,7 @@ public void add(Node node) { if (rack == null) { incrementRacks(); } + interAddNodeWithEmptyRack(node); if (depthOfAllLeaves == -1) { depthOfAllLeaves = node.getLevel(); } @@ -223,6 +234,7 @@ public void remove(Node node) { if (rack == null) { numOfRacks--; } + interRemoveNodeWithEmptyRack(node); } LOG.debug("NetworkTopology became:\n{}", this); } finally { @@ -396,28 +408,25 @@ static public int getDistanceByPath(Node node1, Node node2) { * @exception IllegalArgumentException when either node1 or node2 is null, or * node1 or node2 do not belong to the cluster */ - public boolean isOnSameRack( Node node1, Node node2) { + public boolean isOnSameRack(Node node1, Node node2) { if (node1 == null || node2 == null) { return false; } - - netlock.readLock().lock(); - try { - return isSameParents(node1, node2); - } finally { - netlock.readLock().unlock(); - } + + return isSameParents(node1, node2); } /** - * Check if network topology is aware of NodeGroup + * @return Check if network topology is aware of NodeGroup. */ public boolean isNodeGroupAware() { return false; } /** - * Return false directly as not aware of NodeGroup, to be override in sub-class + * @return Return false directly as not aware of NodeGroup, to be override in sub-class. + * @param node1 input node1. + * @param node2 input node2. */ public boolean isOnSameNodeGroup(Node node1, Node node2) { return false; @@ -440,11 +449,14 @@ protected boolean isSameParents(Node node1, Node node2) { return node1.getParent()==node2.getParent(); } - private static final Random r = new Random(); - @VisibleForTesting void setRandomSeed(long seed) { - r.setSeed(seed); + RANDOM_REF.set(new Random(seed)); + } + + Random getRandom() { + Random random = RANDOM_REF.get(); + return (random == null) ? ThreadLocalRandom.current() : random; } /** @@ -563,6 +575,7 @@ private Node chooseRandom(final InnerNode parentNode, totalInScopeNodes, availableNodes); return null; } + Random r = getRandom(); if (excludedNodes == null || excludedNodes.isEmpty()) { // if there are no excludedNodes, randomly choose a node final int index = r.nextInt(totalInScopeNodes); @@ -721,11 +734,10 @@ public String toString() { } /** - * Divide networklocation string into two parts by last separator, and get + * @return Divide networklocation string into two parts by last separator, and get * the first part here. * - * @param networkLocation - * @return + * @param networkLocation input networkLocation. */ public static String getFirstHalf(String networkLocation) { int index = networkLocation.lastIndexOf(NodeBase.PATH_SEPARATOR_STR); @@ -733,11 +745,10 @@ public static String getFirstHalf(String networkLocation) { } /** - * Divide networklocation string into two parts by last separator, and get + * @return Divide networklocation string into two parts by last separator, and get * the second part here. * - * @param networkLocation - * @return + * @param networkLocation input networkLocation. */ public static String getLastHalf(String networkLocation) { int index = networkLocation.lastIndexOf(NodeBase.PATH_SEPARATOR_STR); @@ -879,7 +890,7 @@ public void sortByDistance(Node reader, Node[] nodes, int activeLen) { * This method is called if the reader is a datanode, * so nonDataNodeReader flag is set to false. */ - sortByDistance(reader, nodes, activeLen, list -> Collections.shuffle(list)); + sortByDistance(reader, nodes, activeLen, null); } /** @@ -889,7 +900,7 @@ public void sortByDistance(Node reader, Node[] nodes, int activeLen) { * or on a different rack from the reader. Sorting the nodes based on network * distance from the reader reduces network traffic and improves * performance. - *

    + *

    * As an additional twist, we also randomize the nodes at each network * distance. This helps with load balancing when there is data skew. * @@ -898,6 +909,7 @@ public void sortByDistance(Node reader, Node[] nodes, int activeLen) { * @param activeLen Number of active nodes at the front of the array * @param secondarySort a secondary sorting strategy which can inject into * that point from outside to help sort the same distance. + * @param Generics Type T */ public void sortByDistance(Node reader, T[] nodes, int activeLen, Consumer> secondarySort){ @@ -910,7 +922,7 @@ public void sortByDistance(Node reader, T[] nodes, * is not a datanode. Sorting the nodes based on network distance * from the reader reduces network traffic and improves * performance. - *

    + *

    * * @param reader Node where data will be read * @param nodes Available replicas with the requested data @@ -922,8 +934,7 @@ public void sortByDistanceUsingNetworkLocation(Node reader, Node[] nodes, * This method is called if the reader is not a datanode, * so nonDataNodeReader flag is set to true. */ - sortByDistanceUsingNetworkLocation(reader, nodes, activeLen, - list -> Collections.shuffle(list)); + sortByDistanceUsingNetworkLocation(reader, nodes, activeLen, null); } /** @@ -932,13 +943,14 @@ public void sortByDistanceUsingNetworkLocation(Node reader, Node[] nodes, * is not a datanode. Sorting the nodes based on network distance * from the reader reduces network traffic and improves * performance. - *

    + *

    * * @param reader Node where data will be read * @param nodes Available replicas with the requested data * @param activeLen Number of active nodes at the front of the array * @param secondarySort a secondary sorting strategy which can inject into * that point from outside to help sort the same distance. + * @param Generics Type T. */ public void sortByDistanceUsingNetworkLocation(Node reader, T[] nodes, int activeLen, Consumer> secondarySort) { @@ -950,6 +962,7 @@ public void sortByDistanceUsingNetworkLocation(Node reader, *

    * As an additional twist, we also randomize the nodes at each network * distance. This helps with load balancing when there is data skew. + * And it helps choose node with more fast storage type. * * @param reader Node where data will be read * @param nodes Available replicas with the requested data @@ -960,38 +973,135 @@ private void sortByDistance(Node reader, T[] nodes, int activeLen, Consumer> secondarySort, boolean nonDataNodeReader) { /** Sort weights for the nodes array */ - int[] weights = new int[activeLen]; - for (int i=0; i> weightedNodeTree = + new TreeMap<>(); + int nWeight; + for (int i = 0; i < activeLen; i++) { + if (nonDataNodeReader) { + nWeight = getWeightUsingNetworkLocation(reader, nodes[i]); } else { - weights[i] = getWeight(reader, nodes[i]); - } - } - // Add weight/node pairs to a TreeMap to sort - TreeMap> tree = new TreeMap<>(); - for (int i=0; i list = tree.get(weight); - if (list == null) { - list = Lists.newArrayListWithExpectedSize(1); - tree.put(weight, list); + nWeight = getWeight(reader, nodes[i]); } - list.add(node); + weightedNodeTree.computeIfAbsent( + nWeight, k -> new ArrayList<>(1)).add(nodes[i]); } - // Sort nodes which have the same weight using secondarySort. int idx = 0; - for (List list: tree.values()) { - if (list != null) { - secondarySort.accept(list); - for (T n: list) { - nodes[idx] = n; - idx++; - } + // Sort nodes which have the same weight using secondarySort. + for (List nodesList : weightedNodeTree.values()) { + Collections.shuffle(nodesList, getRandom()); + if (secondarySort != null) { + // a secondary sort breaks the tie between nodes. + secondarySort.accept(nodesList); + } + for (T n : nodesList) { + nodes[idx++] = n; } } Preconditions.checkState(idx == activeLen, "Sorted the wrong number of nodes!"); } -} \ No newline at end of file + + /** @return the number of nonempty racks */ + public int getNumOfNonEmptyRacks() { + return numOfRacks - numOfEmptyRacks; + } + + /** + * Update empty rack number when add a node like recommission. + * @param node node to be added; can be null + */ + public void recommissionNode(Node node) { + if (node == null) { + return; + } + if (node instanceof InnerNode) { + throw new IllegalArgumentException( + "Not allow to remove an inner node: " + NodeBase.getPath(node)); + } + netlock.writeLock().lock(); + try { + decommissionNodes.remove(node.getName()); + interAddNodeWithEmptyRack(node); + } finally { + netlock.writeLock().unlock(); + } + } + + /** + * Update empty rack number when remove a node like decommission. + * @param node node to be added; can be null + */ + public void decommissionNode(Node node) { + if (node == null) { + return; + } + if (node instanceof InnerNode) { + throw new IllegalArgumentException( + "Not allow to remove an inner node: " + NodeBase.getPath(node)); + } + netlock.writeLock().lock(); + try { + decommissionNodes.add(node.getName()); + interRemoveNodeWithEmptyRack(node); + } finally { + netlock.writeLock().unlock(); + } + } + + /** + * Internal function for update empty rack number + * for add or recommission a node. + * @param node node to be added; can be null + */ + private void interAddNodeWithEmptyRack(Node node) { + if (node == null) { + return; + } + String rackname = node.getNetworkLocation(); + Set nodes = rackMap.get(rackname); + if (nodes == null) { + nodes = new HashSet(); + } + if (!decommissionNodes.contains(node.getName())) { + nodes.add(node.getName()); + } + rackMap.put(rackname, nodes); + countEmptyRacks(); + } + + /** + * Internal function for update empty rack number + * for remove or decommission a node. + * @param node node to be removed; can be null + */ + private void interRemoveNodeWithEmptyRack(Node node) { + if (node == null) { + return; + } + String rackname = node.getNetworkLocation(); + Set nodes = rackMap.get(rackname); + if (nodes != null) { + InnerNode rack = (InnerNode) getNode(node.getNetworkLocation()); + if (rack == null) { + // this node and its rack are both removed. + rackMap.remove(rackname); + } else if (nodes.contains(node.getName())) { + // this node is decommissioned or removed. + nodes.remove(node.getName()); + rackMap.put(rackname, nodes); + } + countEmptyRacks(); + } + } + + private void countEmptyRacks() { + int count = 0; + for (Set nodes : rackMap.values()) { + if (nodes != null && nodes.isEmpty()) { + count++; + } + } + numOfEmptyRacks = count; + LOG.debug("Current numOfEmptyRacks is {}", numOfEmptyRacks); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/ScriptBasedMapping.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/ScriptBasedMapping.java index 4db8155ffed3b..60ae442b4f602 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/ScriptBasedMapping.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/ScriptBasedMapping.java @@ -124,12 +124,13 @@ public String toString() { } /** - * {@inheritDoc} + * {@inheritDoc}. *

    * This will get called in the superclass constructor, so a check is needed * to ensure that the raw mapping is defined before trying to relaying a null * configuration. - * @param conf + *

    + * @param conf input Configuration. */ @Override public void setConf(Configuration conf) { @@ -212,8 +213,9 @@ public List resolve(List names) { /** * Build and execute the resolution command. The command is * executed in the directory specified by the system property - * "user.dir" if set; otherwise the current working directory is used + * "user.dir" if set; otherwise the current working directory is used. * @param args a list of arguments + * @param commandScriptName input commandScriptName. * @return null if the number of arguments is out of range, * or the output of the command. */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/ScriptBasedMappingWithDependency.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/ScriptBasedMappingWithDependency.java index e05fae6496a15..4c1a547baabe0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/ScriptBasedMappingWithDependency.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/ScriptBasedMappingWithDependency.java @@ -74,12 +74,13 @@ public String toString() { } /** - * {@inheritDoc} + * {@inheritDoc}. *

    * This will get called in the superclass constructor, so a check is needed * to ensure that the raw mapping is defined before trying to relaying a null * configuration. - * @param conf + *

    + * @param conf input Configuration. */ @Override public void setConf(Configuration conf) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketIOWithTimeout.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketIOWithTimeout.java index 312a481f25a86..d117bb8a6b701 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketIOWithTimeout.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketIOWithTimeout.java @@ -28,8 +28,9 @@ import java.nio.channels.Selector; import java.nio.channels.SocketChannel; import java.nio.channels.spi.SelectorProvider; -import java.util.Iterator; -import java.util.LinkedList; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentLinkedDeque; +import java.util.concurrent.atomic.AtomicBoolean; import org.apache.hadoop.util.Time; import org.slf4j.Logger; @@ -48,8 +49,6 @@ abstract class SocketIOWithTimeout { private long timeout; private boolean closed = false; - private static SelectorPool selector = new SelectorPool(); - /* A timeout value of 0 implies wait for ever. * We should have a value of timeout that implies zero wait.. i.e. * read or write returns immediately. @@ -154,7 +153,7 @@ int doIO(ByteBuffer buf, int ops) throws IOException { //now wait for socket to be ready. int count = 0; try { - count = selector.select(channel, ops, timeout); + count = SelectorPool.select(channel, ops, timeout); } catch (IOException e) { //unexpected IOException. closed = true; throw e; @@ -200,7 +199,7 @@ static void connect(SocketChannel channel, // we might have to call finishConnect() more than once // for some channels (with user level protocols) - int ret = selector.select((SelectableChannel)channel, + int ret = SelectorPool.select(channel, SelectionKey.OP_CONNECT, timeoutLeft); if (ret > 0 && channel.finishConnect()) { @@ -242,7 +241,7 @@ static void connect(SocketChannel channel, */ void waitForIO(int ops) throws IOException { - if (selector.select(channel, ops, timeout) == 0) { + if (SelectorPool.select(channel, ops, timeout) == 0) { throw new SocketTimeoutException(timeoutExceptionString(channel, timeout, ops)); } @@ -280,12 +279,17 @@ private static String timeoutExceptionString(SelectableChannel channel, * This maintains a pool of selectors. These selectors are closed * once they are idle (unused) for a few seconds. */ - private static class SelectorPool { + private static final class SelectorPool { - private static class SelectorInfo { - Selector selector; - long lastActivityTime; - LinkedList queue; + private static final class SelectorInfo { + private final SelectorProvider provider; + private final Selector selector; + private long lastActivityTime; + + private SelectorInfo(SelectorProvider provider, Selector selector) { + this.provider = provider; + this.selector = selector; + } void close() { if (selector != null) { @@ -298,16 +302,11 @@ void close() { } } - private static class ProviderInfo { - SelectorProvider provider; - LinkedList queue; // lifo - ProviderInfo next; - } + private static ConcurrentHashMap> providerMap = new ConcurrentHashMap<>(); private static final long IDLE_TIMEOUT = 10 * 1000; // 10 seconds. - private ProviderInfo providerList = null; - /** * Waits on the channel with the given timeout using one of the * cached selectors. It also removes any cached selectors that are @@ -319,7 +318,7 @@ private static class ProviderInfo { * @return * @throws IOException */ - int select(SelectableChannel channel, int ops, long timeout) + static int select(SelectableChannel channel, int ops, long timeout) throws IOException { SelectorInfo info = get(channel); @@ -385,35 +384,18 @@ int select(SelectableChannel channel, int ops, long timeout) * @return * @throws IOException */ - private synchronized SelectorInfo get(SelectableChannel channel) + private static SelectorInfo get(SelectableChannel channel) throws IOException { - SelectorInfo selInfo = null; - SelectorProvider provider = channel.provider(); - // pick the list : rarely there is more than one provider in use. - ProviderInfo pList = providerList; - while (pList != null && pList.provider != provider) { - pList = pList.next; - } - if (pList == null) { - //LOG.info("Creating new ProviderInfo : " + provider.toString()); - pList = new ProviderInfo(); - pList.provider = provider; - pList.queue = new LinkedList(); - pList.next = providerList; - providerList = pList; - } - - LinkedList queue = pList.queue; - - if (queue.isEmpty()) { + ConcurrentLinkedDeque infoQ = providerMap.computeIfAbsent( + provider, k -> new ConcurrentLinkedDeque<>()); + + SelectorInfo selInfo = infoQ.pollLast(); // last in first out + if (selInfo == null) { Selector selector = provider.openSelector(); - selInfo = new SelectorInfo(); - selInfo.selector = selector; - selInfo.queue = queue; - } else { - selInfo = queue.removeLast(); + // selInfo will be put into infoQ after `#release()` + selInfo = new SelectorInfo(provider, selector); } trimIdleSelectors(Time.now()); @@ -426,34 +408,39 @@ private synchronized SelectorInfo get(SelectableChannel channel) * * @param info */ - private synchronized void release(SelectorInfo info) { + private static void release(SelectorInfo info) { long now = Time.now(); trimIdleSelectors(now); info.lastActivityTime = now; - info.queue.addLast(info); + // SelectorInfos in queue are sorted by lastActivityTime + providerMap.get(info.provider).addLast(info); } + private static AtomicBoolean trimming = new AtomicBoolean(false); + /** * Closes selectors that are idle for IDLE_TIMEOUT (10 sec). It does not * traverse the whole list, just over the one that have crossed * the timeout. */ - private void trimIdleSelectors(long now) { + private static void trimIdleSelectors(long now) { + if (!trimming.compareAndSet(false, true)) { + return; + } + long cutoff = now - IDLE_TIMEOUT; - - for(ProviderInfo pList=providerList; pList != null; pList=pList.next) { - if (pList.queue.isEmpty()) { - continue; - } - for(Iterator it = pList.queue.iterator(); it.hasNext();) { - SelectorInfo info = it.next(); - if (info.lastActivityTime > cutoff) { + for (ConcurrentLinkedDeque infoQ : providerMap.values()) { + SelectorInfo oldest; + while ((oldest = infoQ.peekFirst()) != null) { + if (oldest.lastActivityTime <= cutoff && infoQ.remove(oldest)) { + oldest.close(); + } else { break; } - it.remove(); - info.close(); } } + + trimming.set(false); } } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketInputStream.java index cfa7b01e8136a..99e646a975b22 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketInputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketInputStream.java @@ -67,7 +67,7 @@ int performIO(ByteBuffer buf) throws IOException { * Channel for reading, should also be a {@link SelectableChannel}. * The channel will be configured to be non-blocking. * @param timeout timeout in milliseconds. must not be negative. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public SocketInputStream(ReadableByteChannel channel, long timeout) throws IOException { @@ -86,7 +86,7 @@ public SocketInputStream(ReadableByteChannel channel, long timeout) * * @param socket should have a channel associated with it. * @param timeout timeout timeout in milliseconds. must not be negative. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public SocketInputStream(Socket socket, long timeout) throws IOException { @@ -103,7 +103,7 @@ public SocketInputStream(Socket socket, long timeout) * @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) * * @param socket should have a channel associated with it. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public SocketInputStream(Socket socket) throws IOException { this(socket.getChannel(), socket.getSoTimeout()); @@ -141,7 +141,7 @@ public synchronized void close() throws IOException { } /** - * Returns underlying channel used by inputstream. + * @return Returns underlying channel used by inputstream. * This is useful in certain cases like channel for * {@link FileChannel#transferFrom(ReadableByteChannel, long, long)}. */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketInputWrapper.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketInputWrapper.java index f5cbe17519d60..45f776e692ac4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketInputWrapper.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketInputWrapper.java @@ -27,7 +27,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * A wrapper stream around a socket which allows setting of its timeout. If the diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketOutputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketOutputStream.java index 93f4f56d78d63..3f6ea098a7200 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketOutputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketOutputStream.java @@ -72,7 +72,7 @@ int performIO(ByteBuffer buf) throws IOException { * Channel for writing, should also be a {@link SelectableChannel}. * The channel will be configured to be non-blocking. * @param timeout timeout in milliseconds. must not be negative. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public SocketOutputStream(WritableByteChannel channel, long timeout) throws IOException { @@ -91,7 +91,7 @@ public SocketOutputStream(WritableByteChannel channel, long timeout) * * @param socket should have a channel associated with it. * @param timeout timeout timeout in milliseconds. must not be negative. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public SocketOutputStream(Socket socket, long timeout) throws IOException { @@ -138,7 +138,7 @@ public synchronized void close() throws IOException { } /** - * Returns underlying channel used by this stream. + * @return Returns underlying channel used by this stream. * This is useful in certain cases like channel for * {@link FileChannel#transferTo(long, long, WritableByteChannel)} */ @@ -254,7 +254,12 @@ public void transferToFully(FileChannel fileCh, long position, int count, * Call * {@link #transferToFully(FileChannel, long, int, LongWritable, LongWritable) * } - * with null waitForWritableTime and transferToTime + * with null waitForWritableTime and transferToTime. + * + * @param fileCh input fileCh. + * @param position input position. + * @param count input count. + * @throws IOException raised on errors performing I/O. */ public void transferToFully(FileChannel fileCh, long position, int count) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java index 9693220438dd6..e5a4047d5650e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java @@ -32,7 +32,7 @@ import org.apache.hadoop.util.NativeCodeLoader; import org.apache.hadoop.util.CloseableReferenceCount; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -106,6 +106,8 @@ native static void validateSocketPathSecurity0(String path, /** * Return true only if UNIX domain sockets are available. + * + * @return loadingFailureReason. */ public static String getLoadingFailureReason() { return loadingFailureReason; @@ -184,6 +186,7 @@ private void unreference(boolean checkClosed) throws ClosedChannelException { * * @param path The path to bind and listen on. * @return The new DomainSocket. + * @throws IOException raised on errors performing I/O. */ public static DomainSocket bindAndListen(String path) throws IOException { if (loadingFailureReason != null) { @@ -387,7 +390,7 @@ public void close() throws IOException { /** * Call shutdown(SHUT_RDWR) on the UNIX domain socket. * - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void shutdown() throws IOException { refCount.reference(); @@ -413,6 +416,7 @@ private native static void sendFileDescriptors0(int fd, * one byte. * @param offset The offset in the jbuf array to start at. * @param length Length of the jbuf array to use. + * @throws IOException raised on errors performing I/O. */ public void sendFileDescriptors(FileDescriptor descriptors[], byte jbuf[], int offset, int length) throws IOException { @@ -433,6 +437,13 @@ private static native int receiveFileDescriptors0(int fd, /** * Receive some FileDescriptor objects from the process on the other side of * this socket, and wrap them in FileInputStream objects. + * + * @param streams input stream. + * @param buf input buf. + * @param offset input offset. + * @param length input length. + * @return wrap them in FileInputStream objects. + * @throws IOException raised on errors performing I/O. */ public int recvFileInputStreams(FileInputStream[] streams, byte buf[], int offset, int length) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocketWatcher.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocketWatcher.java index e36399ff96c01..17c7d4b65c401 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocketWatcher.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocketWatcher.java @@ -35,9 +35,9 @@ import org.apache.commons.lang3.SystemUtils; import org.apache.hadoop.util.NativeCodeLoader; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.util.concurrent.Uninterruptibles; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Uninterruptibles; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Credentials.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Credentials.java index e71bf6d40dd21..ef309cb2247fd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Credentials.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Credentials.java @@ -138,6 +138,8 @@ public void addToken(Text alias, Token t) { /** * Return all the tokens in the in-memory map. + * + * @return all the tokens in the in-memory map. */ public Collection> getAllTokens() { return tokenMap.values(); @@ -145,6 +147,8 @@ public Collection> getAllTokens() { /** * Returns an unmodifiable version of the full map of aliases to Tokens. + * + * @return TokenMap. */ public Map> getTokenMap() { return Collections.unmodifiableMap(tokenMap); @@ -192,6 +196,8 @@ public void removeSecretKey(Text alias) { /** * Return all the secret key entries in the in-memory map. + * + * @return Text List. */ public List getAllSecretKeys() { List list = new java.util.ArrayList(); @@ -202,6 +208,8 @@ public List getAllSecretKeys() { /** * Returns an unmodifiable version of the full map of aliases to secret keys. + * + * @return SecretKeyMap. */ public Map getSecretKeyMap() { return Collections.unmodifiableMap(secretKeysMap); @@ -209,9 +217,10 @@ public Map getSecretKeyMap() { /** * Convenience method for reading a token storage file and loading its Tokens. - * @param filename - * @param conf - * @throws IOException + * @param filename filename. + * @param conf configuration. + * @throws IOException raised on errors performing I/O. + * @return Credentials. */ public static Credentials readTokenStorageFile(Path filename, Configuration conf) @@ -233,9 +242,10 @@ public static Credentials readTokenStorageFile(Path filename, /** * Convenience method for reading a token storage file and loading its Tokens. - * @param filename - * @param conf - * @throws IOException + * @param filename filename. + * @param conf configuration. + * @throws IOException raised on errors performing I/O. + * @return Token. */ public static Credentials readTokenStorageFile(File filename, Configuration conf) @@ -256,6 +266,9 @@ public static Credentials readTokenStorageFile(File filename, /** * Convenience method for reading a token from a DataInputStream. + * + * @param in DataInputStream. + * @throws IOException raised on errors performing I/O. */ public void readTokenStorageStream(DataInputStream in) throws IOException { byte[] magic = new byte[TOKEN_STORAGE_MAGIC.length]; @@ -335,8 +348,8 @@ public void writeTokenStorageFile(Path filename, Configuration conf, /** * Stores all the keys to DataOutput. - * @param out - * @throws IOException + * @param out DataOutput. + * @throws IOException raised on errors performing I/O. */ @Override public void write(DataOutput out) throws IOException { @@ -401,8 +414,8 @@ void readProto(DataInput in) throws IOException { /** * Loads all the keys. - * @param in - * @throws IOException + * @param in DataInput. + * @throws IOException raised on errors performing I/O. */ @Override public void readFields(DataInput in) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/GroupMappingServiceProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/GroupMappingServiceProvider.java index 8b90f5bc7af9e..b35ac903a7829 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/GroupMappingServiceProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/GroupMappingServiceProvider.java @@ -38,18 +38,18 @@ public interface GroupMappingServiceProvider { * Returns EMPTY list in case of non-existing user * @param user User's name * @return group memberships of user - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public List getGroups(String user) throws IOException; /** * Refresh the cache of groups and user mapping - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void cacheGroupsRefresh() throws IOException; /** * Caches the group user information * @param groups list of groups to add to cache - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void cacheGroupsAdd(List groups) throws IOException; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java index b29278bd20751..93e251d9222fc 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java @@ -35,20 +35,20 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; -import org.apache.htrace.core.TraceScope; -import org.apache.htrace.core.Tracer; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Ticker; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.Cache; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; -import com.google.common.util.concurrent.FutureCallback; -import com.google.common.util.concurrent.Futures; -import com.google.common.util.concurrent.ListenableFuture; -import com.google.common.util.concurrent.ListeningExecutorService; -import com.google.common.util.concurrent.MoreExecutors; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.tracing.TraceScope; +import org.apache.hadoop.tracing.Tracer; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Ticker; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheBuilder; +import org.apache.hadoop.thirdparty.com.google.common.cache.Cache; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheLoader; +import org.apache.hadoop.thirdparty.com.google.common.cache.LoadingCache; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.FutureCallback; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Futures; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableFuture; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; @@ -438,7 +438,7 @@ public static Groups getUserToGroupsMappingService() { /** * Get the groups being used to map user-to-groups. - * @param conf + * @param conf configuration. * @return the groups being used to map user-to-groups. */ public static synchronized Groups getUserToGroupsMappingService( @@ -455,7 +455,7 @@ public static synchronized Groups getUserToGroupsMappingService( /** * Create new groups used to map user-to-groups with loaded configuration. - * @param conf + * @param conf configuration. * @return the groups being used to map user-to-groups. */ @Private diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/HadoopKerberosName.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/HadoopKerberosName.java index df96c500cd08b..b66f8444528a4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/HadoopKerberosName.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/HadoopKerberosName.java @@ -45,7 +45,7 @@ public class HadoopKerberosName extends KerberosName { /** * Create a name from the full Kerberos principal name. - * @param name + * @param name name. */ public HadoopKerberosName(String name) { super(name); @@ -58,7 +58,7 @@ public HadoopKerberosName(String name) { * method should be invoked directly. * * @param conf the new configuration - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static void setConfiguration(Configuration conf) throws IOException { final String defaultRule; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/IngressPortBasedResolver.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/IngressPortBasedResolver.java index a30e4a84dd86b..1431ed5d0e907 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/IngressPortBasedResolver.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/IngressPortBasedResolver.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.security; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import java.net.InetAddress; import java.util.Collection; import java.util.HashMap; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/KDiag.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/KDiag.java index f759dbdb44f75..ee6a127f0e24f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/KDiag.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/KDiag.java @@ -46,6 +46,7 @@ import java.io.PrintWriter; import java.lang.reflect.InvocationTargetException; import java.net.InetAddress; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; @@ -434,7 +435,8 @@ protected boolean isSimpleAuthentication(Configuration conf) { * This is a recurrent problem * (that is: it keeps creeping back with JVM updates); * a fast failure is the best tactic. - * @throws NoSuchAlgorithmException + * @throws NoSuchAlgorithmException when a particular cryptographic algorithm is + * requested but is not available in the environment. */ protected void validateKeyLength() throws NoSuchAlgorithmException { @@ -923,7 +925,7 @@ private void printEnv(String variable) { */ private void dump(File file) throws IOException { try (InputStream in = Files.newInputStream(file.toPath())) { - for (String line : IOUtils.readLines(in)) { + for (String line : IOUtils.readLines(in, StandardCharsets.UTF_8)) { println("%s", line); } } @@ -1045,7 +1047,7 @@ private void failif(boolean condition, * @param conf configuration * @param argv argument list * @return an exception - * @throws Exception + * @throws Exception Exception. */ public static int exec(Configuration conf, String... argv) throws Exception { try(KDiag kdiag = new KDiag()) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/KerberosInfo.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/KerberosInfo.java index 062dcff61e1d9..e79492adf94e9 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/KerberosInfo.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/KerberosInfo.java @@ -31,7 +31,10 @@ @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) @InterfaceStability.Evolving public @interface KerberosInfo { - /** Key for getting server's Kerberos principal name from Configuration */ + /** + * Key for getting server's Kerberos principal name from Configuration. + * @return serverPrincipal. + */ String serverPrincipal(); String clientPrincipal() default ""; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/LdapGroupsMapping.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/LdapGroupsMapping.java index 8e71f69c858d1..e751ed6a3730f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/LdapGroupsMapping.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/LdapGroupsMapping.java @@ -58,7 +58,8 @@ import javax.net.ssl.TrustManager; import javax.net.ssl.TrustManagerFactory; -import com.google.common.collect.Iterators; +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.collect.Iterators; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configurable; @@ -458,7 +459,8 @@ private NamingEnumeration lookupPosixGroup(SearchResult result, * @return a list of strings representing group names of the user. * @throws NamingException if unable to find group names */ - private List lookupGroup(SearchResult result, DirContext c, + @VisibleForTesting + List lookupGroup(SearchResult result, DirContext c, int goUpHierarchy) throws NamingException { List groups = new ArrayList<>(); @@ -510,6 +512,7 @@ private List lookupGroup(SearchResult result, DirContext c, List doGetGroups(String user, int goUpHierarchy) throws NamingException { DirContext c = getDirContext(); + List groups = new ArrayList<>(); // Search for the user. We'll only ever need to look at the first result NamingEnumeration results = c.search(userbaseDN, @@ -518,11 +521,10 @@ List doGetGroups(String user, int goUpHierarchy) if (!results.hasMoreElements()) { LOG.debug("doGetGroups({}) returned no groups because the " + "user is not found.", user); - return new ArrayList<>(); + return groups; } SearchResult result = results.nextElement(); - List groups = null; if (useOneQuery) { try { /** @@ -536,7 +538,6 @@ List doGetGroups(String user, int goUpHierarchy) memberOfAttr + "' attribute." + "Returned user object: " + result.toString()); } - groups = new ArrayList<>(); NamingEnumeration groupEnumeration = groupDNAttr.getAll(); while (groupEnumeration.hasMore()) { String groupDN = groupEnumeration.next().toString(); @@ -544,11 +545,13 @@ List doGetGroups(String user, int goUpHierarchy) } } catch (NamingException e) { // If the first lookup failed, fall back to the typical scenario. + // In order to force the fallback, we need to reset groups collection. + groups.clear(); LOG.info("Failed to get groups from the first lookup. Initiating " + "the second LDAP query using the user's DN.", e); } } - if (groups == null || groups.isEmpty() || goUpHierarchy > 0) { + if (groups.isEmpty() || goUpHierarchy > 0) { groups = lookupGroup(result, c, goUpHierarchy); } LOG.debug("doGetGroups({}) returned {}", user, groups); @@ -677,7 +680,29 @@ private DirContext getDirContext() throws NamingException { env.put("com.sun.jndi.ldap.read.timeout", conf.get(READ_TIMEOUT, String.valueOf(READ_TIMEOUT_DEFAULT))); - ctx = new InitialDirContext(env); + // See HADOOP-17675 for details TLDR: + // From a native thread the thread's context classloader is null. + // jndi internally in the InitialDirContext specifies the context + // classloader for Class.forName, and as it is null, jndi will use the + // bootstrap classloader in this case to laod the socket factory + // implementation. + // BUT + // Bootstrap classloader does not have it in its classpath, so throws a + // ClassNotFoundException. + // This affects Impala for example when it uses LdapGroupsMapping. + ClassLoader currentContextLoader = + Thread.currentThread().getContextClassLoader(); + if (currentContextLoader == null) { + try { + Thread.currentThread().setContextClassLoader( + this.getClass().getClassLoader()); + ctx = new InitialDirContext(env); + } finally { + Thread.currentThread().setContextClassLoader(null); + } + } else { + ctx = new InitialDirContext(env); + } } return ctx; } @@ -825,7 +850,7 @@ String getPasswordFromCredentialProviders( password = new String(passchars); } } catch (IOException ioe) { - LOG.warn("Exception while trying to get password for alias {}: {}", + LOG.warn("Exception while trying to get password for alias {}: ", alias, ioe); } return password; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ProviderUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ProviderUtils.java index d7a68210c7dc2..f9c292b4608cc 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ProviderUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ProviderUtils.java @@ -23,8 +23,9 @@ import java.net.URI; import java.net.URISyntaxException; import java.net.URL; +import java.nio.charset.StandardCharsets; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -135,6 +136,7 @@ public static URI nestURIForLocalJavaKeyStoreProvider(final URI localFile) * @param config the existing configuration with provider path * @param fileSystemClass the class which providers must be compatible * @return Configuration clone with new provider path + * @throws IOException raised on errors performing I/O. */ public static Configuration excludeIncompatibleCredentialProviders( Configuration config, Class fileSystemClass) @@ -224,7 +226,7 @@ public static char[] locatePassword(String envWithPass, String fileWithPass) throw new IOException("Password file does not exist"); } try (InputStream is = pwdFile.openStream()) { - pass = IOUtils.toString(is).trim().toCharArray(); + pass = IOUtils.toString(is, StandardCharsets.UTF_8).trim().toCharArray(); } } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/RefreshUserMappingsProtocol.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/RefreshUserMappingsProtocol.java index 005b2948ea2a6..c4f636e374519 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/RefreshUserMappingsProtocol.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/RefreshUserMappingsProtocol.java @@ -42,14 +42,14 @@ public interface RefreshUserMappingsProtocol { /** * Refresh user to group mappings. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Idempotent public void refreshUserToGroupsMappings() throws IOException; /** * Refresh superuser proxy group list - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Idempotent public void refreshSuperUserGroupsConfiguration() throws IOException; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/RuleBasedLdapGroupsMapping.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/RuleBasedLdapGroupsMapping.java index 6accf2fdced02..6af28f155c466 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/RuleBasedLdapGroupsMapping.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/RuleBasedLdapGroupsMapping.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.security; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslInputStream.java index a91a90ac7c901..2a8c3bf30c75f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslInputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslInputStream.java @@ -281,7 +281,7 @@ public int read(byte[] b, int off, int len) throws IOException { *

    * Fewer bytes than requested might be skipped. The actual number of bytes * skipped is equal to n or the result of a call to - * {@link #available() available}, whichever is smaller. If + * {@link #available()}, whichever is smaller. If * n is less than zero, no bytes are skipped. * *

    diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslPropertiesResolver.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslPropertiesResolver.java index dd6c42e1491a8..25cc4a8144f05 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslPropertiesResolver.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslPropertiesResolver.java @@ -46,7 +46,7 @@ public class SaslPropertiesResolver implements Configurable{ * Looks up the configuration to see if there is custom class specified. * Constructs the instance by passing the configuration directly to the * constructor to achieve thread safety using final fields. - * @param conf + * @param conf configuration. * @return SaslPropertiesResolver */ public static SaslPropertiesResolver getInstance(Configuration conf) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcClient.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcClient.java index 215f473b9fcc9..cb2870e5de9d1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcClient.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcClient.java @@ -71,7 +71,7 @@ import org.apache.hadoop.security.token.TokenSelector; import org.apache.hadoop.util.ProtoUtil; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.thirdparty.protobuf.ByteString; import com.google.re2j.Pattern; import org.slf4j.Logger; @@ -351,9 +351,9 @@ String getServerPrincipal(SaslAuth authType) throws IOException { /** * Do client side SASL authentication with server via the given IpcStreams. * - * @param ipcStreams + * @param ipcStreams ipcStreams. * @return AuthMethod used to negotiate the connection - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public AuthMethod saslConnect(IpcStreams ipcStreams) throws IOException { // redefined if/when a SASL negotiation starts, can be queried if the @@ -521,7 +521,7 @@ private boolean useWrap() { * * @param in - InputStream used to make the connection * @return InputStream that may be using SASL unwrap - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public InputStream getInputStream(InputStream in) throws IOException { if (useWrap()) { @@ -537,7 +537,7 @@ public InputStream getInputStream(InputStream in) throws IOException { * * @param out - OutputStream used to make the connection * @return OutputStream that may be using wrapping - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public OutputStream getOutputStream(OutputStream out) throws IOException { if (useWrap()) { @@ -638,7 +638,11 @@ public void write(byte[] buf, int off, int len) throws IOException { } } - /** Release resources used by wrapped saslClient */ + /** + * Release resources used by wrapped saslClient. + * @throws SaslException if authentication or generating response fails, + * or SASL protocol mixup + */ public void dispose() throws SaslException { if (saslClient != null) { saslClient.dispose(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcServer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcServer.java index 7c3f14da21cf5..b61b6cc18414d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcServer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcServer.java @@ -208,7 +208,11 @@ static char[] encodePassword(byte[] password) { StandardCharsets.UTF_8).toCharArray(); } - /** Splitting fully qualified Kerberos name into parts */ + /** + * Splitting fully qualified Kerberos name into parts. + * @param fullName fullName. + * @return splitKerberosName. + */ public static String[] splitKerberosName(String fullName) { return fullName.split("[/@]"); } @@ -240,17 +244,30 @@ private static AuthMethod valueOf(byte code) { return i < 0 || i >= values().length ? null : values()[i]; } - /** Return the SASL mechanism name */ + /** + * Return the SASL mechanism name. + * @return mechanismName. + */ public String getMechanismName() { return mechanismName; } - /** Read from in */ + /** + * Read from in. + * + * @param in DataInput. + * @throws IOException raised on errors performing I/O. + * @return AuthMethod. + */ public static AuthMethod read(DataInput in) throws IOException { return valueOf(in.readByte()); } - /** Write to out */ + /** + * Write to out. + * @param out DataOutput. + * @throws IOException raised on errors performing I/O. + */ public void write(DataOutput out) throws IOException { out.write(code); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java index aa12b93be95b4..3b201384125a5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java @@ -44,6 +44,8 @@ import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.io.Text; import org.apache.hadoop.net.DNS; +import org.apache.hadoop.net.DomainNameResolver; +import org.apache.hadoop.net.DomainNameResolverFactory; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; import org.apache.hadoop.security.token.Token; @@ -57,8 +59,8 @@ import org.xbill.DNS.ResolverConfig; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.net.InetAddresses; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.net.InetAddresses; /** * Security Utils. @@ -81,6 +83,8 @@ private SecurityUtil() { @VisibleForTesting static HostResolver hostResolver; + private static DomainNameResolver domainNameResolver; + private static boolean logSlowLookups; private static int slowLookupThresholdMs; @@ -112,10 +116,15 @@ private static void setConfigurationInternal(Configuration conf) { .HADOOP_SECURITY_DNS_LOG_SLOW_LOOKUPS_THRESHOLD_MS_KEY, CommonConfigurationKeys .HADOOP_SECURITY_DNS_LOG_SLOW_LOOKUPS_THRESHOLD_MS_DEFAULT); + + domainNameResolver = DomainNameResolverFactory.newInstance(conf, + CommonConfigurationKeys.HADOOP_SECURITY_RESOLVER_IMPL); } /** - * For use only by tests and initialization + * For use only by tests and initialization. + * + * @param flag flag. */ @InterfaceAudience.Private @VisibleForTesting @@ -210,7 +219,7 @@ public static String getServerPrincipal(String principalConfig, throw new IOException("Can't replace " + HOSTNAME_PATTERN + " pattern since client address is null"); } - return replacePattern(components, addr.getCanonicalHostName()); + return replacePattern(components, domainNameResolver.getHostnameByIP(addr)); } } @@ -380,7 +389,25 @@ public static void setSecurityInfoProviders(SecurityInfo... providers) { } return null; } - + + /** + * Look up the client principal for a given protocol. It searches all known + * SecurityInfo providers. + * @param protocol the protocol class to get the information for + * @param conf configuration object + * @return client principal or null if it has no client principal defined. + */ + public static String getClientPrincipal(Class protocol, + Configuration conf) { + String user = null; + KerberosInfo krbInfo = SecurityUtil.getKerberosInfo(protocol, conf); + if (krbInfo != null) { + String key = krbInfo.clientPrincipal(); + user = (key != null && !key.isEmpty()) ? conf.get(key) : null; + } + return user; + } + /** * Look up the TokenInfo for a given protocol. It searches all known * SecurityInfo providers. @@ -469,6 +496,10 @@ public static Text buildTokenService(URI uri) { * Perform the given action as the daemon's login user. If the login * user cannot be determined, this will log a FATAL error and exit * the whole JVM. + * + * @param action action. + * @param generic type T. + * @return generic type T. */ public static T doAsLoginUserOrFatal(PrivilegedAction action) { if (UserGroupInformation.isSecurityEnabled()) { @@ -491,6 +522,7 @@ public static T doAsLoginUserOrFatal(PrivilegedAction action) { * InterruptedException is thrown, it is converted to an IOException. * * @param action the action to perform + * @param Generics Type T. * @return the result of the action * @throws IOException in the event of error */ @@ -504,6 +536,7 @@ public static T doAsLoginUser(PrivilegedExceptionAction action) * InterruptedException is thrown, it is converted to an IOException. * * @param action the action to perform + * @param generic type T. * @return the result of the action * @throws IOException in the event of error */ @@ -730,9 +763,13 @@ public static boolean isPrivilegedPort(final int port) { /** * Utility method to fetch ZK auth info from the configuration. + * + * @param conf configuration. + * @param configKey config key. * @throws java.io.IOException if the Zookeeper ACLs configuration file * cannot be read * @throws ZKUtil.BadAuthFormatException if the auth format is invalid + * @return ZKAuthInfo List. */ public static List getZKAuthInfos(Configuration conf, String configKey) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedIdMapping.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedIdMapping.java index 92ea83d8f1da5..d280f75f6ac30 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedIdMapping.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedIdMapping.java @@ -21,7 +21,6 @@ import java.io.File; import java.io.IOException; import java.io.InputStreamReader; -import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.util.HashMap; @@ -32,12 +31,14 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.Time; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.BiMap; -import com.google.common.collect.HashBiMap; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.collect.BiMap; +import org.apache.hadoop.thirdparty.com.google.common.collect.HashBiMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.util.Shell.bashQuote; + /** * A simple shell-based implementation of {@link IdMappingServiceProvider} * Map id to user name or group name. It does update every 15 minutes. Only a @@ -210,7 +211,14 @@ private static Integer parseId(final String idStr) { /** * Get the list of users or groups returned by the specified command, * and save them in the corresponding map. - * @throws IOException + * + * @param map map. + * @param mapName mapName. + * @param command command. + * @param staticMapping staticMapping. + * @param regex regex. + * @throws IOException raised on errors performing I/O. + * @return updateMapInternal. */ @VisibleForTesting public static boolean updateMapInternal(BiMap map, @@ -222,8 +230,7 @@ public static boolean updateMapInternal(BiMap map, Process process = Runtime.getRuntime().exec( new String[] { "bash", "-c", command }); br = new BufferedReader( - new InputStreamReader(process.getInputStream(), - Charset.defaultCharset())); + new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8)); String line = null; while ((line = br.readLine()) != null) { String[] nameId = line.split(regex); @@ -466,26 +473,27 @@ synchronized private void updateMapIncr(final String name, boolean updated = false; updateStaticMapping(); + String name2 = bashQuote(name); if (OS.startsWith("Linux") || OS.equals("SunOS") || OS.contains("BSD")) { if (isGrp) { updated = updateMapInternal(gidNameMap, "group", - getName2IdCmdNIX(name, true), ":", + getName2IdCmdNIX(name2, true), ":", staticMapping.gidMapping); } else { updated = updateMapInternal(uidNameMap, "user", - getName2IdCmdNIX(name, false), ":", + getName2IdCmdNIX(name2, false), ":", staticMapping.uidMapping); } } else { // Mac if (isGrp) { updated = updateMapInternal(gidNameMap, "group", - getName2IdCmdMac(name, true), "\\s+", + getName2IdCmdMac(name2, true), "\\s+", staticMapping.gidMapping); } else { updated = updateMapInternal(uidNameMap, "user", - getName2IdCmdMac(name, false), "\\s+", + getName2IdCmdMac(name2, false), "\\s+", staticMapping.uidMapping); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedUnixGroupsMapping.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedUnixGroupsMapping.java index 31f43980552f2..96e4402e5b9c0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedUnixGroupsMapping.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedUnixGroupsMapping.java @@ -23,8 +23,8 @@ import java.util.StringTokenizer; import java.util.concurrent.TimeUnit; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedUnixGroupsNetgroupMapping.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedUnixGroupsNetgroupMapping.java index eff6985471b4c..01d6f299d17d5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedUnixGroupsNetgroupMapping.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedUnixGroupsNetgroupMapping.java @@ -92,6 +92,7 @@ public void cacheGroupsAdd(List groups) throws IOException { * * @param netgroup return users for this netgroup * @return list of users for a given netgroup + * @throws IOException raised on errors performing I/O. */ protected List getUsersForNetgroup(String netgroup) throws IOException { @@ -128,6 +129,7 @@ protected List getUsersForNetgroup(String netgroup) * * @param netgroup get users for this netgroup * @return string of users for a given netgroup in getent netgroups format + * @throws IOException raised on errors performing I/O. */ protected String execShellGetUserForNetgroup(final String netgroup) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java index 8c84a8d31a063..98d8361c9c70a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java @@ -28,7 +28,7 @@ import static org.apache.hadoop.util.PlatformName.IBM_JAVA; import static org.apache.hadoop.util.StringUtils.getTrimmedStringCollection; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import java.io.File; import java.io.IOException; @@ -530,6 +530,14 @@ private void setLogin(LoginContext login) { user.setLogin(login); } + /** + * Set the last login time for logged in user + * @param loginTime the number of milliseconds since the beginning of time + */ + private void setLastLogin(long loginTime) { + user.setLastLogin(loginTime); + } + /** * Create a UserGroupInformation for the given subject. * This does not change the subject or acquire new credentials. @@ -582,6 +590,7 @@ public static UserGroupInformation getCurrentUser() throws IOException { * @param user The user name, or NULL if none is specified. * * @return The most appropriate UserGroupInformation + * @throws IOException raised on errors performing I/O. */ public static UserGroupInformation getBestUGI( String ticketCachePath, String user) throws IOException { @@ -602,6 +611,7 @@ public static UserGroupInformation getBestUGI( * @param ticketCache the path to the ticket cache file * * @throws IOException if the kerberos login fails + * @return UserGroupInformation. */ @InterfaceAudience.Public @InterfaceStability.Evolving @@ -623,8 +633,9 @@ public static UserGroupInformation getUGIFromTicketCache( * The creator of subject is responsible for * renewing credentials. * - * @throws IOException + * @throws IOException raised on errors performing I/O. * @throws KerberosAuthException if the kerberos login fails + * @return UserGroupInformation. */ public static UserGroupInformation getUGIFromSubject(Subject subject) throws IOException { @@ -679,7 +690,7 @@ public static UserGroupInformation getLoginUser() throws IOException { * remove the login method that is followed by a space from the username * e.g. "jack (auth:SIMPLE)" {@literal ->} "jack" * - * @param userName + * @param userName input userName. * @return userName without login method */ public static String trimLoginMethod(String userName) { @@ -1099,7 +1110,7 @@ static long getNextTgtRenewalTime(final long tgtEndTime, final long now, * file and logs them in. They become the currently logged-in user. * @param user the principal name to load from the keytab * @param path the path to the keytab file - * @throws IOException + * @throws IOException raised on errors performing I/O. * @throws KerberosAuthException if it's a kerberos login exception. */ @InterfaceAudience.Public @@ -1118,9 +1129,10 @@ static void loginUserFromKeytab(String user, setLoginUser(u); - LOG.info("Login successful for user {} using keytab file {}. Keytab auto" + - " renewal enabled : {}", - user, path, isKerberosKeyTabLoginRenewalEnabled()); + LOG.info( + "Login successful for user {} using keytab file {}. Keytab auto" + + " renewal enabled : {}", + user, new File(path).getName(), isKerberosKeyTabLoginRenewalEnabled()); } /** @@ -1128,7 +1140,7 @@ static void loginUserFromKeytab(String user, * This method assumes that the user logged in by calling * {@link #loginUserFromKeytab(String, String)}. * - * @throws IOException + * @throws IOException raised on errors performing I/O. * @throws KerberosAuthException if a failure occurred in logout, * or if the user did not log in by invoking loginUserFromKeyTab() before. */ @@ -1168,7 +1180,7 @@ public void logoutUserFromKeytab() throws IOException { /** * Re-login a user from keytab if TGT is expired or is close to expiry. * - * @throws IOException + * @throws IOException raised on errors performing I/O. * @throws KerberosAuthException if it's a kerberos login exception. */ public void checkTGTAndReloginFromKeytab() throws IOException { @@ -1216,7 +1228,7 @@ void fixKerberosTicketOrder() { * happened already. * The Subject field of this UserGroupInformation object is updated to have * the new credentials. - * @throws IOException + * @throws IOException raised on errors performing I/O. * @throws KerberosAuthException on a failure */ @InterfaceAudience.Public @@ -1225,7 +1237,29 @@ public void reloginFromKeytab() throws IOException { reloginFromKeytab(false); } + /** + * Force re-Login a user in from a keytab file irrespective of the last login + * time. Loads a user identity from a keytab file and logs them in. They + * become the currently logged-in user. This method assumes that + * {@link #loginUserFromKeytab(String, String)} had happened already. The + * Subject field of this UserGroupInformation object is updated to have the + * new credentials. + * + * @throws IOException raised on errors performing I/O. + * @throws KerberosAuthException on a failure + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public void forceReloginFromKeytab() throws IOException { + reloginFromKeytab(false, true); + } + private void reloginFromKeytab(boolean checkTGT) throws IOException { + reloginFromKeytab(checkTGT, false); + } + + private void reloginFromKeytab(boolean checkTGT, boolean ignoreLastLoginTime) + throws IOException { if (!shouldRelogin() || !isFromKeytab()) { return; } @@ -1240,7 +1274,7 @@ private void reloginFromKeytab(boolean checkTGT) throws IOException { return; } } - relogin(login); + relogin(login, ignoreLastLoginTime); } /** @@ -1248,7 +1282,7 @@ private void reloginFromKeytab(boolean checkTGT) throws IOException { * method assumes that login had happened already. * The Subject field of this UserGroupInformation object is updated to have * the new credentials. - * @throws IOException + * @throws IOException raised on errors performing I/O. * @throws KerberosAuthException on a failure */ @InterfaceAudience.Public @@ -1261,25 +1295,27 @@ public void reloginFromTicketCache() throws IOException { if (login == null) { throw new KerberosAuthException(MUST_FIRST_LOGIN); } - relogin(login); + relogin(login, false); } - private void relogin(HadoopLoginContext login) throws IOException { + private void relogin(HadoopLoginContext login, boolean ignoreLastLoginTime) + throws IOException { // ensure the relogin is atomic to avoid leaving credentials in an // inconsistent state. prevents other ugi instances, SASL, and SPNEGO // from accessing or altering credentials during the relogin. synchronized(login.getSubjectLock()) { // another racing thread may have beat us to the relogin. if (login == getLogin()) { - unprotectedRelogin(login); + unprotectedRelogin(login, ignoreLastLoginTime); } } } - private void unprotectedRelogin(HadoopLoginContext login) throws IOException { + private void unprotectedRelogin(HadoopLoginContext login, + boolean ignoreLastLoginTime) throws IOException { assert Thread.holdsLock(login.getSubjectLock()); long now = Time.now(); - if (!hasSufficientTimeElapsed(now)) { + if (!hasSufficientTimeElapsed(now) && !ignoreLastLoginTime) { return; } // register most recent relogin attempt @@ -1314,6 +1350,7 @@ private void unprotectedRelogin(HadoopLoginContext login) throws IOException { * @param user the principal name to load from the keytab * @param path the path to the keytab file * @throws IOException if the keytab file can't be read + * @return UserGroupInformation. */ public static UserGroupInformation loginUserFromKeytabAndReturnUGI(String user, @@ -1340,8 +1377,9 @@ private boolean hasSufficientTimeElapsed(long now) { } /** - * Did the login happen via keytab + * Did the login happen via keytab. * @return true or false + * @throws IOException raised on errors performing I/O. */ @InterfaceAudience.Public @InterfaceStability.Evolving @@ -1350,8 +1388,9 @@ public static boolean isLoginKeytabBased() throws IOException { } /** - * Did the login happen via ticket cache + * Did the login happen via ticket cache. * @return true or false + * @throws IOException raised on errors performing I/O. */ public static boolean isLoginTicketBased() throws IOException { return getLoginUser().isFromTicket(); @@ -1372,7 +1411,9 @@ public static UserGroupInformation createRemoteUser(String user) { /** * Create a user from a login name. It is intended to be used for remote * users in RPC, since it won't have any credentials. + * * @param user the full user principal name, must not be empty or null + * @param authMethod input authMethod. * @return the UserGroupInformation for the remote user. */ @InterfaceAudience.Public @@ -1442,8 +1483,9 @@ public static AuthenticationMethod valueOf(AuthMethod authMethod) { /** * Create a proxy user using username of the effective user and the ugi of the * real user. - * @param user - * @param realUser + * + * @param user input user. + * @param realUser input realUser. * @return proxyUser ugi */ @InterfaceAudience.Public @@ -1476,7 +1518,19 @@ public UserGroupInformation getRealUser() { return null; } - + /** + * If this is a proxy user, get the real user. Otherwise, return + * this user. + * @param user the user to check + * @return the real user or self + */ + public static UserGroupInformation getRealUserOrSelf(UserGroupInformation user) { + if (user == null) { + return null; + } + UserGroupInformation real = user.getRealUser(); + return real != null ? real : user; + } /** * This class is used for storing the groups for testing. It stores a local @@ -1719,9 +1773,9 @@ public String toString() { } /** - * Sets the authentication method in the subject + * Sets the authentication method in the subject. * - * @param authMethod + * @param authMethod input authMethod. */ public synchronized void setAuthenticationMethod(AuthenticationMethod authMethod) { @@ -1729,9 +1783,9 @@ void setAuthenticationMethod(AuthenticationMethod authMethod) { } /** - * Sets the authentication method in the subject + * Sets the authentication method in the subject. * - * @param authMethod + * @param authMethod input authMethod. */ public void setAuthenticationMethod(AuthMethod authMethod) { user.setAuthenticationMethod(AuthenticationMethod.valueOf(authMethod)); @@ -1764,7 +1818,7 @@ public synchronized AuthenticationMethod getRealAuthenticationMethod() { * Returns the authentication method of a ugi. If the authentication method is * PROXY, returns the authentication method of the real user. * - * @param ugi + * @param ugi input ugi. * @return AuthenticationMethod */ public static AuthenticationMethod getRealAuthenticationMethod( @@ -1865,8 +1919,11 @@ public T doAs(PrivilegedExceptionAction action /** * Log current UGI and token information into specified log. - * @param ugi - UGI - * @throws IOException + * + * @param log input log. + * @param caption input caption. + * @param ugi - UGI. + * @throws IOException raised on errors performing I/O. */ @InterfaceAudience.LimitedPrivate({"HDFS", "KMS"}) @InterfaceStability.Unstable @@ -1882,8 +1939,10 @@ public static void logUserInfo(Logger log, String caption, /** * Log all (current, real, login) UGI and token info into specified log. + * + * @param log input log. * @param ugi - UGI - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @InterfaceAudience.LimitedPrivate({"HDFS", "KMS"}) @InterfaceStability.Unstable @@ -1901,7 +1960,7 @@ public static void logAllUserInfo(Logger log, UserGroupInformation ugi) throws /** * Log all (current, real, login) UGI and token info into UGI debug log. * @param ugi - UGI - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static void logAllUserInfo(UserGroupInformation ugi) throws IOException { @@ -1946,6 +2005,7 @@ private static UserGroupInformation doSubjectLogin( if (subject == null) { params.put(LoginParam.PRINCIPAL, ugi.getUserName()); ugi.setLogin(login); + ugi.setLastLogin(Time.now()); } return ugi; } catch (LoginException le) { @@ -2178,7 +2238,7 @@ private static String prependFileAuthority(String keytabPath) { * A test method to print out the current user's UGI. * @param args if there are two arguments, read the user from the keytab * and print it out. - * @throws Exception + * @throws Exception if any error occurs. */ public static void main(String [] args) throws Exception { System.out.println("Getting UGI for current user"); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/AbstractJavaKeyStoreProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/AbstractJavaKeyStoreProvider.java index df783f16edb90..260f1d22496f0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/AbstractJavaKeyStoreProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/AbstractJavaKeyStoreProvider.java @@ -24,7 +24,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.security.ProviderUtils; -import com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -147,6 +147,10 @@ protected final String getPathAsString() { protected abstract String getSchemeName(); + protected abstract String getKeyStoreType(); + + protected abstract String getAlgorithm(); + protected abstract OutputStream getOutputStreamForKeystore() throws IOException; @@ -264,8 +268,8 @@ CredentialEntry innerSetCredential(String alias, char[] material) writeLock.lock(); try { keyStore.setKeyEntry(alias, - new SecretKeySpec(new String(material).getBytes("UTF-8"), "AES"), - password, null); + new SecretKeySpec(new String(material).getBytes("UTF-8"), + getAlgorithm()), password, null); } catch (KeyStoreException e) { throw new IOException("Can't store credential " + alias + " in " + this, e); @@ -315,7 +319,7 @@ private void locateKeystore() throws IOException { password = CREDENTIAL_PASSWORD_DEFAULT.toCharArray(); } KeyStore ks; - ks = KeyStore.getInstance("jceks"); + ks = KeyStore.getInstance(getKeyStoreType()); if (keystoreExists()) { stashOriginalFilePermissions(); try (InputStream in = getInputStreamForFile()) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/BouncyCastleFipsKeyStoreProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/BouncyCastleFipsKeyStoreProvider.java new file mode 100644 index 0000000000000..7c7c2c6cee561 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/BouncyCastleFipsKeyStoreProvider.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.security.alias; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; + +import java.io.IOException; +import java.net.URI; + +/** + * CredentialProvider based on Bouncy Castle FIPS KeyStore file format. + * The file may be stored in any Hadoop FileSystem using the following + * name mangling: + * bcfks://hdfs@nn1.example.com/my/creds.bcfks {@literal ->} + * hdfs://nn1.example.com/my/creds.bcfks bcfks://file/home/larry/creds.bcfks + * {@literal ->} file:///home/user1/creds.bcfks + */ +@InterfaceAudience.Private +public final class BouncyCastleFipsKeyStoreProvider extends KeyStoreProvider { + public static final String SCHEME_NAME = "bcfks"; + public static final String KEYSTORE_TYPE = "bcfks"; + public static final String ALGORITHM = "HMACSHA512"; + + private BouncyCastleFipsKeyStoreProvider(URI uri, Configuration conf) + throws IOException { + super(uri, conf); + } + + @Override + protected String getSchemeName() { + return SCHEME_NAME; + } + + @Override + protected String getKeyStoreType() { + return KEYSTORE_TYPE; + } + + @Override + protected String getAlgorithm() { + return ALGORITHM; + } + + /** + * The factory to create JksProviders, which is used by the ServiceLoader. + */ + public static class Factory extends CredentialProviderFactory { + @Override + public CredentialProvider createProvider(URI providerName, + Configuration conf) throws IOException { + if (SCHEME_NAME.equals(providerName.getScheme())) { + return new BouncyCastleFipsKeyStoreProvider(providerName, conf); + } + return null; + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/CredentialProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/CredentialProvider.java index 113dcaeb5e644..2779194d85e00 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/CredentialProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/CredentialProvider.java @@ -91,7 +91,7 @@ public boolean isTransient() { /** * Ensures that any changes to the credentials are written to persistent * store. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public abstract void flush() throws IOException; @@ -99,7 +99,7 @@ public boolean isTransient() { * Get the credential entry for a specific alias. * @param alias the name of a specific credential * @return the credentialEntry - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public abstract CredentialEntry getCredentialEntry(String alias) throws IOException; @@ -107,7 +107,7 @@ public abstract CredentialEntry getCredentialEntry(String alias) /** * Get the aliases for all credentials. * @return the list of alias names - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public abstract List getAliases() throws IOException; @@ -115,7 +115,8 @@ public abstract CredentialEntry getCredentialEntry(String alias) * Create a new credential. The given alias must not already exist. * @param name the alias of the credential * @param credential the credential value for the alias. - * @throws IOException + * @throws IOException raised on errors performing I/O. + * @return CredentialEntry. */ public abstract CredentialEntry createCredentialEntry(String name, char[] credential) throws IOException; @@ -123,7 +124,7 @@ public abstract CredentialEntry createCredentialEntry(String name, /** * Delete the given credential. * @param name the alias of the credential to delete - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public abstract void deleteCredentialEntry(String name) throws IOException; @@ -133,7 +134,7 @@ public abstract CredentialEntry createCredentialEntry(String name, * means. If true, the password should be provided by the caller using * setPassword(). * @return Whether or not the provider requires a password - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public boolean needsPassword() throws IOException { return false; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/CredentialProviderFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/CredentialProviderFactory.java index 1b2ac41fa8463..8b39337ed18ac 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/CredentialProviderFactory.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/CredentialProviderFactory.java @@ -25,11 +25,13 @@ import java.util.Iterator; import java.util.List; import java.util.ServiceLoader; +import java.util.concurrent.atomic.AtomicBoolean; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.PathIOException; /** * A factory to create a list of CredentialProvider based on the path given in a @@ -59,9 +61,18 @@ public abstract CredentialProvider createProvider(URI providerName, } } + /** + * Fail fast on any recursive load of credential providers, which can + * happen if the FS itself triggers the load. + * A simple boolean could be used here, as the synchronized block ensures + * that only one thread can be active at a time. An atomic is used + * for rigorousness. + */ + private static final AtomicBoolean SERVICE_LOADER_LOCKED = new AtomicBoolean(false); + public static List getProviders(Configuration conf ) throws IOException { - List result = new ArrayList(); + List result = new ArrayList<>(); for(String path: conf.getStringCollection(CREDENTIAL_PROVIDER_PATH)) { try { URI uri = new URI(path); @@ -69,13 +80,23 @@ public static List getProviders(Configuration conf // Iterate serviceLoader in a synchronized block since // serviceLoader iterator is not thread-safe. synchronized (serviceLoader) { - for (CredentialProviderFactory factory : serviceLoader) { - CredentialProvider kp = factory.createProvider(uri, conf); - if (kp != null) { - result.add(kp); - found = true; - break; + try { + if (SERVICE_LOADER_LOCKED.getAndSet(true)) { + throw new PathIOException(path, + "Recursive load of credential provider; " + + "if loading a JCEKS file, this means that the filesystem connector is " + + "trying to load the same file"); + } + for (CredentialProviderFactory factory : serviceLoader) { + CredentialProvider kp = factory.createProvider(uri, conf); + if (kp != null) { + result.add(kp); + found = true; + break; + } } + } finally { + SERVICE_LOADER_LOCKED.set(false); } } if (!found) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/CredentialShell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/CredentialShell.java index 603772444bcef..fcd471eb99cce 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/CredentialShell.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/CredentialShell.java @@ -25,7 +25,7 @@ import java.util.Arrays; import java.util.List; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.commons.lang3.StringUtils; @@ -70,9 +70,9 @@ public class CredentialShell extends CommandShell { * % hadoop credential check alias [-provider providerPath] * % hadoop credential delete alias [-provider providerPath] [-f] *

    - * @param args + * @param args args. * @return 0 if the argument(s) were recognized, 1 otherwise - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override protected int init(String[] args) throws IOException { @@ -523,7 +523,7 @@ public void format(String message) { * * @param args * Command line arguments - * @throws Exception + * @throws Exception exception. */ public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new CredentialShell(), args); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/JavaKeyStoreProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/JavaKeyStoreProvider.java index 5028482dfc4aa..f3b721f50b5a7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/JavaKeyStoreProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/JavaKeyStoreProvider.java @@ -20,14 +20,8 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.permission.FsPermission; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; import java.net.URI; /** @@ -38,11 +32,10 @@ * {@literal ->} file:///home/larry/creds.jceks */ @InterfaceAudience.Private -public class JavaKeyStoreProvider extends AbstractJavaKeyStoreProvider { +public final class JavaKeyStoreProvider extends KeyStoreProvider { public static final String SCHEME_NAME = "jceks"; - - private FileSystem fs; - private FsPermission permissions; + public static final String KEYSTORE_TYPE = "jceks"; + public static final String ALGORITHM = "AES"; private JavaKeyStoreProvider(URI uri, Configuration conf) throws IOException { @@ -55,38 +48,13 @@ protected String getSchemeName() { } @Override - protected OutputStream getOutputStreamForKeystore() throws IOException { - FSDataOutputStream out = FileSystem.create(fs, getPath(), permissions); - return out; - } - - @Override - protected boolean keystoreExists() throws IOException { - return fs.exists(getPath()); - } - - @Override - protected InputStream getInputStreamForFile() throws IOException { - return fs.open(getPath()); + protected String getKeyStoreType() { + return KEYSTORE_TYPE; } @Override - protected void createPermissions(String perms) { - permissions = new FsPermission(perms); - } - - @Override - protected void stashOriginalFilePermissions() throws IOException { - // save off permissions in case we need to - // rewrite the keystore in flush() - FileStatus s = fs.getFileStatus(getPath()); - permissions = s.getPermission(); - } - - protected void initFileSystem(URI uri) - throws IOException { - super.initFileSystem(uri); - fs = getPath().getFileSystem(getConf()); + protected String getAlgorithm() { + return ALGORITHM; } /** diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/KeyStoreProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/KeyStoreProvider.java new file mode 100644 index 0000000000000..6909b07161a07 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/KeyStoreProvider.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.security.alias; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.permission.FsPermission; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URI; + +/** + * CredentialProvider based on Java Key Store API. + * The file may be stored in any Hadoop FileSystem using the following + * name mangling: + * bcfks://hdfs@nn1.example.com/my/creds.bcfks {@literal ->} + * hdfs://nn1.example.com/my/creds.bcfks bcfks://file/home/larry/creds.bcfks + * {@literal ->} file:///home/user1/creds.bcfks + */ +@InterfaceAudience.Private +public abstract class KeyStoreProvider extends AbstractJavaKeyStoreProvider { + + private FileSystem fs; + private FsPermission permissions; + + protected KeyStoreProvider(URI uri, Configuration conf) + throws IOException { + super(uri, conf); + } + + @Override + protected OutputStream getOutputStreamForKeystore() throws IOException { + FSDataOutputStream out = FileSystem.create(fs, getPath(), permissions); + return out; + } + + @Override + protected boolean keystoreExists() throws IOException { + return fs.exists(getPath()); + } + + @Override + protected InputStream getInputStreamForFile() throws IOException { + return fs.open(getPath()); + } + + @Override + protected void createPermissions(String perms) { + permissions = new FsPermission(perms); + } + + @Override + protected void stashOriginalFilePermissions() throws IOException { + // save off permissions in case we need to + // rewrite the keystore in flush() + FileStatus s = fs.getFileStatus(getPath()); + permissions = s.getPermission(); + } + + protected void initFileSystem(URI uri) + throws IOException { + super.initFileSystem(uri); + fs = getPath().getFileSystem(getConf()); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/LocalBouncyCastleFipsKeyStoreProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/LocalBouncyCastleFipsKeyStoreProvider.java new file mode 100644 index 0000000000000..1aef63a90db8e --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/LocalBouncyCastleFipsKeyStoreProvider.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.security.alias; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; + +import java.io.IOException; +import java.net.URI; + +/** + * CredentialProvider based on bouncy castle FIPS KeyStore file format. + * The file may be stored only on the local filesystem using the + * following name mangling: + * localbcfks://file/home/larry/creds.bcfks {@literal ->} + * file:///home/larry/creds.bcfks + */ +@InterfaceAudience.Private +public final class LocalBouncyCastleFipsKeyStoreProvider extends + LocalKeyStoreProvider { + public static final String SCHEME_NAME = "localbcfks"; + public static final String KEYSTORE_TYPE = "bcfks"; + public static final String ALGORITHM = "HMACSHA512"; + + private LocalBouncyCastleFipsKeyStoreProvider(URI uri, Configuration conf) + throws IOException { + super(uri, conf); + } + + @Override + protected String getSchemeName() { + return SCHEME_NAME; + } + + @Override + protected String getKeyStoreType() { + return KEYSTORE_TYPE; + } + + @Override + protected String getAlgorithm() { + return ALGORITHM; + } + + /** + * The factory to create KeyStore Providers, which is used by the + * ServiceLoader. + */ + public static class Factory extends CredentialProviderFactory { + @Override + public CredentialProvider createProvider(URI providerName, + Configuration conf) throws IOException { + if (SCHEME_NAME.equals(providerName.getScheme())) { + return new LocalBouncyCastleFipsKeyStoreProvider(providerName, conf); + } + return null; + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/LocalJavaKeyStoreProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/LocalJavaKeyStoreProvider.java index c44e246b9d0cd..dd922412892df 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/LocalJavaKeyStoreProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/LocalJavaKeyStoreProvider.java @@ -20,24 +20,9 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.util.Shell; -import java.io.File; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; import java.net.URI; -import java.net.URISyntaxException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.nio.file.attribute.PosixFilePermission; -import java.nio.file.attribute.PosixFilePermissions; -import java.util.Set; -import java.util.StringTokenizer; -import java.util.EnumSet; /** * CredentialProvider based on Java's KeyStore file format. The file may be @@ -47,10 +32,10 @@ */ @InterfaceAudience.Private public final class LocalJavaKeyStoreProvider extends - AbstractJavaKeyStoreProvider { + LocalKeyStoreProvider { public static final String SCHEME_NAME = "localjceks"; - private File file; - private Set permissions; + public static final String KEYSTORE_TYPE = "jceks"; + public static final String ALGORITHM = "AES"; private LocalJavaKeyStoreProvider(URI uri, Configuration conf) throws IOException { @@ -63,106 +48,13 @@ protected String getSchemeName() { } @Override - protected OutputStream getOutputStreamForKeystore() throws IOException { - if (LOG.isDebugEnabled()) { - LOG.debug("using '" + file + "' for output stream."); - } - OutputStream out = Files.newOutputStream(file.toPath()); - return out; - } - - @Override - protected boolean keystoreExists() throws IOException { - /* The keystore loader doesn't handle zero length files. */ - return file.exists() && (file.length() > 0); - } - - @Override - protected InputStream getInputStreamForFile() throws IOException { - InputStream is = Files.newInputStream(file.toPath()); - return is; - } - - @Override - protected void createPermissions(String perms) throws IOException { - int mode = 700; - try { - mode = Integer.parseInt(perms, 8); - } catch (NumberFormatException nfe) { - throw new IOException("Invalid permissions mode provided while " - + "trying to createPermissions", nfe); - } - permissions = modeToPosixFilePermission(mode); - } - - @Override - protected void stashOriginalFilePermissions() throws IOException { - // save off permissions in case we need to - // rewrite the keystore in flush() - if (!Shell.WINDOWS) { - Path path = Paths.get(file.getCanonicalPath()); - permissions = Files.getPosixFilePermissions(path); - } else { - // On Windows, the JDK does not support the POSIX file permission APIs. - // Instead, we can do a winutils call and translate. - String[] cmd = Shell.getGetPermissionCommand(); - String[] args = new String[cmd.length + 1]; - System.arraycopy(cmd, 0, args, 0, cmd.length); - args[cmd.length] = file.getCanonicalPath(); - String out = Shell.execCommand(args); - StringTokenizer t = new StringTokenizer(out, Shell.TOKEN_SEPARATOR_REGEX); - // The winutils output consists of 10 characters because of the leading - // directory indicator, i.e. "drwx------". The JDK parsing method expects - // a 9-character string, so remove the leading character. - String permString = t.nextToken().substring(1); - permissions = PosixFilePermissions.fromString(permString); - } - } - - @Override - protected void initFileSystem(URI uri) - throws IOException { - super.initFileSystem(uri); - try { - file = new File(new URI(getPath().toString())); - if (LOG.isDebugEnabled()) { - LOG.debug("initialized local file as '" + file + "'."); - if (file.exists()) { - LOG.debug("the local file exists and is size " + file.length()); - if (LOG.isTraceEnabled()) { - if (file.canRead()) { - LOG.trace("we can read the local file."); - } - if (file.canWrite()) { - LOG.trace("we can write the local file."); - } - } - } else { - LOG.debug("the local file does not exist."); - } - } - } catch (URISyntaxException e) { - throw new IOException(e); - } + protected String getKeyStoreType() { + return KEYSTORE_TYPE; } @Override - public void flush() throws IOException { - super.flush(); - if (LOG.isDebugEnabled()) { - LOG.debug("Resetting permissions to '" + permissions + "'"); - } - if (!Shell.WINDOWS) { - Files.setPosixFilePermissions(Paths.get(file.getCanonicalPath()), - permissions); - } else { - // FsPermission expects a 10-character string because of the leading - // directory indicator, i.e. "drwx------". The JDK toString method returns - // a 9-character string, so prepend a leading character. - FsPermission fsPermission = FsPermission.valueOf( - "-" + PosixFilePermissions.toString(permissions)); - FileUtil.setPermission(file, fsPermission); - } + protected String getAlgorithm() { + return ALGORITHM; } /** @@ -178,37 +70,4 @@ public CredentialProvider createProvider(URI providerName, return null; } } - - private static Set modeToPosixFilePermission( - int mode) { - Set perms = EnumSet.noneOf(PosixFilePermission.class); - if ((mode & 0001) != 0) { - perms.add(PosixFilePermission.OTHERS_EXECUTE); - } - if ((mode & 0002) != 0) { - perms.add(PosixFilePermission.OTHERS_WRITE); - } - if ((mode & 0004) != 0) { - perms.add(PosixFilePermission.OTHERS_READ); - } - if ((mode & 0010) != 0) { - perms.add(PosixFilePermission.GROUP_EXECUTE); - } - if ((mode & 0020) != 0) { - perms.add(PosixFilePermission.GROUP_WRITE); - } - if ((mode & 0040) != 0) { - perms.add(PosixFilePermission.GROUP_READ); - } - if ((mode & 0100) != 0) { - perms.add(PosixFilePermission.OWNER_EXECUTE); - } - if ((mode & 0200) != 0) { - perms.add(PosixFilePermission.OWNER_WRITE); - } - if ((mode & 0400) != 0) { - perms.add(PosixFilePermission.OWNER_READ); - } - return perms; - } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/LocalKeyStoreProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/LocalKeyStoreProvider.java new file mode 100644 index 0000000000000..b355bbc9cd62f --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/LocalKeyStoreProvider.java @@ -0,0 +1,194 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.security.alias; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.util.Shell; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.PosixFilePermission; +import java.nio.file.attribute.PosixFilePermissions; +import java.util.EnumSet; +import java.util.Set; +import java.util.StringTokenizer; + +/** + * CredentialProvider based on Java's KeyStore file format. The file may be + * stored only on the local filesystem using the following name mangling: + * localjceks://file/home/larry/creds.jceks {@literal ->} + * file:///home/larry/creds.jceks + */ +@InterfaceAudience.Private +public abstract class LocalKeyStoreProvider extends + AbstractJavaKeyStoreProvider { + private File file; + private Set permissions; + + protected LocalKeyStoreProvider(URI uri, Configuration conf) + throws IOException { + super(uri, conf); + } + + @Override + protected OutputStream getOutputStreamForKeystore() throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("using '" + file + "' for output stream."); + } + OutputStream out = Files.newOutputStream(file.toPath()); + return out; + } + + @Override + protected boolean keystoreExists() throws IOException { + /* The keystore loader doesn't handle zero length files. */ + return file.exists() && (file.length() > 0); + } + + @Override + protected InputStream getInputStreamForFile() throws IOException { + InputStream is = Files.newInputStream(file.toPath()); + return is; + } + + @Override + protected void createPermissions(String perms) throws IOException { + int mode = 700; + try { + mode = Integer.parseInt(perms, 8); + } catch (NumberFormatException nfe) { + throw new IOException("Invalid permissions mode provided while " + + "trying to createPermissions", nfe); + } + permissions = modeToPosixFilePermission(mode); + } + + @Override + protected void stashOriginalFilePermissions() throws IOException { + // save off permissions in case we need to + // rewrite the keystore in flush() + if (!Shell.WINDOWS) { + Path path = Paths.get(file.getCanonicalPath()); + permissions = Files.getPosixFilePermissions(path); + } else { + // On Windows, the JDK does not support the POSIX file permission APIs. + // Instead, we can do a winutils call and translate. + String[] cmd = Shell.getGetPermissionCommand(); + String[] args = new String[cmd.length + 1]; + System.arraycopy(cmd, 0, args, 0, cmd.length); + args[cmd.length] = file.getCanonicalPath(); + String out = Shell.execCommand(args); + StringTokenizer t = new StringTokenizer(out, Shell.TOKEN_SEPARATOR_REGEX); + // The winutils output consists of 10 characters because of the leading + // directory indicator, i.e. "drwx------". The JDK parsing method expects + // a 9-character string, so remove the leading character. + String permString = t.nextToken().substring(1); + permissions = PosixFilePermissions.fromString(permString); + } + } + + @Override + protected void initFileSystem(URI uri) + throws IOException { + super.initFileSystem(uri); + try { + file = new File(new URI(getPath().toString())); + if (LOG.isDebugEnabled()) { + LOG.debug("initialized local file as '" + file + "'."); + if (file.exists()) { + LOG.debug("the local file exists and is size " + file.length()); + if (LOG.isTraceEnabled()) { + if (file.canRead()) { + LOG.trace("we can read the local file."); + } + if (file.canWrite()) { + LOG.trace("we can write the local file."); + } + } + } else { + LOG.debug("the local file does not exist."); + } + } + } catch (URISyntaxException e) { + throw new IOException(e); + } + } + + @Override + public void flush() throws IOException { + super.flush(); + if (LOG.isDebugEnabled()) { + LOG.debug("Resetting permissions to '" + permissions + "'"); + } + if (!Shell.WINDOWS) { + Files.setPosixFilePermissions(Paths.get(file.getCanonicalPath()), + permissions); + } else { + // FsPermission expects a 10-character string because of the leading + // directory indicator, i.e. "drwx------". The JDK toString method returns + // a 9-character string, so prepend a leading character. + FsPermission fsPermission = FsPermission.valueOf( + "-" + PosixFilePermissions.toString(permissions)); + FileUtil.setPermission(file, fsPermission); + } + } + + private static Set modeToPosixFilePermission( + int mode) { + Set perms = EnumSet.noneOf(PosixFilePermission.class); + if ((mode & 0001) != 0) { + perms.add(PosixFilePermission.OTHERS_EXECUTE); + } + if ((mode & 0002) != 0) { + perms.add(PosixFilePermission.OTHERS_WRITE); + } + if ((mode & 0004) != 0) { + perms.add(PosixFilePermission.OTHERS_READ); + } + if ((mode & 0010) != 0) { + perms.add(PosixFilePermission.GROUP_EXECUTE); + } + if ((mode & 0020) != 0) { + perms.add(PosixFilePermission.GROUP_WRITE); + } + if ((mode & 0040) != 0) { + perms.add(PosixFilePermission.GROUP_READ); + } + if ((mode & 0100) != 0) { + perms.add(PosixFilePermission.OWNER_EXECUTE); + } + if ((mode & 0200) != 0) { + perms.add(PosixFilePermission.OWNER_WRITE); + } + if ((mode & 0400) != 0) { + perms.add(PosixFilePermission.OWNER_READ); + } + return perms; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/package-info.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/package-info.java new file mode 100644 index 0000000000000..d05e3cb9f20f3 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Provides the hadoop credential provider API. + */ +package org.apache.hadoop.security.alias; \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/AccessControlList.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/AccessControlList.java index 8af47d6e9d5e9..cc18c67c50ba4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/AccessControlList.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/AccessControlList.java @@ -55,6 +55,7 @@ public class AccessControlList implements Writable { // Indicates an ACL string that represents access to all users public static final String WILDCARD_ACL_VALUE = "*"; private static final int INITIAL_CAPACITY = 256; + public static final String USE_REAL_ACLS = "~"; // Set of users who are granted access. private Collection users; @@ -223,9 +224,12 @@ public Collection getGroups() { /** * Checks if a user represented by the provided {@link UserGroupInformation} - * is a member of the Access Control List + * is a member of the Access Control List. If user was proxied and + * USE_REAL_ACLS + the real user name is in the control list, then treat this + * case as if user were in the ACL list. * @param ugi UserGroupInformation to check if contained in the ACL - * @return true if ugi is member of the list + * @return true if ugi is member of the list or if USE_REAL_ACLS + real user + * is in the list */ public final boolean isUserInList(UserGroupInformation ugi) { if (allAllowed || users.contains(ugi.getShortUserName())) { @@ -237,7 +241,9 @@ public final boolean isUserInList(UserGroupInformation ugi) { } } } - return false; + UserGroupInformation realUgi = ugi.getRealUser(); + return realUgi != null && + users.contains(USE_REAL_ACLS + realUgi.getShortUserName()); } public boolean isUserAllowed(UserGroupInformation ugi) { @@ -288,6 +294,7 @@ else if (!users.isEmpty()) { /** * Returns the access control list as a String that can be used for building a * new instance by sending it to the constructor of {@link AccessControlList}. + * @return acl string. */ public String getAclString() { StringBuilder sb = new StringBuilder(INITIAL_CAPACITY); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/DefaultImpersonationProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/DefaultImpersonationProvider.java index b766d5c37fa2f..f2589308640c9 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/DefaultImpersonationProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/DefaultImpersonationProvider.java @@ -18,6 +18,7 @@ package org.apache.hadoop.security.authorize; +import java.net.InetAddress; import java.util.Collection; import java.util.HashMap; import java.util.Map; @@ -30,7 +31,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.MachineList; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; @InterfaceStability.Unstable @InterfaceAudience.Public @@ -105,8 +106,8 @@ public Configuration getConf() { } @Override - public void authorize(UserGroupInformation user, - String remoteAddress) throws AuthorizationException { + public void authorize(UserGroupInformation user, + InetAddress remoteAddress) throws AuthorizationException { if (user == null) { throw new IllegalArgumentException("user is null."); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ImpersonationProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ImpersonationProvider.java index 8b483f0336f3d..129e1e4dad26a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ImpersonationProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ImpersonationProvider.java @@ -18,6 +18,9 @@ package org.apache.hadoop.security.authorize; +import java.net.InetAddress; +import java.net.UnknownHostException; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configurable; @@ -38,12 +41,29 @@ public interface ImpersonationProvider extends Configurable { public void init(String configurationPrefix); /** - * Authorize the superuser which is doing doAs - * + * Authorize the superuser which is doing doAs. + * {@link #authorize(UserGroupInformation, InetAddress)} should + * be preferred to avoid possibly re-resolving the ip address. + * @param user ugi of the effective or proxy user which contains a real user. + * @param remoteAddress the ip address of client. + * @throws AuthorizationException Authorization Exception. + */ + default void authorize(UserGroupInformation user, String remoteAddress) + throws AuthorizationException { + try { + authorize(user, InetAddress.getByName(remoteAddress)); + } catch (UnknownHostException e) { + throw new AuthorizationException(e); + } + } + + /** + * Authorize the superuser which is doing doAs. + * * @param user ugi of the effective or proxy user which contains a real user * @param remoteAddress the ip address of client - * @throws AuthorizationException + * @throws AuthorizationException Authorization Exception. */ - public void authorize(UserGroupInformation user, String remoteAddress) + void authorize(UserGroupInformation user, InetAddress remoteAddress) throws AuthorizationException; } \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ProxyUsers.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ProxyUsers.java index 97a7f080fbdf9..9148170343518 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ProxyUsers.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ProxyUsers.java @@ -18,7 +18,9 @@ package org.apache.hadoop.security.authorize; -import com.google.common.base.Preconditions; +import java.net.InetAddress; + +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -26,7 +28,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ReflectionUtils; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; @InterfaceStability.Unstable @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce", "HBase", "Hive"}) @@ -86,28 +88,47 @@ public static void refreshSuperUserGroupsConfiguration(Configuration conf) { } /** - * Authorize the superuser which is doing doAs - * + * Authorize the superuser which is doing doAs. + * {@link #authorize(UserGroupInformation, InetAddress)} should be preferred + * to avoid possibly re-resolving the ip address. + * * @param user ugi of the effective or proxy user which contains a real user * @param remoteAddress the ip address of client - * @throws AuthorizationException + * @throws AuthorizationException Authorization Exception. */ public static void authorize(UserGroupInformation user, String remoteAddress) throws AuthorizationException { - if (sip==null) { - // In a race situation, It is possible for multiple threads to satisfy this condition. + getSip().authorize(user, remoteAddress); + } + + /** + * Authorize the superuser which is doing doAs. + * + * @param user ugi of the effective or proxy user which contains a real user + * @param remoteAddress the inet address of client + * @throws AuthorizationException Authorization Exception. + */ + public static void authorize(UserGroupInformation user, + InetAddress remoteAddress) throws AuthorizationException { + getSip().authorize(user, remoteAddress); + } + + private static ImpersonationProvider getSip() { + if (sip == null) { + // In a race situation, It is possible for multiple threads to satisfy + // this condition. // The last assignment will prevail. - refreshSuperUserGroupsConfiguration(); + refreshSuperUserGroupsConfiguration(); } - sip.authorize(user, remoteAddress); + return sip; } - + /** * This function is kept to provide backward compatibility. - * @param user - * @param remoteAddress - * @param conf - * @throws AuthorizationException + * @param user user. + * @param remoteAddress remote address. + * @param conf configuration. + * @throws AuthorizationException Authorization Exception. * @deprecated use {@link #authorize(UserGroupInformation, String)} instead. */ @Deprecated @@ -118,7 +139,7 @@ public static void authorize(UserGroupInformation user, @VisibleForTesting public static DefaultImpersonationProvider getDefaultImpersonationProvider() { - return ((DefaultImpersonationProvider)sip); + return ((DefaultImpersonationProvider) getSip()); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/RefreshAuthorizationPolicyProtocol.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/RefreshAuthorizationPolicyProtocol.java index 0f0b25d8344e2..51a900fa71cb0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/RefreshAuthorizationPolicyProtocol.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/RefreshAuthorizationPolicyProtocol.java @@ -41,7 +41,7 @@ public interface RefreshAuthorizationPolicyProtocol { /** * Refresh the service-level authorization policy in-effect. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Idempotent void refreshServiceAcl() throws IOException; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ServiceAuthorizationManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ServiceAuthorizationManager.java index a264eb4dcd9fb..c83afc7fe4b92 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ServiceAuthorizationManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ServiceAuthorizationManager.java @@ -28,12 +28,11 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; -import org.apache.hadoop.security.KerberosInfo; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.MachineList; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -101,21 +100,19 @@ public void authorize(UserGroupInformation user, String clientPrincipal = null; if (UserGroupInformation.isSecurityEnabled()) { // get client principal key to verify (if available) - KerberosInfo krbInfo = SecurityUtil.getKerberosInfo(protocol, conf); - if (krbInfo != null) { - String clientKey = krbInfo.clientPrincipal(); - if (clientKey != null && !clientKey.isEmpty()) { - try { - clientPrincipal = SecurityUtil.getServerPrincipal( - conf.get(clientKey), addr); - } catch (IOException e) { - throw (AuthorizationException) new AuthorizationException( - "Can't figure out Kerberos principal name for connection from " - + addr + " for user=" + user + " protocol=" + protocol) - .initCause(e); - } + clientPrincipal = SecurityUtil.getClientPrincipal(protocol, conf); + try { + if (clientPrincipal != null) { + clientPrincipal = + SecurityUtil.getServerPrincipal(clientPrincipal, addr); } + } catch (IOException e) { + throw (AuthorizationException) new AuthorizationException( + "Can't figure out Kerberos principal name for connection from " + + addr + " for user=" + user + " protocol=" + protocol) + .initCause(e); } + } if((clientPrincipal != null && !clientPrincipal.equals(user.getUserName())) || acls.length != 2 || !acls[0].isUserAllowed(user) || acls[1].isUserAllowed(user)) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/http/CrossOriginFilter.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/http/CrossOriginFilter.java index 60c2864bbe539..6ba651c13da0b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/http/CrossOriginFilter.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/http/CrossOriginFilter.java @@ -36,7 +36,7 @@ import org.apache.commons.lang3.StringUtils; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import java.util.stream.Collectors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/http/RestCsrfPreventionFilter.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/http/RestCsrfPreventionFilter.java index 59cb0d6599595..b81ed8e90155e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/http/RestCsrfPreventionFilter.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/http/RestCsrfPreventionFilter.java @@ -37,6 +37,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; +import org.eclipse.jetty.server.Response; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -271,6 +272,10 @@ public void proceed() throws IOException, ServletException { @Override public void sendError(int code, String message) throws IOException { + if (httpResponse instanceof Response) { + ((Response)httpResponse).setStatusWithReason(code, message); + } + httpResponse.sendError(code, message); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/DelegatingSSLSocketFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/DelegatingSSLSocketFactory.java index c961364aa1124..b08a45d1b51de 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/DelegatingSSLSocketFactory.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/DelegatingSSLSocketFactory.java @@ -21,7 +21,6 @@ import java.io.IOException; import java.net.InetAddress; import java.net.Socket; -import java.net.SocketException; import java.security.KeyManagementException; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; @@ -31,11 +30,9 @@ import javax.net.ssl.SSLSocket; import javax.net.ssl.SSLSocketFactory; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.wildfly.openssl.OpenSSLProvider; -import org.wildfly.openssl.SSL; - /** * A {@link SSLSocketFactory} that can delegate to various SSL implementations. @@ -57,11 +54,10 @@ *
  • Default_JSSE_with_GCM: Delegates to the JSSE implementation of * SSL with no modification to the list of enabled ciphers.
  • * - *

    * * In order to load OpenSSL, applications must ensure the wildfly-openssl - * artifact is on the classpath. Currently, only ABFS and S3A provide - * wildfly-openssl as a runtime dependency. + * artifact is on the classpath. Currently, only ABFS declares + * wildfly-openssl as an explicit dependency. */ public final class DelegatingSSLSocketFactory extends SSLSocketFactory { @@ -110,7 +106,16 @@ public static synchronized void initializeDefaultFactory( } /** - * Singletone instance of the SSLSocketFactory. + * For testing only: reset the socket factory. + */ + @VisibleForTesting + public static synchronized void resetDefaultFactory() { + LOG.info("Resetting default SSL Socket Factory"); + instance = null; + } + + /** + * Singleton instance of the SSLSocketFactory. * * SSLSocketFactory must be initialized with appropriate SSLChannelMode * using initializeDefaultFactory method. @@ -126,9 +131,7 @@ private DelegatingSSLSocketFactory(SSLChannelMode preferredChannelMode) throws IOException { try { initializeSSLContext(preferredChannelMode); - } catch (NoSuchAlgorithmException e) { - throw new IOException(e); - } catch (KeyManagementException e) { + } catch (NoSuchAlgorithmException | KeyManagementException e) { throw new IOException(e); } @@ -146,42 +149,23 @@ private DelegatingSSLSocketFactory(SSLChannelMode preferredChannelMode) } private void initializeSSLContext(SSLChannelMode preferredChannelMode) - throws NoSuchAlgorithmException, KeyManagementException { + throws NoSuchAlgorithmException, KeyManagementException, IOException { + LOG.debug("Initializing SSL Context to channel mode {}", + preferredChannelMode); switch (preferredChannelMode) { case Default: - if (!openSSLProviderRegistered) { - OpenSSLProvider.register(); - openSSLProviderRegistered = true; - } try { - java.util.logging.Logger logger = java.util.logging.Logger.getLogger( - SSL.class.getName()); - logger.setLevel(Level.WARNING); - ctx = SSLContext.getInstance("openssl.TLS"); - ctx.init(null, null, null); - // Strong reference needs to be kept to logger until initialization of - // SSLContext finished (see HADOOP-16174): - logger.setLevel(Level.INFO); + bindToOpenSSLProvider(); channelMode = SSLChannelMode.OpenSSL; - } catch (NoSuchAlgorithmException e) { - LOG.debug("Failed to load OpenSSL. Falling back to the JSSE default."); + } catch (LinkageError | NoSuchAlgorithmException | RuntimeException e) { + LOG.debug("Failed to load OpenSSL. Falling back to the JSSE default.", + e); ctx = SSLContext.getDefault(); channelMode = SSLChannelMode.Default_JSSE; } break; case OpenSSL: - if (!openSSLProviderRegistered) { - OpenSSLProvider.register(); - openSSLProviderRegistered = true; - } - java.util.logging.Logger logger = java.util.logging.Logger.getLogger( - SSL.class.getName()); - logger.setLevel(Level.WARNING); - ctx = SSLContext.getInstance("openssl.TLS"); - ctx.init(null, null, null); - // Strong reference needs to be kept to logger until initialization of - // SSLContext finished (see HADOOP-16174): - logger.setLevel(Level.INFO); + bindToOpenSSLProvider(); channelMode = SSLChannelMode.OpenSSL; break; case Default_JSSE: @@ -193,11 +177,38 @@ private void initializeSSLContext(SSLChannelMode preferredChannelMode) channelMode = SSLChannelMode.Default_JSSE_with_GCM; break; default: - throw new NoSuchAlgorithmException("Unknown channel mode: " + throw new IOException("Unknown channel mode: " + preferredChannelMode); } } + /** + * Bind to the OpenSSL provider via wildfly. + * This MUST be the only place where wildfly classes are referenced, + * so ensuring that any linkage problems only surface here where they may + * be caught by the initialization code. + */ + private void bindToOpenSSLProvider() + throws NoSuchAlgorithmException, KeyManagementException { + if (!openSSLProviderRegistered) { + LOG.debug("Attempting to register OpenSSL provider"); + org.wildfly.openssl.OpenSSLProvider.register(); + openSSLProviderRegistered = true; + } + // Strong reference needs to be kept to logger until initialization of + // SSLContext finished (see HADOOP-16174): + java.util.logging.Logger logger = java.util.logging.Logger.getLogger( + "org.wildfly.openssl.SSL"); + Level originalLevel = logger.getLevel(); + try { + logger.setLevel(Level.WARNING); + ctx = SSLContext.getInstance("openssl.TLS"); + ctx.init(null, null, null); + } finally { + logger.setLevel(originalLevel); + } + } + public String getProviderName() { return providerName; } @@ -212,21 +223,26 @@ public String[] getSupportedCipherSuites() { return ciphers.clone(); } + /** + * Get the channel mode of this instance. + * @return a channel mode. + */ + public SSLChannelMode getChannelMode() { + return channelMode; + } + public Socket createSocket() throws IOException { SSLSocketFactory factory = ctx.getSocketFactory(); - SSLSocket ss = (SSLSocket) factory.createSocket(); - configureSocket(ss); - return ss; + return configureSocket(factory.createSocket()); } @Override public Socket createSocket(Socket s, String host, int port, boolean autoClose) throws IOException { SSLSocketFactory factory = ctx.getSocketFactory(); - SSLSocket ss = (SSLSocket) factory.createSocket(s, host, port, autoClose); - configureSocket(ss); - return ss; + return configureSocket( + factory.createSocket(s, host, port, autoClose)); } @Override @@ -234,52 +250,41 @@ public Socket createSocket(InetAddress address, int port, InetAddress localAddress, int localPort) throws IOException { SSLSocketFactory factory = ctx.getSocketFactory(); - SSLSocket ss = (SSLSocket) factory - .createSocket(address, port, localAddress, localPort); - - configureSocket(ss); - return ss; + return configureSocket(factory + .createSocket(address, port, localAddress, localPort)); } @Override public Socket createSocket(String host, int port, InetAddress localHost, int localPort) throws IOException { SSLSocketFactory factory = ctx.getSocketFactory(); - SSLSocket ss = (SSLSocket) factory - .createSocket(host, port, localHost, localPort); - configureSocket(ss); - - return ss; + return configureSocket(factory + .createSocket(host, port, localHost, localPort)); } @Override public Socket createSocket(InetAddress host, int port) throws IOException { SSLSocketFactory factory = ctx.getSocketFactory(); - SSLSocket ss = (SSLSocket) factory.createSocket(host, port); - - configureSocket(ss); - return ss; + return configureSocket(factory.createSocket(host, port)); } @Override public Socket createSocket(String host, int port) throws IOException { SSLSocketFactory factory = ctx.getSocketFactory(); - SSLSocket ss = (SSLSocket) factory.createSocket(host, port); - - configureSocket(ss); - return ss; + return configureSocket(factory.createSocket(host, port)); } - private void configureSocket(SSLSocket ss) throws SocketException { - ss.setEnabledCipherSuites(ciphers); + private Socket configureSocket(Socket socket) { + ((SSLSocket) socket).setEnabledCipherSuites(ciphers); + return socket; } private String[] alterCipherList(String[] defaultCiphers) { - ArrayList preferredSuits = new ArrayList<>(); + ArrayList preferredSuites = new ArrayList<>(); // Remove GCM mode based ciphers from the supported list. for (int i = 0; i < defaultCiphers.length; i++) { @@ -287,11 +292,11 @@ private String[] alterCipherList(String[] defaultCiphers) { LOG.debug("Removed Cipher - {} from list of enabled SSLSocket ciphers", defaultCiphers[i]); } else { - preferredSuits.add(defaultCiphers[i]); + preferredSuites.add(defaultCiphers[i]); } } - ciphers = preferredSuits.toArray(new String[0]); + ciphers = preferredSuites.toArray(new String[0]); return ciphers; } -} \ No newline at end of file +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/FileBasedKeyStoresFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/FileBasedKeyStoresFactory.java index 3531173bb72f7..401509a40b172 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/FileBasedKeyStoresFactory.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/FileBasedKeyStoresFactory.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.security.ssl; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -29,20 +29,20 @@ import javax.net.ssl.KeyManagerFactory; import javax.net.ssl.TrustManager; import java.io.IOException; -import java.io.InputStream; -import java.nio.file.Files; import java.nio.file.Paths; import java.security.GeneralSecurityException; import java.security.KeyStore; import java.text.MessageFormat; +import java.util.Timer; /** * {@link KeyStoresFactory} implementation that reads the certificates from * keystore files. *

    - * if the trust certificates keystore file changes, the {@link TrustManager} - * is refreshed with the new trust certificate entries (using a - * {@link ReloadingX509TrustManager} trustmanager). + * If either the truststore or the keystore certificates file changes, it + * would be refreshed under the corresponding wrapper implementation - + * {@link ReloadingX509KeystoreManager} or {@link ReloadingX509TrustManager}. + *

    */ @InterfaceAudience.Private @InterfaceStability.Evolving @@ -51,6 +51,19 @@ public class FileBasedKeyStoresFactory implements KeyStoresFactory { private static final Logger LOG = LoggerFactory.getLogger(FileBasedKeyStoresFactory.class); + + /** + * The name of the timer thread monitoring file changes. + */ + public static final String SSL_MONITORING_THREAD_NAME = "SSL Certificates Store Monitor"; + + /** + * The refresh interval used to check if either of the truststore or keystore + * certificate file has changed. + */ + public static final String SSL_STORES_RELOAD_INTERVAL_TPL_KEY = + "ssl.{0}.stores.reload.interval"; + public static final String SSL_KEYSTORE_LOCATION_TPL_KEY = "ssl.{0}.keystore.location"; public static final String SSL_KEYSTORE_PASSWORD_TPL_KEY = @@ -77,14 +90,119 @@ public class FileBasedKeyStoresFactory implements KeyStoresFactory { public static final String DEFAULT_KEYSTORE_TYPE = "jks"; /** - * Reload interval in milliseconds. + * The default time interval in milliseconds used to check if either + * of the truststore or keystore certificates file has changed and needs reloading. */ - public static final int DEFAULT_SSL_TRUSTSTORE_RELOAD_INTERVAL = 10000; + public static final int DEFAULT_SSL_STORES_RELOAD_INTERVAL = 10000; private Configuration conf; private KeyManager[] keyManagers; private TrustManager[] trustManagers; private ReloadingX509TrustManager trustManager; + private Timer fileMonitoringTimer; + + + private void createTrustManagersFromConfiguration(SSLFactory.Mode mode, + String truststoreType, + String truststoreLocation, + long storesReloadInterval) + throws IOException, GeneralSecurityException { + String passwordProperty = resolvePropertyName(mode, + SSL_TRUSTSTORE_PASSWORD_TPL_KEY); + String truststorePassword = getPassword(conf, passwordProperty, ""); + if (truststorePassword.isEmpty()) { + // An empty trust store password is legal; the trust store password + // is only required when writing to a trust store. Otherwise it's + // an optional integrity check. + truststorePassword = null; + } + + // Check if obsolete truststore specific reload interval is present for backward compatible + long truststoreReloadInterval = + conf.getLong( + resolvePropertyName(mode, SSL_TRUSTSTORE_RELOAD_INTERVAL_TPL_KEY), + storesReloadInterval); + + if (LOG.isDebugEnabled()) { + LOG.debug(mode.toString() + " TrustStore: " + truststoreLocation + + ", reloading at " + truststoreReloadInterval + " millis."); + } + + trustManager = new ReloadingX509TrustManager( + truststoreType, + truststoreLocation, + truststorePassword); + + if (truststoreReloadInterval > 0) { + fileMonitoringTimer.schedule( + new FileMonitoringTimerTask( + Paths.get(truststoreLocation), + path -> trustManager.loadFrom(path), + exception -> LOG.error(ReloadingX509TrustManager.RELOAD_ERROR_MESSAGE, exception)), + truststoreReloadInterval, + truststoreReloadInterval); + } + + if (LOG.isDebugEnabled()) { + LOG.debug(mode.toString() + " Loaded TrustStore: " + truststoreLocation); + } + trustManagers = new TrustManager[]{trustManager}; + } + + /** + * Implements logic of initializing the KeyManagers with the options + * to reload keystores. + * @param mode client or server + * @param keystoreType The keystore type. + * @param storesReloadInterval The interval to check if the keystore certificates + * file has changed. + */ + private void createKeyManagersFromConfiguration(SSLFactory.Mode mode, + String keystoreType, long storesReloadInterval) + throws GeneralSecurityException, IOException { + String locationProperty = + resolvePropertyName(mode, SSL_KEYSTORE_LOCATION_TPL_KEY); + String keystoreLocation = conf.get(locationProperty, ""); + if (keystoreLocation.isEmpty()) { + throw new GeneralSecurityException("The property '" + locationProperty + + "' has not been set in the ssl configuration file."); + } + String passwordProperty = + resolvePropertyName(mode, SSL_KEYSTORE_PASSWORD_TPL_KEY); + String keystorePassword = getPassword(conf, passwordProperty, ""); + if (keystorePassword.isEmpty()) { + throw new GeneralSecurityException("The property '" + passwordProperty + + "' has not been set in the ssl configuration file."); + } + String keyPasswordProperty = + resolvePropertyName(mode, SSL_KEYSTORE_KEYPASSWORD_TPL_KEY); + // Key password defaults to the same value as store password for + // compatibility with legacy configurations that did not use a separate + // configuration property for key password. + String keystoreKeyPassword = getPassword( + conf, keyPasswordProperty, keystorePassword); + if (LOG.isDebugEnabled()) { + LOG.debug(mode.toString() + " KeyStore: " + keystoreLocation); + } + + ReloadingX509KeystoreManager keystoreManager = new ReloadingX509KeystoreManager( + keystoreType, + keystoreLocation, + keystorePassword, + keystoreKeyPassword); + + if (storesReloadInterval > 0) { + fileMonitoringTimer.schedule( + new FileMonitoringTimerTask( + Paths.get(keystoreLocation), + path -> keystoreManager.loadFrom(path), + exception -> LOG.error(ReloadingX509KeystoreManager.RELOAD_ERROR_MESSAGE, exception)), + storesReloadInterval, + storesReloadInterval); + } + + keyManagers = new KeyManager[] { keystoreManager }; + } /** * Resolves a property name to its client/server version if applicable. @@ -139,56 +257,28 @@ public void init(SSLFactory.Mode mode) conf.getBoolean(SSLFactory.SSL_REQUIRE_CLIENT_CERT_KEY, SSLFactory.SSL_REQUIRE_CLIENT_CERT_DEFAULT); + long storesReloadInterval = conf.getLong( + resolvePropertyName(mode, SSL_STORES_RELOAD_INTERVAL_TPL_KEY), + DEFAULT_SSL_STORES_RELOAD_INTERVAL); + + fileMonitoringTimer = new Timer(SSL_MONITORING_THREAD_NAME, true); + // certificate store String keystoreType = - conf.get(resolvePropertyName(mode, SSL_KEYSTORE_TYPE_TPL_KEY), - DEFAULT_KEYSTORE_TYPE); - KeyStore keystore = KeyStore.getInstance(keystoreType); - String keystoreKeyPassword = null; - if (requireClientCert || mode == SSLFactory.Mode.SERVER) { - String locationProperty = - resolvePropertyName(mode, SSL_KEYSTORE_LOCATION_TPL_KEY); - String keystoreLocation = conf.get(locationProperty, ""); - if (keystoreLocation.isEmpty()) { - throw new GeneralSecurityException("The property '" + locationProperty + - "' has not been set in the ssl configuration file."); - } - String passwordProperty = - resolvePropertyName(mode, SSL_KEYSTORE_PASSWORD_TPL_KEY); - String keystorePassword = getPassword(conf, passwordProperty, ""); - if (keystorePassword.isEmpty()) { - throw new GeneralSecurityException("The property '" + passwordProperty + - "' has not been set in the ssl configuration file."); - } - String keyPasswordProperty = - resolvePropertyName(mode, SSL_KEYSTORE_KEYPASSWORD_TPL_KEY); - // Key password defaults to the same value as store password for - // compatibility with legacy configurations that did not use a separate - // configuration property for key password. - keystoreKeyPassword = getPassword( - conf, keyPasswordProperty, keystorePassword); - if (LOG.isDebugEnabled()) { - LOG.debug(mode.toString() + " KeyStore: " + keystoreLocation); - } + conf.get(resolvePropertyName(mode, SSL_KEYSTORE_TYPE_TPL_KEY), + DEFAULT_KEYSTORE_TYPE); - InputStream is = Files.newInputStream(Paths.get(keystoreLocation)); - try { - keystore.load(is, keystorePassword.toCharArray()); - } finally { - is.close(); - } - if (LOG.isDebugEnabled()) { - LOG.debug(mode.toString() + " Loaded KeyStore: " + keystoreLocation); - } + if (requireClientCert || mode == SSLFactory.Mode.SERVER) { + createKeyManagersFromConfiguration(mode, keystoreType, storesReloadInterval); } else { + KeyStore keystore = KeyStore.getInstance(keystoreType); keystore.load(null, null); + KeyManagerFactory keyMgrFactory = KeyManagerFactory + .getInstance(SSLFactory.SSLCERTIFICATE); + + keyMgrFactory.init(keystore, null); + keyManagers = keyMgrFactory.getKeyManagers(); } - KeyManagerFactory keyMgrFactory = KeyManagerFactory - .getInstance(SSLFactory.SSLCERTIFICATE); - - keyMgrFactory.init(keystore, (keystoreKeyPassword != null) ? - keystoreKeyPassword.toCharArray() : null); - keyManagers = keyMgrFactory.getKeyManagers(); //trust store String truststoreType = @@ -199,33 +289,7 @@ public void init(SSLFactory.Mode mode) resolvePropertyName(mode, SSL_TRUSTSTORE_LOCATION_TPL_KEY); String truststoreLocation = conf.get(locationProperty, ""); if (!truststoreLocation.isEmpty()) { - String passwordProperty = resolvePropertyName(mode, - SSL_TRUSTSTORE_PASSWORD_TPL_KEY); - String truststorePassword = getPassword(conf, passwordProperty, ""); - if (truststorePassword.isEmpty()) { - // An empty trust store password is legal; the trust store password - // is only required when writing to a trust store. Otherwise it's - // an optional integrity check. - truststorePassword = null; - } - long truststoreReloadInterval = - conf.getLong( - resolvePropertyName(mode, SSL_TRUSTSTORE_RELOAD_INTERVAL_TPL_KEY), - DEFAULT_SSL_TRUSTSTORE_RELOAD_INTERVAL); - - if (LOG.isDebugEnabled()) { - LOG.debug(mode.toString() + " TrustStore: " + truststoreLocation); - } - - trustManager = new ReloadingX509TrustManager(truststoreType, - truststoreLocation, - truststorePassword, - truststoreReloadInterval); - trustManager.init(); - if (LOG.isDebugEnabled()) { - LOG.debug(mode.toString() + " Loaded TrustStore: " + truststoreLocation); - } - trustManagers = new TrustManager[]{trustManager}; + createTrustManagersFromConfiguration(mode, truststoreType, truststoreLocation, storesReloadInterval); } else { if (LOG.isDebugEnabled()) { LOG.debug("The property '" + locationProperty + "' has not been set, " + @@ -255,8 +319,10 @@ String getPassword(Configuration conf, String alias, String defaultPass) { */ @Override public synchronized void destroy() { + if (fileMonitoringTimer != null) { + fileMonitoringTimer.cancel(); + } if (trustManager != null) { - trustManager.destroy(); trustManager = null; keyManagers = null; trustManagers = null; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/FileMonitoringTimerTask.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/FileMonitoringTimerTask.java new file mode 100644 index 0000000000000..d42d3173cb2b4 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/FileMonitoringTimerTask.java @@ -0,0 +1,113 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.security.ssl; + +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.classification.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.TimerTask; +import java.util.function.Consumer; + +/** + * Implements basic logic to track when a file changes on disk and call the action + * passed to the constructor when it does. An exception handler can optionally also be specified + * in the constructor, otherwise any exception occurring during process will be logged + * using this class' logger. + */ +@InterfaceAudience.Private +public class FileMonitoringTimerTask extends TimerTask { + + static final Logger LOG = LoggerFactory.getLogger(FileMonitoringTimerTask.class); + + @VisibleForTesting + static final String PROCESS_ERROR_MESSAGE = + "Could not process file change : "; + + final private List filePaths; + final private Consumer onFileChange; + final Consumer onChangeFailure; + private List lastProcessed; + + /** + * See {@link #FileMonitoringTimerTask(List, Consumer, Consumer)}. + * + * @param filePath The file to monitor. + * @param onFileChange What to do when the file changes. + * @param onChangeFailure What to do when onFileChange + * throws an exception. + */ + public FileMonitoringTimerTask(Path filePath, Consumer onFileChange, + Consumer onChangeFailure) { + this(Collections.singletonList(filePath), onFileChange, onChangeFailure); + } + + /** + * Create file monitoring task to be scheduled using a standard + * Java {@link java.util.Timer} instance. + * + * @param filePaths The path to the file to monitor. + * @param onFileChange The function to call when the file has changed. + * @param onChangeFailure The function to call when an exception is + * thrown during the file change processing. + */ + public FileMonitoringTimerTask(List filePaths, + Consumer onFileChange, + Consumer onChangeFailure) { + Preconditions.checkNotNull(filePaths, + "path to monitor disk file is not set"); + Preconditions.checkNotNull(onFileChange, + "action to monitor disk file is not set"); + + this.filePaths = new ArrayList(filePaths); + this.lastProcessed = new ArrayList(); + this.filePaths.forEach(path -> + this.lastProcessed.add(path.toFile().lastModified())); + this.onFileChange = onFileChange; + this.onChangeFailure = onChangeFailure; + } + + @Override + public void run() { + int modified = -1; + for (int i = 0; i < filePaths.size() && modified < 0; i++) { + if (lastProcessed.get(i) != filePaths.get(i).toFile().lastModified()) { + modified = i; + } + } + if (modified > -1) { + Path filePath = filePaths.get(modified); + try { + onFileChange.accept(filePath); + } catch (Throwable t) { + if (onChangeFailure != null) { + onChangeFailure.accept(t); + } else { + LOG.error(PROCESS_ERROR_MESSAGE + filePath.toString(), t); + } + } + lastProcessed.set(modified, filePath.toFile().lastModified()); + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/ReloadingX509KeystoreManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/ReloadingX509KeystoreManager.java new file mode 100644 index 0000000000000..86af39dd818cc --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/ReloadingX509KeystoreManager.java @@ -0,0 +1,157 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.security.ssl; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.net.ssl.*; +import java.io.IOException; +import java.io.InputStream; +import java.net.Socket; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.security.GeneralSecurityException; +import java.security.KeyStore; +import java.security.Principal; +import java.security.PrivateKey; +import java.security.cert.X509Certificate; +import java.util.concurrent.atomic.AtomicReference; + +/** + * An implementation of X509KeyManager that exposes a method, + * {@link #loadFrom(Path)} to reload its configuration. Note that it is necessary + * to implement the X509ExtendedKeyManager to properly delegate + * the additional methods, otherwise the SSL handshake will fail. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class ReloadingX509KeystoreManager extends X509ExtendedKeyManager { + + private static final Logger LOG = LoggerFactory.getLogger(ReloadingX509TrustManager.class); + + static final String RELOAD_ERROR_MESSAGE = + "Could not load keystore (keep using existing one) : "; + + final private String type; + final private String storePassword; + final private String keyPassword; + private AtomicReference keyManagerRef; + + /** + * Construct a Reloading509KeystoreManager + * + * @param type type of keystore file, typically 'jks'. + * @param location local path to the keystore file. + * @param storePassword password of the keystore file. + * @param keyPassword The password of the key. + * @throws IOException raised on errors performing I/O. + * @throws GeneralSecurityException thrown if create encryptor error. + */ + public ReloadingX509KeystoreManager(String type, String location, + String storePassword, String keyPassword) + throws IOException, GeneralSecurityException { + this.type = type; + this.storePassword = storePassword; + this.keyPassword = keyPassword; + keyManagerRef = new AtomicReference(); + keyManagerRef.set(loadKeyManager(Paths.get(location))); + } + + @Override + public String chooseEngineClientAlias(String[] strings, Principal[] principals, + SSLEngine sslEngine) { + return keyManagerRef.get().chooseEngineClientAlias(strings, principals, sslEngine); + } + + @Override + public String chooseEngineServerAlias(String s, Principal[] principals, + SSLEngine sslEngine) { + return keyManagerRef.get().chooseEngineServerAlias(s, principals, sslEngine); + } + + @Override + public String[] getClientAliases(String s, Principal[] principals) { + return keyManagerRef.get().getClientAliases(s, principals); + } + + @Override + public String chooseClientAlias(String[] strings, Principal[] principals, + Socket socket) { + return keyManagerRef.get().chooseClientAlias(strings, principals, socket); + } + + @Override + public String[] getServerAliases(String s, Principal[] principals) { + return keyManagerRef.get().getServerAliases(s, principals); + } + + @Override + public String chooseServerAlias(String s, Principal[] principals, + Socket socket) { + return keyManagerRef.get().chooseServerAlias(s, principals, socket); + } + + @Override + public X509Certificate[] getCertificateChain(String s) { + return keyManagerRef.get().getCertificateChain(s); + } + + @Override + public PrivateKey getPrivateKey(String s) { + return keyManagerRef.get().getPrivateKey(s); + } + + public ReloadingX509KeystoreManager loadFrom(Path path) { + try { + this.keyManagerRef.set(loadKeyManager(path)); + } catch (Exception ex) { + // The Consumer.accept interface forces us to convert to unchecked + throw new RuntimeException(ex); + } + return this; + } + + private X509ExtendedKeyManager loadKeyManager(Path path) + throws IOException, GeneralSecurityException { + + X509ExtendedKeyManager keyManager = null; + KeyStore keystore = KeyStore.getInstance(type); + + try (InputStream is = Files.newInputStream(path)) { + keystore.load(is, this.storePassword.toCharArray()); + } + + LOG.debug(" Loaded KeyStore: " + path.toFile().getAbsolutePath()); + + KeyManagerFactory keyMgrFactory = KeyManagerFactory.getInstance( + SSLFactory.SSLCERTIFICATE); + keyMgrFactory.init(keystore, + (keyPassword != null) ? keyPassword.toCharArray() : null); + for (KeyManager candidate: keyMgrFactory.getKeyManagers()) { + if (candidate instanceof X509ExtendedKeyManager) { + keyManager = (X509ExtendedKeyManager)candidate; + break; + } + } + return keyManager; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/ReloadingX509TrustManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/ReloadingX509TrustManager.java index b2f0118aaf5c6..68fd4c161005c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/ReloadingX509TrustManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/ReloadingX509TrustManager.java @@ -21,7 +21,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -32,6 +32,8 @@ import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import java.security.GeneralSecurityException; import java.security.KeyStore; import java.security.cert.CertificateException; @@ -39,31 +41,23 @@ import java.util.concurrent.atomic.AtomicReference; /** - * A {@link TrustManager} implementation that reloads its configuration when - * the truststore file on disk changes. + * A {@link TrustManager} implementation that exposes a method, {@link #loadFrom(Path)} + * to reload its configuration for example when the truststore file on disk changes. */ @InterfaceAudience.Private @InterfaceStability.Evolving -public final class ReloadingX509TrustManager - implements X509TrustManager, Runnable { +public final class ReloadingX509TrustManager implements X509TrustManager { - @VisibleForTesting static final Logger LOG = LoggerFactory.getLogger(ReloadingX509TrustManager.class); - @VisibleForTesting + static final String RELOAD_ERROR_MESSAGE = "Could not load truststore (keep using existing one) : "; private String type; - private File file; private String password; - private long lastLoaded; - private long reloadInterval; private AtomicReference trustManagerRef; - private volatile boolean running; - private Thread reloader; - /** * Creates a reloadable trustmanager. The trustmanager reloads itself * if the underlying trustore file has changed. @@ -71,49 +65,18 @@ public final class ReloadingX509TrustManager * @param type type of truststore file, typically 'jks'. * @param location local path to the truststore file. * @param password password of the truststore file. - * @param reloadInterval interval to check if the truststore file has * changed, in milliseconds. * @throws IOException thrown if the truststore could not be initialized due * to an IO error. * @throws GeneralSecurityException thrown if the truststore could not be * initialized due to a security error. */ - public ReloadingX509TrustManager(String type, String location, - String password, long reloadInterval) + public ReloadingX509TrustManager(String type, String location, String password) throws IOException, GeneralSecurityException { this.type = type; - file = new File(location); this.password = password; trustManagerRef = new AtomicReference(); - trustManagerRef.set(loadTrustManager()); - this.reloadInterval = reloadInterval; - } - - /** - * Starts the reloader thread. - */ - public void init() { - reloader = new Thread(this, "Truststore reloader thread"); - reloader.setDaemon(true); - running = true; - reloader.start(); - } - - /** - * Stops the reloader thread. - */ - public void destroy() { - running = false; - reloader.interrupt(); - } - - /** - * Returns the reload check interval. - * - * @return the reload check interval, in milliseconds. - */ - public long getReloadInterval() { - return reloadInterval; + trustManagerRef.set(loadTrustManager(Paths.get(location))); } @Override @@ -151,27 +114,24 @@ public X509Certificate[] getAcceptedIssuers() { return issuers; } - boolean needsReload() { - boolean reload = true; - if (file.exists()) { - if (file.lastModified() == lastLoaded) { - reload = false; - } - } else { - lastLoaded = 0; + public ReloadingX509TrustManager loadFrom(Path path) { + try { + this.trustManagerRef.set(loadTrustManager(path)); + } catch (Exception ex) { + // The Consumer.accept interface forces us to convert to unchecked + throw new RuntimeException(RELOAD_ERROR_MESSAGE, ex); } - return reload; + return this; } - X509TrustManager loadTrustManager() + X509TrustManager loadTrustManager(Path path) throws IOException, GeneralSecurityException { X509TrustManager trustManager = null; KeyStore ks = KeyStore.getInstance(type); - InputStream in = Files.newInputStream(file.toPath()); + InputStream in = Files.newInputStream(path); try { ks.load(in, (password == null) ? null : password.toCharArray()); - lastLoaded = file.lastModified(); - LOG.debug("Loaded truststore '" + file + "'"); + LOG.debug("Loaded truststore '" + path + "'"); } finally { in.close(); } @@ -188,23 +148,4 @@ X509TrustManager loadTrustManager() } return trustManager; } - - @Override - public void run() { - while (running) { - try { - Thread.sleep(reloadInterval); - } catch (InterruptedException e) { - //NOP - } - if (running && needsReload()) { - try { - trustManagerRef.set(loadTrustManager()); - } catch (Exception ex) { - LOG.warn(RELOAD_ERROR_MESSAGE + ex.toString(), ex); - } - } - } - } - } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/DelegationTokenIssuer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/DelegationTokenIssuer.java index 70a53b7166870..601635b1a2460 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/DelegationTokenIssuer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/DelegationTokenIssuer.java @@ -37,17 +37,24 @@ public interface DelegationTokenIssuer { * The service name used as the alias for the token in the credential * token map. addDelegationTokens will use this to determine if * a token exists, and if not, add a new token with this alias. + * @return the token. */ String getCanonicalServiceName(); /** * Unconditionally get a new token with the optional renewer. Returning * null indicates the service does not issue tokens. + * @param renewer renewer. + * @return the token. + * @throws IOException raised on errors performing I/O. */ Token getDelegationToken(String renewer) throws IOException; /** * Issuers may need tokens from additional services. + * + * @return delegation token issuer. + * @throws IOException raised on errors performing I/O. */ default DelegationTokenIssuer[] getAdditionalTokenIssuers() throws IOException { @@ -79,6 +86,12 @@ default Token[] addDelegationTokens( /** * NEVER call this method directly. + * + * @param issuer issuer. + * @param renewer renewer. + * @param credentials cache in which to add new delegation tokens. + * @param tokens list of new delegation tokens. + * @throws IOException raised on errors performing I/O. */ @InterfaceAudience.Private static void collectDelegationTokens( diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/DtFetcher.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/DtFetcher.java index d74e7bdb10272..4b22df2043e8c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/DtFetcher.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/DtFetcher.java @@ -28,14 +28,29 @@ * compilation units. Resolution of fetcher impl will be done at runtime. */ public interface DtFetcher { - /** Return a key used to identify the object/service implementation. */ + /** + * Return a key used to identify the object/service implementation. + * @return ServiceName. + */ Text getServiceName(); - /** Used to allow the service API to indicate whether a token is required. */ + /** + * Used to allow the service API to indicate whether a token is required. + * @return isTokenRequired. + */ boolean isTokenRequired(); - /** Add any number of delegation tokens to Credentials object and return - * a token instance that is appropriate for aliasing, or null if none. */ + /** + * Add any number of delegation tokens to Credentials object and return + * a token instance that is appropriate for aliasing, or null if none. + * + * @param conf configuration. + * @param creds credentials. + * @param renewer renewer. + * @param url url. + * @throws Exception Exception. + * @return DelegationTokens. + */ Token addDelegationTokens(Configuration conf, Credentials creds, String renewer, String url) throws Exception; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/DtFileOperations.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/DtFileOperations.java index 2160d8b6a82a1..5d80a45f79f22 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/DtFileOperations.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/DtFileOperations.java @@ -99,7 +99,7 @@ private static Path fileToPath(File f) { * @param format a string equal to FORMAT_PB or FORMAT_JAVA. * @param creds the Credentials object to be written out. * @param conf a Configuration object passed along. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static void doFormattedWrite( File f, String format, Credentials creds, Configuration conf) @@ -118,7 +118,7 @@ public static void doFormattedWrite( * @param alias print only tokens matching alias (null matches all). * @param conf Configuration object passed along. * @param out print to this stream. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static void printTokenFile( File tokenFile, Text alias, Configuration conf, PrintStream out) @@ -170,7 +170,7 @@ public static void printCredentials( * @param url pass this URL to fetcher after stripping any http/s prefix. * @param renewer pass this renewer to the fetcher. * @param conf Configuration object passed along. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static void getTokenFile(File tokenFile, String fileFormat, Text alias, Text service, String url, String renewer, Configuration conf) @@ -225,7 +225,7 @@ public static void getTokenFile(File tokenFile, String fileFormat, * @param alias overwrite service field of fetched token with this text. * @param service only apply alias to tokens matching this service text. * @param conf Configuration object passed along. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static void aliasTokenFile(File tokenFile, String fileFormat, Text alias, Text service, Configuration conf) throws Exception { @@ -246,7 +246,7 @@ public static void aliasTokenFile(File tokenFile, String fileFormat, * @param tokenFiles list of local File objects. Last file holds the output. * @param fileFormat a string equal to FORMAT_PB or FORMAT_JAVA, for output * @param conf Configuration object passed along. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static void appendTokenFiles( ArrayList tokenFiles, String fileFormat, Configuration conf) @@ -269,8 +269,8 @@ public static void appendTokenFiles( * @param fileFormat a string equal to FORMAT_PB or FORMAT_JAVA, for output * @param alias remove only tokens matching alias; null matches all. * @param conf Configuration object passed along. - * @throws IOException - * @throws InterruptedException + * @throws IOException raised on errors performing I/O. + * @throws InterruptedException if the thread is interrupted. */ public static void removeTokenFromFile(boolean cancel, File tokenFile, String fileFormat, Text alias, Configuration conf) @@ -295,8 +295,8 @@ public static void removeTokenFromFile(boolean cancel, * @param fileFormat a string equal to FORMAT_PB or FORMAT_JAVA, for output * @param alias renew only tokens matching alias; null matches all. * @param conf Configuration object passed along. - * @throws IOException - * @throws InterruptedException + * @throws IOException raised on errors performing I/O. + * @throws InterruptedException if the thread is interrupted. */ public static void renewTokenFile( File tokenFile, String fileFormat, Text alias, Configuration conf) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/DtUtilShell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/DtUtilShell.java index bc2d1b6e11a7e..9e34ebf4a2a58 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/DtUtilShell.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/DtUtilShell.java @@ -109,9 +109,9 @@ private String[] maybeDoLoginFromKeytabAndPrincipal(String[] args) * Parse the command line arguments and initialize subcommand. * Also will attempt to perform Kerberos login if both -principal and -keytab * flags are passed in args array. - * @param args + * @param args args. * @return 0 if the argument(s) were recognized, 1 otherwise - * @throws Exception + * @throws Exception Exception. */ @Override protected int init(String[] args) throws Exception { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/Token.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/Token.java index 4f0f6fc4d444a..3ea32bc41ed35 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/Token.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/Token.java @@ -18,8 +18,8 @@ package org.apache.hadoop.security.token; -import com.google.common.collect.Maps; -import com.google.common.primitives.Bytes; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.primitives.Bytes; import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.HadoopIllegalArgumentException; @@ -32,7 +32,7 @@ import org.slf4j.LoggerFactory; import java.io.*; -import java.util.Arrays; +import java.security.MessageDigest; import java.util.Iterator; import java.util.Map; import java.util.ServiceConfigurationError; @@ -136,6 +136,7 @@ public byte[] getIdentifier() { while (tokenIdentifiers.hasNext()) { try { TokenIdentifier id = tokenIdentifiers.next(); + LOG.debug("Added {}:{} into tokenKindMap", id.getKind(), id.getClass()); tokenKindMap.put(id.getKind(), id.getClass()); } catch (ServiceConfigurationError | LinkageError e) { // failure to load a token implementation @@ -193,7 +194,7 @@ public synchronized Text getKind() { /** * Set the token kind. This is only intended to be used by services that * wrap another service's token. - * @param newKind + * @param newKind newKind. */ @InterfaceAudience.Private public synchronized void setKind(Text newKind) { @@ -367,7 +368,7 @@ private static void decodeWritable(Writable obj, /** * Encode this token as a url safe string. * @return the encoded string - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public String encodeToUrlString() throws IOException { return encodeWritable(this); @@ -376,7 +377,7 @@ public String encodeToUrlString() throws IOException { /** * Decode the given url safe string into this token. * @param newValue the encoded string - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void decodeFromUrlString(String newValue) throws IOException { decodeWritable(this, newValue); @@ -391,8 +392,8 @@ public boolean equals(Object right) { return false; } else { Token r = (Token) right; - return Arrays.equals(identifier, r.identifier) && - Arrays.equals(password, r.password) && + return MessageDigest.isEqual(identifier, r.identifier) && + MessageDigest.isEqual(password, r.password) && kind.equals(r.kind) && service.equals(r.service); } @@ -481,6 +482,7 @@ private synchronized TokenRenewer getRenewer() throws IOException { /** * Is this token managed so that it can be renewed or cancelled? * @return true, if it can be renewed and cancelled. + * @throws IOException raised on errors performing I/O. */ public boolean isManaged() throws IOException { return getRenewer().isManaged(this); @@ -488,9 +490,10 @@ public boolean isManaged() throws IOException { /** * Renew this delegation token. + * @param conf configuration. * @return the new expiration time - * @throws IOException - * @throws InterruptedException + * @throws IOException raised on errors performing I/O. + * @throws InterruptedException if the thread is interrupted. */ public long renew(Configuration conf ) throws IOException, InterruptedException { @@ -499,8 +502,10 @@ public long renew(Configuration conf /** * Cancel this delegation token. - * @throws IOException - * @throws InterruptedException + * + * @param conf configuration. + * @throws IOException raised on errors performing I/O. + * @throws InterruptedException if the thread is interrupted. */ public void cancel(Configuration conf ) throws IOException, InterruptedException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/TokenInfo.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/TokenInfo.java index cc76824eb0e13..9234b23202eca 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/TokenInfo.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/TokenInfo.java @@ -31,6 +31,10 @@ @InterfaceAudience.Public @InterfaceStability.Evolving public @interface TokenInfo { - /** The type of TokenSelector to be used */ + /** + * The type of TokenSelector to be used. + * + * @return TokenSelector + */ Class> value(); } \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/TokenRenewer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/TokenRenewer.java index 11e275f3213d2..eba4bf6daa42f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/TokenRenewer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/TokenRenewer.java @@ -44,25 +44,37 @@ public abstract class TokenRenewer { * cancelled. * @param token the token being checked * @return true if the token may be renewed or cancelled - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public abstract boolean isManaged(Token token) throws IOException; - - /** - * Renew the given token. - * @return the new expiration time - * @throws IOException - * @throws InterruptedException - */ + + /** + * Renew the given token. + * + * @param token the token being checked. + * @param conf configuration. + * + * @return the new expiration time. + * @throws IOException raised on errors performing I/O. + * @throws InterruptedException thrown when a thread is waiting, sleeping, + * or otherwise occupied, and the thread is interrupted, + * either before or during the activity. + */ public abstract long renew(Token token, Configuration conf ) throws IOException, InterruptedException; - - /** - * Cancel the given token - * @throws IOException - * @throws InterruptedException - */ + + /** + * Cancel the given token. + * + * @param token the token being checked. + * @param conf configuration. + * + * @throws IOException raised on errors performing I/O. + * @throws InterruptedException thrown when a thread is waiting, sleeping, + * or otherwise occupied, and the thread is interrupted, + * either before or during the activity. + */ public abstract void cancel(Token token, Configuration conf ) throws IOException, InterruptedException; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenIdentifier.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenIdentifier.java index 6dfe52a83bf68..3f27e45af8191 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenIdentifier.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenIdentifier.java @@ -32,7 +32,7 @@ import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; import org.apache.hadoop.security.token.TokenIdentifier; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; @InterfaceAudience.Public @InterfaceStability.Evolving @@ -49,7 +49,7 @@ public abstract class AbstractDelegationTokenIdentifier private int masterKeyId = 0; public AbstractDelegationTokenIdentifier() { - this(new Text(), new Text(), new Text()); + this(null, null, null); } public AbstractDelegationTokenIdentifier(Text owner, Text renewer, Text realUser) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java index f329accec7553..61c3312c1078a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java @@ -19,21 +19,35 @@ package org.apache.hadoop.security.token.delegation; import java.io.ByteArrayInputStream; +import java.io.DataInput; import java.io.DataInputStream; +import java.io.DataOutput; import java.io.IOException; import java.security.MessageDigest; import java.util.Collection; -import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import javax.crypto.SecretKey; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.statistics.DurationTracker; +import org.apache.hadoop.fs.statistics.DurationTrackerFactory; +import org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding; +import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableUtils; +import org.apache.hadoop.metrics2.annotation.Metric; +import org.apache.hadoop.metrics2.annotation.Metrics; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.MetricsRegistry; +import org.apache.hadoop.metrics2.lib.MutableCounterLong; +import org.apache.hadoop.metrics2.lib.MutableRate; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.HadoopKerberosName; import org.apache.hadoop.security.token.SecretManager; @@ -41,19 +55,25 @@ import org.apache.hadoop.util.Daemon; import org.apache.hadoop.util.Time; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.util.functional.InvocationRaisingIOE; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @InterfaceAudience.Public @InterfaceStability.Evolving -public abstract -class AbstractDelegationTokenSecretManager - extends SecretManager { +public abstract class AbstractDelegationTokenSecretManager + extends SecretManager { private static final Logger LOG = LoggerFactory .getLogger(AbstractDelegationTokenSecretManager.class); + /** + * Metrics to track token management operations. + */ + private static final DelegationTokenSecretManagerMetrics METRICS + = DelegationTokenSecretManagerMetrics.create(); + private String formatTokenId(TokenIdent id) { return "(" + id + ")"; } @@ -63,7 +83,7 @@ private String formatTokenId(TokenIdent id) { * to DelegationTokenInformation. Protected by this object lock. */ protected final Map currentTokens - = new HashMap(); + = new ConcurrentHashMap<>(); /** * Sequence number to create DelegationTokenIdentifier. @@ -72,17 +92,17 @@ private String formatTokenId(TokenIdent id) { protected int delegationTokenSequenceNumber = 0; /** - * Access to allKeys is protected by this object lock + * Access to allKeys is protected by this object lock. */ protected final Map allKeys - = new HashMap(); + = new ConcurrentHashMap<>(); /** * Access to currentId is protected by this object lock. */ protected int currentId = 0; /** - * Access to currentKey is protected by this object lock + * Access to currentKey is protected by this object lock. */ private DelegationKey currentKey; @@ -105,7 +125,7 @@ private String formatTokenId(TokenIdent id) { protected Object noInterruptsLock = new Object(); /** - * Create a secret manager + * Create a secret manager. * @param delegationKeyUpdateInterval the number of milliseconds for rolling * new secret keys. * @param delegationTokenMaxLifetime the maximum lifetime of the delegation @@ -125,7 +145,10 @@ public AbstractDelegationTokenSecretManager(long delegationKeyUpdateInterval, this.storeTokenTrackingId = false; } - /** should be called before this object is used */ + /** + * should be called before this object is used. + * @throws IOException raised on errors performing I/O. + */ public void startThreads() throws IOException { Preconditions.checkState(!running); updateCurrentKey(); @@ -148,6 +171,8 @@ public synchronized void reset() { /** * Total count of active delegation tokens. + * + * @return currentTokens.size. */ public long getCurrentTokensSize() { return currentTokens.size(); @@ -155,11 +180,15 @@ public long getCurrentTokensSize() { /** * Add a previously used master key to cache (when NN restarts), - * should be called before activate(). - * */ + * should be called before activate(). + * + * @param key delegation key. + * @throws IOException raised on errors performing I/O. + */ public synchronized void addKey(DelegationKey key) throws IOException { - if (running) // a safety check + if (running) { // a safety check throw new IOException("Can't add delegation key to a running SecretManager."); + } if (key.getKeyId() > getCurrentKeyId()) { setCurrentKeyId(key.getKeyId()); } @@ -206,7 +235,9 @@ protected void updateStoredToken(TokenIdent ident, long renewDate) throws IOExce /** * For subclasses externalizing the storage, for example Zookeeper - * based implementations + * based implementations. + * + * @return currentId. */ protected synchronized int getCurrentKeyId() { return currentId; @@ -214,7 +245,9 @@ protected synchronized int getCurrentKeyId() { /** * For subclasses externalizing the storage, for example Zookeeper - * based implementations + * based implementations. + * + * @return currentId. */ protected synchronized int incrementCurrentKeyId() { return ++currentId; @@ -222,7 +255,9 @@ protected synchronized int incrementCurrentKeyId() { /** * For subclasses externalizing the storage, for example Zookeeper - * based implementations + * based implementations. + * + * @param keyId keyId. */ protected synchronized void setCurrentKeyId(int keyId) { currentId = keyId; @@ -230,7 +265,9 @@ protected synchronized void setCurrentKeyId(int keyId) { /** * For subclasses externalizing the storage, for example Zookeeper - * based implementations + * based implementations. + * + * @return delegationTokenSequenceNumber. */ protected synchronized int getDelegationTokenSeqNum() { return delegationTokenSequenceNumber; @@ -238,7 +275,9 @@ protected synchronized int getDelegationTokenSeqNum() { /** * For subclasses externalizing the storage, for example Zookeeper - * based implementations + * based implementations. + * + * @return delegationTokenSequenceNumber. */ protected synchronized int incrementDelegationTokenSeqNum() { return ++delegationTokenSequenceNumber; @@ -246,7 +285,9 @@ protected synchronized int incrementDelegationTokenSeqNum() { /** * For subclasses externalizing the storage, for example Zookeeper - * based implementations + * based implementations. + * + * @param seqNum seqNum. */ protected synchronized void setDelegationTokenSeqNum(int seqNum) { delegationTokenSequenceNumber = seqNum; @@ -254,7 +295,10 @@ protected synchronized void setDelegationTokenSeqNum(int seqNum) { /** * For subclasses externalizing the storage, for example Zookeeper - * based implementations + * based implementations. + * + * @param keyId keyId. + * @return DelegationKey. */ protected DelegationKey getDelegationKey(int keyId) { return allKeys.get(keyId); @@ -262,7 +306,10 @@ protected DelegationKey getDelegationKey(int keyId) { /** * For subclasses externalizing the storage, for example Zookeeper - * based implementations + * based implementations. + * + * @param key DelegationKey. + * @throws IOException raised on errors performing I/O. */ protected void storeDelegationKey(DelegationKey key) throws IOException { allKeys.put(key.getKeyId(), key); @@ -271,7 +318,10 @@ protected void storeDelegationKey(DelegationKey key) throws IOException { /** * For subclasses externalizing the storage, for example Zookeeper - * based implementations + * based implementations. + * + * @param key DelegationKey. + * @throws IOException raised on errors performing I/O. */ protected void updateDelegationKey(DelegationKey key) throws IOException { allKeys.put(key.getKeyId(), key); @@ -280,6 +330,9 @@ protected void updateDelegationKey(DelegationKey key) throws IOException { /** * For subclasses externalizing the storage, for example Zookeeper * based implementations + * + * @param ident ident. + * @return DelegationTokenInformation. */ protected DelegationTokenInformation getTokenInfo(TokenIdent ident) { return currentTokens.get(ident); @@ -287,7 +340,11 @@ protected DelegationTokenInformation getTokenInfo(TokenIdent ident) { /** * For subclasses externalizing the storage, for example Zookeeper - * based implementations + * based implementations. + * + * @param ident ident. + * @param tokenInfo tokenInfo. + * @throws IOException raised on errors performing I/O. */ protected void storeToken(TokenIdent ident, DelegationTokenInformation tokenInfo) throws IOException { @@ -297,7 +354,11 @@ protected void storeToken(TokenIdent ident, /** * For subclasses externalizing the storage, for example Zookeeper - * based implementations + * based implementations. + * + * @param ident ident. + * @param tokenInfo tokenInfo. + * @throws IOException raised on errors performing I/O. */ protected void updateToken(TokenIdent ident, DelegationTokenInformation tokenInfo) throws IOException { @@ -313,7 +374,7 @@ protected void updateToken(TokenIdent ident, * startThreads() is called) * @param identifier identifier read from persistent storage * @param renewDate token renew time - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public synchronized void addPersistedDelegationToken( TokenIdent identifier, long renewDate) throws IOException { @@ -371,8 +432,9 @@ private void updateCurrentKey() throws IOException { /** * Update the current master key for generating delegation tokens * It should be called only by tokenRemoverThread. + * @throws IOException raised on errors performing I/O. */ - void rollMasterKey() throws IOException { + protected void rollMasterKey() throws IOException { synchronized (this) { removeExpiredKeys(); /* set final expiry date for retiring currentKey */ @@ -396,8 +458,9 @@ private synchronized void removeExpiredKeys() { it.remove(); // ensure the tokens generated by this current key can be recovered // with this current key after this current key is rolled - if(!e.getValue().equals(currentKey)) + if(!e.getValue().equals(currentKey)) { removeStoredMasterKey(e.getValue()); + } } } } @@ -417,7 +480,7 @@ protected synchronized byte[] createPassword(TokenIdent identifier) { DelegationTokenInformation tokenInfo = new DelegationTokenInformation(now + tokenRenewInterval, password, getTrackingIdIfEnabled(identifier)); try { - storeToken(identifier, tokenInfo); + METRICS.trackStoreToken(() -> storeToken(identifier, tokenInfo)); } catch (IOException ioe) { LOG.error("Could not store token " + formatTokenId(identifier) + "!!", ioe); @@ -431,20 +494,28 @@ protected synchronized byte[] createPassword(TokenIdent identifier) { * Find the DelegationTokenInformation for the given token id, and verify that * if the token is expired. Note that this method should be called with * acquiring the secret manager's monitor. + * + * @param identifier identifier. + * @throws InvalidToken invalid token exception. + * @return DelegationTokenInformation. */ protected DelegationTokenInformation checkToken(TokenIdent identifier) throws InvalidToken { assert Thread.holdsLock(this); DelegationTokenInformation info = getTokenInfo(identifier); + String err; if (info == null) { - throw new InvalidToken("token " + formatTokenId(identifier) - + " can't be found in cache"); + err = "Token for real user: " + identifier.getRealUser() + ", can't be found in cache"; + LOG.warn("{}, Token={}", err, formatTokenId(identifier)); + throw new InvalidToken(err); } long now = Time.now(); if (info.getRenewDate() < now) { - throw new InvalidToken("token " + formatTokenId(identifier) + " is " + - "expired, current time: " + Time.formatTime(now) + - " expected renewal time: " + Time.formatTime(info.getRenewDate())); + err = "Token " + identifier.getRealUser() + " has expired, current time: " + + Time.formatTime(now) + " expected renewal time: " + Time + .formatTime(info.getRenewDate()); + LOG.info("{}, Token={}", err, formatTokenId(identifier)); + throw new InvalidToken(err); } return info; } @@ -474,7 +545,7 @@ public synchronized String getTokenTrackingId(TokenIdent identifier) { * Verifies that the given identifier and password are valid and match. * @param identifier Token identifier. * @param password Password in the token. - * @throws InvalidToken + * @throws InvalidToken InvalidToken. */ public synchronized void verifyToken(TokenIdent identifier, byte[] password) throws InvalidToken { @@ -542,12 +613,15 @@ public synchronized long renewToken(Token token, throw new InvalidToken("Renewal request for unknown token " + formatTokenId(id)); } - updateToken(id, info); + METRICS.trackUpdateToken(() -> updateToken(id, info)); return renewTime; } /** * Cancel a token by removing it from cache. + * + * @param token token. + * @param canceller canceller. * @return Identifier of the canceled token * @throws InvalidToken for invalid token * @throws AccessControlException if the user isn't allowed to cancel @@ -578,7 +652,9 @@ public synchronized TokenIdent cancelToken(Token token, if (info == null) { throw new InvalidToken("Token not found " + formatTokenId(id)); } - removeStoredToken(id); + METRICS.trackRemoveToken(() -> { + removeStoredToken(id); + }); return id; } @@ -593,11 +669,15 @@ public static SecretKey createSecretKey(byte[] key) { /** Class to encapsulate a token's renew date and password. */ @InterfaceStability.Evolving - public static class DelegationTokenInformation { + public static class DelegationTokenInformation implements Writable { long renewDate; byte[] password; String trackingId; + public DelegationTokenInformation() { + this(0, null); + } + public DelegationTokenInformation(long renewDate, byte[] password) { this(renewDate, password, null); } @@ -608,18 +688,48 @@ public DelegationTokenInformation(long renewDate, byte[] password, this.password = password; this.trackingId = trackingId; } - /** returns renew date */ + /** + * @return returns renew date. + */ public long getRenewDate() { return renewDate; } - /** returns password */ + /** + * @return returns password. + */ byte[] getPassword() { return password; } - /** returns tracking id */ + + /** + * @return returns tracking id. + */ public String getTrackingId() { return trackingId; } + + @Override + public void write(DataOutput out) throws IOException { + WritableUtils.writeVLong(out, renewDate); + if (password == null) { + WritableUtils.writeVInt(out, -1); + } else { + WritableUtils.writeVInt(out, password.length); + out.write(password); + } + WritableUtils.writeString(out, trackingId); + } + + @Override + public void readFields(DataInput in) throws IOException { + renewDate = WritableUtils.readVLong(in); + int len = WritableUtils.readVInt(in); + if (len > -1) { + password = new byte[len]; + in.readFully(password); + } + trackingId = WritableUtils.readString(in); + } } /** Remove expired delegation tokens from cache */ @@ -652,8 +762,9 @@ protected void logExpireTokens( } public void stopThreads() { - if (LOG.isDebugEnabled()) + if (LOG.isDebugEnabled()) { LOG.debug("Stopping expired delegation token remover thread"); + } running = false; if (tokenRemoverThread != null) { @@ -720,10 +831,102 @@ public void run() { * * @param token the token where to extract the identifier * @return the delegation token identifier - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public TokenIdent decodeTokenIdentifier(Token token) throws IOException { return token.decodeIdentifier(); } + protected DelegationTokenSecretManagerMetrics getMetrics() { + return METRICS; + } + + /** + * DelegationTokenSecretManagerMetrics tracks token management operations + * and publishes them through the metrics interfaces. + */ + @Metrics(about="Delegation token secret manager metrics", context="token") + static class DelegationTokenSecretManagerMetrics implements DurationTrackerFactory { + private static final Logger LOG = LoggerFactory.getLogger( + DelegationTokenSecretManagerMetrics.class); + + final static String STORE_TOKEN_STAT = "storeToken"; + final static String UPDATE_TOKEN_STAT = "updateToken"; + final static String REMOVE_TOKEN_STAT = "removeToken"; + final static String TOKEN_FAILURE_STAT = "tokenFailure"; + + private final MetricsRegistry registry; + private final IOStatisticsStore ioStatistics; + + @Metric("Rate of storage of delegation tokens and latency (milliseconds)") + private MutableRate storeToken; + @Metric("Rate of update of delegation tokens and latency (milliseconds)") + private MutableRate updateToken; + @Metric("Rate of removal of delegation tokens and latency (milliseconds)") + private MutableRate removeToken; + @Metric("Counter of delegation tokens operation failures") + private MutableCounterLong tokenFailure; + + static DelegationTokenSecretManagerMetrics create() { + return DefaultMetricsSystem.instance().register(new DelegationTokenSecretManagerMetrics()); + } + + DelegationTokenSecretManagerMetrics() { + ioStatistics = IOStatisticsBinding.iostatisticsStore() + .withDurationTracking(STORE_TOKEN_STAT, UPDATE_TOKEN_STAT, REMOVE_TOKEN_STAT) + .withCounters(TOKEN_FAILURE_STAT) + .build(); + registry = new MetricsRegistry("DelegationTokenSecretManagerMetrics"); + LOG.debug("Initialized {}", registry); + } + + public void trackStoreToken(InvocationRaisingIOE invocation) throws IOException { + trackInvocation(invocation, STORE_TOKEN_STAT, storeToken); + } + + public void trackUpdateToken(InvocationRaisingIOE invocation) throws IOException { + trackInvocation(invocation, UPDATE_TOKEN_STAT, updateToken); + } + + public void trackRemoveToken(InvocationRaisingIOE invocation) throws IOException { + trackInvocation(invocation, REMOVE_TOKEN_STAT, removeToken); + } + + public void trackInvocation(InvocationRaisingIOE invocation, String statistic, + MutableRate metric) throws IOException { + try { + long start = Time.monotonicNow(); + IOStatisticsBinding.trackDurationOfInvocation(this, statistic, invocation); + metric.add(Time.monotonicNow() - start); + } catch (Exception ex) { + tokenFailure.incr(); + throw ex; + } + } + + @Override + public DurationTracker trackDuration(String key, long count) { + return ioStatistics.trackDuration(key, count); + } + + protected MutableRate getStoreToken() { + return storeToken; + } + + protected MutableRate getUpdateToken() { + return updateToken; + } + + protected MutableRate getRemoveToken() { + return removeToken; + } + + protected MutableCounterLong getTokenFailure() { + return tokenFailure; + } + + protected IOStatisticsStore getIoStatistics() { + return ioStatistics; + } + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/SQLDelegationTokenSecretManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/SQLDelegationTokenSecretManager.java new file mode 100644 index 0000000000000..4b6ae21d7a95b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/SQLDelegationTokenSecretManager.java @@ -0,0 +1,400 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.security.token.delegation; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.sql.SQLException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.delegation.web.DelegationTokenManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * An implementation of {@link AbstractDelegationTokenSecretManager} that + * persists TokenIdentifiers and DelegationKeys in an existing SQL database. + */ +public abstract class SQLDelegationTokenSecretManager + extends AbstractDelegationTokenSecretManager { + + private static final Logger LOG = + LoggerFactory.getLogger(SQLDelegationTokenSecretManager.class); + + public static final String SQL_DTSM_CONF_PREFIX = "sql-dt-secret-manager."; + private static final String SQL_DTSM_TOKEN_SEQNUM_BATCH_SIZE = SQL_DTSM_CONF_PREFIX + + "token.seqnum.batch.size"; + public static final int DEFAULT_SEQ_NUM_BATCH_SIZE = 10; + + // Batch of sequence numbers that will be requested by the sequenceNumCounter. + // A new batch is requested once the sequenceNums available to a secret manager are + // exhausted, including during initialization. + private final int seqNumBatchSize; + + // Last sequenceNum in the current batch that has been allocated to a token. + private int currentSeqNum; + + // Max sequenceNum in the current batch that can be allocated to a token. + // Unused sequenceNums in the current batch cannot be reused by other routers. + private int currentMaxSeqNum; + + public SQLDelegationTokenSecretManager(Configuration conf) { + super(conf.getLong(DelegationTokenManager.UPDATE_INTERVAL, + DelegationTokenManager.UPDATE_INTERVAL_DEFAULT) * 1000, + conf.getLong(DelegationTokenManager.MAX_LIFETIME, + DelegationTokenManager.MAX_LIFETIME_DEFAULT) * 1000, + conf.getLong(DelegationTokenManager.RENEW_INTERVAL, + DelegationTokenManager.RENEW_INTERVAL_DEFAULT) * 1000, + conf.getLong(DelegationTokenManager.REMOVAL_SCAN_INTERVAL, + DelegationTokenManager.REMOVAL_SCAN_INTERVAL_DEFAULT) * 1000); + + this.seqNumBatchSize = conf.getInt(SQL_DTSM_TOKEN_SEQNUM_BATCH_SIZE, + DEFAULT_SEQ_NUM_BATCH_SIZE); + } + + /** + * Persists a TokenIdentifier and its corresponding TokenInformation into + * the SQL database. The TokenIdentifier is expected to be unique and any + * duplicate token attempts will result in an IOException. + * @param ident TokenIdentifier to persist. + * @param tokenInfo DelegationTokenInformation associated with the TokenIdentifier. + */ + @Override + protected void storeToken(TokenIdent ident, + DelegationTokenInformation tokenInfo) throws IOException { + try (ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos)) { + tokenInfo.write(dos); + // Add token to SQL database + insertToken(ident.getSequenceNumber(), ident.getBytes(), bos.toByteArray()); + // Add token to local cache + super.storeToken(ident, tokenInfo); + } catch (SQLException e) { + throw new IOException("Failed to store token in SQL secret manager", e); + } + } + + /** + * Updates the TokenInformation of an existing TokenIdentifier in + * the SQL database. + * @param ident Existing TokenIdentifier in the SQL database. + * @param tokenInfo Updated DelegationTokenInformation associated with the TokenIdentifier. + */ + @Override + protected void updateToken(TokenIdent ident, + DelegationTokenInformation tokenInfo) throws IOException { + try (ByteArrayOutputStream bos = new ByteArrayOutputStream()) { + try (DataOutputStream dos = new DataOutputStream(bos)) { + tokenInfo.write(dos); + // Update token in SQL database + updateToken(ident.getSequenceNumber(), ident.getBytes(), bos.toByteArray()); + // Update token in local cache + super.updateToken(ident, tokenInfo); + } + } catch (SQLException e) { + throw new IOException("Failed to update token in SQL secret manager", e); + } + } + + /** + * Cancels a token by removing it from the SQL database. This will + * call the corresponding method in {@link AbstractDelegationTokenSecretManager} + * to perform validation and remove the token from the cache. + * @return Identifier of the canceled token + */ + @Override + public synchronized TokenIdent cancelToken(Token token, + String canceller) throws IOException { + try (ByteArrayInputStream bis = new ByteArrayInputStream(token.getIdentifier()); + DataInputStream din = new DataInputStream(bis)) { + TokenIdent id = createIdentifier(); + id.readFields(din); + + // Calling getTokenInfo to load token into local cache if not present. + // super.cancelToken() requires token to be present in local cache. + getTokenInfo(id); + } + + return super.cancelToken(token, canceller); + } + + /** + * Removes the existing TokenInformation from the SQL database to + * invalidate it. + * @param ident TokenInformation to remove from the SQL database. + */ + @Override + protected void removeStoredToken(TokenIdent ident) throws IOException { + try { + deleteToken(ident.getSequenceNumber(), ident.getBytes()); + } catch (SQLException e) { + LOG.warn("Failed to remove token in SQL secret manager", e); + } + } + + /** + * Obtains the DelegationTokenInformation associated with the given + * TokenIdentifier in the SQL database. + * @param ident Existing TokenIdentifier in the SQL database. + * @return DelegationTokenInformation that matches the given TokenIdentifier or + * null if it doesn't exist in the database. + */ + @Override + protected DelegationTokenInformation getTokenInfo(TokenIdent ident) { + // Look for token in local cache + DelegationTokenInformation tokenInfo = super.getTokenInfo(ident); + + if (tokenInfo == null) { + try { + // Look for token in SQL database + byte[] tokenInfoBytes = selectTokenInfo(ident.getSequenceNumber(), ident.getBytes()); + + if (tokenInfoBytes != null) { + tokenInfo = new DelegationTokenInformation(); + try (ByteArrayInputStream bis = new ByteArrayInputStream(tokenInfoBytes)) { + try (DataInputStream dis = new DataInputStream(bis)) { + tokenInfo.readFields(dis); + } + } + + // Update token in local cache + currentTokens.put(ident, tokenInfo); + } + } catch (IOException | SQLException e) { + LOG.error("Failed to get token in SQL secret manager", e); + } + } + + return tokenInfo; + } + + /** + * Obtains the value of the last reserved sequence number. + * @return Last reserved sequence number. + */ + @Override + public int getDelegationTokenSeqNum() { + try { + return selectSequenceNum(); + } catch (SQLException e) { + throw new RuntimeException( + "Failed to get token sequence number in SQL secret manager", e); + } + } + + /** + * Updates the value of the last reserved sequence number. + * @param seqNum Value to update the sequence number to. + */ + @Override + public void setDelegationTokenSeqNum(int seqNum) { + try { + updateSequenceNum(seqNum); + } catch (SQLException e) { + throw new RuntimeException( + "Failed to update token sequence number in SQL secret manager", e); + } + } + + /** + * Obtains the next available sequence number that can be allocated to a Token. + * Sequence numbers need to be reserved using the shared sequenceNumberCounter once + * the local batch has been exhausted, which handles sequenceNumber allocation + * concurrently with other secret managers. + * This method ensures that sequence numbers are incremental in a single secret manager, + * but not across secret managers. + * @return Next available sequence number. + */ + @Override + public synchronized int incrementDelegationTokenSeqNum() { + if (currentSeqNum >= currentMaxSeqNum) { + try { + // Request a new batch of sequence numbers and use the + // lowest one available. + currentSeqNum = incrementSequenceNum(seqNumBatchSize); + currentMaxSeqNum = currentSeqNum + seqNumBatchSize; + } catch (SQLException e) { + throw new RuntimeException( + "Failed to increment token sequence number in SQL secret manager", e); + } + } + + return ++currentSeqNum; + } + + /** + * Persists a DelegationKey into the SQL database. The delegation keyId + * is expected to be unique and any duplicate key attempts will result + * in an IOException. + * @param key DelegationKey to persist into the SQL database. + */ + @Override + protected void storeDelegationKey(DelegationKey key) throws IOException { + try (ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos)) { + key.write(dos); + // Add delegation key to SQL database + insertDelegationKey(key.getKeyId(), bos.toByteArray()); + // Add delegation key to local cache + super.storeDelegationKey(key); + } catch (SQLException e) { + throw new IOException("Failed to store delegation key in SQL secret manager", e); + } + } + + /** + * Updates an existing DelegationKey in the SQL database. + * @param key Updated DelegationKey. + */ + @Override + protected void updateDelegationKey(DelegationKey key) throws IOException { + try (ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos)) { + key.write(dos); + // Update delegation key in SQL database + updateDelegationKey(key.getKeyId(), bos.toByteArray()); + // Update delegation key in local cache + super.updateDelegationKey(key); + } catch (SQLException e) { + throw new IOException("Failed to update delegation key in SQL secret manager", e); + } + } + + /** + * Removes the existing DelegationKey from the SQL database to + * invalidate it. + * @param key DelegationKey to remove from the SQL database. + */ + @Override + protected void removeStoredMasterKey(DelegationKey key) { + try { + deleteDelegationKey(key.getKeyId()); + } catch (SQLException e) { + LOG.warn("Failed to remove delegation key in SQL secret manager", e); + } + } + + /** + * Obtains the DelegationKey from the SQL database. + * @param keyId KeyId of the DelegationKey to obtain. + * @return DelegationKey that matches the given keyId or null + * if it doesn't exist in the database. + */ + @Override + protected DelegationKey getDelegationKey(int keyId) { + // Look for delegation key in local cache + DelegationKey delegationKey = super.getDelegationKey(keyId); + + if (delegationKey == null) { + try { + // Look for delegation key in SQL database + byte[] delegationKeyBytes = selectDelegationKey(keyId); + + if (delegationKeyBytes != null) { + delegationKey = new DelegationKey(); + try (ByteArrayInputStream bis = new ByteArrayInputStream(delegationKeyBytes)) { + try (DataInputStream dis = new DataInputStream(bis)) { + delegationKey.readFields(dis); + } + } + + // Update delegation key in local cache + allKeys.put(keyId, delegationKey); + } + } catch (IOException | SQLException e) { + LOG.error("Failed to get delegation key in SQL secret manager", e); + } + } + + return delegationKey; + } + + /** + * Obtains the value of the last delegation key id. + * @return Last delegation key id. + */ + @Override + public int getCurrentKeyId() { + try { + return selectKeyId(); + } catch (SQLException e) { + throw new RuntimeException( + "Failed to get delegation key id in SQL secret manager", e); + } + } + + /** + * Updates the value of the last delegation key id. + * @param keyId Value to update the delegation key id to. + */ + @Override + public void setCurrentKeyId(int keyId) { + try { + updateKeyId(keyId); + } catch (SQLException e) { + throw new RuntimeException( + "Failed to set delegation key id in SQL secret manager", e); + } + } + + /** + * Obtains the next available delegation key id that can be allocated to a DelegationKey. + * Delegation key id need to be reserved using the shared delegationKeyIdCounter, + * which handles keyId allocation concurrently with other secret managers. + * @return Next available delegation key id. + */ + @Override + public int incrementCurrentKeyId() { + try { + return incrementKeyId(1) + 1; + } catch (SQLException e) { + throw new RuntimeException( + "Failed to increment delegation key id in SQL secret manager", e); + } + } + + // Token operations in SQL database + protected abstract byte[] selectTokenInfo(int sequenceNum, byte[] tokenIdentifier) + throws SQLException; + protected abstract void insertToken(int sequenceNum, byte[] tokenIdentifier, byte[] tokenInfo) + throws SQLException; + protected abstract void updateToken(int sequenceNum, byte[] tokenIdentifier, byte[] tokenInfo) + throws SQLException; + protected abstract void deleteToken(int sequenceNum, byte[] tokenIdentifier) + throws SQLException; + // Delegation key operations in SQL database + protected abstract byte[] selectDelegationKey(int keyId) throws SQLException; + protected abstract void insertDelegationKey(int keyId, byte[] delegationKey) + throws SQLException; + protected abstract void updateDelegationKey(int keyId, byte[] delegationKey) + throws SQLException; + protected abstract void deleteDelegationKey(int keyId) throws SQLException; + // Counter operations in SQL database + protected abstract int selectSequenceNum() throws SQLException; + protected abstract void updateSequenceNum(int value) throws SQLException; + protected abstract int incrementSequenceNum(int amount) throws SQLException; + protected abstract int selectKeyId() throws SQLException; + protected abstract void updateKeyId(int value) throws SQLException; + protected abstract int incrementKeyId(int amount) throws SQLException; +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java index f61590c28ebce..a7c637b8ddd03 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java @@ -23,15 +23,11 @@ import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; +import java.io.UncheckedIOException; import java.util.Collections; -import java.util.HashMap; import java.util.List; -import java.util.Map; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; - -import javax.security.auth.login.AppConfigurationEntry; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Stream; import org.apache.curator.ensemble.fixed.FixedEnsembleProvider; import org.apache.curator.framework.CuratorFramework; @@ -40,21 +36,22 @@ import org.apache.curator.framework.api.ACLProvider; import org.apache.curator.framework.imps.DefaultACLProvider; import org.apache.curator.framework.recipes.cache.ChildData; -import org.apache.curator.framework.recipes.cache.PathChildrenCache; -import org.apache.curator.framework.recipes.cache.PathChildrenCache.StartMode; -import org.apache.curator.framework.recipes.cache.PathChildrenCacheEvent; -import org.apache.curator.framework.recipes.cache.PathChildrenCacheListener; +import org.apache.curator.framework.recipes.cache.CuratorCache; +import org.apache.curator.framework.recipes.cache.CuratorCacheBridge; +import org.apache.curator.framework.recipes.cache.CuratorCacheListener; import org.apache.curator.framework.recipes.shared.SharedCount; import org.apache.curator.framework.recipes.shared.VersionedValue; import org.apache.curator.retry.RetryNTimes; -import org.apache.curator.utils.EnsurePath; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.SecurityUtil; +import org.apache.hadoop.security.authentication.util.JaasConfiguration; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.delegation.web.DelegationTokenManager; +import org.apache.hadoop.util.curator.ZKCuratorManager; +import static org.apache.hadoop.util.Time.now; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException.NoNodeException; @@ -62,11 +59,12 @@ import org.apache.zookeeper.client.ZKClientConfig; import org.apache.zookeeper.data.ACL; import org.apache.zookeeper.data.Id; +import org.apache.zookeeper.data.Stat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * An implementation of {@link AbstractDelegationTokenSecretManager} that @@ -79,7 +77,7 @@ public abstract class ZKDelegationTokenSecretManager extends AbstractDelegationTokenSecretManager { - private static final String ZK_CONF_PREFIX = "zk-dt-secret-manager."; + public static final String ZK_CONF_PREFIX = "zk-dt-secret-manager."; public static final String ZK_DTSM_ZK_NUM_RETRIES = ZK_CONF_PREFIX + "zkNumRetries"; public static final String ZK_DTSM_ZK_SESSION_TIMEOUT = ZK_CONF_PREFIX @@ -98,14 +96,23 @@ public abstract class ZKDelegationTokenSecretManager options = new HashMap(); - options.put("keyTab", keytab); - options.put("principal", principal); - options.put("useKeyTab", "true"); - options.put("storeKey", "true"); - options.put("useTicketCache", "false"); - options.put("refreshKrb5Config", "true"); - String jaasEnvVar = System.getenv("HADOOP_JAAS_DEBUG"); - if (jaasEnvVar != null && "true".equalsIgnoreCase(jaasEnvVar)) { - options.put("debug", "true"); - } - entry = new AppConfigurationEntry[] { - new AppConfigurationEntry(getKrb5LoginModuleName(), - AppConfigurationEntry.LoginModuleControlFlag.REQUIRED, - options) }; - } - - @Override - public AppConfigurationEntry[] getAppConfigurationEntry(String name) { - return (entryName.equals(name)) ? entry : ((baseConfig != null) - ? baseConfig.getAppConfigurationEntry(name) : null); - } - - private String getKrb5LoginModuleName() { - String krb5LoginModuleName; - if (System.getProperty("java.vendor").contains("IBM")) { - krb5LoginModuleName = "com.ibm.security.auth.module.Krb5LoginModule"; - } else { - krb5LoginModuleName = "com.sun.security.auth.module.Krb5LoginModule"; - } - return krb5LoginModuleName; - } - } - @Override public void startThreads() throws IOException { if (!isExternalClient) { @@ -308,20 +265,27 @@ public void startThreads() throws IOException { // If namespace parents are implicitly created, they won't have ACLs. // So, let's explicitly create them. CuratorFramework nullNsFw = zkClient.usingNamespace(null); - EnsurePath ensureNs = - nullNsFw.newNamespaceAwareEnsurePath("/" + zkClient.getNamespace()); try { - ensureNs.ensure(nullNsFw.getZookeeperClient()); + String nameSpace = "/" + zkClient.getNamespace(); + Stat stat = nullNsFw.checkExists().forPath(nameSpace); + if (stat == null) { + nullNsFw.create().creatingParentContainersIfNeeded().forPath(nameSpace); + } } catch (Exception e) { throw new IOException("Could not create namespace", e); } } - listenerThreadPool = Executors.newSingleThreadExecutor(); try { delTokSeqCounter = new SharedCount(zkClient, ZK_DTSM_SEQNUM_ROOT, 0); if (delTokSeqCounter != null) { delTokSeqCounter.start(); } + // the first batch range should be allocated during this starting window + // by calling the incrSharedCount + currentSeqNum = incrSharedCount(delTokSeqCounter, seqNumBatchSize); + currentMaxSeqNum = currentSeqNum + seqNumBatchSize; + LOG.info("Fetched initial range of seq num, from {} to {} ", + currentSeqNum+1, currentMaxSeqNum); } catch (Exception e) { throw new IOException("Could not start Sequence Counter", e); } @@ -340,68 +304,65 @@ public void startThreads() throws IOException { throw new RuntimeException("Could not create ZK paths"); } try { - keyCache = new PathChildrenCache(zkClient, ZK_DTSM_MASTER_KEY_ROOT, true); - if (keyCache != null) { - keyCache.start(StartMode.BUILD_INITIAL_CACHE); - keyCache.getListenable().addListener(new PathChildrenCacheListener() { - @Override - public void childEvent(CuratorFramework client, - PathChildrenCacheEvent event) - throws Exception { - switch (event.getType()) { - case CHILD_ADDED: - processKeyAddOrUpdate(event.getData().getData()); - break; - case CHILD_UPDATED: - processKeyAddOrUpdate(event.getData().getData()); - break; - case CHILD_REMOVED: - processKeyRemoved(event.getData().getPath()); - break; - default: - break; + keyCache = CuratorCache.bridgeBuilder(zkClient, ZK_DTSM_MASTER_KEY_ROOT) + .build(); + CuratorCacheListener keyCacheListener = CuratorCacheListener.builder() + .forCreatesAndChanges((oldNode, node) -> { + try { + processKeyAddOrUpdate(node.getData()); + } catch (IOException e) { + LOG.error("Error while processing Curator keyCacheListener " + + "NODE_CREATED / NODE_CHANGED event"); + throw new UncheckedIOException(e); } - } - }, listenerThreadPool); - loadFromZKCache(false); - } + }) + .forDeletes(childData -> processKeyRemoved(childData.getPath())) + .build(); + keyCache.listenable().addListener(keyCacheListener); + keyCache.start(); + loadFromZKCache(false); } catch (Exception e) { - throw new IOException("Could not start PathChildrenCache for keys", e); + throw new IOException("Could not start Curator keyCacheListener for keys", + e); } - try { - tokenCache = new PathChildrenCache(zkClient, ZK_DTSM_TOKENS_ROOT, true); - if (tokenCache != null) { - tokenCache.start(StartMode.BUILD_INITIAL_CACHE); - tokenCache.getListenable().addListener(new PathChildrenCacheListener() { - - @Override - public void childEvent(CuratorFramework client, - PathChildrenCacheEvent event) throws Exception { - switch (event.getType()) { - case CHILD_ADDED: - processTokenAddOrUpdate(event.getData()); - break; - case CHILD_UPDATED: - processTokenAddOrUpdate(event.getData()); - break; - case CHILD_REMOVED: - processTokenRemoved(event.getData()); - break; - default: - break; - } - } - }, listenerThreadPool); + if (isTokenWatcherEnabled) { + LOG.info("TokenCache is enabled"); + try { + tokenCache = CuratorCache.bridgeBuilder(zkClient, ZK_DTSM_TOKENS_ROOT) + .build(); + CuratorCacheListener tokenCacheListener = CuratorCacheListener.builder() + .forCreatesAndChanges((oldNode, node) -> { + try { + processTokenAddOrUpdate(node.getData()); + } catch (IOException e) { + LOG.error("Error while processing Curator tokenCacheListener " + + "NODE_CREATED / NODE_CHANGED event"); + throw new UncheckedIOException(e); + } + }) + .forDeletes(childData -> { + try { + processTokenRemoved(childData); + } catch (IOException e) { + LOG.error("Error while processing Curator tokenCacheListener " + + "NODE_DELETED event"); + throw new UncheckedIOException(e); + } + }) + .build(); + tokenCache.listenable().addListener(tokenCacheListener); + tokenCache.start(); loadFromZKCache(true); + } catch (Exception e) { + throw new IOException( + "Could not start Curator tokenCacheListener for tokens", e); } - } catch (Exception e) { - throw new IOException("Could not start PathChildrenCache for tokens", e); } super.startThreads(); } /** - * Load the PathChildrenCache into the in-memory map. Possible caches to be + * Load the CuratorCache into the in-memory map. Possible caches to be * loaded are keyCache and tokenCache. * * @param isTokenCache true if loading tokenCache, false if loading keyCache. @@ -409,30 +370,31 @@ public void childEvent(CuratorFramework client, private void loadFromZKCache(final boolean isTokenCache) { final String cacheName = isTokenCache ? "token" : "key"; LOG.info("Starting to load {} cache.", cacheName); - final List children; + final Stream children; if (isTokenCache) { - children = tokenCache.getCurrentData(); + children = tokenCache.stream(); } else { - children = keyCache.getCurrentData(); + children = keyCache.stream(); } - int count = 0; - for (ChildData child : children) { + final AtomicInteger count = new AtomicInteger(0); + children.forEach(childData -> { try { if (isTokenCache) { - processTokenAddOrUpdate(child); + processTokenAddOrUpdate(childData.getData()); } else { - processKeyAddOrUpdate(child.getData()); + processKeyAddOrUpdate(childData.getData()); } } catch (Exception e) { LOG.info("Ignoring node {} because it failed to load.", - child.getPath()); + childData.getPath()); LOG.debug("Failure exception:", e); - ++count; + count.getAndIncrement(); } - } - if (count > 0) { - LOG.warn("Ignored {} nodes while loading {} cache.", count, cacheName); + }); + if (count.get() > 0) { + LOG.warn("Ignored {} nodes while loading {} cache.", count.get(), + cacheName); } LOG.info("Loaded {} cache.", cacheName); } @@ -442,9 +404,7 @@ private void processKeyAddOrUpdate(byte[] data) throws IOException { DataInputStream din = new DataInputStream(bin); DelegationKey key = new DelegationKey(); key.readFields(din); - synchronized (this) { - allKeys.put(key.getKeyId(), key); - } + allKeys.put(key.getKeyId(), key); } private void processKeyRemoved(String path) { @@ -454,15 +414,13 @@ private void processKeyRemoved(String path) { int j = tokSeg.indexOf('_'); if (j > 0) { int keyId = Integer.parseInt(tokSeg.substring(j + 1)); - synchronized (this) { - allKeys.remove(keyId); - } + allKeys.remove(keyId); } } } - private void processTokenAddOrUpdate(ChildData data) throws IOException { - ByteArrayInputStream bin = new ByteArrayInputStream(data.getData()); + protected TokenIdent processTokenAddOrUpdate(byte[] data) throws IOException { + ByteArrayInputStream bin = new ByteArrayInputStream(data); DataInputStream din = new DataInputStream(bin); TokenIdent ident = createIdentifier(); ident.readFields(din); @@ -473,12 +431,10 @@ private void processTokenAddOrUpdate(ChildData data) throws IOException { if (numRead > -1) { DelegationTokenInformation tokenInfo = new DelegationTokenInformation(renewDate, password); - synchronized (this) { - currentTokens.put(ident, tokenInfo); - // The cancel task might be waiting - notifyAll(); - } + currentTokens.put(ident, tokenInfo); + return ident; } + return null; } private void processTokenRemoved(ChildData data) throws IOException { @@ -486,11 +442,7 @@ private void processTokenRemoved(ChildData data) throws IOException { DataInputStream din = new DataInputStream(bin); TokenIdent ident = createIdentifier(); ident.readFields(din); - synchronized (this) { - currentTokens.remove(ident); - // The cancel task might be waiting - notifyAll(); - } + currentTokens.remove(ident); } @Override @@ -531,20 +483,6 @@ public void stopThreads() { } catch (Exception e) { LOG.error("Could not stop Curator Framework", e); } - if (listenerThreadPool != null) { - listenerThreadPool.shutdown(); - try { - // wait for existing tasks to terminate - if (!listenerThreadPool.awaitTermination(shutdownTimeout, - TimeUnit.MILLISECONDS)) { - LOG.error("Forcing Listener threadPool to shutdown !!"); - listenerThreadPool.shutdownNow(); - } - } catch (InterruptedException ie) { - listenerThreadPool.shutdownNow(); - Thread.currentThread().interrupt(); - } - } } private void createPersistentNode(String nodePath) throws Exception { @@ -562,28 +500,41 @@ protected int getDelegationTokenSeqNum() { return delTokSeqCounter.getCount(); } - private void incrSharedCount(SharedCount sharedCount) throws Exception { + private int incrSharedCount(SharedCount sharedCount, int batchSize) + throws Exception { while (true) { // Loop until we successfully increment the counter VersionedValue versionedValue = sharedCount.getVersionedValue(); - if (sharedCount.trySetCount(versionedValue, versionedValue.getValue() + 1)) { - break; + if (sharedCount.trySetCount( + versionedValue, versionedValue.getValue() + batchSize)) { + return versionedValue.getValue(); } } } @Override protected int incrementDelegationTokenSeqNum() { - try { - incrSharedCount(delTokSeqCounter); - } catch (InterruptedException e) { - // The ExpirationThread is just finishing.. so dont do anything.. - LOG.debug("Thread interrupted while performing token counter increment", e); - Thread.currentThread().interrupt(); - } catch (Exception e) { - throw new RuntimeException("Could not increment shared counter !!", e); + // The secret manager will keep a local range of seq num which won't be + // seen by peers, so only when the range is exhausted it will ask zk for + // another range again + if (currentSeqNum >= currentMaxSeqNum) { + try { + // after a successful batch request, we can get the range starting point + currentSeqNum = incrSharedCount(delTokSeqCounter, seqNumBatchSize); + currentMaxSeqNum = currentSeqNum + seqNumBatchSize; + LOG.info("Fetched new range of seq num, from {} to {} ", + currentSeqNum+1, currentMaxSeqNum); + } catch (InterruptedException e) { + // The ExpirationThread is just finishing.. so dont do anything.. + LOG.debug( + "Thread interrupted while performing token counter increment", e); + Thread.currentThread().interrupt(); + } catch (Exception e) { + throw new RuntimeException("Could not increment shared counter !!", e); + } } - return delTokSeqCounter.getCount(); + + return ++currentSeqNum; } @Override @@ -603,7 +554,7 @@ protected int getCurrentKeyId() { @Override protected int incrementCurrentKeyId() { try { - incrSharedCount(keyIdSeqCounter); + incrSharedCount(keyIdSeqCounter, 1); } catch (InterruptedException e) { // The ExpirationThread is just finishing.. so dont do anything.. LOG.debug("Thread interrupted while performing keyId increment", e); @@ -678,7 +629,7 @@ protected DelegationTokenInformation getTokenInfo(TokenIdent ident) { * * @param ident Identifier of the token */ - private synchronized void syncLocalCacheWithZk(TokenIdent ident) { + protected void syncLocalCacheWithZk(TokenIdent ident) { try { DelegationTokenInformation tokenInfo = getTokenInfoFromZK(ident); if (tokenInfo != null && !currentTokens.containsKey(ident)) { @@ -692,16 +643,21 @@ private synchronized void syncLocalCacheWithZk(TokenIdent ident) { } } - private DelegationTokenInformation getTokenInfoFromZK(TokenIdent ident) + protected DelegationTokenInformation getTokenInfoFromZK(TokenIdent ident) throws IOException { return getTokenInfoFromZK(ident, false); } - private DelegationTokenInformation getTokenInfoFromZK(TokenIdent ident, + protected DelegationTokenInformation getTokenInfoFromZK(TokenIdent ident, boolean quiet) throws IOException { String nodePath = getNodePath(ZK_DTSM_TOKENS_ROOT, DELEGATION_TOKEN_PREFIX + ident.getSequenceNumber()); + return getTokenInfoFromZK(nodePath, quiet); + } + + protected DelegationTokenInformation getTokenInfoFromZK(String nodePath, + boolean quiet) throws IOException { try { byte[] data = zkClient.getData().forPath(nodePath); if ((data == null) || (data.length == 0)) { @@ -836,15 +792,30 @@ protected void updateToken(TokenIdent ident, @Override protected void removeStoredToken(TokenIdent ident) throws IOException { + removeStoredToken(ident, false); + } + + protected void removeStoredToken(TokenIdent ident, + boolean checkAgainstZkBeforeDeletion) throws IOException { String nodeRemovePath = getNodePath(ZK_DTSM_TOKENS_ROOT, DELEGATION_TOKEN_PREFIX + ident.getSequenceNumber()); - if (LOG.isDebugEnabled()) { - LOG.debug("Removing ZKDTSMDelegationToken_" - + ident.getSequenceNumber()); - } try { - if (zkClient.checkExists().forPath(nodeRemovePath) != null) { + DelegationTokenInformation dtInfo = getTokenInfoFromZK(ident, true); + if (dtInfo != null) { + // For the case there is no sync or watch miss, it is possible that the + // local storage has expired tokens which have been renewed by peer + // so double check again to avoid accidental delete + if (checkAgainstZkBeforeDeletion + && dtInfo.getRenewDate() > now()) { + LOG.info("Node already renewed by peer " + nodeRemovePath + + " so this token should not be deleted"); + return; + } + if (LOG.isDebugEnabled()) { + LOG.debug("Removing ZKDTSMDelegationToken_" + + ident.getSequenceNumber()); + } while(zkClient.checkExists().forPath(nodeRemovePath) != null){ try { zkClient.delete().guaranteed().forPath(nodeRemovePath); @@ -867,7 +838,7 @@ protected void removeStoredToken(TokenIdent ident) } @Override - public synchronized TokenIdent cancelToken(Token token, + public TokenIdent cancelToken(Token token, String canceller) throws IOException { ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier()); DataInputStream in = new DataInputStream(buf); @@ -878,7 +849,7 @@ public synchronized TokenIdent cancelToken(Token token, return super.cancelToken(token, canceller); } - private void addOrUpdateToken(TokenIdent ident, + protected void addOrUpdateToken(TokenIdent ident, DelegationTokenInformation info, boolean isUpdate) throws Exception { String nodeCreatePath = getNodePath(ZK_DTSM_TOKENS_ROOT, DELEGATION_TOKEN_PREFIX @@ -905,6 +876,10 @@ private void addOrUpdateToken(TokenIdent ident, } } + public boolean isTokenWatcherEnabled() { + return isTokenWatcherEnabled; + } + /** * Simple implementation of an {@link ACLProvider} that simply returns an ACL * that gives all permissions only to a single principal. @@ -936,11 +911,6 @@ static String getNodePath(String root, String nodeName) { return (root + "/" + nodeName); } - @VisibleForTesting - public ExecutorService getListenerThreadPool() { - return listenerThreadPool; - } - @VisibleForTesting DelegationTokenInformation getTokenInfoFromMemory(TokenIdent ident) { return currentTokens.get(ident); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticatedURL.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticatedURL.java index 4e9881bc34369..7797bfe496f50 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticatedURL.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticatedURL.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.security.token.delegation.web; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.io.Text; @@ -295,10 +295,8 @@ public HttpURLConnection openConnection(URL url, Token token, String doAs) // delegation token Credentials creds = UserGroupInformation.getCurrentUser(). getCredentials(); - if (LOG.isDebugEnabled()) { - LOG.debug("Token not set, looking for delegation token. Creds:{}," - + " size:{}", creds.getAllTokens(), creds.numberOfTokens()); - } + LOG.debug("Token not set, looking for delegation token. Creds:{}," + + " size:{}", creds.getAllTokens(), creds.numberOfTokens()); if (!creds.getAllTokens().isEmpty()) { dToken = selectDelegationToken(url, creds); if (dToken != null) { @@ -338,6 +336,10 @@ public HttpURLConnection openConnection(URL url, Token token, String doAs) /** * Select a delegation token from all tokens in credentials, based on url. + * + * @param url url. + * @param creds credentials. + * @return token. */ @InterfaceAudience.Private public org.apache.hadoop.security.token.Token @@ -409,6 +411,7 @@ public HttpURLConnection openConnection(URL url, Token token, String doAs) * @param token the authentication token with the Delegation Token to renew. * @throws IOException if an IO error occurred. * @throws AuthenticationException if an authentication exception occurred. + * @return delegation token long value. */ public long renewDelegationToken(URL url, Token token) throws IOException, AuthenticationException { @@ -425,6 +428,7 @@ public long renewDelegationToken(URL url, Token token) * @param doAsUser the user to do as, which will be the token owner. * @throws IOException if an IO error occurred. * @throws AuthenticationException if an authentication exception occurred. + * @return delegation token long value. */ public long renewDelegationToken(URL url, Token token, String doAsUser) throws IOException, AuthenticationException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationFilter.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationFilter.java index 5275526202f2b..a8058c0911570 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationFilter.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationFilter.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.security.token.delegation.web; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.curator.framework.CuratorFramework; import org.apache.hadoop.classification.InterfaceAudience; @@ -125,6 +125,7 @@ protected Properties getConfiguration(String configPrefix, * Set AUTH_TYPE property to the name of the corresponding authentication * handler class based on the input properties. * @param props input properties. + * @throws ServletException servlet exception. */ protected void setAuthHandlerClass(Properties props) throws ServletException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationHandler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationHandler.java index 284044fd938a8..479517f2a73ab 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationHandler.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationHandler.java @@ -54,7 +54,7 @@ import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * An {@link AuthenticationHandler} that implements Kerberos SPNEGO mechanism diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticator.java index 4e2ee4fdbea95..05fbc56ac7910 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticator.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticator.java @@ -138,8 +138,8 @@ public void authenticate(URL url, AuthenticatedURL.Token token) try { // check and renew TGT to handle potential expiration UserGroupInformation.getCurrentUser().checkTGTAndReloginFromKeytab(); - LOG.debug("No delegation token found for url={}, token={}, " - + "authenticating with {}", url, token, authenticator.getClass()); + LOG.debug("No delegation token found for url={}, " + + "authenticating with {}", url, authenticator.getClass()); authenticator.authenticate(url, token); } catch (IOException ex) { throw NetUtils.wrapException(url.getHost(), url.getPort(), @@ -163,6 +163,7 @@ public void authenticate(URL url, AuthenticatedURL.Token token) * @param renewer the renewer user. * @throws IOException if an IO error occurred. * @throws AuthenticationException if an authentication exception occurred. + * @return abstract delegation token identifier. */ public Token getDelegationToken(URL url, AuthenticatedURL.Token token, String renewer) @@ -182,6 +183,7 @@ public Token getDelegationToken(URL url, * @param doAsUser the user to do as, which will be the token owner. * @throws IOException if an IO error occurred. * @throws AuthenticationException if an authentication exception occurred. + * @return abstract delegation token identifier. */ public Token getDelegationToken(URL url, AuthenticatedURL.Token token, String renewer, String doAsUser) @@ -207,8 +209,10 @@ public Token getDelegationToken(URL url, * @param url the URL to renew the delegation token from. Only HTTP/S URLs are * supported. * @param token the authentication token with the Delegation Token to renew. + * @param dToken abstract delegation token identifier. * @throws IOException if an IO error occurred. * @throws AuthenticationException if an authentication exception occurred. + * @return delegation token long value. */ public long renewDelegationToken(URL url, AuthenticatedURL.Token token, @@ -225,8 +229,10 @@ public long renewDelegationToken(URL url, * supported. * @param token the authentication token with the Delegation Token to renew. * @param doAsUser the user to do as, which will be the token owner. + * @param dToken abstract delegation token identifier. * @throws IOException if an IO error occurred. * @throws AuthenticationException if an authentication exception occurred. + * @return delegation token long value. */ public long renewDelegationToken(URL url, AuthenticatedURL.Token token, @@ -245,6 +251,7 @@ public long renewDelegationToken(URL url, * @param url the URL to cancel the delegation token from. Only HTTP/S URLs * are supported. * @param token the authentication token with the Delegation Token to cancel. + * @param dToken abstract delegation token identifier. * @throws IOException if an IO error occurred. */ public void cancelDelegationToken(URL url, @@ -261,6 +268,7 @@ public void cancelDelegationToken(URL url, * @param url the URL to cancel the delegation token from. Only HTTP/S URLs * are supported. * @param token the authentication token with the Delegation Token to cancel. + * @param dToken abstract delegation token identifier. * @param doAsUser the user to do as, which will be the token owner. * @throws IOException if an IO error occurred. */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenManager.java index e1445fb5ca05b..7e7e794d9de47 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenManager.java @@ -33,7 +33,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Delegation Token Manager used by the diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/MultiSchemeDelegationTokenAuthenticationHandler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/MultiSchemeDelegationTokenAuthenticationHandler.java index 0661fb2b5a2a7..865977e67d07a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/MultiSchemeDelegationTokenAuthenticationHandler.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/MultiSchemeDelegationTokenAuthenticationHandler.java @@ -36,8 +36,8 @@ import org.apache.hadoop.security.authentication.server.HttpConstants; import org.apache.hadoop.security.authentication.server.MultiSchemeAuthenticationHandler; -import com.google.common.base.Preconditions; -import com.google.common.base.Splitter; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Splitter; /** * A {@link CompositeAuthenticationHandler} that supports multiple HTTP diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/AbstractService.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/AbstractService.java index c9fec435bfa24..69000a03cf78a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/AbstractService.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/AbstractService.java @@ -29,7 +29,7 @@ import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.conf.Configuration; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -240,7 +240,7 @@ public void stop() { /** * Relay to {@link #stop()} - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Override public final void close() throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/CompositeService.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/CompositeService.java index 4aa2f23fad730..236c6031cade5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/CompositeService.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/CompositeService.java @@ -81,7 +81,7 @@ protected void addService(Service service) { /** * If the passed object is an instance of {@link Service}, * add it to the list of services managed by this {@link CompositeService} - * @param object + * @param object object. * @return true if a service is added, false otherwise. */ protected boolean addIfService(Object object) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/ServiceStateModel.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/ServiceStateModel.java index 9f282b9f93483..c075cbb89a8a6 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/ServiceStateModel.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/ServiceStateModel.java @@ -54,6 +54,8 @@ public class ServiceStateModel { /** * Create the service state model in the {@link Service.STATE#NOTINITED} * state. + * + * @param name input name. */ public ServiceStateModel(String name) { this(name, Service.STATE.NOTINITED); @@ -62,6 +64,7 @@ public ServiceStateModel(String name) { /** * Create a service state model instance in the chosen state * @param state the starting state + * @param name input name. */ public ServiceStateModel(String name, Service.STATE state) { this.state = state; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/launcher/AbstractLaunchableService.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/launcher/AbstractLaunchableService.java index be28c5be2d017..66f8ee430f6e1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/launcher/AbstractLaunchableService.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/launcher/AbstractLaunchableService.java @@ -42,6 +42,8 @@ public abstract class AbstractLaunchableService extends AbstractService /** * Construct an instance with the given name. + * + * @param name input name. */ protected AbstractLaunchableService(String name) { super(name); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/launcher/InterruptEscalator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/launcher/InterruptEscalator.java index 594fc5bfe8d14..ad92d4c6d7a24 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/launcher/InterruptEscalator.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/launcher/InterruptEscalator.java @@ -23,7 +23,7 @@ import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/launcher/IrqHandler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/launcher/IrqHandler.java index 17aa9639c31d9..bcb589f24885f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/launcher/IrqHandler.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/launcher/IrqHandler.java @@ -20,7 +20,7 @@ import java.util.concurrent.atomic.AtomicInteger; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import sun.misc.Signal; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/launcher/ServiceLauncher.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/launcher/ServiceLauncher.java index 5e8a1f4eb21fb..13563833e5939 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/launcher/ServiceLauncher.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/launcher/ServiceLauncher.java @@ -25,8 +25,8 @@ import java.util.Arrays; import java.util.List; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,6 +35,7 @@ import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.audit.CommonAuditContext; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.service.Service; import org.apache.hadoop.util.ExitCodeProvider; @@ -411,7 +412,7 @@ protected List getConfigurationsToCreate() { } /** - * This creates all the configurations defined by + * @return This creates all the configurations defined by * {@link #getConfigurationsToCreate()} , ensuring that * the resources have been pushed in. * If one cannot be loaded it is logged and the operation continues @@ -565,6 +566,7 @@ public ExitUtil.ExitException launchService(Configuration conf, * @throws Exception any other failure -if it implements * {@link ExitCodeProvider} then it defines the exit code for any * containing exception + * @return status code. */ protected int coreServiceLaunch(Configuration conf, @@ -590,6 +592,7 @@ protected int coreServiceLaunch(Configuration conf, } String name = getServiceName(); LOG.debug("Launched service {}", name); + CommonAuditContext.noteEntryPoint(service); LaunchableService launchableService = null; if (service instanceof LaunchableService) { @@ -645,7 +648,7 @@ protected int coreServiceLaunch(Configuration conf, } /** - * Instantiate the service defined in {@code serviceClassName}. + * @return Instantiate the service defined in {@code serviceClassName}. * * Sets the {@code configuration} field * to the the value of {@code conf}, @@ -849,6 +852,7 @@ protected void error(String message, Throwable thrown) { * The service launcher code assumes that after this method is invoked, * no other code in the same method is called. * @param exitCode code to exit + * @param message input message. */ protected void exit(int exitCode, String message) { ExitUtil.terminate(exitCode, message); @@ -1000,7 +1004,7 @@ protected void verifyConfigurationFilesExist(String[] filenames) { } /** - * Build a log message for starting up and shutting down. + * @return Build a log message for starting up and shutting down. * @param classname the class of the server * @param args arguments */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/launcher/package-info.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/launcher/package-info.java index f582fa2d97875..ae7acfc1b17f6 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/launcher/package-info.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/launcher/package-info.java @@ -192,9 +192,8 @@ At this point, rather than block waiting for the service to terminate (as during the {@code execute()} method takes priority over any exit codes returned by the method. This allows services to signal failures simply by raising exceptions with exit codes. -

    +

    -

    To view the workflow in sequence, it is:

    1. (prepare configuration files —covered later)
    2. @@ -221,7 +220,7 @@ At this point, rather than block waiting for the service to terminate (as

      For a basic service, the return code is 0 unless an exception was raised. -

      +

      For a {@link org.apache.hadoop.service.launcher.LaunchableService}, the return code is the number returned from the {@link org.apache.hadoop.service.launcher.LaunchableService#execute()} @@ -235,7 +234,7 @@ At this point, rather than block waiting for the service to terminate (as of returning error codes to signal failures and for normal Services to return any error code at all. -

      +

      Any exception which implements the {@link org.apache.hadoop.util.ExitCodeProvider} interface is considered be a provider of the exit code: the method @@ -269,7 +268,7 @@ interface listing common exception codes. These are exception codes Note that {@link org.apache.hadoop.util.ExitUtil.ExitException} itself implements {@link org.apache.hadoop.util.ExitCodeProvider#getExitCode()} -

      +

      If an exception does not implement {@link org.apache.hadoop.util.ExitCodeProvider#getExitCode()}, it will be wrapped in an {@link org.apache.hadoop.util.ExitUtil.ExitException} @@ -324,7 +323,7 @@ interface listing common exception codes. These are exception codes when received, attempts to stop the service in a limited period of time. It then triggers a JVM shutdown by way of {@link org.apache.hadoop.util.ExitUtil#terminate(int, String)} -

      + If a second signal is received, the {@link org.apache.hadoop.service.launcher.InterruptEscalator} reacts by triggering an immediate JVM halt, invoking @@ -342,7 +341,7 @@ interface listing common exception codes. These are exception codes stop the service if a shutdown request is received, so ensuring that if the JVM is exited by any thread, an attempt to shut down the service will be made. - +

      Configuration class creation

      @@ -355,12 +354,12 @@ interface listing common exception codes. These are exception codes What the launcher does do is use reflection to try and create instances of these classes simply to force in the common resources. If the classes are not on the classpath this fact will be logged. -

      +

      Applications may consider it essential to either force load in the relevant configuration, or pass it down to the service being created. In which case further measures may be needed. -

      1: Creation in an extended {@code ServiceLauncher} + 1: Creation in an extended {@code ServiceLauncher}

      Subclass the Service launcher and override its @@ -371,9 +370,9 @@ interface listing common exception codes. These are exception codes HDFS or YARN. It does imply a dedicated script to invoke the custom {@code main()} method. -

      2: Creation in {@code bindArgs()} + 2: Creation in {@code bindArgs()} -

      +

      In {@link org.apache.hadoop.service.launcher.LaunchableService#bindArgs(Configuration, List)}, a new configuration is created: @@ -390,7 +389,7 @@ interface listing common exception codes. These are exception codes instances created via the service launcher. It does imply that this is expected to be only way that services will be launched. -

      3: Creation in {@code serviceInit()} +

      3: Creation in {@code serviceInit()}

        protected void serviceInit(Configuration conf) throws Exception {
      @@ -406,7 +405,7 @@ protected void serviceInit(Configuration conf) throws Exception {
        propagating information between peer services in a
        {@link org.apache.hadoop.service.CompositeService}.
        While a dangerous practice, it does happen.
      -
      + 

      Summary: the ServiceLauncher makes a best-effort attempt to load the standard Configuration subclasses, but does not fail if they are not present. @@ -429,7 +428,7 @@ class (the one created by the If this argument is repeated multiple times, all configuration files are merged with the latest file on the command line being the last one to be applied. -

      +

      All the {@code --conf <file>} argument pairs are stripped off the argument list provided to the instantiated service; they get the merged configuration, but not the commands used to create it. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tools/CommandShell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tools/CommandShell.java index a53e2259e0e25..25f79a63a233c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tools/CommandShell.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tools/CommandShell.java @@ -36,6 +36,7 @@ public abstract class CommandShell extends Configured implements Tool { /** * Return usage string for the command including any summary of subcommands. + * @return command usage. */ public abstract String getCommandUsage(); @@ -84,8 +85,9 @@ public int run(String[] args) throws Exception { /** * Parse the command line arguments and initialize subcommand instance. - * @param args + * @param args arguments. * @return 0 if the argument(s) were recognized, 1 otherwise + * @throws Exception init exception. */ protected abstract int init(String[] args) throws Exception; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tools/GetGroupsBase.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tools/GetGroupsBase.java index 92cdb5835e77d..548409ea58e09 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tools/GetGroupsBase.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tools/GetGroupsBase.java @@ -38,7 +38,7 @@ public abstract class GetGroupsBase extends Configured implements Tool { /** * Create an instance of this tool using the given configuration. - * @param conf + * @param conf configuration. */ protected GetGroupsBase(Configuration conf) { this(conf, System.out); @@ -84,7 +84,7 @@ public int run(String[] args) throws Exception { * * @param conf The configuration to use. * @return The address where the service is listening. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ protected abstract InetSocketAddress getProtocolAddress(Configuration conf) throws IOException; @@ -92,7 +92,7 @@ protected abstract InetSocketAddress getProtocolAddress(Configuration conf) /** * Get a client of the {@link GetUserMappingsProtocol}. * @return A {@link GetUserMappingsProtocol} client proxy. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ protected GetUserMappingsProtocol getUgmProtocol() throws IOException { GetUserMappingsProtocol userGroupMappingProtocol = diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tools/GetUserMappingsProtocol.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tools/GetUserMappingsProtocol.java index 3e80ac030d3ef..8b1922372ba3c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tools/GetUserMappingsProtocol.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tools/GetUserMappingsProtocol.java @@ -40,7 +40,7 @@ public interface GetUserMappingsProtocol { * Get the groups which are mapped to the given user. * @param user The user to get the groups for. * @return The set of groups the user belongs to. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @Idempotent public String[] getGroupsForUser(String user) throws IOException; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tools/TableListing.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tools/TableListing.java index 348f86fe1368b..d1d933c737d2b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tools/TableListing.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tools/TableListing.java @@ -155,7 +155,9 @@ public Builder addField(String title, Justification justification, } /** - * Whether to hide column headers in table output + * Whether to hide column headers in table output. + * + * @return Builder. */ public Builder hideHeaders() { this.showHeader = false; @@ -164,6 +166,8 @@ public Builder hideHeaders() { /** * Whether to show column headers in table output. This is the default. + * + * @return Builder. */ public Builder showHeaders() { this.showHeader = true; @@ -173,6 +177,9 @@ public Builder showHeaders() { /** * Set the maximum width of a row in the TableListing. Must have one or * more wrappable fields for this to take effect. + * + * @param width width. + * @return Builder. */ public Builder wrapWidth(int width) { this.wrapWidth = width; @@ -181,6 +188,8 @@ public Builder wrapWidth(int width) { /** * Create a new TableListing. + * + * @return TableListing. */ public TableListing build() { return new TableListing(columns.toArray(new Column[0]), showHeader, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/NullTraceScope.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/NullTraceScope.java new file mode 100644 index 0000000000000..13788e3dd56ad --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/NullTraceScope.java @@ -0,0 +1,26 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.tracing; + +public class NullTraceScope extends TraceScope { + public static final NullTraceScope INSTANCE = new NullTraceScope(); + + public NullTraceScope() { + super(null); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/Span.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/Span.java new file mode 100644 index 0000000000000..197b29fa3dfe4 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/Span.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.tracing; + +import java.io.Closeable; + +public class Span implements Closeable { + + public Span() { + } + + public Span addKVAnnotation(String key, String value) { + return this; + } + + public Span addTimelineAnnotation(String msg) { + return this; + } + + public SpanContext getContext() { + return null; + } + + public void finish() { + } + + public void close() { + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/SpanContext.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/SpanContext.java new file mode 100644 index 0000000000000..363e94dc85dba --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/SpanContext.java @@ -0,0 +1,32 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.tracing; + +import java.io.Closeable; + +/** + * Wrapper class for SpanContext to avoid using OpenTracing/OpenTelemetry + * SpanContext class directly for better separation. + */ +public class SpanContext implements Closeable { + public SpanContext() { + } + + public void close() { + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/SpanReceiverInfo.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/SpanReceiverInfo.java deleted file mode 100644 index 546af26b9589a..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/SpanReceiverInfo.java +++ /dev/null @@ -1,64 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.tracing; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; - -@InterfaceAudience.Public -@InterfaceStability.Stable -public class SpanReceiverInfo { - private final long id; - private final String className; - final List configPairs = - new ArrayList(); - - static class ConfigurationPair { - private final String key; - private final String value; - - ConfigurationPair(String key, String value) { - this.key = key; - this.value = value; - } - - public String getKey() { - return key; - } - - public String getValue() { - return value; - } - } - - SpanReceiverInfo(long id, String className) { - this.id = id; - this.className = className; - } - - public long getId() { - return id; - } - - public String getClassName() { - return className; - } -} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/SpanReceiverInfoBuilder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/SpanReceiverInfoBuilder.java deleted file mode 100644 index 07ee380c1a5ed..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/SpanReceiverInfoBuilder.java +++ /dev/null @@ -1,42 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.tracing; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.tracing.SpanReceiverInfo.ConfigurationPair; - -@InterfaceAudience.Public -@InterfaceStability.Stable -public class SpanReceiverInfoBuilder { - private SpanReceiverInfo info; - - public SpanReceiverInfoBuilder(String className) { - info = new SpanReceiverInfo(0, className); - } - - public void addConfigurationPair(String key, String value) { - info.configPairs.add(new ConfigurationPair(key, value)); - } - - public SpanReceiverInfo build() { - SpanReceiverInfo ret = info; - info = null; - return ret; - } -} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceAdmin.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceAdmin.java deleted file mode 100644 index 766fb0a6557eb..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceAdmin.java +++ /dev/null @@ -1,212 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.tracing; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.PrintStream; -import java.net.InetSocketAddress; -import java.util.LinkedList; -import java.util.List; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.CommonConfigurationKeys; -import org.apache.hadoop.ipc.ProtobufRpcEngine; -import org.apache.hadoop.ipc.RPC; -import org.apache.hadoop.net.NetUtils; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.tools.TableListing; -import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.util.Tool; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A command-line tool for viewing and modifying tracing settings. - */ -@InterfaceAudience.Private -public class TraceAdmin extends Configured implements Tool { - private TraceAdminProtocolPB proxy; - private TraceAdminProtocolTranslatorPB remote; - private static final Logger LOG = LoggerFactory.getLogger(TraceAdmin.class); - - private void usage() { - PrintStream err = System.err; - err.print( - "Hadoop tracing configuration commands:\n" + - " -add [-class classname] [-Ckey=value] [-Ckey2=value2] ...\n" + - " Add a span receiver with the provided class name. Configuration\n" + - " keys for the span receiver can be specified with the -C options.\n" + - " The span receiver will also inherit whatever configuration keys\n" + - " exist in the daemon's configuration.\n" + - " -help: Print this help message.\n" + - " -host [hostname:port]\n" + - " Specify the hostname and port of the daemon to examine.\n" + - " Required for all commands.\n" + - " -list: List the current span receivers.\n" + - " -remove [id]\n" + - " Remove the span receiver with the specified id. Use -list to\n" + - " find the id of each receiver.\n" + - " -principal: If the daemon is Kerberized, specify the service\n" + - " principal name." - ); - } - - private int listSpanReceivers(List args) throws IOException { - SpanReceiverInfo infos[] = remote.listSpanReceivers(); - if (infos.length == 0) { - System.out.println("[no span receivers found]"); - return 0; - } - TableListing listing = new TableListing.Builder(). - addField("ID"). - addField("CLASS"). - showHeaders(). - build(); - for (SpanReceiverInfo info : infos) { - listing.addRow("" + info.getId(), info.getClassName()); - } - System.out.println(listing.toString()); - return 0; - } - - private final static String CONFIG_PREFIX = "-C"; - - private int addSpanReceiver(List args) throws IOException { - String className = StringUtils.popOptionWithArgument("-class", args); - if (className == null) { - System.err.println("You must specify the classname with -class."); - return 1; - } - ByteArrayOutputStream configStream = new ByteArrayOutputStream(); - PrintStream configsOut = new PrintStream(configStream, false, "UTF-8"); - SpanReceiverInfoBuilder factory = new SpanReceiverInfoBuilder(className); - String prefix = ""; - for (int i = 0; i < args.size(); ++i) { - String str = args.get(i); - if (!str.startsWith(CONFIG_PREFIX)) { - System.err.println("Can't understand argument: " + str); - return 1; - } - str = str.substring(CONFIG_PREFIX.length()); - int equalsIndex = str.indexOf("="); - if (equalsIndex < 0) { - System.err.println("Can't parse configuration argument " + str); - System.err.println("Arguments must be in the form key=value"); - return 1; - } - String key = str.substring(0, equalsIndex); - String value = str.substring(equalsIndex + 1); - factory.addConfigurationPair(key, value); - configsOut.print(prefix + key + " = " + value); - prefix = ", "; - } - - String configStreamStr = configStream.toString("UTF-8"); - try { - long id = remote.addSpanReceiver(factory.build()); - System.out.println("Added trace span receiver " + id + - " with configuration " + configStreamStr); - } catch (IOException e) { - System.out.println("addSpanReceiver error with configuration " + - configStreamStr); - throw e; - } - return 0; - } - - private int removeSpanReceiver(List args) throws IOException { - String indexStr = StringUtils.popFirstNonOption(args); - long id = -1; - try { - id = Long.parseLong(indexStr); - } catch (NumberFormatException e) { - System.err.println("Failed to parse ID string " + - indexStr + ": " + e.getMessage()); - return 1; - } - remote.removeSpanReceiver(id); - System.err.println("Removed trace span receiver " + id); - return 0; - } - - @Override - public int run(String argv[]) throws Exception { - LinkedList args = new LinkedList(); - for (String arg : argv) { - args.add(arg); - } - if (StringUtils.popOption("-h", args) || - StringUtils.popOption("-help", args)) { - usage(); - return 0; - } else if (args.size() == 0) { - usage(); - return 0; - } - String hostPort = StringUtils.popOptionWithArgument("-host", args); - if (hostPort == null) { - System.err.println("You must specify a host with -host."); - return 1; - } - if (args.isEmpty()) { - System.err.println("You must specify an operation."); - return 1; - } - String servicePrincipal = StringUtils.popOptionWithArgument("-principal", - args); - if (servicePrincipal != null) { - LOG.debug("Set service principal: {}", servicePrincipal); - getConf().set( - CommonConfigurationKeys.HADOOP_SECURITY_SERVICE_USER_NAME_KEY, - servicePrincipal); - } - RPC.setProtocolEngine(getConf(), TraceAdminProtocolPB.class, - ProtobufRpcEngine.class); - InetSocketAddress address = NetUtils.createSocketAddr(hostPort); - UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); - Class xface = TraceAdminProtocolPB.class; - proxy = (TraceAdminProtocolPB)RPC.getProxy(xface, - RPC.getProtocolVersion(xface), address, - ugi, getConf(), NetUtils.getDefaultSocketFactory(getConf()), 0); - remote = new TraceAdminProtocolTranslatorPB(proxy); - try { - if (args.get(0).equals("-list")) { - return listSpanReceivers(args.subList(1, args.size())); - } else if (args.get(0).equals("-add")) { - return addSpanReceiver(args.subList(1, args.size())); - } else if (args.get(0).equals("-remove")) { - return removeSpanReceiver(args.subList(1, args.size())); - } else { - System.err.println("Unrecognized tracing command: " + args.get(0)); - System.err.println("Use -help for help."); - return 1; - } - } finally { - remote.close(); - } - } - - public static void main(String[] argv) throws Exception { - TraceAdmin admin = new TraceAdmin(); - admin.setConf(new Configuration()); - System.exit(admin.run(argv)); - } -} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceAdminProtocol.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceAdminProtocol.java deleted file mode 100644 index a0fcf580cbe40..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceAdminProtocol.java +++ /dev/null @@ -1,69 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.tracing; - -import java.io.IOException; -import java.util.LinkedList; -import java.util.List; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.fs.CommonConfigurationKeys; -import org.apache.hadoop.io.retry.AtMostOnce; -import org.apache.hadoop.io.retry.Idempotent; -import org.apache.hadoop.ipc.ProtocolInfo; -import org.apache.hadoop.security.KerberosInfo; - -/** - * Protocol interface that provides tracing. - */ -@KerberosInfo( - serverPrincipal=CommonConfigurationKeys.HADOOP_SECURITY_SERVICE_USER_NAME_KEY) -@InterfaceAudience.Public -@InterfaceStability.Evolving -public interface TraceAdminProtocol { - public static final long versionID = 1L; - - /** - * List the currently active trace span receivers. - * - * @throws IOException On error. - */ - @Idempotent - public SpanReceiverInfo[] listSpanReceivers() throws IOException; - - /** - * Add a new trace span receiver. - * - * @param desc The span receiver description. - * @return The ID of the new trace span receiver. - * - * @throws IOException On error. - */ - @AtMostOnce - public long addSpanReceiver(SpanReceiverInfo desc) throws IOException; - - /** - * Remove a trace span receiver. - * - * @param spanReceiverId The id of the span receiver to remove. - * @throws IOException On error. - */ - @AtMostOnce - public void removeSpanReceiver(long spanReceiverId) throws IOException; -} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceAdminProtocolPB.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceAdminProtocolPB.java deleted file mode 100644 index e43780e6a60da..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceAdminProtocolPB.java +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.tracing; - -import org.apache.hadoop.ipc.VersionedProtocol; -import org.apache.hadoop.tracing.TraceAdminPB.TraceAdminService; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.fs.CommonConfigurationKeys; -import org.apache.hadoop.ipc.ProtocolInfo; -import org.apache.hadoop.security.KerberosInfo; - -@KerberosInfo( - serverPrincipal=CommonConfigurationKeys.HADOOP_SECURITY_SERVICE_USER_NAME_KEY) -@ProtocolInfo( - protocolName = "org.apache.hadoop.tracing.TraceAdminPB.TraceAdminService", - protocolVersion = 1) -@InterfaceAudience.Public -@InterfaceStability.Evolving -public interface TraceAdminProtocolPB extends - TraceAdminService.BlockingInterface, VersionedProtocol { -} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceAdminProtocolServerSideTranslatorPB.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceAdminProtocolServerSideTranslatorPB.java deleted file mode 100644 index 5b49e2e0d5ef0..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceAdminProtocolServerSideTranslatorPB.java +++ /dev/null @@ -1,118 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.tracing; - -import java.io.Closeable; -import java.io.IOException; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.io.retry.AtMostOnce; -import org.apache.hadoop.ipc.ProtobufHelper; -import org.apache.hadoop.ipc.ProtocolSignature; -import org.apache.hadoop.ipc.RPC; -import org.apache.hadoop.tracing.TraceAdminPB.AddSpanReceiverRequestProto; -import org.apache.hadoop.tracing.TraceAdminPB.AddSpanReceiverResponseProto; -import org.apache.hadoop.tracing.TraceAdminPB.ListSpanReceiversRequestProto; -import org.apache.hadoop.tracing.TraceAdminPB.ListSpanReceiversResponseProto; -import org.apache.hadoop.tracing.TraceAdminPB.ConfigPair; -import org.apache.hadoop.tracing.TraceAdminPB.RemoveSpanReceiverRequestProto; -import org.apache.hadoop.tracing.TraceAdminPB.RemoveSpanReceiverResponseProto; -import org.apache.hadoop.thirdparty.protobuf.RpcController; -import org.apache.hadoop.thirdparty.protobuf.ServiceException; - -@InterfaceAudience.Private -public class TraceAdminProtocolServerSideTranslatorPB - implements TraceAdminProtocolPB, Closeable { - private final TraceAdminProtocol server; - - public TraceAdminProtocolServerSideTranslatorPB(TraceAdminProtocol server) { - this.server = server; - } - - @Override - public void close() throws IOException { - RPC.stopProxy(server); - } - - @Override - public ListSpanReceiversResponseProto listSpanReceivers( - RpcController controller, ListSpanReceiversRequestProto req) - throws ServiceException { - try { - SpanReceiverInfo[] descs = server.listSpanReceivers(); - ListSpanReceiversResponseProto.Builder bld = - ListSpanReceiversResponseProto.newBuilder(); - for (int i = 0; i < descs.length; ++i) { - bld.addDescriptions(TraceAdminPB.SpanReceiverListInfo.newBuilder(). - setId(descs[i].getId()). - setClassName(descs[i].getClassName()).build()); - } - return bld.build(); - } catch (IOException e) { - throw new ServiceException(e); - } - } - - @Override - public AddSpanReceiverResponseProto addSpanReceiver( - RpcController controller, AddSpanReceiverRequestProto req) - throws ServiceException { - try { - SpanReceiverInfoBuilder factory = - new SpanReceiverInfoBuilder(req.getClassName()); - for (ConfigPair config : req.getConfigList()) { - factory.addConfigurationPair(config.getKey(), config.getValue()); - } - long id = server.addSpanReceiver(factory.build()); - return AddSpanReceiverResponseProto.newBuilder().setId(id).build(); - } catch (IOException e) { - throw new ServiceException(e); - } - } - - @Override - public RemoveSpanReceiverResponseProto removeSpanReceiver( - RpcController controller, RemoveSpanReceiverRequestProto req) - throws ServiceException { - try { - server.removeSpanReceiver(req.getId()); - return RemoveSpanReceiverResponseProto.getDefaultInstance(); - } catch (IOException e) { - throw new ServiceException(e); - } - } - - @Override - public long getProtocolVersion(String protocol, long clientVersion) - throws IOException { - return TraceAdminProtocol.versionID; - } - - @Override - public ProtocolSignature getProtocolSignature(String protocol, - long clientVersion, int clientMethodsHash) throws IOException { - if (!protocol.equals(RPC.getProtocolName(TraceAdminProtocolPB.class))) { - throw new IOException("Serverside implements " + - RPC.getProtocolName(TraceAdminProtocolPB.class) + - ". The following requested protocol is unknown: " + protocol); - } - return ProtocolSignature.getProtocolSignature(clientMethodsHash, - RPC.getProtocolVersion(TraceAdminProtocolPB.class), - TraceAdminProtocolPB.class); - } -} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceAdminProtocolTranslatorPB.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceAdminProtocolTranslatorPB.java deleted file mode 100644 index a5cba39844c83..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceAdminProtocolTranslatorPB.java +++ /dev/null @@ -1,105 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.tracing; - -import java.io.Closeable; -import java.io.IOException; -import java.util.ArrayList; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.ipc.ProtobufHelper; -import org.apache.hadoop.ipc.ProtocolTranslator; -import org.apache.hadoop.ipc.RPC; -import org.apache.hadoop.tracing.TraceAdminPB.AddSpanReceiverRequestProto; -import org.apache.hadoop.tracing.TraceAdminPB.AddSpanReceiverResponseProto; -import org.apache.hadoop.tracing.TraceAdminPB.ListSpanReceiversRequestProto; -import org.apache.hadoop.tracing.TraceAdminPB.ListSpanReceiversResponseProto; -import org.apache.hadoop.tracing.TraceAdminPB.ConfigPair; -import org.apache.hadoop.tracing.TraceAdminPB.RemoveSpanReceiverRequestProto; -import org.apache.hadoop.tracing.TraceAdminPB.SpanReceiverListInfo; -import org.apache.hadoop.tracing.SpanReceiverInfo.ConfigurationPair; -import org.apache.hadoop.thirdparty.protobuf.ServiceException; - -@InterfaceAudience.Private -public class TraceAdminProtocolTranslatorPB implements - TraceAdminProtocol, ProtocolTranslator, Closeable { - private final TraceAdminProtocolPB rpcProxy; - - public TraceAdminProtocolTranslatorPB(TraceAdminProtocolPB rpcProxy) { - this.rpcProxy = rpcProxy; - } - - @Override - public void close() throws IOException { - RPC.stopProxy(rpcProxy); - } - - @Override - public SpanReceiverInfo[] listSpanReceivers() throws IOException { - ArrayList infos = new ArrayList(1); - try { - ListSpanReceiversRequestProto req = - ListSpanReceiversRequestProto.newBuilder().build(); - ListSpanReceiversResponseProto resp = - rpcProxy.listSpanReceivers(null, req); - for (SpanReceiverListInfo info : resp.getDescriptionsList()) { - infos.add(new SpanReceiverInfo(info.getId(), info.getClassName())); - } - } catch (ServiceException e) { - throw ProtobufHelper.getRemoteException(e); - } - return infos.toArray(new SpanReceiverInfo[infos.size()]); - } - - @Override - public long addSpanReceiver(SpanReceiverInfo info) throws IOException { - try { - AddSpanReceiverRequestProto.Builder bld = - AddSpanReceiverRequestProto.newBuilder(); - bld.setClassName(info.getClassName()); - for (ConfigurationPair configPair : info.configPairs) { - ConfigPair tuple = ConfigPair.newBuilder(). - setKey(configPair.getKey()). - setValue(configPair.getValue()).build(); - bld.addConfig(tuple); - } - AddSpanReceiverResponseProto resp = - rpcProxy.addSpanReceiver(null, bld.build()); - return resp.getId(); - } catch (ServiceException e) { - throw ProtobufHelper.getRemoteException(e); - } - } - - @Override - public void removeSpanReceiver(long spanReceiverId) throws IOException { - try { - RemoveSpanReceiverRequestProto req = - RemoveSpanReceiverRequestProto.newBuilder() - .setId(spanReceiverId).build(); - rpcProxy.removeSpanReceiver(null, req); - } catch (ServiceException e) { - throw ProtobufHelper.getRemoteException(e); - } - } - - @Override - public Object getUnderlyingProxyObject() { - return rpcProxy; - } -} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceConfiguration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceConfiguration.java new file mode 100644 index 0000000000000..2c9a9b2d0cae3 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceConfiguration.java @@ -0,0 +1,23 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.tracing; + +public class TraceConfiguration { + public TraceConfiguration() { + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceScope.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceScope.java new file mode 100644 index 0000000000000..2abf9cb7ec2bf --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceScope.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.tracing; + +import java.io.Closeable; + +public class TraceScope implements Closeable { + Span span; + + public TraceScope(Span span) { + this.span = span; + } + + // Add tag to the span + public void addKVAnnotation(String key, String value) { + } + + public void addKVAnnotation(String key, Number value) { + } + + public void addTimelineAnnotation(String msg) { + } + + public Span span() { + return span; + } + + public Span getSpan() { + return span; + } + + public void reattach() { + } + + public void detach() { + } + + public void close() { + if (span != null) { + span.close(); + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceUtils.java index 0ae6d03933f09..b218493780ee1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceUtils.java @@ -17,59 +17,31 @@ */ package org.apache.hadoop.tracing; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.tracing.SpanReceiverInfo.ConfigurationPair; -import org.apache.htrace.core.HTraceConfiguration; +import org.apache.hadoop.thirdparty.protobuf.ByteString; /** * This class provides utility functions for tracing. */ @InterfaceAudience.Private public class TraceUtils { - private static List EMPTY = Collections.emptyList(); static final String DEFAULT_HADOOP_TRACE_PREFIX = "hadoop.htrace."; - public static HTraceConfiguration wrapHadoopConf(final String prefix, - final Configuration conf) { - return wrapHadoopConf(prefix, conf, EMPTY); + public static TraceConfiguration wrapHadoopConf(final String prefix, + final Configuration conf) { + return null; } - public static HTraceConfiguration wrapHadoopConf(final String prefix, - final Configuration conf, List extraConfig) { - final HashMap extraMap = new HashMap(); - for (ConfigurationPair pair : extraConfig) { - extraMap.put(pair.getKey(), pair.getValue()); - } - return new HTraceConfiguration() { - @Override - public String get(String key) { - String ret = getInternal(prefix + key); - if (ret != null) { - return ret; - } - return getInternal(DEFAULT_HADOOP_TRACE_PREFIX + key); - } + public static Tracer createAndRegisterTracer(String name) { + return null; + } - @Override - public String get(String key, String defaultValue) { - String ret = get(key); - if (ret != null) { - return ret; - } - return defaultValue; - } + public static SpanContext byteStringToSpanContext(ByteString byteString) { + return null; + } - private String getInternal(String key) { - if (extraMap.containsKey(key)) { - return extraMap.get(key); - } - return conf.get(key); - } - }; + public static ByteString spanContextToByteString(SpanContext context) { + return null; } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/Tracer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/Tracer.java new file mode 100644 index 0000000000000..a99b004b542f4 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/Tracer.java @@ -0,0 +1,98 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.tracing; + +/** + * No-Op Tracer (for now) to remove HTrace without changing too many files. + */ +public class Tracer { + // Singleton + private static final Tracer globalTracer = null; + private final NullTraceScope nullTraceScope; + private final String name; + + public final static String SPAN_RECEIVER_CLASSES_KEY = + "span.receiver.classes"; + + public Tracer(String name) { + this.name = name; + nullTraceScope = NullTraceScope.INSTANCE; + } + + // Keeping this function at the moment for HTrace compatiblity, + // in fact all threads share a single global tracer for OpenTracing. + public static Tracer curThreadTracer() { + return globalTracer; + } + + /*** + * Return active span. + * @return org.apache.hadoop.tracing.Span + */ + public static Span getCurrentSpan() { + return null; + } + + public TraceScope newScope(String description) { + return nullTraceScope; + } + + public Span newSpan(String description, SpanContext spanCtx) { + return new Span(); + } + + public TraceScope newScope(String description, SpanContext spanCtx) { + return nullTraceScope; + } + + public TraceScope newScope(String description, SpanContext spanCtx, + boolean finishSpanOnClose) { + return nullTraceScope; + } + + public TraceScope activateSpan(Span span) { + return nullTraceScope; + } + + public void close() { + } + + public String getName() { + return name; + } + + public static class Builder { + static Tracer globalTracer; + private String name; + + public Builder(final String name) { + this.name = name; + } + + public Builder conf(TraceConfiguration conf) { + return this; + } + + public Tracer build() { + if (globalTracer == null) { + globalTracer = new Tracer(name); + } + return globalTracer; + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TracerConfigurationManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TracerConfigurationManager.java deleted file mode 100644 index 658e4d326b1e9..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TracerConfigurationManager.java +++ /dev/null @@ -1,100 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.tracing; - -import java.io.IOException; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.tracing.SpanReceiverInfo.ConfigurationPair; -import org.apache.htrace.core.SpanReceiver; -import org.apache.htrace.core.TracerPool; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * This class provides functions for managing the tracer configuration at - * runtime via an RPC protocol. - */ -@InterfaceAudience.Private -public class TracerConfigurationManager implements TraceAdminProtocol { - private static final Logger LOG = - LoggerFactory.getLogger(TracerConfigurationManager.class); - - private final String confPrefix; - private final Configuration conf; - - public TracerConfigurationManager(String confPrefix, Configuration conf) { - this.confPrefix = confPrefix; - this.conf = conf; - } - - public synchronized SpanReceiverInfo[] listSpanReceivers() - throws IOException { - TracerPool pool = TracerPool.getGlobalTracerPool(); - SpanReceiver[] receivers = pool.getReceivers(); - SpanReceiverInfo[] info = new SpanReceiverInfo[receivers.length]; - for (int i = 0; i < receivers.length; i++) { - SpanReceiver receiver = receivers[i]; - info[i] = new SpanReceiverInfo(receiver.getId(), - receiver.getClass().getName()); - } - return info; - } - - public synchronized long addSpanReceiver(SpanReceiverInfo info) - throws IOException { - StringBuilder configStringBuilder = new StringBuilder(); - String prefix = ""; - for (ConfigurationPair pair : info.configPairs) { - configStringBuilder.append(prefix).append(pair.getKey()). - append(" = ").append(pair.getValue()); - prefix = ", "; - } - SpanReceiver rcvr = null; - try { - rcvr = new SpanReceiver.Builder(TraceUtils.wrapHadoopConf( - confPrefix, conf, info.configPairs)). - className(info.getClassName().trim()). - build(); - } catch (RuntimeException e) { - LOG.info("Failed to add SpanReceiver " + info.getClassName() + - " with configuration " + configStringBuilder.toString(), e); - throw e; - } - TracerPool.getGlobalTracerPool().addReceiver(rcvr); - LOG.info("Successfully added SpanReceiver " + info.getClassName() + - " with configuration " + configStringBuilder.toString()); - return rcvr.getId(); - } - - public synchronized void removeSpanReceiver(long spanReceiverId) - throws IOException { - SpanReceiver[] receivers = - TracerPool.getGlobalTracerPool().getReceivers(); - for (SpanReceiver receiver : receivers) { - if (receiver.getId() == spanReceiverId) { - TracerPool.getGlobalTracerPool().removeAndCloseReceiver(receiver); - LOG.info("Successfully removed SpanReceiver " + spanReceiverId + - " with class " + receiver.getClass().getName()); - return; - } - } - throw new IOException("There is no span receiver with id " + spanReceiverId); - } -} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/AsyncDiskService.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/AsyncDiskService.java index 8e48cb955a3a7..a3bf4faf0a980 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/AsyncDiskService.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/AsyncDiskService.java @@ -94,6 +94,9 @@ public Thread newThread(Runnable r) { /** * Execute the task sometime in the future, using ThreadPools. + * + * @param root root. + * @param task task. */ public synchronized void execute(String root, Runnable task) { ThreadPoolExecutor executor = executors.get(root); @@ -123,7 +126,7 @@ public synchronized void shutdown() { * * @param milliseconds The number of milliseconds to wait * @return true if all thread pools are terminated without time limit - * @throws InterruptedException + * @throws InterruptedException if the thread is interrupted. */ public synchronized boolean awaitTermination(long milliseconds) throws InterruptedException { @@ -145,6 +148,8 @@ public synchronized boolean awaitTermination(long milliseconds) /** * Shut down all ThreadPools immediately. + * + * @return Runnable List. */ public synchronized List shutdownNow() { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/AutoCloseableLock.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/AutoCloseableLock.java index d7fe93d73cf02..e761858e3c170 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/AutoCloseableLock.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/AutoCloseableLock.java @@ -21,7 +21,7 @@ import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.ReentrantLock; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * This is a wrap class of a ReentrantLock. Extending AutoCloseable diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/BlockingThreadPoolExecutorService.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/BlockingThreadPoolExecutorService.java index d49013ec14d1f..5c90e4bd2d601 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/BlockingThreadPoolExecutorService.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/BlockingThreadPoolExecutorService.java @@ -28,8 +28,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.util.concurrent.MoreExecutors; - import org.apache.hadoop.classification.InterfaceAudience; /** @@ -105,8 +103,7 @@ public Thread newThread(Runnable r) { private BlockingThreadPoolExecutorService(int permitCount, ThreadPoolExecutor eventProcessingExecutor) { - super(MoreExecutors.listeningDecorator(eventProcessingExecutor), - permitCount, false); + super(eventProcessingExecutor, permitCount, false); this.eventProcessingExecutor = eventProcessingExecutor; } @@ -120,6 +117,7 @@ private BlockingThreadPoolExecutorService(int permitCount, * @param keepAliveTime time until threads are cleaned up in {@code unit} * @param unit time unit * @param prefixName prefix of name for threads + * @return BlockingThreadPoolExecutorService. */ public static BlockingThreadPoolExecutorService newInstance( int activeTasks, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ChunkedArrayList.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ChunkedArrayList.java index 84ddc32f88c1f..ff7197ce52e4d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ChunkedArrayList.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ChunkedArrayList.java @@ -23,10 +23,10 @@ import org.apache.hadoop.classification.InterfaceAudience; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Iterables; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; /** * Simplified List implementation which stores elements as a list diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CloseableReferenceCount.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CloseableReferenceCount.java index 388a087bbd346..f81a429b5d422 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CloseableReferenceCount.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CloseableReferenceCount.java @@ -21,7 +21,7 @@ import java.nio.channels.ClosedChannelException; import java.util.concurrent.atomic.AtomicInteger; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * A closeable object that maintains a reference count. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcComposer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcComposer.java index 4023995941f5c..5bf773cef3836 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcComposer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcComposer.java @@ -51,6 +51,11 @@ public class CrcComposer { /** * Returns a CrcComposer which will collapse all ingested CRCs into a single * value. + * + * @param type type. + * @param bytesPerCrcHint bytesPerCrcHint. + * @throws IOException raised on errors performing I/O. + * @return a CrcComposer which will collapse all ingested CRCs into a single value. */ public static CrcComposer newCrcComposer( DataChecksum.Type type, long bytesPerCrcHint) @@ -67,6 +72,13 @@ public static CrcComposer newCrcComposer( * final digest, each corresponding to 10 underlying data bytes. Using * a stripeLength greater than the total underlying data size is equivalent * to using a non-striped CrcComposer. + * + * @param type type. + * @param bytesPerCrcHint bytesPerCrcHint. + * @param stripeLength stripeLength. + * @return a CrcComposer which will collapse CRCs for every combined. + * underlying data size which aligns with the specified stripe boundary. + * @throws IOException raised on errors performing I/O. */ public static CrcComposer newStripedCrcComposer( DataChecksum.Type type, long bytesPerCrcHint, long stripeLength) @@ -102,7 +114,11 @@ public static CrcComposer newStripedCrcComposer( * each CRC expected to correspond to exactly {@code bytesPerCrc} underlying * data bytes. * + * @param crcBuffer crcBuffer. + * @param offset offset. * @param length must be a multiple of the expected byte-size of a CRC. + * @param bytesPerCrc bytesPerCrc. + * @throws IOException raised on errors performing I/O. */ public void update( byte[] crcBuffer, int offset, int length, long bytesPerCrc) @@ -125,6 +141,11 @@ public void update( * Composes {@code numChecksumsToRead} additional CRCs into the current digest * out of {@code checksumIn}, with each CRC expected to correspond to exactly * {@code bytesPerCrc} underlying data bytes. + * + * @param checksumIn checksumIn. + * @param numChecksumsToRead numChecksumsToRead. + * @param bytesPerCrc bytesPerCrc. + * @throws IOException raised on errors performing I/O. */ public void update( DataInputStream checksumIn, long numChecksumsToRead, long bytesPerCrc) @@ -138,6 +159,10 @@ public void update( /** * Updates with a single additional CRC which corresponds to an underlying * data size of {@code bytesPerCrc}. + * + * @param crcB crcB. + * @param bytesPerCrc bytesPerCrc. + * @throws IOException raised on errors performing I/O. */ public void update(int crcB, long bytesPerCrc) throws IOException { if (curCompositeCrc == 0) { @@ -173,6 +198,8 @@ public void update(int crcB, long bytesPerCrc) throws IOException { * total sum bytesPerCrc divided by stripeLength. If the sum of bytesPerCrc * is not a multiple of stripeLength, then the last CRC in the array * corresponds to totalLength % stripeLength underlying data bytes. + * + * @return byte representation of composed CRCs. */ public byte[] digest() { if (curPositionInStripe > 0) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcUtil.java index 42eaf148d64c3..c8183b042fb1a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcUtil.java @@ -44,6 +44,10 @@ private CrcUtil() { * Compute x^({@code lengthBytes} * 8) mod {@code mod}, where {@code mod} is * in "reversed" (little-endian) format such that {@code mod & 1} represents * x^31 and has an implicit term x^32. + * + * @param lengthBytes lengthBytes. + * @param mod mod. + * @return monomial. */ public static int getMonomial(long lengthBytes, int mod) { if (lengthBytes == 0) { @@ -73,7 +77,13 @@ public static int getMonomial(long lengthBytes, int mod) { } /** + * composeWithMonomial. + * + * @param crcA crcA. + * @param crcB crcB. * @param monomial Precomputed x^(lengthBInBytes * 8) mod {@code mod} + * @param mod mod. + * @return compose with monomial. */ public static int composeWithMonomial( int crcA, int crcB, int monomial, int mod) { @@ -81,7 +91,13 @@ public static int composeWithMonomial( } /** + * compose. + * + * @param crcA crcA. + * @param crcB crcB. * @param lengthB length of content corresponding to {@code crcB}, in bytes. + * @param mod mod. + * @return compose result. */ public static int compose(int crcA, int crcB, long lengthB, int mod) { int monomial = getMonomial(lengthB, mod); @@ -91,6 +107,8 @@ public static int compose(int crcA, int crcB, long lengthB, int mod) { /** * @return 4-byte array holding the big-endian representation of * {@code value}. + * + * @param value value. */ public static byte[] intToBytes(int value) { byte[] buf = new byte[4]; @@ -110,6 +128,11 @@ public static byte[] intToBytes(int value) { * Writes big-endian representation of {@code value} into {@code buf} * starting at {@code offset}. buf.length must be greater than or * equal to offset + 4. + * + * @param buf buf size. + * @param offset offset. + * @param value value. + * @throws IOException raised on errors performing I/O. */ public static void writeInt(byte[] buf, int offset, int value) throws IOException { @@ -127,6 +150,11 @@ public static void writeInt(byte[] buf, int offset, int value) /** * Reads 4-byte big-endian int value from {@code buf} starting at * {@code offset}. buf.length must be greater than or equal to offset + 4. + * + * @param offset offset. + * @param buf buf. + * @return int. + * @throws IOException raised on errors performing I/O. */ public static int readInt(byte[] buf, int offset) throws IOException { @@ -146,6 +174,10 @@ public static int readInt(byte[] buf, int offset) * For use with debug statements; verifies bytes.length on creation, * expecting it to represent exactly one CRC, and returns a hex * formatted value. + * + * @param bytes bytes. + * @throws IOException raised on errors performing I/O. + * @return a list of hex formatted values. */ public static String toSingleCrcString(final byte[] bytes) throws IOException { @@ -161,6 +193,10 @@ public static String toSingleCrcString(final byte[] bytes) * For use with debug statements; verifies bytes.length on creation, * expecting it to be divisible by CRC byte size, and returns a list of * hex formatted values. + * + * @param bytes bytes. + * @throws IOException raised on errors performing I/O. + * @return a list of hex formatted values. */ public static String toMultiCrcString(final byte[] bytes) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Daemon.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Daemon.java index 3b95db6693e03..f735b82e4289b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Daemon.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Daemon.java @@ -52,14 +52,21 @@ public Daemon() { super(); } - /** Construct a daemon thread. */ + /** + * Construct a daemon thread. + * @param runnable runnable. + */ public Daemon(Runnable runnable) { super(runnable); this.runnable = runnable; this.setName(((Object)runnable).toString()); } - /** Construct a daemon thread to be part of a specified thread group. */ + /** + * Construct a daemon thread to be part of a specified thread group. + * @param group thread group. + * @param runnable runnable. + */ public Daemon(ThreadGroup group, Runnable runnable) { super(group, runnable); this.runnable = runnable; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java index 32a0adca1979a..1c37d5944c6f2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java @@ -69,7 +69,12 @@ public enum Type { this.size = size; } - /** @return the type corresponding to the id. */ + /** + * the type corresponding to the id. + * + * @return the type corresponding to the id. + * @param id id. + */ public static Type valueOf(int id) { if (id < 0 || id >= values().length) { throw new IllegalArgumentException("id=" + id @@ -82,6 +87,8 @@ public static Type valueOf(int id) { /** * Create a Crc32 Checksum object. The implementation of the Crc32 algorithm * is chosen depending on the platform. + * + * @return Checksum. */ public static Checksum newCrc32() { return new CRC32(); @@ -105,6 +112,9 @@ static Checksum newCrc32C() { } /** + * getCrcPolynomialForType. + * + * @param type type. * @return the int representation of the polynomial associated with the * CRC {@code type}, suitable for use with further CRC arithmetic. * @throws IOException if there is no CRC polynomial applicable @@ -141,7 +151,11 @@ public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) { /** * Creates a DataChecksum from HEADER_LEN bytes from arr[offset]. + * + * @param bytes bytes. + * @param offset offset. * @return DataChecksum of the type in the array or null in case of an error. + * @throws IOException raised on errors performing I/O. */ public static DataChecksum newDataChecksum(byte[] bytes, int offset) throws IOException { @@ -168,7 +182,12 @@ public static DataChecksum newDataChecksum(byte[] bytes, int offset) /** * This constructs a DataChecksum by reading HEADER_LEN bytes from input - * stream in + * stream in. + * + * @param in data input stream. + * @throws IOException raised on errors performing I/O. + * @return DataChecksum by reading HEADER_LEN + * bytes from input stream. */ public static DataChecksum newDataChecksum( DataInputStream in ) throws IOException { @@ -194,6 +213,9 @@ private static Type mapByteToChecksumType(int type) /** * Writes the checksum header to the output stream out. + * + * @param out output stream. + * @throws IOException raised on errors performing I/O. */ public void writeHeader( DataOutputStream out ) throws IOException { @@ -215,7 +237,11 @@ public byte[] getHeader() { /** * Writes the current checksum to the stream. * If reset is true, then resets the checksum. + * + * @param out out. + * @param reset reset. * @return number of bytes written. Will be equal to getChecksumSize(); + * @throws IOException raised on errors performing I/O. */ public int writeValue( DataOutputStream out, boolean reset ) throws IOException { @@ -239,7 +265,12 @@ public int writeValue( DataOutputStream out, boolean reset ) /** * Writes the current checksum to a buffer. * If reset is true, then resets the checksum. + * + * @param buf buf. + * @param offset offset. + * @param reset reset. * @return number of bytes written. Will be equal to getChecksumSize(); + * @throws IOException raised on errors performing I/O. */ public int writeValue( byte[] buf, int offset, boolean reset ) throws IOException { @@ -266,6 +297,9 @@ public int writeValue( byte[] buf, int offset, boolean reset ) /** * Compares the checksum located at buf[offset] with the current checksum. + * + * @param buf buf. + * @param offset offset. * @return true if the checksum matches and false otherwise. */ public boolean compare( byte buf[], int offset ) { @@ -295,12 +329,19 @@ public Type getChecksumType() { return type; } - /** @return the size for a checksum. */ + /** + * the size for a checksum. + * @return the size for a checksum. + */ public int getChecksumSize() { return type.size; } - /** @return the required checksum size given the data length. */ + /** + * the required checksum size given the data length. + * @param dataSize data size. + * @return the required checksum size given the data length. + */ public int getChecksumSize(int dataSize) { return ((dataSize - 1)/getBytesPerChecksum() + 1) * getChecksumSize(); } @@ -525,6 +566,12 @@ public void calculateChunkedSums(ByteBuffer data, ByteBuffer checksums) { /** * Implementation of chunked calculation specifically on byte arrays. This * is to avoid the copy when dealing with ByteBuffers that have array backing. + * + * @param data data. + * @param dataOffset dataOffset. + * @param dataLength dataLength. + * @param sums sums. + * @param sumsOffset sumsOffset. */ public void calculateChunkedSums( byte[] data, int dataOffset, int dataLength, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DirectBufferPool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DirectBufferPool.java index 510938b7fff95..2b9b3653216ed 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DirectBufferPool.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DirectBufferPool.java @@ -26,7 +26,7 @@ import org.apache.hadoop.classification.InterfaceAudience; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceStability; /** @@ -53,6 +53,9 @@ public class DirectBufferPool { * Allocate a direct buffer of the specified size, in bytes. * If a pooled buffer is available, returns that. Otherwise * allocates a new one. + * + * @param size size. + * @return ByteBuffer. */ public ByteBuffer getBuffer(int size) { Queue> list = buffersBySize.get(size); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DiskChecker.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DiskChecker.java index 2ee53dc595f99..0a7c8f01cdf7c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DiskChecker.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DiskChecker.java @@ -25,7 +25,7 @@ import java.util.UUID; import java.util.concurrent.atomic.AtomicReference; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.commons.io.FileUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -70,8 +70,8 @@ public DiskOutOfSpaceException(String msg) { * Create the directory if it doesn't exist and check that dir is readable, * writable and executable * - * @param dir - * @throws DiskErrorException + * @param dir dir. + * @throws DiskErrorException disk problem. */ public static void checkDir(File dir) throws DiskErrorException { checkDirInternal(dir); @@ -82,8 +82,8 @@ public static void checkDir(File dir) throws DiskErrorException { * readable, writable and executable. Perform some disk IO to * ensure that the disk is usable for writes. * - * @param dir - * @throws DiskErrorException + * @param dir dir. + * @throws DiskErrorException disk problem. */ public static void checkDirWithDiskIo(File dir) throws DiskErrorException { @@ -107,8 +107,8 @@ private static void checkDirInternal(File dir) * @param localFS local filesystem * @param dir directory * @param expected permission - * @throws DiskErrorException - * @throws IOException + * @throws DiskErrorException disk problem. + * @throws IOException raised on errors performing I/O. */ public static void checkDir(LocalFileSystem localFS, Path dir, FsPermission expected) @@ -125,8 +125,8 @@ public static void checkDir(LocalFileSystem localFS, Path dir, * @param localFS local filesystem * @param dir directory * @param expected permission - * @throws DiskErrorException - * @throws IOException + * @throws DiskErrorException disk problem. + * @throws IOException raised on errors performing I/O. */ public static void checkDirWithDiskIo(LocalFileSystem localFS, Path dir, FsPermission expected) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DiskValidatorFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DiskValidatorFactory.java index 7d04db23ca76c..18504640f151b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DiskValidatorFactory.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DiskValidatorFactory.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.util; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.util.DiskChecker.DiskErrorException; @@ -40,6 +40,7 @@ private DiskValidatorFactory() { /** * Returns a {@link DiskValidator} instance corresponding to the passed clazz. * @param clazz a class extends {@link DiskValidator} + * @return disk validator. */ public static DiskValidator getInstance(Class clazz) { @@ -66,6 +67,7 @@ private DiskValidatorFactory() { * or "read-write" for {@link ReadWriteDiskValidator}. * @param diskValidator canonical class name, for example, "basic" * @throws DiskErrorException if the class cannot be located + * @return disk validator. */ @SuppressWarnings("unchecked") public static DiskValidator getInstance(String diskValidator) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ExitUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ExitUtil.java index dbe6663b2e715..dd47aeeefac2c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ExitUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ExitUtil.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.util; +import java.util.concurrent.atomic.AtomicReference; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.slf4j.Logger; @@ -36,8 +38,10 @@ public final class ExitUtil { LOG = LoggerFactory.getLogger(ExitUtil.class.getName()); private static volatile boolean systemExitDisabled = false; private static volatile boolean systemHaltDisabled = false; - private static volatile ExitException firstExitException; - private static volatile HaltException firstHaltException; + private static final AtomicReference FIRST_EXIT_EXCEPTION = + new AtomicReference<>(); + private static final AtomicReference FIRST_HALT_EXCEPTION = + new AtomicReference<>(); /** Message raised from an exit exception if none were provided: {@value}. */ public static final String EXIT_EXCEPTION_MESSAGE = "ExitException"; /** Message raised from a halt exception if none were provided: {@value}. */ @@ -159,28 +163,29 @@ public static void disableSystemHalt() { */ public static boolean terminateCalled() { // Either we set this member or we actually called System#exit - return firstExitException != null; + return FIRST_EXIT_EXCEPTION.get() != null; } /** * @return true if halt has been called. */ public static boolean haltCalled() { - return firstHaltException != null; + // Either we set this member or we actually called Runtime#halt + return FIRST_HALT_EXCEPTION.get() != null; } /** - * @return the first ExitException thrown, null if none thrown yet. + * @return the first {@code ExitException} thrown, null if none thrown yet. */ public static ExitException getFirstExitException() { - return firstExitException; + return FIRST_EXIT_EXCEPTION.get(); } /** * @return the first {@code HaltException} thrown, null if none thrown yet. */ public static HaltException getFirstHaltException() { - return firstHaltException; + return FIRST_HALT_EXCEPTION.get(); } /** @@ -188,64 +193,136 @@ public static HaltException getFirstHaltException() { * where one test in the suite expects an exit but others do not. */ public static void resetFirstExitException() { - firstExitException = null; + FIRST_EXIT_EXCEPTION.set(null); } + /** + * Reset the tracking of process termination. This is for use in unit tests + * where one test in the suite expects a halt but others do not. + */ public static void resetFirstHaltException() { - firstHaltException = null; + FIRST_HALT_EXCEPTION.set(null); } /** + * Suppresses if legit and returns the first non-null of the two. Legit means + * suppressor if neither null nor suppressed. + * @param suppressor Throwable that suppresses suppressed + * @param suppressed Throwable that is suppressed by suppressor + * @return suppressor if not null, suppressed otherwise + */ + private static T addSuppressed(T suppressor, T suppressed) { + if (suppressor == null) { + return suppressed; + } + if (suppressor != suppressed) { + suppressor.addSuppressed(suppressed); + } + return suppressor; + } + + /** + * Exits the JVM if exit is enabled, rethrow provided exception or any raised error otherwise. * Inner termination: either exit with the exception's exit code, * or, if system exits are disabled, rethrow the exception. * @param ee exit exception + * @throws ExitException if {@link System#exit(int)} is disabled and not suppressed by an Error + * @throws Error if {@link System#exit(int)} is disabled and one Error arise, suppressing + * anything else, even ee */ - public static synchronized void terminate(ExitException ee) - throws ExitException { - int status = ee.getExitCode(); - String msg = ee.getMessage(); + public static void terminate(final ExitException ee) throws ExitException { + final int status = ee.getExitCode(); + Error caught = null; if (status != 0) { - //exit indicates a problem, log it - LOG.debug("Exiting with status {}: {}", status, msg, ee); - LOG.info("Exiting with status {}: {}", status, msg); + try { + // exit indicates a problem, log it + String msg = ee.getMessage(); + LOG.debug("Exiting with status {}: {}", status, msg, ee); + LOG.info("Exiting with status {}: {}", status, msg); + } catch (Error e) { + // errors have higher priority than HaltException, it may be re-thrown. + // OOM and ThreadDeath are 2 examples of Errors to re-throw + caught = e; + } catch (Throwable t) { + // all other kind of throwables are suppressed + addSuppressed(ee, t); + } } if (systemExitDisabled) { - LOG.error("Terminate called", ee); - if (!terminateCalled()) { - firstExitException = ee; + try { + LOG.error("Terminate called", ee); + } catch (Error e) { + // errors have higher priority again, if it's a 2nd error, the 1st one suprpesses it + caught = addSuppressed(caught, e); + } catch (Throwable t) { + // all other kind of throwables are suppressed + addSuppressed(ee, t); } + FIRST_EXIT_EXCEPTION.compareAndSet(null, ee); + if (caught != null) { + caught.addSuppressed(ee); + throw caught; + } + // not suppressed by a higher prority error throw ee; + } else { + // when exit is enabled, whatever Throwable happened, we exit the VM + System.exit(status); } - System.exit(status); } /** - * Forcibly terminates the currently running Java virtual machine. - * The exception argument is rethrown if JVM halting is disabled. - * @param ee the exception containing the status code, message and any stack + * Halts the JVM if halt is enabled, rethrow provided exception or any raised error otherwise. + * If halt is disabled, this method throws either the exception argument if no + * error arise, the first error if at least one arise, suppressing he. + * If halt is enabled, all throwables are caught, even errors. + * + * @param he the exception containing the status code, message and any stack * trace. - * @throws HaltException if {@link Runtime#halt(int)} is disabled. + * @throws HaltException if {@link Runtime#halt(int)} is disabled and not suppressed by an Error + * @throws Error if {@link Runtime#halt(int)} is disabled and one Error arise, suppressing + * anyuthing else, even he */ - public static synchronized void halt(HaltException ee) throws HaltException { - int status = ee.getExitCode(); - String msg = ee.getMessage(); - try { - if (status != 0) { - //exit indicates a problem, log it - LOG.debug("Halt with status {}: {}", status, msg, ee); - LOG.info("Halt with status {}: {}", status, msg, msg); + public static void halt(final HaltException he) throws HaltException { + final int status = he.getExitCode(); + Error caught = null; + if (status != 0) { + try { + // exit indicates a problem, log it + String msg = he.getMessage(); + LOG.info("Halt with status {}: {}", status, msg, he); + } catch (Error e) { + // errors have higher priority than HaltException, it may be re-thrown. + // OOM and ThreadDeath are 2 examples of Errors to re-throw + caught = e; + } catch (Throwable t) { + // all other kind of throwables are suppressed + addSuppressed(he, t); } - } catch (Exception ignored) { - // ignore exceptions here, as it may be due to an out of memory situation } + // systemHaltDisabled is volatile and not used in scenario nheding atomicty, + // thus it does not nhed a synchronized access nor a atomic access if (systemHaltDisabled) { - LOG.error("Halt called", ee); - if (!haltCalled()) { - firstHaltException = ee; + try { + LOG.error("Halt called", he); + } catch (Error e) { + // errors have higher priority again, if it's a 2nd error, the 1st one suprpesses it + caught = addSuppressed(caught, e); + } catch (Throwable t) { + // all other kind of throwables are suppressed + addSuppressed(he, t); } - throw ee; + FIRST_HALT_EXCEPTION.compareAndSet(null, he); + if (caught != null) { + caught.addSuppressed(he); + throw caught; + } + // not suppressed by a higher prority error + throw he; + } else { + // when halt is enabled, whatever Throwable happened, we halt the VM + Runtime.getRuntime().halt(status); } - Runtime.getRuntime().halt(status); } /** diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/FindClass.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/FindClass.java index 690d09755171f..846af7fb74372 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/FindClass.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/FindClass.java @@ -17,7 +17,7 @@ package org.apache.hadoop.util; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.util.StringUtils; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GcTimeMonitor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GcTimeMonitor.java index 4247eb7050b5a..042f95b2c081f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GcTimeMonitor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GcTimeMonitor.java @@ -18,7 +18,7 @@ package org.apache.hadoop.util; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import java.lang.management.GarbageCollectorMXBean; import java.lang.management.ManagementFactory; @@ -56,6 +56,8 @@ public static class Builder { /** * Set observation window size in milliseconds. + * @param value value. + * @return window size in milliseconds. */ public Builder observationWindowMs(long value) { this.observationWindowMs = value; @@ -64,6 +66,8 @@ public Builder observationWindowMs(long value) { /** * Set sleep interval in milliseconds. + * @param value value. + * @return IntervalMs. */ public Builder sleepIntervalMs(long value) { this.sleepIntervalMs = value; @@ -72,6 +76,8 @@ public Builder sleepIntervalMs(long value) { /** * Set the max GC time percentage that triggers the alert handler. + * @param value value. + * @return max GC time percentage. */ public Builder maxGcTimePercentage(int value) { this.maxGcTimePercentage = value; @@ -80,6 +86,8 @@ public Builder maxGcTimePercentage(int value) { /** * Set the GC alert handler. + * @param value value. + * @return GC alert handler. */ public Builder gcTimeAlertHandler(GcTimeAlertHandler value) { this.handler = value; @@ -169,7 +177,10 @@ public void shutdown() { shouldRun = false; } - /** Returns a copy of the most recent data measured by this monitor. */ + /** + * Returns a copy of the most recent data measured by this monitor. + * @return a copy of the most recent data measured by this monitor + */ public GcData getLatestGcData() { return curData.clone(); } @@ -229,22 +240,34 @@ public static class GcData implements Cloneable { private long gcMonitorRunTime, totalGcTime, totalGcCount; private int gcTimePercentage; - /** Returns the absolute timestamp when this measurement was taken. */ + /** + * Returns the absolute timestamp when this measurement was taken. + * @return timestamp. + */ public long getTimestamp() { return timestamp; } - /** Returns the time since the start of the associated GcTimeMonitor. */ + /** + * Returns the time since the start of the associated GcTimeMonitor. + * @return GcMonitorRunTime. + */ public long getGcMonitorRunTime() { return gcMonitorRunTime; } - /** Returns accumulated GC time since this JVM started. */ + /** + * Returns accumulated GC time since this JVM started. + * @return AccumulatedGcTime. + */ public long getAccumulatedGcTime() { return totalGcTime; } - /** Returns the accumulated number of GC pauses since this JVM started. */ + /** + * Returns the accumulated number of GC pauses since this JVM started. + * @return AccumulatedGcCount. + */ public long getAccumulatedGcCount() { return totalGcCount; } @@ -252,6 +275,8 @@ public long getAccumulatedGcCount() { /** * Returns the percentage (0..100) of time that the JVM spent in GC pauses * within the observation window of the associated GcTimeMonitor. + * + * @return GcTimePercentage. */ public int getGcTimePercentage() { return gcTimePercentage; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericOptionsParser.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericOptionsParser.java index 8ca7a904fdc84..81e1fb5d21234 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericOptionsParser.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericOptionsParser.java @@ -57,8 +57,9 @@ * *

      Generic Options

      * - *

      The supported generic options are: - *

      + * 

      The supported generic options are:

      + *
      + *
        *     -conf <configuration file>     specify a configuration file
        *     -D <property=value>            use value for given property
        *     -fs <local|namenode:port>      specify a namenode
      @@ -69,13 +70,15 @@
        *                            jar files to include in the classpath.
        *     -archives <comma separated list of archives>    specify comma
        *             separated archives to be unarchived on the compute machines.
      -
      - * 

      + *

      + *
      * *

      The general command line syntax is:

      - *

      
      + * 
      + * 
        * bin/hadoop command [genericOptions] [commandOptions]
      - * 

      + * + *

      * *

      Generic command line arguments might modify * Configuration objects, given to constructors.

      @@ -83,7 +86,9 @@ *

      The functionality is implemented using Commons CLI.

      * *

      Examples:

      - *

      + *
      + * 
      + *
        * $ bin/hadoop dfs -fs darwin:8020 -ls /data
        * list /data directory in dfs with namenode darwin:8020
        * 
      @@ -105,7 +110,9 @@
        * $ bin/hadoop jar -libjars testlib.jar 
        * -archives test.tgz -files file.txt inputjar args
        * job submission with libjars, files and archives
      - * 

      + *

      + *
      + * * * @see Tool * @see ToolRunner @@ -124,7 +131,7 @@ public class GenericOptionsParser { * Create an options parser with the given options to parse the args. * @param opts the options * @param args the command line arguments - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public GenericOptionsParser(Options opts, String[] args) throws IOException { @@ -134,7 +141,7 @@ public GenericOptionsParser(Options opts, String[] args) /** * Create an options parser to parse the args. * @param args the command line arguments - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public GenericOptionsParser(String[] args) throws IOException { @@ -150,7 +157,7 @@ public GenericOptionsParser(String[] args) * * @param conf the Configuration to modify. * @param args command-line arguments. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public GenericOptionsParser(Configuration conf, String[] args) throws IOException { @@ -167,7 +174,7 @@ public GenericOptionsParser(Configuration conf, String[] args) * @param conf the configuration to modify * @param options options built by the caller * @param args User-specified arguments - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public GenericOptionsParser(Configuration conf, Options options, String[] args) throws IOException { @@ -217,9 +224,10 @@ public boolean isParseSuccessful() { } /** - * Specify properties of each generic option. + * @return Specify properties of each generic option. * Important: as {@link OptionBuilder} is not thread safe, subclasses * must synchronize use on {@code OptionBuilder.class} + * @param opts input opts. */ @SuppressWarnings("static-access") protected Options buildGeneralOptions(Options opts) { @@ -359,9 +367,9 @@ private void processGeneralOptions(CommandLine line) throws IOException { /** * If libjars are set in the conf, parse the libjars. - * @param conf + * @param conf input Configuration. * @return libjar urls - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static URL[] getLibJars(Configuration conf) throws IOException { String jars = conf.get("tmpjars"); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericsUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericsUtil.java index 0aba34845a676..df81bf1209360 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericsUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericsUtil.java @@ -51,6 +51,8 @@ public static Class getClass(T t) { * T[]. * @param c the Class object of the items in the list * @param list the list to convert + * @param Generics Type T. + * @return T Array. */ public static T[] toArray(Class c, List list) { @@ -67,8 +69,10 @@ public static T[] toArray(Class c, List list) * Converts the given List<T> to a an array of * T[]. * @param list the list to convert + * @param Generics Type T. * @throws ArrayIndexOutOfBoundsException if the list is empty. * Use {@link #toArray(Class, List)} if the list may be empty. + * @return T Array. */ public static T[] toArray(List list) { return toArray(getClass(list.get(0)), list); @@ -85,7 +89,7 @@ public static boolean isLog4jLogger(Class clazz) { } Logger log = LoggerFactory.getLogger(clazz); try { - Class log4jClass = Class.forName("org.slf4j.impl.Log4jLoggerAdapter"); + Class log4jClass = Class.forName("org.slf4j.impl.Reload4jLoggerAdapter"); return log4jClass.isInstance(log); } catch (ClassNotFoundException e) { return false; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/HostsFileReader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/HostsFileReader.java index 5141740a3d23e..054f6235fe925 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/HostsFileReader.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/HostsFileReader.java @@ -147,8 +147,8 @@ public static void readXmlFileToMapWithFileInputStream(String type, String filename, InputStream fileInputStream, Map map) throws IOException { Document dom; - DocumentBuilderFactory builder = DocumentBuilderFactory.newInstance(); try { + DocumentBuilderFactory builder = XMLUtils.newSecureDocumentBuilderFactory(); DocumentBuilder db = builder.newDocumentBuilder(); dom = db.parse(fileInputStream); // Examples: diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IPList.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IPList.java index 3a2616376fbac..71cdcf11656ae 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IPList.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IPList.java @@ -26,7 +26,7 @@ public interface IPList { /** * returns true if the ipAddress is in the IPList. - * @param ipAddress + * @param ipAddress ipAddress. * @return boolean value indicating whether the ipAddress is in the IPList */ public abstract boolean isIn(String ipAddress); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IdGenerator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IdGenerator.java index c14727a3771da..49ae8a8605e98 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IdGenerator.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IdGenerator.java @@ -26,6 +26,9 @@ @InterfaceAudience.Private public interface IdGenerator { - /** Increment and then return the next value. */ + /** + * Increment and then return the next value. + * @return long value. + */ public long nextValue(); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IdentityHashStore.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IdentityHashStore.java index 3ae4bbac6591d..5c4bfb15697a8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IdentityHashStore.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IdentityHashStore.java @@ -21,7 +21,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * The IdentityHashStore stores (key, value) mappings in an array. @@ -113,6 +113,9 @@ private void putInternal(Object k, Object v) { * Inserting a new (key, value) never overwrites a previous one. * In other words, you can insert the same key multiple times and it will * lead to multiple entries. + * + * @param k Generics Type k. + * @param v Generics Type v. */ public void put(K k, V v) { Preconditions.checkNotNull(k); @@ -144,6 +147,9 @@ private int getElementIndex(K k) { /** * Retrieve a value associated with a given key. + * + * @param k Generics Type k. + * @return Generics Type V. */ public V get(K k) { int index = getElementIndex(k); @@ -156,6 +162,9 @@ public V get(K k) { /** * Retrieve a value associated with a given key, and delete the * relevant entry. + * + * @param k Generics Type k. + * @return Generics Type V. */ public V remove(K k) { int index = getElementIndex(k); @@ -187,6 +196,8 @@ public interface Visitor { /** * Visit all key, value pairs in the IdentityHashStore. + * + * @param visitor visitor. */ public void visitAll(Visitor visitor) { int length = buffer == null ? 0 : buffer.length; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IndexedSortable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IndexedSortable.java index 1aa036e95b735..99472e18f1daf 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IndexedSortable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IndexedSortable.java @@ -31,11 +31,18 @@ public interface IndexedSortable { /** * Compare items at the given addresses consistent with the semantics of * {@link java.util.Comparator#compare(Object, Object)}. + * + * @param i(int). + * @param j(int). + * @return compare result. */ int compare(int i, int j); /** * Swap items at the given addresses. + * + * @param i i(int). + * @param j j(int). */ void swap(int i, int j); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IndexedSorter.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IndexedSorter.java index bdd024302c99f..252efe3e4de44 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IndexedSorter.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IndexedSorter.java @@ -38,6 +38,10 @@ public interface IndexedSorter { * entry. * @see IndexedSortable#compare * @see IndexedSortable#swap + * + * @param r r. + * @param l l. + * @param s s. */ void sort(IndexedSortable s, int l, int r); @@ -45,6 +49,10 @@ public interface IndexedSorter { * Same as {@link #sort(IndexedSortable,int,int)}, but indicate progress * periodically. * @see #sort(IndexedSortable,int,int) + * @param s s. + * @param l l. + * @param r r. + * @param rep rep. */ void sort(IndexedSortable s, int l, int r, Progressable rep); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedLock.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedLock.java index cc0ebdf8b3e39..e7c7ad14fd09d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedLock.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedLock.java @@ -26,7 +26,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; /** @@ -150,23 +150,23 @@ public Condition newCondition() { @VisibleForTesting void logWarning(long lockHeldTime, SuppressedSnapshot stats) { - logger.warn(String.format("Lock held time above threshold: " + + logger.warn(String.format("Lock held time above threshold(%d ms): " + "lock identifier: %s " + "lockHeldTimeMs=%d ms. Suppressed %d lock warnings. " + "Longest suppressed LockHeldTimeMs=%d. " + "The stack trace is: %s" , - name, lockHeldTime, stats.getSuppressedCount(), + lockWarningThreshold, name, lockHeldTime, stats.getSuppressedCount(), stats.getMaxSuppressedWait(), StringUtils.getStackTrace(Thread.currentThread()))); } @VisibleForTesting void logWaitWarning(long lockWaitTime, SuppressedSnapshot stats) { - logger.warn(String.format("Waited above threshold to acquire lock: " + + logger.warn(String.format("Waited above threshold(%d ms) to acquire lock: " + "lock identifier: %s " + "waitTimeMs=%d ms. Suppressed %d lock wait warnings. " + "Longest suppressed WaitTimeMs=%d. " + - "The stack trace is: %s", name, lockWaitTime, + "The stack trace is: %s", lockWarningThreshold, name, lockWaitTime, stats.getSuppressedCount(), stats.getMaxSuppressedWait(), StringUtils.getStackTrace(Thread.currentThread()))); } @@ -185,6 +185,7 @@ protected void startLockTiming() { * * @param acquireTime - timestamp just after acquiring the lock. * @param releaseTime - timestamp just before releasing the lock. + * @param checkLockHeld checkLockHeld. */ protected void check(long acquireTime, long releaseTime, boolean checkLockHeld) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedReadLock.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedReadLock.java index 8ab392ed041d0..8417246f0467c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedReadLock.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedReadLock.java @@ -22,7 +22,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; /** diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedWriteLock.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedWriteLock.java index 6842166930d5f..710861c761ae3 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedWriteLock.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedWriteLock.java @@ -22,7 +22,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; /** diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IntrusiveCollection.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IntrusiveCollection.java index 1ffb7db3febff..ff478484f9a7a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IntrusiveCollection.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IntrusiveCollection.java @@ -23,7 +23,7 @@ import org.apache.hadoop.classification.InterfaceAudience; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -50,38 +50,59 @@ public interface Element { /** * Insert this element into the list. This is the first thing that will * be called on the element. + * + * @param list list. + * @param prev prev. + * @param next next. */ void insertInternal(IntrusiveCollection list, Element prev, Element next); /** * Set the prev pointer of an element already in the list. + * + * @param list list. + * @param prev prev. */ void setPrev(IntrusiveCollection list, Element prev); /** * Set the next pointer of an element already in the list. + * + * @param list list. + * @param next next. */ void setNext(IntrusiveCollection list, Element next); /** * Remove an element from the list. This is the last thing that will be * called on an element. + * + * @param list list. */ void removeInternal(IntrusiveCollection list); /** * Get the prev pointer of an element. + * + * @param list list. + * @return Element. */ Element getPrev(IntrusiveCollection list); /** * Get the next pointer of an element. + * + * @param list list. + * @return Element. */ Element getNext(IntrusiveCollection list); /** * Returns true if this element is in the provided list. + * + * @param list list. + * @return if this element is in the provided list true, not false. */ boolean isInList(IntrusiveCollection list); } @@ -261,6 +282,7 @@ public T[] toArray(T[] array) { * Add an element to the end of the list. * * @param elem The new element to add. + * @return add result. */ @Override public boolean add(E elem) { @@ -282,6 +304,7 @@ public boolean add(E elem) { * Add an element to the front of the list. * * @param elem The new element to add. + * @return if addFirst success true, not false. */ public boolean addFirst(Element elem) { if (elem == null) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java index e043b1dc382c0..d634bef644c35 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java @@ -18,6 +18,7 @@ package org.apache.hadoop.util; +import javax.annotation.Nullable; import java.io.EOFException; import java.io.File; import java.io.FileNotFoundException; @@ -35,7 +36,7 @@ import com.fasterxml.jackson.databind.ObjectReader; import com.fasterxml.jackson.databind.ObjectWriter; import com.fasterxml.jackson.databind.SerializationFeature; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,8 +44,15 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FutureDataInputStreamBuilder; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathIOException; + +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE; +import static org.apache.hadoop.util.functional.FutureIO.awaitFuture; /** * Support for marshalling objects to and from JSON. @@ -167,6 +175,9 @@ public synchronized T fromJsonStream(InputStream stream) throws IOException { @SuppressWarnings("unchecked") public synchronized T load(File jsonFile) throws IOException, JsonParseException, JsonMappingException { + if (!jsonFile.exists()) { + throw new FileNotFoundException("No such file: " + jsonFile); + } if (!jsonFile.isFile()) { throw new FileNotFoundException("Not a file: " + jsonFile); } @@ -176,7 +187,7 @@ public synchronized T load(File jsonFile) try { return mapper.readValue(jsonFile, classType); } catch (IOException e) { - LOG.error("Exception while parsing json file {}", jsonFile, e); + LOG.warn("Exception while parsing json file {}", jsonFile, e); throw e; } } @@ -229,30 +240,46 @@ public T fromInstance(T instance) throws IOException { /** * Load from a Hadoop filesystem. - * There's a check for data availability after the file is open, by - * raising an EOFException if stream.available == 0. - * This allows for a meaningful exception without the round trip overhead - * of a getFileStatus call before opening the file. It may be brittle - * against an FS stream which doesn't return a value here, but the - * standard filesystems all do. - * JSON parsing and mapping problems - * are converted to IOEs. * @param fs filesystem * @param path path * @return a loaded object - * @throws IOException IO or JSON parse problems + * @throws PathIOException JSON parse problem + * @throws IOException IO problems */ public T load(FileSystem fs, Path path) throws IOException { - try (FSDataInputStream dataInputStream = fs.open(path)) { - // throw an EOF exception if there is no data available. - if (dataInputStream.available() == 0) { - throw new EOFException("No data in " + path); - } + return load(fs, path, null); + } + + /** + * Load from a Hadoop filesystem. + * If a file status is supplied, it's passed in to the openFile() + * call so that FS implementations can optimize their opening. + * @param fs filesystem + * @param path path + * @param status status of the file to open. + * @return a loaded object + * @throws PathIOException JSON parse problem + * @throws EOFException file status references an empty file + * @throws IOException IO problems + */ + public T load(FileSystem fs, Path path, @Nullable FileStatus status) + throws IOException { + + if (status != null && status.getLen() == 0) { + throw new EOFException("No data in " + path); + } + FutureDataInputStreamBuilder builder = fs.openFile(path) + .opt(FS_OPTION_OPENFILE_READ_POLICY, + FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE); + if (status != null) { + builder.withFileStatus(status); + } + try (FSDataInputStream dataInputStream = + awaitFuture(builder.build())) { return fromJsonStream(dataInputStream); } catch (JsonProcessingException e) { - throw new IOException( - String.format("Failed to read JSON file \"%s\": %s", path, e), - e); + throw new PathIOException(path.toString(), + "Failed to read JSON file " + e, e); } } @@ -261,7 +288,8 @@ public T load(FileSystem fs, Path path) throws IOException { * @param fs filesystem * @param path path * @param overwrite should any existing file be overwritten - * @throws IOException IO exception + * @param instance instance + * @throws IOException IO exception. */ public void save(FileSystem fs, Path path, T instance, boolean overwrite) throws @@ -270,11 +298,12 @@ public void save(FileSystem fs, Path path, T instance, } /** - * Write the JSON as bytes, then close the file. + * Write the JSON as bytes, then close the stream. + * @param instance instance to write * @param dataOutputStream an output stream that will always be closed * @throws IOException on any failure */ - private void writeJsonAsBytes(T instance, + public void writeJsonAsBytes(T instance, OutputStream dataOutputStream) throws IOException { try { dataOutputStream.write(toBytes(instance)); @@ -298,6 +327,7 @@ public byte[] toBytes(T instance) throws IOException { * @param bytes byte array * @throws IOException IO problems * @throws EOFException not enough data + * @return byte array. */ public T fromBytes(byte[] bytes) throws IOException { return fromJson(new String(bytes, 0, bytes.length, UTF_8)); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JvmPauseMonitor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JvmPauseMonitor.java index 420ac8bc1851e..bbe8c64cc1ccd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JvmPauseMonitor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JvmPauseMonitor.java @@ -28,11 +28,11 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.service.AbstractService; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -217,6 +217,9 @@ public void run() { * This main function just leaks memory into a list. Running this class * with a 1GB heap will very quickly go into "GC hell" and result in * log messages about the GC pauses. + * + * @param args args. + * @throws Exception Exception. */ @SuppressWarnings("resource") public static void main(String []args) throws Exception { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightCache.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightCache.java index 79de1ac554476..57363bef3401c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightCache.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightCache.java @@ -24,8 +24,8 @@ import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * A low memory footprint Cache which extends {@link LightWeightGSet}. @@ -59,10 +59,13 @@ public class LightWeightCache extends LightWeightGSet { * Entries of {@link LightWeightCache}. */ public static interface Entry extends LinkedElement { - /** Set the expiration time. */ + /** + * Set the expiration time. + * @param timeNano input timeNano. + */ public void setExpirationTime(long timeNano); - /** Get the expiration time. */ + /** @return Get the expiration time. */ public long getExpirationTime(); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightGSet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightGSet.java index 7c7878a71bdf2..eb543a7187eba 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightGSet.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightGSet.java @@ -27,7 +27,7 @@ import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * A low memory footprint {@link GSet} implementation, @@ -52,10 +52,13 @@ public class LightWeightGSet implements GSet { * Elements of {@link LightWeightGSet}. */ public interface LinkedElement { - /** Set the next element. */ + /** + * Set the next element. + * @param next inputNext. + */ void setNext(LinkedElement next); - /** Get the next element. */ + /** @return Get the next element. */ LinkedElement getNext(); } @@ -177,6 +180,8 @@ public E put(final E element) { * Remove the element corresponding to the key, * given key.hashCode() == index. * + * @param key key. + * @param index index. * @return If such element exists, return it. * Otherwise, return null. */ @@ -270,7 +275,11 @@ public String toString() { return b.toString(); } - /** Print detailed information of this object. */ + /** + * Print detailed information of this object. + * + * @param out out. + */ public void printDetails(final PrintStream out) { out.print(this + ", entries = ["); for(int i = 0; i < entries.length; i++) { @@ -357,6 +366,10 @@ public void setTrackModification(boolean trackModification) { * Let e = round(log_2 t). * Then, we choose capacity = 2^e/(size of reference), * unless it is outside the close interval [1, 2^30]. + * + * @param mapName mapName. + * @param percentage percentage. + * @return compute capacity. */ public static int computeCapacity(double percentage, String mapName) { return computeCapacity(Runtime.getRuntime().maxMemory(), percentage, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightResizableGSet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightResizableGSet.java index 0abcf989d1500..495a757c6175f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightResizableGSet.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LightWeightResizableGSet.java @@ -94,6 +94,8 @@ public E put(final E element) { /** * Resize the internal table to given capacity. + * + * @param cap capacity. */ @SuppressWarnings("unchecked") protected void resize(int cap) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LimitInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LimitInputStream.java index bd646e0bcb608..de95e98e1f4dd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LimitInputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LimitInputStream.java @@ -19,8 +19,8 @@ package org.apache.hadoop.util; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkArgument; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull; import java.io.FilterInputStream; import java.io.IOException; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java index e2cd3048d5843..08bd8102b1388 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java @@ -25,6 +25,9 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; +import org.apache.hadoop.fs.statistics.IOStatisticsSupport; import org.apache.hadoop.io.Text; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY; @@ -42,7 +45,7 @@ */ @InterfaceAudience.LimitedPrivate({"MapReduce"}) @InterfaceStability.Unstable -public class LineReader implements Closeable { +public class LineReader implements Closeable, IOStatisticsSource { private static final int DEFAULT_BUFFER_SIZE = 64 * 1024; private int bufferSize = DEFAULT_BUFFER_SIZE; private InputStream in; @@ -86,7 +89,7 @@ public LineReader(InputStream in, int bufferSize) { * Configuration. * @param in input stream * @param conf configuration - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public LineReader(InputStream in, Configuration conf) throws IOException { this(in, conf.getInt(IO_FILE_BUFFER_SIZE_KEY, DEFAULT_BUFFER_SIZE)); @@ -130,7 +133,7 @@ public LineReader(InputStream in, int bufferSize, * @param in input stream * @param conf configuration * @param recordDelimiterBytes The delimiter - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public LineReader(InputStream in, Configuration conf, byte[] recordDelimiterBytes) throws IOException { @@ -143,12 +146,21 @@ public LineReader(InputStream in, Configuration conf, /** * Close the underlying stream. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public void close() throws IOException { in.close(); } - + + /** + * Return any IOStatistics provided by the source. + * @return IO stats from the input stream. + */ + @Override + public IOStatistics getIOStatistics() { + return IOStatisticsSupport.retrieveIOStatistics(in); + } + /** * Read one line from the InputStream into the given Text. * diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Lists.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Lists.java new file mode 100644 index 0000000000000..a9d0756e8e381 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Lists.java @@ -0,0 +1,285 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import org.apache.hadoop.classification.InterfaceAudience; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; + +/** + * Static utility methods pertaining to {@link List} instances. + * This class is Hadoop's internal use alternative to Guava's Lists + * utility class. + * Javadocs for majority of APIs in this class are taken from Guava's Lists + * class from Guava release version 27.0-jre. + */ +@InterfaceAudience.Private +public final class Lists { + + private Lists() { + // empty + } + + /** + * Creates a mutable, empty {@code ArrayList} instance. + * + * @param Generics Type E. + * @return ArrayList Generics Type E. + */ + public static ArrayList newArrayList() { + return new ArrayList<>(); + } + + /** + * Creates a mutable {@code ArrayList} instance containing the given + * elements. + * + *

      Note that even when you do need the ability to add or remove, + * this method provides only a tiny bit of syntactic sugar for + * {@code newArrayList(} + * {@link Arrays#asList asList} + * {@code (...))}, or for creating an empty list then calling + * {@link Collections#addAll}. + * + * @param Generics Type E. + * @param elements elements. + * @return ArrayList Generics Type E. + */ + @SafeVarargs + public static ArrayList newArrayList(E... elements) { + if (elements == null) { + throw new NullPointerException(); + } + // Avoid integer overflow when a large array is passed in + int capacity = computeArrayListCapacity(elements.length); + ArrayList list = new ArrayList<>(capacity); + Collections.addAll(list, elements); + return list; + } + + /** + * Creates a mutable {@code ArrayList} instance containing the + * given elements; a very thin shortcut for creating an empty list then + * calling Iterables#addAll. + * + * @param Generics Type E. + * @param elements elements. + * @return ArrayList Generics Type E. + */ + public static ArrayList newArrayList(Iterable elements) { + if (elements == null) { + throw new NullPointerException(); + } + return (elements instanceof Collection) + ? new ArrayList<>(cast(elements)) + : newArrayList(elements.iterator()); + } + + /** + * Creates a mutable {@code ArrayList} instance containing the + * given elements; a very thin shortcut for creating an empty list + * and then calling Iterators#addAll. + * + * @param Generics Type E. + * @param elements elements. + * @return ArrayList Generics Type E. + */ + public static ArrayList newArrayList(Iterator elements) { + ArrayList list = newArrayList(); + addAll(list, elements); + return list; + } + + /** + * Creates an {@code ArrayList} instance backed by an array with the + * specified initial size; + * simply delegates to {@link ArrayList#ArrayList(int)}. + * + * @param Generics Type E. + * @param initialArraySize the exact size of the initial backing array for + * the returned array list + * ({@code ArrayList} documentation calls this value the "capacity"). + * @return a new, empty {@code ArrayList} which is guaranteed not to + * resize itself unless its size reaches {@code initialArraySize + 1}. + * @throws IllegalArgumentException if {@code initialArraySize} is negative. + */ + public static ArrayList newArrayListWithCapacity( + int initialArraySize) { + checkNonnegative(initialArraySize, "initialArraySize"); + return new ArrayList<>(initialArraySize); + } + + /** + * Creates an {@code ArrayList} instance to hold {@code estimatedSize} + * elements, plus an unspecified amount of padding; + * you almost certainly mean to call {@link + * #newArrayListWithCapacity} (see that method for further advice on usage). + * + * @param estimatedSize an estimate of the eventual {@link List#size()} + * of the new list. + * @return a new, empty {@code ArrayList}, sized appropriately to hold the + * estimated number of elements. + * @throws IllegalArgumentException if {@code estimatedSize} is negative. + * + * @param Generics Type E. + */ + public static ArrayList newArrayListWithExpectedSize( + int estimatedSize) { + return new ArrayList<>(computeArrayListCapacity(estimatedSize)); + } + + /** + * Creates a mutable, empty {@code LinkedList} instance. + * + *

      Performance note: {@link ArrayList} and + * {@link java.util.ArrayDeque} consistently + * outperform {@code LinkedList} except in certain rare and specific + * situations. Unless you have + * spent a lot of time benchmarking your specific needs, use one of those + * instead.

      + * + * @param Generics Type E. + * @return Generics Type E List. + */ + public static LinkedList newLinkedList() { + return new LinkedList<>(); + } + + /** + * Creates a mutable {@code LinkedList} instance containing the given + * elements; a very thin shortcut for creating an empty list then calling + * Iterables#addAll. + * + *

      Performance note: {@link ArrayList} and + * {@link java.util.ArrayDeque} consistently + * outperform {@code LinkedList} except in certain rare and specific + * situations. Unless you have spent a lot of time benchmarking your + * specific needs, use one of those instead.

      + * + * @param elements elements. + * @param Generics Type E. + * @return Generics Type E List. + */ + public static LinkedList newLinkedList( + Iterable elements) { + LinkedList list = newLinkedList(); + addAll(list, elements); + return list; + } + + private static int computeArrayListCapacity(int arraySize) { + checkNonnegative(arraySize, "arraySize"); + return saturatedCast(5L + arraySize + (arraySize / 10)); + } + + private static int checkNonnegative(int value, String name) { + if (value < 0) { + throw new IllegalArgumentException(name + " cannot be negative but was: " + + value); + } + return value; + } + + /** + * Returns the {@code int} nearest in value to {@code value}. + * + * @param value any {@code long} value. + * @return the same value cast to {@code int} if it is in the range of the + * {@code int} type, {@link Integer#MAX_VALUE} if it is too large, + * or {@link Integer#MIN_VALUE} if it is too small. + */ + private static int saturatedCast(long value) { + if (value > Integer.MAX_VALUE) { + return Integer.MAX_VALUE; + } + if (value < Integer.MIN_VALUE) { + return Integer.MIN_VALUE; + } + return (int) value; + } + + private static boolean addAll(Collection addTo, + Iterator iterator) { + if (addTo == null) { + throw new NullPointerException(); + } + if (iterator == null) { + throw new NullPointerException(); + } + boolean wasModified = false; + while (iterator.hasNext()) { + wasModified |= addTo.add(iterator.next()); + } + return wasModified; + } + + private static Collection cast(Iterable iterable) { + return (Collection) iterable; + } + + /** + * Adds all elements in {@code iterable} to {@code collection}. + * + * @return {@code true} if {@code collection} was modified as a result of + * this operation. + */ + private static boolean addAll(Collection addTo, + Iterable elementsToAdd) { + if (elementsToAdd instanceof Collection) { + Collection c = cast(elementsToAdd); + return addTo.addAll(c); + } + if (elementsToAdd == null) { + throw new NullPointerException(); + } + return addAll(addTo, elementsToAdd.iterator()); + } + + /** + * Returns consecutive sub-lists of a list, each of the same size + * (the final list may be smaller). + * @param originalList original big list. + * @param pageSize desired size of each sublist ( last one + * may be smaller) + * @param Generics Type. + * @return a list of sub lists. + */ + public static List> partition(List originalList, int pageSize) { + + Preconditions.checkArgument(originalList != null && originalList.size() > 0, + "Invalid original list"); + Preconditions.checkArgument(pageSize > 0, "Page size should " + + "be greater than 0 for performing partition"); + + List> result = new ArrayList<>(); + int i=0; + while (i < originalList.size()) { + result.add(originalList.subList(i, + Math.min(i + pageSize, originalList.size()))); + i = i + pageSize; + } + return result; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/MachineList.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/MachineList.java index 20931deb0e9bd..c7fbb47b877b8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/MachineList.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/MachineList.java @@ -21,6 +21,7 @@ import java.net.UnknownHostException; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; import java.util.LinkedList; import java.util.List; @@ -28,8 +29,7 @@ import org.apache.commons.net.util.SubnetUtils; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.net.InetAddresses; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -61,9 +61,9 @@ public InetAddress getByName (String host) throws UnknownHostException { } private final boolean all; - private final Set ipAddresses; + private final Set inetAddresses; + private final Collection entries; private final List cidrAddresses; - private final Set hostNames; private final InetAddressFactory addressFactory; /** @@ -71,7 +71,11 @@ public InetAddress getByName (String host) throws UnknownHostException { * @param hostEntries comma separated ip/cidr/host addresses */ public MachineList(String hostEntries) { - this(StringUtils.getTrimmedStringCollection(hostEntries)); + this(hostEntries, InetAddressFactory.S_INSTANCE); + } + + public MachineList(String hostEntries, InetAddressFactory addressFactory) { + this(StringUtils.getTrimmedStringCollection(hostEntries), addressFactory); } /** @@ -85,22 +89,22 @@ public MachineList(Collection hostEntries) { /** * Accepts a collection of ip/cidr/host addresses * - * @param hostEntries + * @param hostEntries hostEntries. * @param addressFactory addressFactory to convert host to InetAddress */ - public MachineList(Collection hostEntries, InetAddressFactory addressFactory) { + public MachineList(Collection hostEntries, + InetAddressFactory addressFactory) { this.addressFactory = addressFactory; if (hostEntries != null) { + entries = new ArrayList<>(hostEntries); if ((hostEntries.size() == 1) && (hostEntries.contains(WILDCARD_VALUE))) { - all = true; - ipAddresses = null; - hostNames = null; + all = true; + inetAddresses = null; cidrAddresses = null; } else { all = false; - Set ips = new HashSet(); + Set addrs = new HashSet<>(); List cidrs = new LinkedList(); - Set hosts = new HashSet(); for (String hostEntry : hostEntries) { //ip address range if (hostEntry.indexOf("/") > -1) { @@ -112,26 +116,30 @@ public MachineList(Collection hostEntries, InetAddressFactory addressFac LOG.warn("Invalid CIDR syntax : " + hostEntry); throw e; } - } else if (InetAddresses.isInetAddress(hostEntry)) { //ip address - ips.add(hostEntry); - } else { //hostname - hosts.add(hostEntry); + } else { + try { + addrs.add(addressFactory.getByName(hostEntry)); + } catch (UnknownHostException e) { + LOG.warn(e.toString()); + } } } - ipAddresses = (ips.size() > 0) ? ips : null; + inetAddresses = (addrs.size() > 0) ? addrs : null; cidrAddresses = (cidrs.size() > 0) ? cidrs : null; - hostNames = (hosts.size() > 0) ? hosts : null; } } else { - all = false; - ipAddresses = null; - hostNames = null; - cidrAddresses = null; + all = false; + inetAddresses = null; + cidrAddresses = null; + entries = Collections.emptyList(); } } /** - * Accepts an ip address and return true if ipAddress is in the list - * @param ipAddress + * Accepts an ip address and return true if ipAddress is in the list. + * {@link #includes(InetAddress)} should be preferred + * to avoid possibly re-resolving the ip address. + * + * @param ipAddress ipAddress. * @return true if ipAddress is part of the list */ public boolean includes(String ipAddress) { @@ -144,71 +152,47 @@ public boolean includes(String ipAddress) { throw new IllegalArgumentException("ipAddress is null."); } - //check in the set of ipAddresses - if ((ipAddresses != null) && ipAddresses.contains(ipAddress)) { + try { + return includes(addressFactory.getByName(ipAddress)); + } catch (UnknownHostException e) { + return false; + } + } + + /** + * Accepts an inet address and return true if address is in the list. + * @param address address. + * @return true if address is part of the list + */ + public boolean includes(InetAddress address) { + if (all) { return true; } - - //iterate through the ip ranges for inclusion + if (address == null) { + throw new IllegalArgumentException("address is null."); + } + if (inetAddresses != null && inetAddresses.contains(address)) { + return true; + } + // iterate through the ip ranges for inclusion if (cidrAddresses != null) { + String ipAddress = address.getHostAddress(); for(SubnetUtils.SubnetInfo cidrAddress : cidrAddresses) { if(cidrAddress.isInRange(ipAddress)) { return true; } } } - - //check if the ipAddress matches one of hostnames - if (hostNames != null) { - //convert given ipAddress to hostname and look for a match - InetAddress hostAddr; - try { - hostAddr = addressFactory.getByName(ipAddress); - if ((hostAddr != null) && hostNames.contains(hostAddr.getCanonicalHostName())) { - return true; - } - } catch (UnknownHostException e) { - //ignore the exception and proceed to resolve the list of hosts - } - - //loop through host addresses and convert them to ip and look for a match - for (String host : hostNames) { - try { - hostAddr = addressFactory.getByName(host); - } catch (UnknownHostException e) { - continue; - } - if (hostAddr.getHostAddress().equals(ipAddress)) { - return true; - } - } - } return false; } - /** - * returns the contents of the MachineList as a Collection<String> - * This can be used for testing - * @return contents of the MachineList + * returns the contents of the MachineList as a Collection<String> . + * This can be used for testing . + * + * @return contents of the MachineList. */ @VisibleForTesting public Collection getCollection() { - Collection list = new ArrayList(); - if (all) { - list.add("*"); - } else { - if (ipAddresses != null) { - list.addAll(ipAddresses); - } - if (hostNames != null) { - list.addAll(hostNames); - } - if (cidrAddresses != null) { - for(SubnetUtils.SubnetInfo cidrAddress : cidrAddresses) { - list.add(cidrAddress.getCidrSignature()); - } - } - } - return list; + return entries; } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java index a8a380ed070d1..b5550f58ae218 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java @@ -74,23 +74,24 @@ public static boolean isNativeCodeLoaded() { return nativeCodeLoaded; } - /** - * Returns true only if this build was compiled with support for snappy. - */ - public static native boolean buildSupportsSnappy(); - /** * Returns true only if this build was compiled with support for ISA-L. + * + * @return if this build was compiled with support for ISA-L true, not false. */ public static native boolean buildSupportsIsal(); /** - * Returns true only if this build was compiled with support for ZStandard. + * Returns true only if this build was compiled with support for ZStandard. + * + * @return if this build was compiled with support for ZStandard true,not false. */ public static native boolean buildSupportsZstd(); /** * Returns true only if this build was compiled with support for openssl. + * + * @return if this build was compiled with support for openssl true,not false. */ public static native boolean buildSupportsOpenssl(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCrc32.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCrc32.java index 3142df2da36e0..cc41f02d87e48 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCrc32.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCrc32.java @@ -21,7 +21,7 @@ import org.apache.hadoop.fs.ChecksumException; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Wrapper around JNI support code to do checksum computation diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java index 23388248575ac..9843a9d4057dd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java @@ -22,8 +22,6 @@ import org.apache.hadoop.io.erasurecode.ErasureCodeNative; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.OpensslCipher; -import org.apache.hadoop.io.compress.Lz4Codec; -import org.apache.hadoop.io.compress.SnappyCodec; import org.apache.hadoop.io.compress.bzip2.Bzip2Factory; import org.apache.hadoop.io.compress.zlib.ZlibFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -41,7 +39,8 @@ public class NativeLibraryChecker { LoggerFactory.getLogger(NativeLibraryChecker.class); /** - * A tool to test native library availability, + * A tool to test native library availability. + * @param args args. */ public static void main(String[] args) { String usage = "NativeLibraryChecker [-a|-h]\n" @@ -67,12 +66,9 @@ public static void main(String[] args) { Configuration conf = new Configuration(); boolean nativeHadoopLoaded = NativeCodeLoader.isNativeCodeLoaded(); boolean zlibLoaded = false; - boolean snappyLoaded = false; boolean isalLoaded = false; boolean zStdLoaded = false; boolean pmdkLoaded = false; - // lz4 is linked within libhadoop - boolean lz4Loaded = nativeHadoopLoaded; boolean bzip2Loaded = Bzip2Factory.isNativeBzip2Loaded(conf); boolean openSslLoaded = false; boolean winutilsExists = false; @@ -80,11 +76,9 @@ public static void main(String[] args) { String openSslDetail = ""; String hadoopLibraryName = ""; String zlibLibraryName = ""; - String snappyLibraryName = ""; String isalDetail = ""; String pmdkDetail = ""; String zstdLibraryName = ""; - String lz4LibraryName = ""; String bzip2LibraryName = ""; String winutilsPath = null; @@ -99,11 +93,6 @@ public static void main(String[] args) { if (zStdLoaded && NativeCodeLoader.buildSupportsZstd()) { zstdLibraryName = ZStandardCodec.getLibraryName(); } - snappyLoaded = NativeCodeLoader.buildSupportsSnappy() && - SnappyCodec.isNativeCodeLoaded(); - if (snappyLoaded && NativeCodeLoader.buildSupportsSnappy()) { - snappyLibraryName = SnappyCodec.getLibraryName(); - } isalDetail = ErasureCodeNative.getLoadingFailureReason(); if (isalDetail != null) { @@ -127,9 +116,6 @@ public static void main(String[] args) { openSslLoaded = true; } - if (lz4Loaded) { - lz4LibraryName = Lz4Codec.getLibraryName(); - } if (bzip2Loaded) { bzip2LibraryName = Bzip2Factory.getLibraryName(conf); } @@ -152,8 +138,6 @@ public static void main(String[] args) { System.out.printf("hadoop: %b %s%n", nativeHadoopLoaded, hadoopLibraryName); System.out.printf("zlib: %b %s%n", zlibLoaded, zlibLibraryName); System.out.printf("zstd : %b %s%n", zStdLoaded, zstdLibraryName); - System.out.printf("snappy: %b %s%n", snappyLoaded, snappyLibraryName); - System.out.printf("lz4: %b %s%n", lz4Loaded, lz4LibraryName); System.out.printf("bzip2: %b %s%n", bzip2Loaded, bzip2LibraryName); System.out.printf("openssl: %b %s%n", openSslLoaded, openSslDetail); System.out.printf("ISA-L: %b %s%n", isalLoaded, isalDetail); @@ -164,8 +148,8 @@ public static void main(String[] args) { } if ((!nativeHadoopLoaded) || (Shell.WINDOWS && (!winutilsExists)) || - (checkAll && !(zlibLoaded && snappyLoaded && lz4Loaded - && bzip2Loaded && isalLoaded && zStdLoaded))) { + (checkAll && !(zlibLoaded && bzip2Loaded + && isalLoaded && zStdLoaded))) { // return 1 to indicated check failed ExitUtil.terminate(1); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/OperationDuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/OperationDuration.java index 3276d2138bbfc..1fb920e99f08e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/OperationDuration.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/OperationDuration.java @@ -18,48 +18,100 @@ package org.apache.hadoop.util; +import java.time.Duration; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; /** * Little duration counter. */ -@InterfaceAudience.Private +@InterfaceAudience.Public @InterfaceStability.Unstable public class OperationDuration { + /** + * Time in millis when the operation started. + */ private final long started; + + /** + * Time when the operation finished. + */ private long finished; + /** + * Instantiate. + * The start time and finished time are both set + * to the current clock time. + */ public OperationDuration() { started = time(); finished = started; } + /** + * Evaluate the system time. + * @return the current clock time. + */ protected long time() { return System.currentTimeMillis(); } + /** + * Update the finished time with the current system time. + */ public void finished() { finished = time(); } + /** + * Return the duration as {@link #humanTime(long)}. + * @return a printable duration. + */ public String getDurationString() { return humanTime(value()); } + /** + * Convert to a human time of minutes:seconds.millis. + * @param time time to humanize. + * @return a printable value. + */ public static String humanTime(long time) { long seconds = (time / 1000); long minutes = (seconds / 60); return String.format("%d:%02d.%03ds", minutes, seconds % 60, time % 1000); } + /** + * Return the duration as {@link #humanTime(long)}. + * @return a printable duration. + */ @Override public String toString() { return getDurationString(); } + /** + * Get the duration in milliseconds. + * + *

      + * This will be 0 until a call + * to {@link #finished()} has been made. + *

      + * @return the currently recorded duration. + */ public long value() { return finished -started; } + + /** + * Get the duration of an operation as a java Duration + * instance. + * @return a duration. + */ + public Duration asDuration() { + return Duration.ofMillis(value()); + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Options.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Options.java index 23169e3af3533..ccd494e5e40d9 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Options.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Options.java @@ -126,7 +126,7 @@ public Progressable getValue() { * @param cls the dynamic class to find * @param opts the list of options to look through * @return the first option that matches - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @SuppressWarnings("unchecked") public static T getOption(Class cls, base [] opts diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Preconditions.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Preconditions.java new file mode 100644 index 0000000000000..4b98797df3ac8 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Preconditions.java @@ -0,0 +1,344 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.util.function.Supplier; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + *

      This class replaces {@code guava.Preconditions} which provides helpers + * to validate the following conditions: + *

        + *
      • An invalid {@code null} obj causes a {@link NullPointerException}.
      • + *
      • An invalid argument causes an {@link IllegalArgumentException}.
      • + *
      • An invalid state causes an {@link IllegalStateException}.
      • + *
      • An invalid index causes an {@link IndexOutOfBoundsException}.
      • + *
      + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public final class Preconditions { + private static final Logger LOG = + LoggerFactory.getLogger(Preconditions.class); + + private static final String VALIDATE_IS_NOT_NULL_EX_MESSAGE = + "The argument object is NULL"; + private static final String CHECK_ARGUMENT_EX_MESSAGE = + "The argument expression is false"; + private static final String CHECK_STATE_EX_MESSAGE = + "The state expression is false"; + + private Preconditions() { + } + + /** + *

      Preconditions that the specified argument is not {@code null}, + * throwing a NPE exception otherwise. + * + *

      The message of the exception is + * "The validated object is null".

      + * + * @param the object type + * @param obj the object to check + * @return the validated object + * @throws NullPointerException if the object is {@code null} + * @see #checkNotNull(Object, Object) + */ + public static T checkNotNull(final T obj) { + return checkNotNull(obj, VALIDATE_IS_NOT_NULL_EX_MESSAGE); + } + + /** + *

      Preconditions that the specified argument is not {@code null}, + * throwing a NPE exception otherwise. + * + *

      The message of the exception is {@code errorMessage}.

      + * + * @param the object type + * @param obj the object to check + * @param errorMessage the message associated with the NPE + * @return the validated object + * @throws NullPointerException if the object is {@code null} + * @see #checkNotNull(Object, String, Object...) + */ + public static T checkNotNull(final T obj, + final Object errorMessage) { + if (obj == null) { + throw new NullPointerException(String.valueOf(errorMessage)); + } + return obj; + } + + /** + *

      Preconditions that the specified argument is not {@code null}, + * throwing a NPE exception otherwise. + * + *

      The message of the exception is {@code String.format(f, m)}.

      + * + * @param the object type + * @param obj the object to check + * @param message the {@link String#format(String, Object...)} + * exception message if valid. Otherwise, + * the message is {@link #VALIDATE_IS_NOT_NULL_EX_MESSAGE} + * @param values the optional values for the formatted exception message + * @return the validated object + * @throws NullPointerException if the object is {@code null} + * @see #checkNotNull(Object, Supplier) + */ + public static T checkNotNull(final T obj, final String message, + final Object... values) { + // Deferring the evaluation of the message is a tradeoff between the cost + // of constructing lambda Vs constructing a string object. + // Using lambda would allocate an object on every call: + // return checkNotNull(obj, () -> String.format(message, values)); + if (obj == null) { + String msg; + try { + msg = String.format(message, values); + } catch (Exception e) { + LOG.debug("Error formatting message", e); + msg = VALIDATE_IS_NOT_NULL_EX_MESSAGE; + } + throw new NullPointerException(msg); + } + return obj; + } + + /** + * Preconditions that the specified argument is not {@code null}, + * throwing a NPE exception otherwise. + * + *

      The message of the exception is {@code msgSupplier.get()}.

      + * + * @param the object type + * @param obj the object to check + * @param msgSupplier the {@link Supplier#get()} set the + * exception message if valid. Otherwise, + * the message is {@link #VALIDATE_IS_NOT_NULL_EX_MESSAGE} + * @return the validated object (never {@code null} for method chaining) + * @throws NullPointerException if the object is {@code null} + */ + public static T checkNotNull(final T obj, + final Supplier msgSupplier) { + if (obj == null) { + String msg; + try { + // note that we can get NPE evaluating the message itself; + // but we do not want this to override the actual NPE. + msg = msgSupplier.get(); + } catch (Exception e) { + // ideally we want to log the error to capture. This may cause log files + // to bloat. On the other hand, swallowing the exception may hide a bug + // in the caller. Debug level is a good compromise between the two + // concerns. + LOG.debug("Error formatting message", e); + msg = VALIDATE_IS_NOT_NULL_EX_MESSAGE; + } + throw new NullPointerException(msg); + } + return obj; + } + + /** + * Ensures the truth of an expression involving one or more parameters to the calling method. + * + * @param expression a boolean expression + * @throws IllegalArgumentException if {@code expression} is false + */ + public static void checkArgument(final boolean expression) { + if (!expression) { + throw new IllegalArgumentException(); + } + } + + /** + * Ensures the truth of an expression involving one or more parameters to the calling method. + * + * @param expression a boolean expression + * @param errorMessage the exception message to use if the check fails; will be converted to a + * string using {@link String#valueOf(Object)} + * @throws IllegalArgumentException if {@code expression} is false + */ + public static void checkArgument(final boolean expression, final Object errorMessage) { + if (!expression) { + throw new IllegalArgumentException(String.valueOf(errorMessage)); + } + } + + /** + * Ensures the truth of an expression involving one or more parameters to the calling method. + * + *

      The message of the exception is {@code String.format(f, m)}.

      + * + * @param expression a boolean expression + * @param errorMsg the {@link String#format(String, Object...)} + * exception message if valid. Otherwise, + * the message is {@link #CHECK_ARGUMENT_EX_MESSAGE} + * @param errorMsgArgs the optional values for the formatted exception message. + * @throws IllegalArgumentException if {@code expression} is false + */ + public static void checkArgument( + final boolean expression, + final String errorMsg, + Object... errorMsgArgs) { + if (!expression) { + String msg; + try { + msg = String.format(errorMsg, errorMsgArgs); + } catch (Exception e) { + LOG.debug("Error formatting message", e); + msg = CHECK_ARGUMENT_EX_MESSAGE; + } + throw new IllegalArgumentException(msg); + } + } + + /** + * Preconditions that the expression involving one or more parameters to the calling method. + * + *

      The message of the exception is {@code msgSupplier.get()}.

      + * + * @param expression a boolean expression + * @param msgSupplier the {@link Supplier#get()} set the + * exception message if valid. Otherwise, + * the message is {@link #CHECK_ARGUMENT_EX_MESSAGE} + * @throws IllegalArgumentException if {@code expression} is false + */ + public static void checkArgument( + final boolean expression, + final Supplier msgSupplier) { + if (!expression) { + String msg; + try { + // note that we can get NPE evaluating the message itself; + // but we do not want this to override the actual NPE. + msg = msgSupplier.get(); + } catch (Exception e) { + LOG.debug("Error formatting message", e); + msg = CHECK_ARGUMENT_EX_MESSAGE; + } + throw new IllegalArgumentException(msg); + } + } + + /** + * Ensures the truth of an expression involving the state of the calling instance + * without involving any parameters to the calling method. + * + * @param expression a boolean expression + * @throws IllegalStateException if {@code expression} is false + */ + public static void checkState(final boolean expression) { + if (!expression) { + throw new IllegalStateException(); + } + } + + /** + * Ensures the truth of an expression involving the state of the calling instance + * without involving any parameters to the calling method. + * + * @param expression a boolean expression + * @param errorMessage the exception message to use if the check fails; will be converted to a + * string using {@link String#valueOf(Object)} + * @throws IllegalStateException if {@code expression} is false + */ + public static void checkState(final boolean expression, final Object errorMessage) { + if (!expression) { + throw new IllegalStateException(String.valueOf(errorMessage)); + } + } + + /** + * Ensures the truth of an expression involving the state of the calling instance + * without involving any parameters to the calling method. + * + *

      The message of the exception is {@code String.format(f, m)}.

      + * + * @param expression a boolean expression + * @param errorMsg the {@link String#format(String, Object...)} + * exception message if valid. Otherwise, + * the message is {@link #CHECK_STATE_EX_MESSAGE} + * @param errorMsgArgs the optional values for the formatted exception message. + * @throws IllegalStateException if {@code expression} is false + */ + public static void checkState( + final boolean expression, + final String errorMsg, + Object... errorMsgArgs) { + if (!expression) { + String msg; + try { + msg = String.format(errorMsg, errorMsgArgs); + } catch (Exception e) { + LOG.debug("Error formatting message", e); + msg = CHECK_STATE_EX_MESSAGE; + } + throw new IllegalStateException(msg); + } + } + + /** + * Preconditions that the expression involving one or more parameters to the calling method. + * + *

      The message of the exception is {@code msgSupplier.get()}.

      + * + * @param expression a boolean expression + * @param msgSupplier the {@link Supplier#get()} set the + * exception message if valid. Otherwise, + * the message is {@link #CHECK_STATE_EX_MESSAGE} + * @throws IllegalStateException if {@code expression} is false + */ + public static void checkState( + final boolean expression, + final Supplier msgSupplier) { + if (!expression) { + String msg; + try { + // note that we can get NPE evaluating the message itself; + // but we do not want this to override the actual NPE. + msg = msgSupplier.get(); + } catch (Exception e) { + LOG.debug("Error formatting message", e); + msg = CHECK_STATE_EX_MESSAGE; + } + throw new IllegalStateException(msg); + } + } + + /* @VisibleForTesting */ + static String getDefaultNullMSG() { + return VALIDATE_IS_NOT_NULL_EX_MESSAGE; + } + + /* @VisibleForTesting */ + static String getDefaultCheckArgumentMSG() { + return CHECK_ARGUMENT_EX_MESSAGE; + } + + /* @VisibleForTesting */ + static String getDefaultCheckStateMSG() { + return CHECK_STATE_EX_MESSAGE; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PrintJarMainClass.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PrintJarMainClass.java index df571f35e2fcd..f7822e3f788d0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PrintJarMainClass.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PrintJarMainClass.java @@ -31,7 +31,7 @@ public class PrintJarMainClass { /** - * @param args + * @param args args. */ public static void main(String[] args) { try (JarFile jar_file = new JarFile(args[0])) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PriorityQueue.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PriorityQueue.java index ebb943bcb6285..d149d5d811914 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PriorityQueue.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PriorityQueue.java @@ -31,11 +31,19 @@ public abstract class PriorityQueue { private int size; private int maxSize; - /** Determines the ordering of objects in this priority queue. Subclasses - must define this one method. */ + /** + * Determines the ordering of objects in this priority queue. Subclasses + must define this one method. + * @param a object a. + * @param b object b. + * @return if a less than b true, not false + */ protected abstract boolean lessThan(Object a, Object b); - /** Subclass constructors must call this. */ + /** + * Subclass constructors must call this. + * @param maxSize max size. + */ @SuppressWarnings("unchecked") protected final void initialize(int maxSize) { size = 0; @@ -48,6 +56,7 @@ protected final void initialize(int maxSize) { * Adds an Object to a PriorityQueue in log(size) time. * If one tries to add more objects than maxSize from initialize * a RuntimeException (ArrayIndexOutOfBound) is thrown. + * @param element element. */ public final void put(T element) { size++; @@ -58,7 +67,7 @@ public final void put(T element) { /** * Adds element to the PriorityQueue in log(size) time if either * the PriorityQueue is not full, or not lessThan(element, top()). - * @param element + * @param element element. * @return true if element is added, false otherwise. */ public boolean insert(T element){ @@ -75,7 +84,11 @@ else if (size > 0 && !lessThan(element, top())){ return false; } - /** Returns the least element of the PriorityQueue in constant time. */ + /** + * Returns the least element of the PriorityQueue in constant time. + * + * @return T Generics Type T. + */ public final T top() { if (size > 0) return heap[1]; @@ -83,8 +96,11 @@ public final T top() { return null; } - /** Removes and returns the least element of the PriorityQueue in log(size) - time. */ + /** + * Removes and returns the least element of the PriorityQueue in log(size) + time. + * @return T Generics Type T. + */ public final T pop() { if (size > 0) { T result = heap[1]; // save first value @@ -109,7 +125,11 @@ public final void adjustTop() { } - /** Returns the number of elements currently stored in the PriorityQueue. */ + /** + * Returns the number of elements currently stored in the PriorityQueue. + * + * @return size. + */ public final int size() { return size; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProgramDriver.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProgramDriver.java index 347e5087eaa37..e49cf57705344 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProgramDriver.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProgramDriver.java @@ -91,12 +91,12 @@ private static void printUsage(Map programs) { } /** - * This is the method that adds the classed to the repository + * This is the method that adds the classed to the repository. * @param name The name of the string you want the class instance to be called with * @param mainClass The class that you want to add to the repository * @param description The description of the class - * @throws NoSuchMethodException - * @throws SecurityException + * @throws NoSuchMethodException when a particular method cannot be found. + * @throws SecurityException security manager to indicate a security violation. */ public void addClass(String name, Class mainClass, String description) throws Throwable { @@ -111,10 +111,10 @@ public void addClass(String name, Class mainClass, String description) * of the command line arguments. * @param args The argument from the user. args[0] is the command to run. * @return -1 on error, 0 on success - * @throws NoSuchMethodException - * @throws SecurityException - * @throws IllegalAccessException - * @throws IllegalArgumentException + * @throws NoSuchMethodException when a particular method cannot be found. + * @throws SecurityException security manager to indicate a security violation. + * @throws IllegalAccessException for backward compatibility. + * @throws IllegalArgumentException if the arg is invalid. * @throws Throwable Anything thrown by the example program's main */ public int run(String[] args) @@ -146,7 +146,11 @@ public int run(String[] args) } /** - * API compatible with Hadoop 1.x + * API compatible with Hadoop 1.x. + * + * @param argv argv. + * @throws Throwable Anything thrown + * by the example program's main */ public void driver(String[] argv) throws Throwable { if (run(argv) == -1) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Progress.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Progress.java index bd1c0f4a62a78..a839c04e99e63 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Progress.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Progress.java @@ -53,14 +53,21 @@ public class Progress { /** Creates a new root node. */ public Progress() {} - /** Adds a named node to the tree. */ + /** + * Adds a named node to the tree. + * @param status status. + * @return Progress. + */ public Progress addPhase(String status) { Progress phase = addPhase(); phase.setStatus(status); return phase; } - /** Adds a node to the tree. Gives equal weightage to all phases */ + /** + * Adds a node to the tree. Gives equal weightage to all phases. + * @return Progress. + */ public synchronized Progress addPhase() { Progress phase = addNewPhase(); // set equal weightage for all phases @@ -77,7 +84,13 @@ private synchronized Progress addNewPhase() { return phase; } - /** Adds a named node with a specified progress weightage to the tree. */ + /** + * Adds a named node with a specified progress weightage to the tree. + * + * @param status status. + * @param weightage weightage. + * @return Progress. + */ public Progress addPhase(String status, float weightage) { Progress phase = addPhase(weightage); phase.setStatus(status); @@ -85,7 +98,12 @@ public Progress addPhase(String status, float weightage) { return phase; } - /** Adds a node with a specified progress weightage to the tree. */ + /** + * Adds a node with a specified progress weightage to the tree. + * + * @param weightage weightage. + * @return Progress. + */ public synchronized Progress addPhase(float weightage) { Progress phase = new Progress(); progressWeightagesForPhases.add(weightage); @@ -104,7 +122,11 @@ public synchronized Progress addPhase(float weightage) { return phase; } - /** Adds n nodes to the tree. Gives equal weightage to all phases */ + /** + * Adds n nodes to the tree. Gives equal weightage to all phases. + * + * @param n n. + */ public synchronized void addPhases(int n) { for (int i = 0; i < n; i++) { addNewPhase(); @@ -136,7 +158,10 @@ public synchronized void startNextPhase() { currentPhase++; } - /** Returns the current sub-node executing. */ + /** + * Returns the current sub-node executing. + * @return Progress. + */ public synchronized Progress phase() { return phases.get(currentPhase); } @@ -158,7 +183,10 @@ public void complete() { } } - /** Called during execution on a leaf node to set its progress. */ + /** + * Called during execution on a leaf node to set its progress. + * @param progress progress. + */ public synchronized void set(float progress) { if (Float.isNaN(progress)) { progress = 0; @@ -188,7 +216,10 @@ else if (progress == Float.POSITIVE_INFINITY) { this.progress = progress; } - /** Returns the overall progress of the root. */ + /** + * Returns the overall progress of the root. + * @return progress. + */ // this method probably does not need to be synchronized as getInternal() is // synchronized and the node's parent never changes. Still, it doesn't hurt. public synchronized float get() { @@ -202,6 +233,8 @@ public synchronized float get() { /** * Returns progress in this node. get() would give overall progress of the * root node(not just given current node). + * + * @return progress. */ public synchronized float getProgress() { return getInternal(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProtoUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProtoUtil.java index 2bb19460b3686..883c19c5e7750 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProtoUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProtoUtil.java @@ -29,8 +29,9 @@ import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.*; import org.apache.hadoop.security.SaslRpcServer.AuthMethod; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.htrace.core.Span; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.tracing.Span; +import org.apache.hadoop.tracing.Tracer; +import org.apache.hadoop.tracing.TraceUtils; import org.apache.hadoop.thirdparty.protobuf.ByteString; @@ -82,6 +83,10 @@ public static int readRawVarint32(DataInput in) throws IOException { * as the old connection context as was done for writable where * the effective and real users are set based on the auth method. * + * @param protocol protocol. + * @param ugi ugi. + * @param authMethod authMethod. + * @return IpcConnectionContextProto. */ public static IpcConnectionContextProto makeIpcConnectionContext( final String protocol, @@ -180,10 +185,10 @@ public static RpcRequestHeaderProto makeRpcRequestHeader(RPC.RpcKind rpcKind, // Add tracing info if we are currently tracing. Span span = Tracer.getCurrentSpan(); if (span != null) { - result.setTraceInfo(RPCTraceInfoProto.newBuilder() - .setTraceId(span.getSpanId().getHigh()) - .setParentId(span.getSpanId().getLow()) - .build()); + RPCTraceInfoProto.Builder traceInfoProtoBuilder = + RPCTraceInfoProto.newBuilder().setSpanContext( + TraceUtils.spanContextToByteString(span.getContext())); + result.setTraceInfo(traceInfoProtoBuilder); } // Add caller context if it is not null diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/QuickSort.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/QuickSort.java index 73d8d90d42507..f3f8b839a0500 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/QuickSort.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/QuickSort.java @@ -40,6 +40,9 @@ private static void fix(IndexedSortable s, int p, int r) { /** * Deepest recursion before giving up and doing a heapsort. * Returns 2 * ceil(log(n)). + * + * @param x x. + * @return MaxDepth. */ protected static int getMaxDepth(int x) { if (x <= 0) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/RateLimiting.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/RateLimiting.java new file mode 100644 index 0000000000000..ae119c0e630f4 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/RateLimiting.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.time.Duration; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Minimal subset of google rate limiter class. + * Can be used to throttle use of object stores where excess load + * will trigger cluster-wide throttling, backoff etc. and so collapse + * performance. + * The time waited is returned as a Duration type. + * The google rate limiter implements this by allowing a caller to ask for + * more capacity than is available. This will be granted + * but the subsequent request will be blocked if the bucket of + * capacity hasn't let refilled to the point where there is + * capacity again. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public interface RateLimiting { + + /** + * Acquire rate limiter capacity. + * If there is not enough space, the permits will be acquired, + * but the subsequent call will block until the capacity has been + * refilled. + * @param requestedCapacity capacity to acquire. + * @return time spent waiting for output. + */ + Duration acquire(int requestedCapacity); + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/RateLimitingFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/RateLimitingFactory.java new file mode 100644 index 0000000000000..621415456e125 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/RateLimitingFactory.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.time.Duration; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.RateLimiter; + +/** + * Factory for Rate Limiting. + * This should be only place in the code where the guava RateLimiter is imported. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public final class RateLimitingFactory { + + private static final RateLimiting UNLIMITED = new NoRateLimiting(); + + /** + * No waiting took place. + */ + private static final Duration INSTANTLY = Duration.ofMillis(0); + + private RateLimitingFactory() { + } + + /** + * No Rate Limiting. + */ + private static class NoRateLimiting implements RateLimiting { + + + @Override + public Duration acquire(int requestedCapacity) { + return INSTANTLY; + } + } + + /** + * Rate limiting restricted to that of a google rate limiter. + */ + private static final class RestrictedRateLimiting implements RateLimiting { + private final RateLimiter limiter; + + /** + * Constructor. + * @param capacityPerSecond capacity in permits/second. + */ + private RestrictedRateLimiting(int capacityPerSecond) { + this.limiter = RateLimiter.create(capacityPerSecond); + } + + @Override + public Duration acquire(int requestedCapacity) { + final double delayMillis = limiter.acquire(requestedCapacity); + return delayMillis == 0 + ? INSTANTLY + : Duration.ofMillis((long) (delayMillis * 1000)); + } + + } + + /** + * Get the unlimited rate. + * @return a rate limiter which always has capacity. + */ + public static RateLimiting unlimitedRate() { + return UNLIMITED; + } + + /** + * Create an instance. + * If the rate is 0; return the unlimited rate. + * @param capacity capacity in permits/second. + * @return limiter restricted to the given capacity. + */ + public static RateLimiting create(int capacity) { + + return capacity == 0 + ? unlimitedRate() + : new RestrictedRateLimiting(capacity); + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ReadWriteDiskValidatorMetrics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ReadWriteDiskValidatorMetrics.java index 620186298de66..83cc6dcd9b576 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ReadWriteDiskValidatorMetrics.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ReadWriteDiskValidatorMetrics.java @@ -18,7 +18,7 @@ package org.apache.hadoop.util; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.metrics2.MetricsInfo; import org.apache.hadoop.metrics2.MetricsSystem; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ReflectionUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ReflectionUtils.java index 1ae71d187d3ba..c32d5ca5ada19 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ReflectionUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ReflectionUtils.java @@ -29,7 +29,7 @@ import java.lang.reflect.Constructor; import java.lang.reflect.Field; import java.lang.reflect.Method; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; @@ -116,8 +116,9 @@ private static void setJobConf(Object theObject, Configuration conf) { } } - /** Create an object for the given class and initialize it from conf - * + /** Create an object for the given class and initialize it from conf. + * + * @param Generics Type. * @param theClass class of which an object is created * @param conf Configuration * @return a new object @@ -224,7 +225,7 @@ public static void logThreadInfo(Log log, try { ByteArrayOutputStream buffer = new ByteArrayOutputStream(); printThreadInfo(new PrintStream(buffer, false, "UTF-8"), title); - log.info(buffer.toString(Charset.defaultCharset().name())); + log.info(buffer.toString(StandardCharsets.UTF_8.name())); } catch (UnsupportedEncodingException ignored) { } } @@ -253,7 +254,7 @@ public static void logThreadInfo(Logger log, try { ByteArrayOutputStream buffer = new ByteArrayOutputStream(); printThreadInfo(new PrintStream(buffer, false, "UTF-8"), title); - log.info(buffer.toString(Charset.defaultCharset().name())); + log.info(buffer.toString(StandardCharsets.UTF_8.name())); } catch (UnsupportedEncodingException ignored) { } } @@ -262,7 +263,8 @@ public static void logThreadInfo(Logger log, /** * Return the correctly-typed {@link Class} of the given object. - * + * + * @param Generics Type T * @param o object whose correctly-typed Class is to be obtained * @return the correctly typed Class of the given object. */ @@ -312,11 +314,14 @@ private static SerializationFactory getFactory(Configuration conf) { } /** - * Make a copy of the writable object using serialization to a buffer + * Make a copy of the writable object using serialization to a buffer. + * + * @param Generics Type. + * @param conf input Configuration. * @param src the object to copy from * @param dst the object to copy into, which is destroyed * @return dst param (the copy) - * @throws IOException + * @throws IOException raised on errors performing I/O. */ @SuppressWarnings("unchecked") public static T copy(Configuration conf, @@ -346,8 +351,9 @@ public static void cloneWritableInto(Writable dst, } /** - * Gets all the declared fields of a class including fields declared in + * @return Gets all the declared fields of a class including fields declared in * superclasses. + * @param clazz input clazz. */ public static List getDeclaredFieldsIncludingInherited(Class clazz) { List fields = new ArrayList(); @@ -368,8 +374,9 @@ public int compare(Field a, Field b) { } /** - * Gets all the declared methods of a class including methods declared in + * @return Gets all the declared methods of a class including methods declared in * superclasses. + * @param clazz input clazz. */ public static List getDeclaredMethodsIncludingInherited(Class clazz) { List methods = new ArrayList(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/RunJar.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/RunJar.java index 50126002b7be7..c28e69f54611e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/RunJar.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/RunJar.java @@ -156,6 +156,7 @@ public static void unJar(InputStream inputStream, File toDir, * @param inputStream the jar stream to unpack * @param toDir the destination directory into which to unpack the jar * @param unpackRegex the pattern to match jar entries against + * @param name name. * * @throws IOException if an I/O error has occurred or toDir * cannot be created and does not already exist @@ -231,7 +232,11 @@ private static void ensureDirectory(File dir) throws IOException { } /** Run a Hadoop job jar. If the main class is not in the jar's manifest, - * then it must be provided on the command line. */ + * then it must be provided on the command line. + * + * @param args args. + * @throws Throwable error. + */ public static void main(String[] args) throws Throwable { new RunJar().run(args); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SemaphoredDelegatingExecutor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SemaphoredDelegatingExecutor.java index 4ec77e75ba520..c4c11e57b3720 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SemaphoredDelegatingExecutor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SemaphoredDelegatingExecutor.java @@ -18,22 +18,27 @@ package org.apache.hadoop.util; -import com.google.common.util.concurrent.ForwardingListeningExecutorService; -import com.google.common.util.concurrent.Futures; -import com.google.common.util.concurrent.ListenableFuture; -import com.google.common.util.concurrent.ListeningExecutorService; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ForwardingExecutorService; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Futures; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.statistics.DurationTracker; +import org.apache.hadoop.fs.statistics.DurationTrackerFactory; import java.util.Collection; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import static java.util.Objects.requireNonNull; +import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.stubDurationTrackerFactory; +import static org.apache.hadoop.fs.statistics.StoreStatisticNames.ACTION_EXECUTOR_ACQUIRED; + /** * This ExecutorService blocks the submission of new tasks when its queue is * already full by using a semaphore. Task submissions require permits, task @@ -49,29 +54,48 @@ @SuppressWarnings("NullableProblems") @InterfaceAudience.Private public class SemaphoredDelegatingExecutor extends - ForwardingListeningExecutorService { + ForwardingExecutorService { private final Semaphore queueingPermits; - private final ListeningExecutorService executorDelegatee; + private final ExecutorService executorDelegatee; private final int permitCount; + private final DurationTrackerFactory trackerFactory; /** * Instantiate. * @param executorDelegatee Executor to delegate to * @param permitCount number of permits into the queue permitted * @param fair should the semaphore be "fair" + * @param trackerFactory duration tracker factory. */ public SemaphoredDelegatingExecutor( - ListeningExecutorService executorDelegatee, + ExecutorService executorDelegatee, int permitCount, - boolean fair) { + boolean fair, + DurationTrackerFactory trackerFactory) { this.permitCount = permitCount; queueingPermits = new Semaphore(permitCount, fair); - this.executorDelegatee = executorDelegatee; + this.executorDelegatee = requireNonNull(executorDelegatee); + this.trackerFactory = trackerFactory != null + ? trackerFactory + : stubDurationTrackerFactory(); + } + + /** + * Instantiate without collecting executor aquisition duration information. + * @param executorDelegatee Executor to delegate to + * @param permitCount number of permits into the queue permitted + * @param fair should the semaphore be "fair" + */ + public SemaphoredDelegatingExecutor( + ExecutorService executorDelegatee, + int permitCount, + boolean fair) { + this(executorDelegatee, permitCount, fair, null); } @Override - protected ListeningExecutorService delegate() { + protected ExecutorService delegate() { return executorDelegatee; } @@ -102,8 +126,9 @@ public T invokeAny(Collection> tasks, long timeout, } @Override - public ListenableFuture submit(Callable task) { - try { + public Future submit(Callable task) { + try (DurationTracker ignored = + trackerFactory.trackDuration(ACTION_EXECUTOR_ACQUIRED)) { queueingPermits.acquire(); } catch (InterruptedException e) { Thread.currentThread().interrupt(); @@ -113,8 +138,9 @@ public ListenableFuture submit(Callable task) { } @Override - public ListenableFuture submit(Runnable task, T result) { - try { + public Future submit(Runnable task, T result) { + try (DurationTracker ignored = + trackerFactory.trackDuration(ACTION_EXECUTOR_ACQUIRED)) { queueingPermits.acquire(); } catch (InterruptedException e) { Thread.currentThread().interrupt(); @@ -124,8 +150,9 @@ public ListenableFuture submit(Runnable task, T result) { } @Override - public ListenableFuture submit(Runnable task) { - try { + public Future submit(Runnable task) { + try (DurationTracker ignored = + trackerFactory.trackDuration(ACTION_EXECUTOR_ACQUIRED)) { queueingPermits.acquire(); } catch (InterruptedException e) { Thread.currentThread().interrupt(); @@ -136,7 +163,8 @@ public ListenableFuture submit(Runnable task) { @Override public void execute(Runnable command) { - try { + try (DurationTracker ignored = + trackerFactory.trackDuration(ACTION_EXECUTOR_ACQUIRED)) { queueingPermits.acquire(); } catch (InterruptedException e) { Thread.currentThread().interrupt(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SequentialNumber.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SequentialNumber.java index 685e92d628136..c3c04493d8be5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SequentialNumber.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SequentialNumber.java @@ -30,7 +30,10 @@ public abstract class SequentialNumber implements IdGenerator { private final AtomicLong currentValue; - /** Create a new instance with the given initial value. */ + /** + * Create a new instance with the given initial value. + * @param initialValue initialValue. + */ protected SequentialNumber(final long initialValue) { currentValue = new AtomicLong(initialValue); } @@ -40,7 +43,10 @@ public long getCurrentValue() { return currentValue.get(); } - /** Set current value. */ + /** + * Set current value. + * @param value value. + */ public void setCurrentValue(long value) { currentValue.set(value); } @@ -63,7 +69,12 @@ public long nextValue() { return currentValue.incrementAndGet(); } - /** Skip to the new value. */ + /** + * Skip to the new value. + * @param newValue newValue. + * @throws IllegalStateException + * Cannot skip to less than the current value. + */ public void skipTo(long newValue) throws IllegalStateException { for(;;) { final long c = getCurrentValue(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ServletUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ServletUtil.java index 9ba9e94dff5aa..455de4cb73994 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ServletUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ServletUtil.java @@ -26,13 +26,18 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; @InterfaceAudience.Private @InterfaceStability.Unstable public class ServletUtil { /** - * Initial HTML header + * Initial HTML header. + * + * @param response response. + * @param title title. + * @throws IOException raised on errors performing I/O. + * @return PrintWriter. */ public static PrintWriter initHTML(ServletResponse response, String title ) throws IOException { @@ -49,6 +54,10 @@ public static PrintWriter initHTML(ServletResponse response, String title /** * Get a parameter from a ServletRequest. * Return null if the parameter contains only white spaces. + * + * @param request request. + * @param name name. + * @return get a parameter from a ServletRequest. */ public static String getParameter(ServletRequest request, String name) { String s = request.getParameter(name); @@ -60,8 +69,13 @@ public static String getParameter(ServletRequest request, String name) { } /** + * parseLongParam. + * + * @param request request. + * @param param param. * @return a long value as passed in the given parameter, throwing * an exception if it is not present or if it is not a valid number. + * @throws IOException raised on errors performing I/O. */ public static long parseLongParam(ServletRequest request, String param) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java index e66c81b4b8df6..cd36fdbbcce5b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java @@ -23,7 +23,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.io.InterruptedIOException; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; @@ -34,10 +34,9 @@ import java.util.WeakHashMap; import java.util.concurrent.atomic.AtomicBoolean; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.security.alias.AbstractJavaKeyStoreProvider; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -122,6 +121,7 @@ public static boolean isJavaVersionAtLeast(int version) { * delimiters, no extra count will be added for delimiters. * * @param commands command parts, including any space delimiters + * @throws IOException raised on errors performing I/O. */ public static void checkWindowsCommandLineLength(String...commands) throws IOException { @@ -145,7 +145,8 @@ public static void checkWindowsCommandLineLength(String...commands) * @param arg the argument to quote * @return the quoted string */ - static String bashQuote(String arg) { + @InterfaceAudience.Private + public static String bashQuote(String arg) { StringBuilder buffer = new StringBuilder(arg.length() + 2); buffer.append('\'') .append(arg.replace("'", "'\\''")) @@ -205,7 +206,11 @@ private static OSType getOSType() { public static final boolean PPC_64 = System.getProperties().getProperty("os.arch").contains("ppc64"); - /** a Unix command to get the current user's groups list. */ + /** + * a Unix command to get the current user's groups list. + * + * @return group command array. + */ public static String[] getGroupsCommand() { return (WINDOWS)? new String[]{"cmd", "/c", "groups"} : new String[]{"groups"}; @@ -216,6 +221,9 @@ public static String[] getGroupsCommand() { * If the OS is not WINDOWS, the command will get the user's primary group * first and finally get the groups list which includes the primary group. * i.e. the user's primary group will be included twice. + * + * @param user user. + * @return groups for user command. */ public static String[] getGroupsForUserCommand(final String user) { //'groups username' command return is inconsistent across different unixes @@ -235,6 +243,9 @@ public static String[] getGroupsForUserCommand(final String user) { * first and finally get the groups list which includes the primary group. * i.e. the user's primary group will be included twice. * This command does not support Windows and will only return group names. + * + * @param user user. + * @return groups id for user command. */ public static String[] getGroupsIDForUserCommand(final String user) { //'groups username' command return is inconsistent across different unixes @@ -248,19 +259,34 @@ public static String[] getGroupsIDForUserCommand(final String user) { } } - /** A command to get a given netgroup's user list. */ + /** + * A command to get a given netgroup's user list. + * + * @param netgroup net group. + * @return users for net group command. + */ public static String[] getUsersForNetgroupCommand(final String netgroup) { //'groups username' command return is non-consistent across different unixes return new String[] {"getent", "netgroup", netgroup}; } - /** Return a command to get permission information. */ + /** + * Return a command to get permission information. + * + * @return permission command. + */ public static String[] getGetPermissionCommand() { return (WINDOWS) ? new String[] { getWinUtilsPath(), "ls", "-F" } : new String[] { "ls", "-ld" }; } - /** Return a command to set permission. */ + /** + * Return a command to set permission. + * + * @param perm permission. + * @param recursive recursive. + * @return set permission command. + */ public static String[] getSetPermissionCommand(String perm, boolean recursive) { if (recursive) { return (WINDOWS) ? @@ -290,21 +316,37 @@ public static String[] getSetPermissionCommand(String perm, return cmdWithFile; } - /** Return a command to set owner. */ + /** + * Return a command to set owner. + * + * @param owner owner. + * @return set owner command. + */ public static String[] getSetOwnerCommand(String owner) { return (WINDOWS) ? new String[] { getWinUtilsPath(), "chown", "\"" + owner + "\"" } : new String[] { "chown", owner }; } - /** Return a command to create symbolic links. */ + /** + * Return a command to create symbolic links. + * + * @param target target. + * @param link link. + * @return symlink command. + */ public static String[] getSymlinkCommand(String target, String link) { return WINDOWS ? new String[] { getWinUtilsPath(), "symlink", link, target } : new String[] { "ln", "-s", target, link }; } - /** Return a command to read the target of the a symbolic link. */ + /** + * Return a command to read the target of the a symbolic link. + * + * @param link link. + * @return read link command. + */ public static String[] getReadlinkCommand(String link) { return WINDOWS ? new String[] { getWinUtilsPath(), "readlink", link } @@ -320,7 +362,13 @@ public static String[] getCheckProcessIsAliveCommand(String pid) { return getSignalKillCommand(0, pid); } - /** Return a command to send a signal to a given pid. */ + /** + * Return a command to send a signal to a given pid. + * + * @param code code. + * @param pid pid. + * @return signal kill command. + */ public static String[] getSignalKillCommand(int code, String pid) { // Code == 0 means check alive if (Shell.WINDOWS) { @@ -347,7 +395,11 @@ public static String[] getSignalKillCommand(int code, String pid) { /** Regular expression for environment variables: {@value}. */ public static final String ENV_NAME_REGEX = "[A-Za-z_][A-Za-z0-9_]*"; - /** Return a regular expression string that match environment variables. */ + /** + * Return a regular expression string that match environment variables. + * + * @return environment variable regex. + */ public static String getEnvironmentVariableRegex() { return (WINDOWS) ? "%(" + ENV_NAME_REGEX + "?)%" @@ -889,7 +941,11 @@ protected void setWorkingDirectory(File dir) { this.dir = dir; } - /** Check to see if a command needs to be executed and execute if needed. */ + /** + * Check to see if a command needs to be executed and execute if needed. + * + * @throws IOException raised on errors performing I/O. + */ protected void run() throws IOException { if (lastTime + interval > Time.monotonicNow()) { return; @@ -901,7 +957,11 @@ protected void run() throws IOException { runCommand(); } - /** Run the command. */ + /** + * Run the command. + * + * @throws IOException raised on errors performing I/O. + */ private void runCommand() throws IOException { ProcessBuilder builder = new ProcessBuilder(getExecString()); Timer timeOutTimer = null; @@ -949,11 +1009,11 @@ private void runCommand() throws IOException { timeOutTimer.schedule(timeoutTimerTask, timeOutInterval); } final BufferedReader errReader = - new BufferedReader(new InputStreamReader( - process.getErrorStream(), Charset.defaultCharset())); + new BufferedReader(new InputStreamReader(process.getErrorStream(), + StandardCharsets.UTF_8)); BufferedReader inReader = - new BufferedReader(new InputStreamReader( - process.getInputStream(), Charset.defaultCharset())); + new BufferedReader(new InputStreamReader(process.getInputStream(), + StandardCharsets.UTF_8)); final StringBuffer errMsg = new StringBuffer(); // read error and input streams as this would free up the buffers @@ -1050,10 +1110,19 @@ private static void joinThread(Thread t) { } } - /** return an array containing the command name and its parameters. */ + /** + * return an array containing the command name and its parameters. + * + * @return exec string array. + */ protected abstract String[] getExecString(); - /** Parse the execution result */ + /** + * Parse the execution result. + * + * @param lines lines. + * @throws IOException raised on errors performing I/O. + * */ protected abstract void parseExecResult(BufferedReader lines) throws IOException; @@ -1284,6 +1353,7 @@ private void setTimedOut() { * the Shell interface. * @param cmd shell command to execute. * @return the output of the executed command. + * @throws IOException raised on errors performing I/O. */ public static String execCommand(String ... cmd) throws IOException { return execCommand(null, cmd, 0L); @@ -1368,6 +1438,8 @@ public static void destroyAllShellProcesses() { /** * Static method to return a Set of all Shell objects. + * + * @return all shells set. */ public static Set getAllShells() { synchronized (CHILD_SHELLS) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ShutdownHookManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ShutdownHookManager.java index 76d90063609b2..f044295a8068d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ShutdownHookManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ShutdownHookManager.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.util; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.hadoop.util.concurrent.HadoopExecutors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -147,14 +147,14 @@ private static void shutdownExecutor(final Configuration conf) { shutdownTimeout, TIME_UNIT_DEFAULT)) { // timeout waiting for the - LOG.error("ShutdownHookManger shutdown forcefully after" + LOG.error("ShutdownHookManager shutdown forcefully after" + " {} seconds.", shutdownTimeout); EXECUTOR.shutdownNow(); } - LOG.debug("ShutdownHookManger completed shutdown."); + LOG.debug("ShutdownHookManager completed shutdown."); } catch (InterruptedException ex) { // interrupted. - LOG.error("ShutdownHookManger interrupted while waiting for " + + LOG.error("ShutdownHookManager interrupted while waiting for " + "termination.", ex); EXECUTOR.shutdownNow(); Thread.currentThread().interrupt(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ShutdownThreadsHelper.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ShutdownThreadsHelper.java index 50a728e568a4e..4dc761e9d780c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ShutdownThreadsHelper.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ShutdownThreadsHelper.java @@ -18,7 +18,7 @@ package org.apache.hadoop.util; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -68,10 +68,12 @@ public static boolean shutdownThread(Thread thread, } /** + * shutdownExecutorService. + * * @param service {@link ExecutorService to be shutdown} * @return true if the service is terminated, * false otherwise - * @throws InterruptedException + * @throws InterruptedException if the thread is interrupted. */ public static boolean shutdownExecutorService(ExecutorService service) throws InterruptedException { @@ -79,13 +81,15 @@ public static boolean shutdownExecutorService(ExecutorService service) } /** + * shutdownExecutorService. + * * @param service {@link ExecutorService to be shutdown} * @param timeoutInMs time to wait for {@link * ExecutorService#awaitTermination(long, java.util.concurrent.TimeUnit)} * calls in milli seconds. * @return true if the service is terminated, * false otherwise - * @throws InterruptedException + * @throws InterruptedException if the thread is interrupted. */ public static boolean shutdownExecutorService(ExecutorService service, long timeoutInMs) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StopWatch.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StopWatch.java index c0eedf6110d7f..7ccaebedb5662 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StopWatch.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StopWatch.java @@ -89,6 +89,9 @@ public StopWatch reset() { } /** + * now. + * + * @param timeUnit timeUnit. * @return current elapsed time in specified timeunit. */ public long now(TimeUnit timeUnit) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringInterner.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringInterner.java index 028e49acbe59a..2cbaa2ac1f1a6 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringInterner.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringInterner.java @@ -21,8 +21,8 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import com.google.common.collect.Interner; -import com.google.common.collect.Interners; +import org.apache.hadoop.thirdparty.com.google.common.collect.Interner; +import org.apache.hadoop.thirdparty.com.google.common.collect.Interners; /** * Provides string interning utility methods. For weak interning, @@ -74,6 +74,9 @@ public static String weakIntern(String sample) { /** * Interns all the strings in the given array in place, * returning the same array. + * + * @param strings strings. + * @return internStringsInArray. */ public static String[] internStringsInArray(String[] strings) { for (int i = 0; i < strings.length; i++) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringUtils.java index cf7b04ab61a7e..b88ca62d78413 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringUtils.java @@ -41,9 +41,10 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.Path; import org.apache.hadoop.net.NetUtils; +import org.apache.log4j.LogManager; -import com.google.common.base.Preconditions; -import com.google.common.net.InetAddresses; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.net.InetAddresses; /** * General string utils @@ -120,7 +121,12 @@ public static String humanReadableInt(long number) { return TraditionalBinaryPrefix.long2String(number, "", 1); } - /** The same as String.format(Locale.ENGLISH, format, objects). */ + /** + * The same as String.format(Locale.ENGLISH, format, objects). + * @param format format. + * @param objects objects. + * @return format string. + */ public static String format(final String format, final Object... objects) { return String.format(Locale.ENGLISH, format, objects); } @@ -156,7 +162,7 @@ public static String arrayToString(String[] strs) { /** * Given an array of bytes it will convert the bytes to a hex string * representation of the bytes - * @param bytes + * @param bytes bytes. * @param start start index, inclusively * @param end end index, exclusively * @return hex string representation of the byte array @@ -172,7 +178,11 @@ public static String byteToHexString(byte[] bytes, int start, int end) { return s.toString(); } - /** Same as byteToHexString(bytes, 0, bytes.length). */ + /** + * Same as byteToHexString(bytes, 0, bytes.length). + * @param bytes bytes. + * @return byteToHexString. + */ public static String byteToHexString(byte bytes[]) { return byteToHexString(bytes, 0, bytes.length); } @@ -203,8 +213,9 @@ public static byte[] hexStringToByte(String hex) { return bts; } /** - * - * @param uris + * uriToString. + * @param uris uris. + * @return uriToString. */ public static String uriToString(URI[] uris){ if (uris == null) { @@ -242,8 +253,9 @@ public static URI[] stringToURI(String[] str){ } /** - * - * @param str + * stringToPath. + * @param str str. + * @return path array. */ public static Path[] stringToPath(String[] str){ if (str == null) { @@ -263,6 +275,8 @@ public static Path[] stringToPath(String[] str){ * * @param finishTime finish time * @param startTime start time + * @return a String in the format Xhrs, Ymins, Z sec, + * for the time difference between two times. */ public static String formatTimeDiff(long finishTime, long startTime){ long timeDiff = finishTime - startTime; @@ -275,6 +289,7 @@ public static String formatTimeDiff(long finishTime, long startTime){ * String in the format Xhrs, Ymins, Z sec. * * @param timeDiff The time difference to format + * @return formatTime String. */ public static String formatTime(long timeDiff){ StringBuilder buf = new StringBuilder(); @@ -305,6 +320,7 @@ public static String formatTime(long timeDiff){ * more than 100 hours ,it is displayed as 99hrs, 59mins, 59sec. * * @param timeDiff The time difference to format + * @return format time sortable. */ public static String formatTimeSortable(long timeDiff) { StringBuilder buf = new StringBuilder(); @@ -563,6 +579,7 @@ public static String[] split( * @param escapeChar character used to escape * @param start from where to search * @param split used to pass back the extracted string + * @return index. */ public static int findNext(String str, char separator, char escapeChar, int start, StringBuilder split) { @@ -615,7 +632,12 @@ private static boolean hasChar(char[] chars, char character) { } /** + * escapeString. + * + * @param str str. + * @param escapeChar escapeChar. * @param charsToEscape array of characters to be escaped + * @return escapeString. */ public static String escapeString(String str, char escapeChar, char[] charsToEscape) { @@ -658,7 +680,11 @@ public static String unEscapeString( } /** + * unEscapeString. + * @param str str. + * @param escapeChar escapeChar. * @param charsToEscape array of characters to unescape + * @return escape string. */ public static String unEscapeString(String str, char escapeChar, char[] charsToEscape) { @@ -752,6 +778,7 @@ static void startupShutdownMessage(Class clazz, String[] args, public void run() { LOG.info(toStartupShutdownString("SHUTDOWN_MSG: ", new String[]{ "Shutting down " + classname + " at " + hostname})); + LogManager.shutdown(); } }, SHUTDOWN_HOOK_PRIORITY); @@ -806,7 +833,10 @@ private TraditionalBinaryPrefix(int bitShift) { } /** - * @return The TraditionalBinaryPrefix object corresponding to the symbol. + * The TraditionalBinaryPrefix object corresponding to the symbol. + * + * @param symbol symbol. + * @return traditional binary prefix object. */ public static TraditionalBinaryPrefix valueOf(char symbol) { symbol = Character.toUpperCase(symbol); @@ -906,7 +936,7 @@ public static String long2String(long n, String unit, int decimalPlaces) { /** * Escapes HTML Special characters present in the string. - * @param string + * @param string param string. * @return HTML Escaped String representation */ public static String escapeHTML(String string) { @@ -941,13 +971,22 @@ public static String escapeHTML(String string) { } /** + * a byte description of the given long interger value. + * + * @param len len. * @return a byte description of the given long interger value. */ public static String byteDesc(long len) { return TraditionalBinaryPrefix.long2String(len, "B", 2); } - /** @deprecated use StringUtils.format("%.2f", d). */ + /** + * limitDecimalTo2. + * + * @param d double param. + * @return string value ("%.2f"). + * @deprecated use StringUtils.format("%.2f", d). + */ @Deprecated public static String limitDecimalTo2(double d) { return format("%.2f", d); @@ -958,6 +997,7 @@ public static String limitDecimalTo2(double d) { * * @param separator Separator to join with. * @param strings Strings to join. + * @return join string. */ public static String join(CharSequence separator, Iterable strings) { Iterator i = strings.iterator(); @@ -1053,6 +1093,8 @@ public static String replaceTokens(String template, Pattern pattern, /** * Get stack trace for a given thread. + * @param t thread. + * @return stack trace string. */ public static String getStackTrace(Thread t) { final StackTraceElement[] stackTrace = t.getStackTrace(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoLinux.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoLinux.java index 3591bb81e121c..6f2f585c87f22 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoLinux.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoLinux.java @@ -30,7 +30,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoWindows.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoWindows.java index e8940141b43c5..4d86153345bae 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoWindows.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoWindows.java @@ -19,7 +19,7 @@ import java.io.IOException; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Time.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Time.java index 42005f0b09b3e..f0ce85bbac873 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Time.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Time.java @@ -81,6 +81,8 @@ public static long monotonicNowNanos() { /** * Convert time in millisecond to human readable format. + * + * @param millis millisecond. * @return a human readable string for the input time */ public static String formatTime(long millis) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Tool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Tool.java index a4fbce4ace86f..63c275a1b06bd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Tool.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Tool.java @@ -32,7 +32,7 @@ * and only handle its custom arguments.

      * *

      Here is how a typical Tool is implemented:

      - *

      + * 
        *     public class MyApp extends Configured implements Tool {
        *     
        *       public int run(String[] args) throws Exception {
      @@ -69,7 +69,7 @@
        *         System.exit(res);
        *       }
        *     }
      - * 

      + *

      * * @see GenericOptionsParser * @see ToolRunner @@ -82,7 +82,7 @@ public interface Tool extends Configurable { * * @param args command specific arguments. * @return exit code. - * @throws Exception + * @throws Exception command exception. */ int run(String [] args) throws Exception; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ToolRunner.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ToolRunner.java index 8740be49d97bc..b2b57f233a7b2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ToolRunner.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ToolRunner.java @@ -23,6 +23,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.audit.CommonAuditContext; import org.apache.hadoop.ipc.CallerContext; /** @@ -56,6 +57,7 @@ public class ToolRunner { * @param tool Tool to run. * @param args command-line arguments to the tool. * @return exit code of the {@link Tool#run(String[])} method. + * @throws Exception Exception. */ public static int run(Configuration conf, Tool tool, String[] args) throws Exception{ @@ -63,6 +65,10 @@ public static int run(Configuration conf, Tool tool, String[] args) CallerContext ctx = new CallerContext.Builder("CLI").build(); CallerContext.setCurrent(ctx); } + // Note the entry point in the audit context; this + // may be used in audit events set to cloud store logs + // or elsewhere. + CommonAuditContext.noteEntryPoint(tool); if(conf == null) { conf = new Configuration(); @@ -84,6 +90,7 @@ public static int run(Configuration conf, Tool tool, String[] args) * @param tool Tool to run. * @param args command-line arguments to the tool. * @return exit code of the {@link Tool#run(String[])} method. + * @throws Exception exception. */ public static int run(Tool tool, String[] args) throws Exception{ @@ -102,7 +109,13 @@ public static void printGenericCommandUsage(PrintStream out) { /** * Print out a prompt to the user, and return true if the user - * responds with "y" or "yes". (case insensitive) + * responds with "y" or "yes". (case insensitive). + * + * @param prompt prompt. + * @throws IOException raised on errors performing I/O. + * @return if the user + * responds with "y" or "yes". (case insensitive) true, + * not false. */ public static boolean confirmPrompt(String prompt) throws IOException { while (true) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/VersionInfo.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/VersionInfo.java index ea835023e8658..31fe3c6377b94 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/VersionInfo.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/VersionInfo.java @@ -93,6 +93,10 @@ protected String _getProtocVersion() { return info.getProperty("protocVersion", "Unknown"); } + protected String _getCompilePlatform() { + return info.getProperty("compilePlatform", "Unknown"); + } + private static VersionInfo COMMON_VERSION_INFO = new VersionInfo("common"); /** * Get the Hadoop version. @@ -167,12 +171,21 @@ public static String getProtocVersion(){ return COMMON_VERSION_INFO._getProtocVersion(); } + /** + * Returns the OS platform used for the build. + * @return the OS platform + */ + public static String getCompilePlatform() { + return COMMON_VERSION_INFO._getCompilePlatform(); + } + public static void main(String[] args) { LOG.debug("version: "+ getVersion()); System.out.println("Hadoop " + getVersion()); System.out.println("Source code repository " + getUrl() + " -r " + getRevision()); System.out.println("Compiled by " + getUser() + " on " + getDate()); + System.out.println("Compiled on platform " + getCompilePlatform()); System.out.println("Compiled with protoc " + getProtocVersion()); System.out.println("From source with checksum " + getSrcChecksum()); System.out.println("This command was run using " + diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/WeakReferenceMap.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/WeakReferenceMap.java new file mode 100644 index 0000000000000..18d180ee47024 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/WeakReferenceMap.java @@ -0,0 +1,332 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.lang.ref.WeakReference; +import java.util.Iterator; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Consumer; +import java.util.function.Function; + +import javax.annotation.Nullable; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.store.LogExactlyOnce; + +import static java.util.Objects.requireNonNull; + +/** + * A map of keys type K to objects of type V which uses weak references, + * so does lot leak memory through long-lived references + * at the expense of losing references when GC takes place.. + * + * This class is intended be used instead of ThreadLocal storage when + * references are to be cleaned up when the instance holding. + * In this use case, the key is the Long key. + * + * Concurrency. + * The class assumes that map entries are rarely contended for when writing, + * and that not blocking other threads is more important than atomicity. + * - a ConcurrentHashMap is used to map keys to weak references, with + * all its guarantees. + * - there is no automatic pruning. + * - see {@link #create(Object)} for the concurrency semantics on entry creation. + */ +@InterfaceAudience.Private +public class WeakReferenceMap { + + private static final Logger LOG = + LoggerFactory.getLogger(WeakReferenceMap.class); + + /** + * The reference map. + */ + private final Map> map = new ConcurrentHashMap<>(); + + /** + * Supplier of new instances. + */ + private final Function factory; + + /** + * Nullable callback when a get on a key got a weak reference back. + * The assumption is that this is for logging/stats, which is why + * no attempt is made to use the call as a supplier of a new value. + */ + private final Consumer referenceLost; + + /** + * Counter of references lost. + */ + private final AtomicLong referenceLostCount = new AtomicLong(); + + /** + * Counter of entries created. + */ + private final AtomicLong entriesCreatedCount = new AtomicLong(); + + /** + * Log to report loss of a reference during the create phase, which + * is believed to be a cause of HADOOP-18456. + */ + private final LogExactlyOnce referenceLostDuringCreation = new LogExactlyOnce(LOG); + + /** + * instantiate. + * @param factory supplier of new instances + * @param referenceLost optional callback on lost references. + */ + public WeakReferenceMap( + Function factory, + @Nullable final Consumer referenceLost) { + + this.factory = requireNonNull(factory); + this.referenceLost = referenceLost; + } + + @Override + public String toString() { + return "WeakReferenceMap{" + + "size=" + size() + + ", referenceLostCount=" + referenceLostCount + + ", entriesCreatedCount=" + entriesCreatedCount + + '}'; + } + + /** + * Map size. + * @return the current map size. + */ + public int size() { + return map.size(); + } + + /** + * Clear all entries. + */ + public void clear() { + map.clear(); + } + + /** + * look up the value, returning the possibly empty weak reference + * to a value, or null if no value was found. + * @param key key to look up + * @return null if there is no entry, a weak reference if found + */ + public WeakReference lookup(K key) { + return map.get(key); + } + + /** + * Get the value, creating if needed. + * @param key key. + * @return an instance. + */ + public V get(K key) { + final WeakReference currentWeakRef = lookup(key); + // resolve it, after which if not null, we have a strong reference + V strongVal = resolve(currentWeakRef); + if (strongVal != null) { + // all good. + return strongVal; + } + + // here, either currentWeakRef was null, or its reference was GC'd. + if (currentWeakRef != null) { + // garbage collection removed the reference. + + // explicitly remove the weak ref from the map if it has not + // been updated by this point + // this is here just for completeness. + map.remove(key, currentWeakRef); + + // log/report the loss. + noteLost(key); + } + + // create a new value and add it to the map + return create(key); + } + + /** + * Create a new instance under a key. + *

      + * The instance is created, added to the map and then the + * map value retrieved. + * This ensures that the reference returned is that in the map, + * even if there is more than one entry being created at the same time. + * If that race does occur, it will be logged the first time it happens + * for this specific map instance. + *

      + * HADOOP-18456 highlighted the risk of a concurrent GC resulting a null + * value being retrieved and so returned. + * To prevent this: + *

        + *
      1. A strong reference is retained to the newly created instance + * in a local variable.
      2. + *
      3. That variable is used after the resolution process, to ensure + * the JVM doesn't consider it "unreachable" and so eligible for GC.
      4. + *
      5. A check is made for the resolved reference being null, and if so, + * the put() is repeated
      6. + *
      + * @param key key + * @return the created value + */ + public V create(K key) { + entriesCreatedCount.incrementAndGet(); + /* + Get a strong ref so even if a GC happens in this method the reference is not lost. + It is NOT enough to have a reference in a field, it MUST be used + so as to ensure the reference isn't optimized away prematurely. + "A reachable object is any object that can be accessed in any potential continuing + computation from any live thread." + */ + + final V strongRef = requireNonNull(factory.apply(key), + "factory returned a null instance"); + V resolvedStrongRef; + do { + WeakReference newWeakRef = new WeakReference<>(strongRef); + + // put it in the map + map.put(key, newWeakRef); + + // get it back from the map + WeakReference retrievedWeakRef = map.get(key); + // resolve that reference, handling the situation where somehow it was removed from the map + // between the put() and the get() + resolvedStrongRef = resolve(retrievedWeakRef); + if (resolvedStrongRef == null) { + referenceLostDuringCreation.warn("reference to %s lost during creation", key); + noteLost(key); + } + } while (resolvedStrongRef == null); + + // note if there was any change in the reference. + // as this forces strongRef to be kept in scope + if (strongRef != resolvedStrongRef) { + LOG.debug("Created instance for key {}: {} overwritten by {}", + key, strongRef, resolvedStrongRef); + } + + return resolvedStrongRef; + } + + /** + * Put a value under the key. + * A null value can be put, though on a get() call + * a new entry is generated + * + * @param key key + * @param value value + * @return any old non-null reference. + */ + public V put(K key, V value) { + return resolve(map.put(key, new WeakReference<>(value))); + } + + /** + * Remove any value under the key. + * @param key key + * @return any old non-null reference. + */ + public V remove(K key) { + return resolve(map.remove(key)); + } + + /** + * Does the map have a valid reference for this object? + * no-side effects: there's no attempt to notify or cleanup + * if the reference is null. + * @param key key to look up + * @return true if there is a valid reference. + */ + public boolean containsKey(K key) { + final WeakReference current = lookup(key); + return resolve(current) != null; + } + + /** + * Given a possibly null weak reference, resolve + * its value. + * @param r reference to resolve + * @return the value or null + */ + protected V resolve(WeakReference r) { + return r == null ? null : r.get(); + } + + /** + * Prune all null weak references, calling the referenceLost + * callback for each one. + * + * non-atomic and non-blocking. + * @return the number of entries pruned. + */ + public int prune() { + int count = 0; + final Iterator>> it = map.entrySet().iterator(); + while (it.hasNext()) { + final Map.Entry> next = it.next(); + if (next.getValue().get() == null) { + it.remove(); + count++; + noteLost(next.getKey()); + } + } + return count; + } + + /** + * Notify the reference lost callback. + * @param key key of lost reference + */ + private void noteLost(final K key) { + // increment local counter + referenceLostCount.incrementAndGet(); + + // and call any notification function supplied in the constructor + if (referenceLost != null) { + referenceLost.accept(key); + } + } + + /** + * Get count of references lost as detected + * during prune() or get() calls. + * @return count of references lost + */ + public final long getReferenceLostCount() { + return referenceLostCount.get(); + } + + /** + * Get count of entries created on demand. + * @return count of entries created + */ + public final long getEntriesCreatedCount() { + return entriesCreatedCount.get(); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java index 6d3894f5b4ca5..8a5d2f3661500 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java @@ -18,13 +18,23 @@ package org.apache.hadoop.util; +import javax.xml.XMLConstants; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParserFactory; import javax.xml.transform.*; +import javax.xml.transform.sax.SAXTransformerFactory; import javax.xml.transform.stream.*; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; + import java.io.*; +import java.util.concurrent.atomic.AtomicBoolean; /** * General xml utilities. @@ -33,21 +43,45 @@ @InterfaceAudience.Private @InterfaceStability.Unstable public class XMLUtils { + + private static final Logger LOG = + LoggerFactory.getLogger(XMLUtils.class); + + public static final String DISALLOW_DOCTYPE_DECL = + "http://apache.org/xml/features/disallow-doctype-decl"; + public static final String LOAD_EXTERNAL_DECL = + "http://apache.org/xml/features/nonvalidating/load-external-dtd"; + public static final String EXTERNAL_GENERAL_ENTITIES = + "http://xml.org/sax/features/external-general-entities"; + public static final String EXTERNAL_PARAMETER_ENTITIES = + "http://xml.org/sax/features/external-parameter-entities"; + public static final String CREATE_ENTITY_REF_NODES = + "http://apache.org/xml/features/dom/create-entity-ref-nodes"; + public static final String VALIDATION = + "http://xml.org/sax/features/validation"; + + private static final AtomicBoolean CAN_SET_TRANSFORMER_ACCESS_EXTERNAL_DTD = + new AtomicBoolean(true); + private static final AtomicBoolean CAN_SET_TRANSFORMER_ACCESS_EXTERNAL_STYLESHEET = + new AtomicBoolean(true); + /** * Transform input xml given a stylesheet. * * @param styleSheet the style-sheet * @param xml input xml data * @param out output - * @throws TransformerConfigurationException - * @throws TransformerException + * @throws TransformerConfigurationException synopsis signals a problem + * creating a transformer object. + * @throws TransformerException this is used for throwing processor + * exceptions before the processing has started. */ public static void transform( InputStream styleSheet, InputStream xml, Writer out ) throws TransformerConfigurationException, TransformerException { // Instantiate a TransformerFactory - TransformerFactory tFactory = TransformerFactory.newInstance(); + TransformerFactory tFactory = newSecureTransformerFactory(); // Use the TransformerFactory to process the // stylesheet and generate a Transformer @@ -59,4 +93,118 @@ public static void transform( // and send the output to a Result object. transformer.transform(new StreamSource(xml), new StreamResult(out)); } + + /** + * This method should be used if you need a {@link DocumentBuilderFactory}. Use this method + * instead of {@link DocumentBuilderFactory#newInstance()}. The factory that is returned has + * secure configuration enabled. + * + * @return a {@link DocumentBuilderFactory} with secure configuration enabled + * @throws ParserConfigurationException if the {@code JAXP} parser does not support the + * secure configuration + */ + public static DocumentBuilderFactory newSecureDocumentBuilderFactory() + throws ParserConfigurationException { + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + dbf.setFeature(DISALLOW_DOCTYPE_DECL, true); + dbf.setFeature(LOAD_EXTERNAL_DECL, false); + dbf.setFeature(EXTERNAL_GENERAL_ENTITIES, false); + dbf.setFeature(EXTERNAL_PARAMETER_ENTITIES, false); + dbf.setFeature(CREATE_ENTITY_REF_NODES, false); + return dbf; + } + + /** + * This method should be used if you need a {@link SAXParserFactory}. Use this method + * instead of {@link SAXParserFactory#newInstance()}. The factory that is returned has + * secure configuration enabled. + * + * @return a {@link SAXParserFactory} with secure configuration enabled + * @throws ParserConfigurationException if the {@code JAXP} parser does not support the + * secure configuration + * @throws SAXException if there are another issues when creating the factory + */ + public static SAXParserFactory newSecureSAXParserFactory() + throws SAXException, ParserConfigurationException { + SAXParserFactory spf = SAXParserFactory.newInstance(); + spf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + spf.setFeature(DISALLOW_DOCTYPE_DECL, true); + spf.setFeature(LOAD_EXTERNAL_DECL, false); + spf.setFeature(EXTERNAL_GENERAL_ENTITIES, false); + spf.setFeature(EXTERNAL_PARAMETER_ENTITIES, false); + return spf; + } + + /** + * This method should be used if you need a {@link TransformerFactory}. Use this method + * instead of {@link TransformerFactory#newInstance()}. The factory that is returned has + * secure configuration enabled. + * + * @return a {@link TransformerFactory} with secure configuration enabled + * @throws TransformerConfigurationException if the {@code JAXP} transformer does not + * support the secure configuration + */ + public static TransformerFactory newSecureTransformerFactory() + throws TransformerConfigurationException { + TransformerFactory trfactory = TransformerFactory.newInstance(); + trfactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + setOptionalSecureTransformerAttributes(trfactory); + return trfactory; + } + + /** + * This method should be used if you need a {@link SAXTransformerFactory}. Use this method + * instead of {@link SAXTransformerFactory#newInstance()}. The factory that is returned has + * secure configuration enabled. + * + * @return a {@link SAXTransformerFactory} with secure configuration enabled + * @throws TransformerConfigurationException if the {@code JAXP} transformer does not + * support the secure configuration + */ + public static SAXTransformerFactory newSecureSAXTransformerFactory() + throws TransformerConfigurationException { + SAXTransformerFactory trfactory = (SAXTransformerFactory) SAXTransformerFactory.newInstance(); + trfactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + setOptionalSecureTransformerAttributes(trfactory); + return trfactory; + } + + /** + * These attributes are recommended for maximum security but some JAXP transformers do + * not support them. If at any stage, we fail to set these attributes, then we won't try again + * for subsequent transformers. + * + * @param transformerFactory to update + */ + private static void setOptionalSecureTransformerAttributes( + TransformerFactory transformerFactory) { + bestEffortSetAttribute(transformerFactory, CAN_SET_TRANSFORMER_ACCESS_EXTERNAL_DTD, + XMLConstants.ACCESS_EXTERNAL_DTD, ""); + bestEffortSetAttribute(transformerFactory, CAN_SET_TRANSFORMER_ACCESS_EXTERNAL_STYLESHEET, + XMLConstants.ACCESS_EXTERNAL_STYLESHEET, ""); + } + + /** + * Set an attribute value on a {@link TransformerFactory}. If the TransformerFactory + * does not support the attribute, the method just returns false and + * logs the issue at debug level. + * + * @param transformerFactory to update + * @param flag that indicates whether to do the update and the flag can be set to + * false if an update fails + * @param name of the attribute to set + * @param value to set on the attribute + */ + static void bestEffortSetAttribute(TransformerFactory transformerFactory, AtomicBoolean flag, + String name, Object value) { + if (flag.get()) { + try { + transformerFactory.setAttribute(name, value); + } catch (Throwable t) { + flag.set(false); + LOG.debug("Issue setting TransformerFactory attribute {}: {}", name, t.toString()); + } + } + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ZKUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ZKUtil.java index 48cef5f06fcc9..59bfb9428f50b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ZKUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ZKUtil.java @@ -27,10 +27,10 @@ import org.apache.zookeeper.data.ACL; import org.apache.zookeeper.data.Id; -import com.google.common.base.Charsets; -import com.google.common.base.Splitter; -import com.google.common.collect.Lists; -import com.google.common.io.Files; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.base.Splitter; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.io.Files; /** * Utilities for working with ZooKeeper. @@ -89,6 +89,7 @@ public static int removeSpecificPerms(int perms, int remove) { * Parse comma separated list of ACL entries to secure generated nodes, e.g. * sasl:hdfs/host1@MY.DOMAIN:cdrwa,sasl:hdfs/host2@MY.DOMAIN:cdrwa * + * @param aclString aclString. * @return ACL list * @throws BadAclFormatException if an ACL is invalid */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/bloom/Key.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/bloom/Key.java index e8ad18cfc87e3..e2299365f8e22 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/bloom/Key.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/bloom/Key.java @@ -100,8 +100,8 @@ public Key(byte[] value, double weight) { } /** - * @param value - * @param weight + * @param value value. + * @param weight weight. */ public void set(byte[] value, double weight) { if (value == null) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/concurrent/AsyncGet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/concurrent/AsyncGet.java index 9304b483952d0..35ec9115a5f4c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/concurrent/AsyncGet.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/concurrent/AsyncGet.java @@ -52,7 +52,13 @@ R get(long timeout, TimeUnit unit) /** Utility */ class Util { - /** Use {@link #get(long, TimeUnit)} timeout parameters to wait. */ + /** + * Use {@link #get(long, TimeUnit)} timeout parameters to wait. + * @param obj object. + * @param timeout timeout. + * @param unit unit. + * @throws InterruptedException if the thread is interrupted. + */ public static void wait(Object obj, long timeout, TimeUnit unit) throws InterruptedException { if (timeout < 0) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/concurrent/AsyncGetFuture.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/concurrent/AsyncGetFuture.java index 61eb777d4806d..46fc8df37d3f3 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/concurrent/AsyncGetFuture.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/concurrent/AsyncGetFuture.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.util.concurrent; -import com.google.common.util.concurrent.AbstractFuture; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.AbstractFuture; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/concurrent/ExecutorHelper.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/concurrent/ExecutorHelper.java index 0b349518e4c2b..5cc92eb71bb64 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/concurrent/ExecutorHelper.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/concurrent/ExecutorHelper.java @@ -47,12 +47,12 @@ static void logThrowableFromAfterExecute(Runnable r, Throwable t) { try { ((Future) r).get(); } catch (ExecutionException ee) { - LOG.warn( - "Execution exception when running task in " + Thread.currentThread() + LOG.debug( + "Execution exception when running task in {}", Thread.currentThread() .getName()); t = ee.getCause(); } catch (InterruptedException ie) { - LOG.warn("Thread (" + Thread.currentThread() + ") interrupted: ", ie); + LOG.debug("Thread ( {} ) interrupted: ", Thread.currentThread(), ie); Thread.currentThread().interrupt(); } catch (Throwable throwable) { t = throwable; @@ -60,8 +60,8 @@ static void logThrowableFromAfterExecute(Runnable r, Throwable t) { } if (t != null) { - LOG.warn("Caught exception in thread " + Thread - .currentThread().getName() + ": ", t); + LOG.warn("Caught exception in thread {} + : ", Thread + .currentThread().getName(), t); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/concurrent/HadoopExecutors.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/concurrent/HadoopExecutors.java index 0bbceb59c31e7..6e2838bfe9c97 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/concurrent/HadoopExecutors.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/concurrent/HadoopExecutors.java @@ -115,9 +115,8 @@ public static void shutdown(ExecutorService executorService, Logger logger, try { executorService.shutdown(); - logger.debug( - "Gracefully shutting down executor service. Waiting max {} {}", - timeout, unit); + logger.debug("Gracefully shutting down executor service {}. Waiting max {} {}", + executorService, timeout, unit); if (!executorService.awaitTermination(timeout, unit)) { logger.debug( "Executor service has not shutdown yet. Forcing. " diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/curator/ChildReaper.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/curator/ChildReaper.java deleted file mode 100644 index 86142fb6d3a4f..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/curator/ChildReaper.java +++ /dev/null @@ -1,234 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hadoop.util.curator; - -import com.google.common.base.Preconditions; -import org.apache.curator.framework.recipes.locks.Reaper; -import org.apache.curator.utils.CloseableUtils; -import org.apache.curator.framework.CuratorFramework; -import org.apache.curator.utils.CloseableScheduledExecutorService; -import org.apache.curator.utils.ThreadUtils; -import org.apache.curator.utils.ZKPaths; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.zookeeper.data.Stat; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import java.io.Closeable; -import java.io.IOException; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.Future; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicReference; -import org.apache.curator.utils.PathUtils; - -/** - * This is a copy of Curator 2.7.1's ChildReaper class, modified to work with - * Guava 11.0.2. The problem is the 'paths' Collection, which calls Guava's - * Sets.newConcurrentHashSet(), which was added in Guava 15.0. - *

      - * Utility to reap empty child nodes of a parent node. Periodically calls getChildren on - * the node and adds empty nodes to an internally managed {@link Reaper} - */ -@InterfaceAudience.Private -@InterfaceStability.Unstable -public class ChildReaper implements Closeable -{ - private final Logger log = LoggerFactory.getLogger(getClass()); - private final Reaper reaper; - private final AtomicReference state = new AtomicReference(State.LATENT); - private final CuratorFramework client; - private final Collection paths = newConcurrentHashSet(); - private final Reaper.Mode mode; - private final CloseableScheduledExecutorService executor; - private final int reapingThresholdMs; - - private volatile Future task; - - // This is copied from Curator's Reaper class - static final int DEFAULT_REAPING_THRESHOLD_MS = (int)TimeUnit.MILLISECONDS.convert(5, TimeUnit.MINUTES); - - // This is copied from Guava - /** - * Creates a thread-safe set backed by a hash map. The set is backed by a - * {@link ConcurrentHashMap} instance, and thus carries the same concurrency - * guarantees. - * - *

      Unlike {@code HashSet}, this class does NOT allow {@code null} to be - * used as an element. The set is serializable. - * - * @return a new, empty thread-safe {@code Set} - * @since 15.0 - */ - public static Set newConcurrentHashSet() { - return Collections.newSetFromMap(new ConcurrentHashMap()); - } - - private enum State - { - LATENT, - STARTED, - CLOSED - } - - /** - * @param client the client - * @param path path to reap children from - * @param mode reaping mode - */ - public ChildReaper(CuratorFramework client, String path, Reaper.Mode mode) - { - this(client, path, mode, newExecutorService(), DEFAULT_REAPING_THRESHOLD_MS, null); - } - - /** - * @param client the client - * @param path path to reap children from - * @param reapingThresholdMs threshold in milliseconds that determines that a path can be deleted - * @param mode reaping mode - */ - public ChildReaper(CuratorFramework client, String path, Reaper.Mode mode, int reapingThresholdMs) - { - this(client, path, mode, newExecutorService(), reapingThresholdMs, null); - } - - /** - * @param client the client - * @param path path to reap children from - * @param executor executor to use for background tasks - * @param reapingThresholdMs threshold in milliseconds that determines that a path can be deleted - * @param mode reaping mode - */ - public ChildReaper(CuratorFramework client, String path, Reaper.Mode mode, ScheduledExecutorService executor, int reapingThresholdMs) - { - this(client, path, mode, executor, reapingThresholdMs, null); - } - - /** - * @param client the client - * @param path path to reap children from - * @param executor executor to use for background tasks - * @param reapingThresholdMs threshold in milliseconds that determines that a path can be deleted - * @param mode reaping mode - * @param leaderPath if not null, uses a leader selection so that only 1 reaper is active in the cluster - */ - public ChildReaper(CuratorFramework client, String path, Reaper.Mode mode, ScheduledExecutorService executor, int reapingThresholdMs, String leaderPath) - { - this.client = client; - this.mode = mode; - this.executor = new CloseableScheduledExecutorService(executor); - this.reapingThresholdMs = reapingThresholdMs; - this.reaper = new Reaper(client, executor, reapingThresholdMs, leaderPath); - addPath(path); - } - - /** - * The reaper must be started - * - * @throws Exception errors - */ - public void start() throws Exception - { - Preconditions.checkState(state.compareAndSet(State.LATENT, State.STARTED), "Cannot be started more than once"); - - task = executor.scheduleWithFixedDelay - ( - new Runnable() - { - @Override - public void run() - { - doWork(); - } - }, - reapingThresholdMs, - reapingThresholdMs, - TimeUnit.MILLISECONDS - ); - - reaper.start(); - } - - @Override - public void close() throws IOException - { - if ( state.compareAndSet(State.STARTED, State.CLOSED) ) - { - CloseableUtils.closeQuietly(reaper); - task.cancel(true); - } - } - - /** - * Add a path to reap children from - * - * @param path the path - * @return this for chaining - */ - public ChildReaper addPath(String path) - { - paths.add(PathUtils.validatePath(path)); - return this; - } - - /** - * Remove a path from reaping - * - * @param path the path - * @return true if the path existed and was removed - */ - public boolean removePath(String path) - { - return paths.remove(PathUtils.validatePath(path)); - } - - private static ScheduledExecutorService newExecutorService() - { - return ThreadUtils.newFixedThreadScheduledPool(2, "ChildReaper"); - } - - private void doWork() - { - for ( String path : paths ) - { - try - { - List children = client.getChildren().forPath(path); - for ( String name : children ) - { - String thisPath = ZKPaths.makePath(path, name); - Stat stat = client.checkExists().forPath(thisPath); - if ( (stat != null) && (stat.getNumChildren() == 0) ) - { - reaper.addPath(thisPath, mode); - } - } - } - catch ( Exception e ) - { - log.error("Could not get children for path: " + path, e); - } - } - } -} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/curator/ZKCuratorManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/curator/ZKCuratorManager.java index 36dade27dd6f7..8fcf456c76a82 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/curator/ZKCuratorManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/curator/ZKCuratorManager.java @@ -28,18 +28,23 @@ import org.apache.curator.framework.CuratorFrameworkFactory; import org.apache.curator.framework.api.transaction.CuratorOp; import org.apache.curator.retry.RetryNTimes; +import org.apache.curator.utils.ZookeeperFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.security.SecurityUtil; +import org.apache.hadoop.security.authentication.util.JaasConfiguration; import org.apache.hadoop.util.ZKUtil; import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.Watcher; +import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.client.ZKClientConfig; import org.apache.zookeeper.data.ACL; import org.apache.zookeeper.data.Stat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * Helper class that provides utility methods specific to ZK operations. @@ -79,8 +84,11 @@ public void close() { /** * Utility method to fetch the ZK ACLs from the configuration. + * + * @param conf configuration. * @throws java.io.IOException if the Zookeeper ACLs configuration file * cannot be read + * @return acl list. */ public static List getZKAcls(Configuration conf) throws IOException { // Parse authentication from configuration. @@ -98,9 +106,12 @@ public static List getZKAcls(Configuration conf) throws IOException { /** * Utility method to fetch ZK auth info from the configuration. + * + * @param conf configuration. * @throws java.io.IOException if the Zookeeper ACLs configuration file * cannot be read * @throws ZKUtil.BadAuthFormatException if the auth format is invalid + * @return ZKAuthInfo List. */ public static List getZKAuths(Configuration conf) throws IOException { @@ -148,6 +159,10 @@ public void start(List authInfos) throws IOException { CuratorFramework client = CuratorFrameworkFactory.builder() .connectString(zkHostPort) + .zookeeperFactory(new HadoopZookeeperFactory( + conf.get(CommonConfigurationKeys.ZK_SERVER_PRINCIPAL), + conf.get(CommonConfigurationKeys.ZK_KERBEROS_PRINCIPAL), + conf.get(CommonConfigurationKeys.ZK_KERBEROS_KEYTAB))) .sessionTimeoutMs(zkSessionTimeout) .retryPolicy(retryPolicy) .authorization(authInfos) @@ -161,7 +176,7 @@ public void start(List authInfos) throws IOException { * Get ACLs for a ZNode. * @param path Path of the ZNode. * @return The list of ACLs. - * @throws Exception + * @throws Exception If it cannot contact Zookeeper. */ public List getACL(final String path) throws Exception { return curator.getACL().forPath(path); @@ -180,7 +195,7 @@ public byte[] getData(final String path) throws Exception { /** * Get the data in a ZNode. * @param path Path of the ZNode. - * @param stat + * @param stat stat. * @return The data in the ZNode. * @throws Exception If it cannot contact Zookeeper. */ @@ -357,7 +372,10 @@ public void safeCreate(String path, byte[] data, List acl, /** * Deletes the path. Checks for existence of path as well. + * * @param path Path to be deleted. + * @param fencingNodePath fencingNodePath. + * @param fencingACL fencingACL. * @throws Exception if any problem occurs while performing deletion. */ public void safeDelete(final String path, List fencingACL, @@ -428,4 +446,61 @@ public void setData(String path, byte[] data, int version) .forPath(path, data)); } } + + public static class HadoopZookeeperFactory implements ZookeeperFactory { + public final static String JAAS_CLIENT_ENTRY = "HadoopZookeeperClient"; + private final String zkPrincipal; + private final String kerberosPrincipal; + private final String kerberosKeytab; + + public HadoopZookeeperFactory(String zkPrincipal) { + this(zkPrincipal, null, null); + } + + public HadoopZookeeperFactory(String zkPrincipal, String kerberosPrincipal, + String kerberosKeytab) { + this.zkPrincipal = zkPrincipal; + this.kerberosPrincipal = kerberosPrincipal; + this.kerberosKeytab = kerberosKeytab; + } + + @Override + public ZooKeeper newZooKeeper(String connectString, int sessionTimeout, + Watcher watcher, boolean canBeReadOnly + ) throws Exception { + ZKClientConfig zkClientConfig = new ZKClientConfig(); + if (zkPrincipal != null) { + LOG.info("Configuring zookeeper to use {} as the server principal", + zkPrincipal); + zkClientConfig.setProperty(ZKClientConfig.ZK_SASL_CLIENT_USERNAME, + zkPrincipal); + } + if (zkClientConfig.isSaslClientEnabled() && !isJaasConfigurationSet(zkClientConfig)) { + setJaasConfiguration(zkClientConfig); + } + return new ZooKeeper(connectString, sessionTimeout, watcher, + canBeReadOnly, zkClientConfig); + } + + private boolean isJaasConfigurationSet(ZKClientConfig zkClientConfig) { + String clientConfig = zkClientConfig.getProperty(ZKClientConfig.LOGIN_CONTEXT_NAME_KEY, + ZKClientConfig.LOGIN_CONTEXT_NAME_KEY_DEFAULT); + return javax.security.auth.login.Configuration.getConfiguration() + .getAppConfigurationEntry(clientConfig) != null; + } + + private void setJaasConfiguration(ZKClientConfig zkClientConfig) throws IOException { + if (kerberosPrincipal == null || kerberosKeytab == null) { + LOG.warn("JaasConfiguration has not been set since kerberos principal " + + "or keytab is not specified"); + return; + } + + String principal = SecurityUtil.getServerPrincipal(kerberosPrincipal, ""); + JaasConfiguration jconf = new JaasConfiguration(JAAS_CLIENT_ENTRY, principal, + kerberosKeytab); + javax.security.auth.login.Configuration.setConfiguration(jconf); + zkClientConfig.setProperty(ZKClientConfig.LOGIN_CONTEXT_NAME_KEY, JAAS_CLIENT_ENTRY); + } + } } \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/BiFunctionRaisingIOE.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/BiFunctionRaisingIOE.java new file mode 100644 index 0000000000000..ea17c16d01e87 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/BiFunctionRaisingIOE.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.functional; + +import java.io.IOException; + +/** + * Function of arity 2 which may raise an IOException. + * @param type of arg1 + * @param type of arg2 + * @param type of return value. + */ +@FunctionalInterface +public interface BiFunctionRaisingIOE { + + /** + * Apply the function. + * @param t argument 1 + * @param u argument 2 + * @return result + * @throws IOException Any IO failure + */ + R apply(T t, U u) throws IOException; +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/CallableRaisingIOE.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/CallableRaisingIOE.java new file mode 100644 index 0000000000000..65b3a63b2b9a0 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/CallableRaisingIOE.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.functional; + +import java.io.IOException; + +/** + * This is a callable which only raises an IOException. + * @param return type + */ +@FunctionalInterface +public interface CallableRaisingIOE { + + /** + * Apply the operation. + * @return result + * @throws IOException Any IO failure + */ + R apply() throws IOException; +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/CloseableTaskPoolSubmitter.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/CloseableTaskPoolSubmitter.java new file mode 100644 index 0000000000000..695da7e932279 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/CloseableTaskPoolSubmitter.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.functional; + +import java.io.Closeable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +import static java.util.Objects.requireNonNull; + +/** + * A task submitter which is closeable, and whose close() call + * shuts down the pool. This can help manage + * thread pool lifecycles. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public class CloseableTaskPoolSubmitter implements TaskPool.Submitter, + Closeable { + + /** Executors. */ + private ExecutorService pool; + + /** + * Constructor. + * @param pool non-null executor. + */ + public CloseableTaskPoolSubmitter(final ExecutorService pool) { + this.pool = requireNonNull(pool); + } + + /** + * Get the pool. + * @return the pool. + */ + public ExecutorService getPool() { + return pool; + } + + /** + * Shut down the pool. + */ + @Override + public void close() { + if (pool != null) { + pool.shutdown(); + pool = null; + } + } + + @Override + public Future submit(final Runnable task) { + return pool.submit(task); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/CommonCallableSupplier.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/CommonCallableSupplier.java new file mode 100644 index 0000000000000..67299ef96aec6 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/CommonCallableSupplier.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.functional; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.CancellationException; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; +import java.util.concurrent.Executor; +import java.util.function.Supplier; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.util.DurationInfo; + +import static org.apache.hadoop.util.functional.FutureIO.raiseInnerCause; + +/** + * A bridge from Callable to Supplier; catching exceptions + * raised by the callable and wrapping them as appropriate. + * @param return type. + */ +public final class CommonCallableSupplier implements Supplier { + + private static final Logger LOG = + LoggerFactory.getLogger(CommonCallableSupplier.class); + + private final Callable call; + + /** + * Create. + * @param call call to invoke. + */ + public CommonCallableSupplier(final Callable call) { + this.call = call; + } + + @Override + public Object get() { + try { + return call.call(); + } catch (RuntimeException e) { + throw e; + } catch (IOException e) { + throw new UncheckedIOException(e); + } catch (Exception e) { + throw new UncheckedIOException(new IOException(e)); + } + } + + /** + * Submit a callable into a completable future. + * RTEs are rethrown. + * Non RTEs are caught and wrapped; IOExceptions to + * {@code RuntimeIOException} instances. + * @param executor executor. + * @param call call to invoke + * @param type + * @return the future to wait for + */ + @SuppressWarnings("unchecked") + public static CompletableFuture submit(final Executor executor, + final Callable call) { + return CompletableFuture + .supplyAsync(new CommonCallableSupplier(call), executor); + } + + /** + * Wait for a list of futures to complete. If the list is empty, + * return immediately. + * + * @param futures list of futures. + * @param Generics Type T. + * @throws IOException if one of the called futures raised an IOE. + * @throws RuntimeException if one of the futures raised one. + */ + public static void waitForCompletion( + final List> futures) throws IOException { + if (futures.isEmpty()) { + return; + } + // await completion + waitForCompletion( + CompletableFuture.allOf(futures.toArray(new CompletableFuture[0]))); + } + + /** + * Wait for a single of future to complete, extracting IOEs afterwards. + * + * @param Generics Type T. + * @param future future to wait for. + * @throws IOException if one of the called futures raised an IOE. + * @throws RuntimeException if one of the futures raised one. + */ + public static void waitForCompletion(final CompletableFuture future) + throws IOException { + try (DurationInfo ignore = new DurationInfo(LOG, false, + "Waiting for task completion")) { + future.join(); + } catch (CancellationException e) { + throw new IOException(e); + } catch (CompletionException e) { + raiseInnerCause(e); + } + } + + /** + * Wait for a single of future to complete, ignoring exceptions raised. + * @param future future to wait for. + * @param Generics Type T. + */ + public static void waitForCompletionIgnoringExceptions( + @Nullable final CompletableFuture future) { + if (future != null) { + try (DurationInfo ignore = new DurationInfo(LOG, false, + "Waiting for task completion")) { + future.join(); + } catch (Exception e) { + LOG.debug("Ignoring exception raised in task completion: "); + } + } + } + + /** + * Block awaiting completion for any non-null future passed in; + * No-op if a null arg was supplied. + * @param future future + * @throws IOException if one of the called futures raised an IOE. + * @throws RuntimeException if one of the futures raised one. + */ + public static void maybeAwaitCompletion( + @Nullable final CompletableFuture future) throws IOException { + if (future != null) { + waitForCompletion(future); + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/ConsumerRaisingIOE.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/ConsumerRaisingIOE.java new file mode 100644 index 0000000000000..24a3b55c58d4a --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/ConsumerRaisingIOE.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.functional; + +import java.io.IOException; + +/** + * Version of java.util.function.Consumer which raises + * exceptions. + * @param type of argument,. + */ +@FunctionalInterface +public interface ConsumerRaisingIOE { + + /** + * Process the argument. + * @param t type + * @throws IOException if needed + */ + void accept(T t) throws IOException; + + /** + * after calling {@link #accept(Object)}, + * invoke the next consumer in the chain. + * @param next next consumer + * @return the chain. + */ + default ConsumerRaisingIOE andThen( + ConsumerRaisingIOE next) { + return (T t) -> { + accept(t); + next.accept(t); + }; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/Function4RaisingIOE.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/Function4RaisingIOE.java new file mode 100644 index 0000000000000..f0cd5c08c572b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/Function4RaisingIOE.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.functional; + +import java.io.IOException; + +/** + * Function of arity 4 which may raise an IOException. + * @param type of arg1. + * @param type of arg2. + * @param type of arg3. + * @param type of arg4. + * @param return type. + */ +public interface Function4RaisingIOE { + + /** + * Apply the function. + * @param i1 argument 1. + * @param i2 argument 2. + * @param i3 argument 3. + * @param i4 argument 4. + * @return return value. + * @throws IOException any IOE. + */ + R apply(I1 i1, I2 i2, I3 i3, I4 i4) throws IOException; +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FunctionRaisingIOE.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FunctionRaisingIOE.java new file mode 100644 index 0000000000000..83e041e2b3160 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FunctionRaisingIOE.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.functional; + +import java.io.IOException; + +/** + * Function of arity 1 which may raise an IOException. + * @param type of arg1 + * @param type of return value. + */ +@FunctionalInterface +public interface FunctionRaisingIOE { + + /** + * Apply the function. + * @param t argument 1 + * @return result + * @throws IOException Any IO failure + */ + R apply(T t) throws IOException; +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FutureIO.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FutureIO.java new file mode 100644 index 0000000000000..c3fda19d8d73b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FutureIO.java @@ -0,0 +1,278 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.functional; + +import java.io.IOException; +import java.io.InterruptedIOException; +import java.io.UncheckedIOException; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSBuilder; + +/** + * Future IO Helper methods. + *

      + * Contains methods promoted from + * {@link org.apache.hadoop.fs.impl.FutureIOSupport} because they + * are a key part of integrating async IO in application code. + *

      + *

      + * One key feature is that the {@link #awaitFuture(Future)} and + * {@link #awaitFuture(Future, long, TimeUnit)} calls will + * extract and rethrow exceptions raised in the future's execution, + * including extracting the inner IOException of any + * {@code UncheckedIOException} raised in the future. + * This makes it somewhat easier to execute IOException-raising + * code inside futures. + *

      + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public final class FutureIO { + + private FutureIO() { + } + + /** + * Given a future, evaluate it. + *

      + * Any exception generated in the future is + * extracted and rethrown. + *

      + * @param future future to evaluate + * @param type of the result. + * @return the result, if all went well. + * @throws InterruptedIOException future was interrupted + * @throws IOException if something went wrong + * @throws RuntimeException any nested RTE thrown + */ + public static T awaitFuture(final Future future) + throws InterruptedIOException, IOException, RuntimeException { + try { + return future.get(); + } catch (InterruptedException e) { + throw (InterruptedIOException) new InterruptedIOException(e.toString()) + .initCause(e); + } catch (ExecutionException e) { + return raiseInnerCause(e); + } + } + + /** + * Given a future, evaluate it. + *

      + * Any exception generated in the future is + * extracted and rethrown. + *

      + * @param future future to evaluate + * @param timeout timeout to wait + * @param unit time unit. + * @param type of the result. + * @return the result, if all went well. + * @throws InterruptedIOException future was interrupted + * @throws IOException if something went wrong + * @throws RuntimeException any nested RTE thrown + * @throws TimeoutException the future timed out. + */ + public static T awaitFuture(final Future future, + final long timeout, + final TimeUnit unit) + throws InterruptedIOException, IOException, RuntimeException, + TimeoutException { + try { + return future.get(timeout, unit); + } catch (InterruptedException e) { + throw (InterruptedIOException) new InterruptedIOException(e.toString()) + .initCause(e); + } catch (ExecutionException e) { + return raiseInnerCause(e); + } + } + + /** + * From the inner cause of an execution exception, extract the inner cause + * if it is an IOE or RTE. + * This will always raise an exception, either the inner IOException, + * an inner RuntimeException, or a new IOException wrapping the raised + * exception. + * + * @param e exception. + * @param type of return value. + * @return nothing, ever. + * @throws IOException either the inner IOException, or a wrapper around + * any non-Runtime-Exception + * @throws RuntimeException if that is the inner cause. + */ + public static T raiseInnerCause(final ExecutionException e) + throws IOException { + throw unwrapInnerException(e); + } + + /** + * Extract the cause of a completion failure and rethrow it if an IOE + * or RTE. + * @param e exception. + * @param type of return value. + * @return nothing, ever. + * @throws IOException either the inner IOException, or a wrapper around + * any non-Runtime-Exception + * @throws RuntimeException if that is the inner cause. + */ + public static T raiseInnerCause(final CompletionException e) + throws IOException { + throw unwrapInnerException(e); + } + + /** + * From the inner cause of an execution exception, extract the inner cause + * to an IOException, raising RuntimeExceptions and Errors immediately. + *
        + *
      1. If it is an IOE: Return.
      2. + *
      3. If it is a {@link UncheckedIOException}: return the cause
      4. + *
      5. Completion/Execution Exceptions: extract and repeat
      6. + *
      7. If it is an RTE or Error: throw.
      8. + *
      9. Any other type: wrap in an IOE
      10. + *
      + * + * Recursively handles wrapped Execution and Completion Exceptions in + * case something very complicated has happened. + * @param e exception. + * @return an IOException extracted or built from the cause. + * @throws RuntimeException if that is the inner cause. + * @throws Error if that is the inner cause. + */ + @SuppressWarnings("ChainOfInstanceofChecks") + public static IOException unwrapInnerException(final Throwable e) { + Throwable cause = e.getCause(); + if (cause instanceof IOException) { + return (IOException) cause; + } else if (cause instanceof UncheckedIOException) { + // this is always an IOException + return ((UncheckedIOException) cause).getCause(); + } else if (cause instanceof CompletionException) { + return unwrapInnerException(cause); + } else if (cause instanceof ExecutionException) { + return unwrapInnerException(cause); + } else if (cause instanceof RuntimeException) { + throw (RuntimeException) cause; + } else if (cause instanceof Error) { + throw (Error) cause; + } else if (cause != null) { + // other type: wrap with a new IOE + return new IOException(cause); + } else { + // this only happens if there was no cause. + return new IOException(e); + } + } + + /** + * Propagate options to any builder, converting everything with the + * prefix to an option where, if there were 2+ dot-separated elements, + * it is converted to a schema. + * See {@link #propagateOptions(FSBuilder, Configuration, String, boolean)}. + * @param builder builder to modify + * @param conf configuration to read + * @param optionalPrefix prefix for optional settings + * @param mandatoryPrefix prefix for mandatory settings + * @param type of result + * @param type of builder + * @return the builder passed in. + */ + public static > + FSBuilder propagateOptions( + final FSBuilder builder, + final Configuration conf, + final String optionalPrefix, + final String mandatoryPrefix) { + propagateOptions(builder, conf, + optionalPrefix, false); + propagateOptions(builder, conf, + mandatoryPrefix, true); + return builder; + } + + /** + * Propagate options to any builder, converting everything with the + * prefix to an option where, if there were 2+ dot-separated elements, + * it is converted to a schema. + *
      +   *   fs.example.s3a.option becomes "s3a.option"
      +   *   fs.example.fs.io.policy becomes "fs.io.policy"
      +   *   fs.example.something becomes "something"
      +   * 
      + * @param builder builder to modify + * @param conf configuration to read + * @param prefix prefix to scan/strip + * @param mandatory are the options to be mandatory or optional? + */ + public static void propagateOptions( + final FSBuilder builder, + final Configuration conf, + final String prefix, + final boolean mandatory) { + + final String p = prefix.endsWith(".") ? prefix : (prefix + "."); + final Map propsWithPrefix = conf.getPropsWithPrefix(p); + for (Map.Entry entry : propsWithPrefix.entrySet()) { + // change the schema off each entry + String key = entry.getKey(); + String val = entry.getValue(); + if (mandatory) { + builder.must(key, val); + } else { + builder.opt(key, val); + } + } + } + + /** + * Evaluate a CallableRaisingIOE in the current thread, + * converting IOEs to RTEs and propagating. + * @param callable callable to invoke + * @param Return type. + * @return the evaluated result. + * @throws UnsupportedOperationException fail fast if unsupported + * @throws IllegalArgumentException invalid argument + */ + public static CompletableFuture eval( + CallableRaisingIOE callable) { + CompletableFuture result = new CompletableFuture<>(); + try { + result.complete(callable.apply()); + } catch (UnsupportedOperationException | IllegalArgumentException tx) { + // fail fast here + throw tx; + } catch (Throwable tx) { + // fail lazily here to ensure callers expect all File IO operations to + // surface later + result.completeExceptionally(tx); + } + return result; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/InvocationRaisingIOE.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/InvocationRaisingIOE.java new file mode 100644 index 0000000000000..b59dabea89ea9 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/InvocationRaisingIOE.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.functional; + +import java.io.IOException; + +/** + * This is a lambda-expression which may raises an IOException. + * This is a recurrent design patten in the hadoop codebase, e.g + * {@code LambdaTestUtils.VoidCallable} and + * the S3A {@code Invoker.VoidOperation}}. Hopefully this should + * be the last. + * Note for implementors of methods which take this as an argument: + * don't use method overloading to determine which specific functional + * interface is to be used. + */ +@FunctionalInterface +public interface InvocationRaisingIOE { + + /** + * Apply the operation. + * @throws IOException Any IO failure + */ + void apply() throws IOException; + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/RemoteIterators.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/RemoteIterators.java new file mode 100644 index 0000000000000..9b1611d7dadb0 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/RemoteIterators.java @@ -0,0 +1,840 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.functional; + +import javax.annotation.Nullable; +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.Objects; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; +import org.apache.hadoop.io.IOUtils; + +import static java.util.Objects.requireNonNull; +import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.logIOStatisticsAtDebug; +import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.retrieveIOStatistics; + +/** + * A set of remote iterators supporting transformation and filtering, + * with IOStatisticsSource passthrough, and of conversions of + * the iterators to lists/arrays and of performing actions + * on the values. + *

      + * This aims to make it straightforward to use lambda-expressions to + * transform the results of an iterator, without losing the statistics + * in the process, and to chain the operations together. + *

      + * The closeable operation will be passed through RemoteIterators which + * wrap other RemoteIterators. This is to support any iterator which + * can be closed to release held connections, file handles etc. + * Unless client code is written to assume that RemoteIterator instances + * may be closed, this is not likely to be broadly used. It is added + * to make it possible to adopt this feature in a managed way. + *

      + * One notable feature is that the + * {@link #foreach(RemoteIterator, ConsumerRaisingIOE)} method will + * LOG at debug any IOStatistics provided by the iterator, if such + * statistics are provided. There's no attempt at retrieval and logging + * if the LOG is not set to debug, so it is a zero cost feature unless + * the logger {@code org.apache.hadoop.fs.functional.RemoteIterators} + * is at DEBUG. + *

      + * Based on the S3A Listing code, and some some work on moving other code + * to using iterative listings so as to pick up the statistics. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public final class RemoteIterators { + + /** + * Log used for logging any statistics in + * {@link #foreach(RemoteIterator, ConsumerRaisingIOE)} + * at DEBUG. + */ + private static final Logger LOG = LoggerFactory.getLogger( + RemoteIterators.class); + + private RemoteIterators() { + } + + /** + * Create an iterator from a singleton. + * @param singleton instance + * @param type + * @return a remote iterator + */ + public static RemoteIterator remoteIteratorFromSingleton( + @Nullable T singleton) { + return new SingletonIterator<>(singleton); + } + + /** + * Create a remote iterator from a java.util.Iterator. + * @param type + * @param iterator iterator. + * @return a remote iterator + */ + public static RemoteIterator remoteIteratorFromIterator( + Iterator iterator) { + return new WrappedJavaIterator<>(iterator); + } + + /** + * Create a remote iterator from a java.util.Iterable -e.g. a list + * or other collection. + * @param type + * @param iterable iterable. + * @return a remote iterator + */ + public static RemoteIterator remoteIteratorFromIterable( + Iterable iterable) { + return new WrappedJavaIterator<>(iterable.iterator()); + } + + /** + * Create a remote iterator from an array. + * @param type + * @param array array. + * @return a remote iterator + */ + public static RemoteIterator remoteIteratorFromArray(T[] array) { + return new WrappedJavaIterator<>(Arrays.stream(array).iterator()); + } + + /** + * Create an iterator from an iterator and a transformation function. + * @param source type + * @param result type + * @param iterator source + * @param mapper transformation + * @return a remote iterator + */ + public static RemoteIterator mappingRemoteIterator( + RemoteIterator iterator, + FunctionRaisingIOE mapper) { + return new MappingRemoteIterator<>(iterator, mapper); + } + + /** + * Create a RemoteIterator from a RemoteIterator, casting the + * type in the process. This is to help with filesystem API + * calls where overloading causes confusion (e.g. listStatusIterator()) + * @param source type + * @param result type + * @param iterator source + * @return a remote iterator + */ + public static RemoteIterator typeCastingRemoteIterator( + RemoteIterator iterator) { + return new TypeCastingRemoteIterator<>(iterator); + } + + /** + * Create a RemoteIterator from a RemoteIterator and a filter + * function which returns true for every element to be passed + * through. + *

      + * Elements are filtered in the hasNext() method; if not used + * the filtering will be done on demand in the {@code next()} + * call. + *

      + * @param type + * @param iterator source + * @param filter filter + * @return a remote iterator + */ + public static RemoteIterator filteringRemoteIterator( + RemoteIterator iterator, + FunctionRaisingIOE filter) { + return new FilteringRemoteIterator<>(iterator, filter); + } + + /** + * This adds an extra close operation alongside the passthrough + * to any Closeable.close() method supported by the source iterator. + * @param iterator source + * @param toClose extra object to close. + * @param source type. + * @return a new iterator + */ + public static RemoteIterator closingRemoteIterator( + RemoteIterator iterator, + Closeable toClose) { + return new CloseRemoteIterator<>(iterator, toClose); + } + + /** + * Wrap an iterator with one which adds a continuation probe. + * This allows work to exit fast without complicated breakout logic + * @param iterator source + * @param continueWork predicate which will trigger a fast halt if it returns false. + * @param source type. + * @return a new iterator + */ + public static RemoteIterator haltableRemoteIterator( + final RemoteIterator iterator, + final CallableRaisingIOE continueWork) { + return new HaltableRemoteIterator<>(iterator, continueWork); + } + + /** + * A remote iterator which simply counts up, stopping once the + * value is greater than the value of {@code excludedFinish}. + * This is primarily for tests or when submitting work into a TaskPool. + * equivalent to + *
      +   *   for(long l = start, l < excludedFinish; l++) yield l;
      +   * 
      + * @param start start value + * @param excludedFinish excluded finish + * @return an iterator which returns longs from [start, finish) + */ + public static RemoteIterator rangeExcludingIterator( + final long start, final long excludedFinish) { + return new RangeExcludingLongIterator(start, excludedFinish); + } + + /** + * Build a list from a RemoteIterator. + * @param source source iterator + * @param type + * @return a list of the values. + * @throws IOException if the source RemoteIterator raises it. + */ + public static List toList(RemoteIterator source) + throws IOException { + List l = new ArrayList<>(); + foreach(source, l::add); + return l; + } + + /** + * Build an array from a RemoteIterator. + * @param source source iterator + * @param a destination array; if too small a new array + * of the same type is created + * @param type + * @return an array of the values. + * @throws IOException if the source RemoteIterator raises it. + */ + public static T[] toArray(RemoteIterator source, + T[] a) throws IOException { + List list = toList(source); + return list.toArray(a); + } + + /** + * Apply an operation to all values of a RemoteIterator. + * + * If the iterator is an IOStatisticsSource returning a non-null + * set of statistics, and this classes log is set to DEBUG, + * then the statistics of the operation are evaluated and logged at + * debug. + *

      + * The number of entries processed is returned, as it is useful to + * know this, especially during tests or when reporting values + * to users. + *

      + * This does not close the iterator afterwards. + * @param source iterator source + * @param consumer consumer of the values. + * @return the number of elements processed + * @param type of source + * @throws IOException if the source RemoteIterator or the consumer raise one. + */ + public static long foreach( + RemoteIterator source, + ConsumerRaisingIOE consumer) throws IOException { + long count = 0; + + try { + while (source.hasNext()) { + count++; + consumer.accept(source.next()); + } + + } finally { + cleanupRemoteIterator(source); + } + return count; + } + + /** + * Clean up after an iteration. + * If the log is at debug, calculate and log the IOStatistics. + * If the iterator is closeable, cast and then cleanup the iterator + * @param source iterator source + * @param type of source + */ + public static void cleanupRemoteIterator(RemoteIterator source) { + // maybe log the results + logIOStatisticsAtDebug(LOG, "RemoteIterator Statistics: {}", source); + if (source instanceof Closeable) { + /* source is closeable, so close.*/ + IOUtils.cleanupWithLogger(LOG, (Closeable) source); + } + } + + /** + * A remote iterator from a singleton. It has a single next() + * value, after which hasNext() returns false and next() fails. + *

      + * If it is a source of + * remote statistics, these are returned. + * @param type. + */ + private static final class SingletonIterator + implements RemoteIterator, IOStatisticsSource { + + /** + * Single entry. + */ + private final T singleton; + + /** Has the entry been processed? */ + private boolean processed; + + /** + * Instantiate. + * @param singleton single value...may be null + */ + private SingletonIterator(@Nullable T singleton) { + this.singleton = singleton; + // if the entry is null, consider it processed. + this.processed = singleton == null; + } + + @Override + public boolean hasNext() throws IOException { + return !processed; + } + + @SuppressWarnings("NewExceptionWithoutArguments") + @Override + public T next() throws IOException { + if (hasNext()) { + processed = true; + return singleton; + } else { + throw new NoSuchElementException(); + } + } + + @Override + public IOStatistics getIOStatistics() { + return retrieveIOStatistics(singleton); + } + + @Override + public String toString() { + return "SingletonIterator{" + + (singleton != null ? singleton : "") + + '}'; + } + + } + + /** + * Create a remote iterator from a simple java.util.Iterator, or + * an iterable. + *

      + * If the iterator is a source of statistics that is passed through. + *

      + * The {@link #close()} will close the source iterator if it is + * Closeable; + * @param iterator type. + */ + private static final class WrappedJavaIterator + implements RemoteIterator, IOStatisticsSource, Closeable { + + /** + * inner iterator.. + */ + private final Iterator source; + + private final Closeable sourceToClose; + + + /** + * Construct from an interator. + * @param source source iterator. + */ + private WrappedJavaIterator(Iterator source) { + this.source = requireNonNull(source); + sourceToClose = new MaybeClose(source); + } + + @Override + public boolean hasNext() { + return source.hasNext(); + } + + @Override + public T next() { + return source.next(); + } + + @Override + public IOStatistics getIOStatistics() { + return retrieveIOStatistics(source); + } + + @Override + public String toString() { + return "FromIterator{" + source + '}'; + } + + @Override + public void close() throws IOException { + sourceToClose.close(); + + } + } + + /** + * Wrapper of another remote iterator; IOStatistics + * and Closeable methods are passed down if implemented. + * This class may be subclassed within the hadoop codebase + * if custom iterators are needed. + * @param source type + * @param type of returned value + */ + public static abstract class WrappingRemoteIterator + implements RemoteIterator, IOStatisticsSource, Closeable { + + /** + * Source iterator. + */ + private final RemoteIterator source; + + private final Closeable sourceToClose; + + protected WrappingRemoteIterator(final RemoteIterator source) { + this.source = requireNonNull(source); + sourceToClose = new MaybeClose(source); + } + + protected RemoteIterator getSource() { + return source; + } + + @Override + public IOStatistics getIOStatistics() { + return retrieveIOStatistics(source); + } + + @Override + public void close() throws IOException { + sourceToClose.close(); + } + + /** + * Check for the source having a next element. + * If it does not, this object's close() method + * is called and false returned + * @return true if there is a new value + * @throws IOException failure to retrieve next value + */ + protected boolean sourceHasNext() throws IOException { + boolean hasNext; + try { + hasNext = getSource().hasNext(); + } catch (IOException e) { + IOUtils.cleanupWithLogger(LOG, this); + throw e; + } + if (!hasNext) { + // there is nothing less so automatically close. + close(); + } + return hasNext; + } + + /** + * Get the next source value. + * This calls {@link #sourceHasNext()} first to verify + * that there is data. + * @return the next value + * @throws IOException failure + * @throws NoSuchElementException no more data + */ + protected S sourceNext() throws IOException { + try { + if (!sourceHasNext()) { + throw new NoSuchElementException(); + } + return getSource().next(); + } catch (NoSuchElementException | IOException e) { + IOUtils.cleanupWithLogger(LOG, this); + throw e; + } + } + + @Override + public String toString() { + return source.toString(); + } + + } + + /** + * Iterator taking a source and a transformational function. + * @param source type + * @param final output type.There + */ + private static final class MappingRemoteIterator + extends WrappingRemoteIterator { + + /** + * Mapper to invoke. + */ + private final FunctionRaisingIOE mapper; + + private MappingRemoteIterator( + RemoteIterator source, + FunctionRaisingIOE mapper) { + super(source); + this.mapper = requireNonNull(mapper); + } + + @Override + public boolean hasNext() throws IOException { + return sourceHasNext(); + } + + @Override + public T next() throws IOException { + return mapper.apply(sourceNext()); + } + + @Override + public String toString() { + return "FunctionRemoteIterator{" + getSource() + '}'; + } + } + + /** + * RemoteIterator which can change the type of the input. + * This is useful in some situations. + * @param source type + * @param final output type. + */ + private static final class TypeCastingRemoteIterator + extends WrappingRemoteIterator { + + private TypeCastingRemoteIterator( + RemoteIterator source) { + super(source); + } + + @Override + public boolean hasNext() throws IOException { + return sourceHasNext(); + } + + @Override + public T next() throws IOException { + return (T)sourceNext(); + } + + @Override + public String toString() { + return getSource().toString(); + } + } + + /** + * Extend the wrapped iterator by filtering source values out. + * Only those values for which the filter predicate returns true + * will be returned. + * @param type of iterator. + */ + @SuppressWarnings("NewExceptionWithoutArguments") + private static final class FilteringRemoteIterator + extends WrappingRemoteIterator { + + /** + * Filter Predicate. + * Takes the input type or any superclass. + */ + private final FunctionRaisingIOE + filter; + + /** + * Next value; will be null if none has been evaluated, or the + * last one was already returned by next(). + */ + private S next; + + /** + * An iterator which combines filtering with transformation. + * All source elements for which filter = true are returned, + * transformed via the mapper. + * @param source source iterator. + * @param filter filter predicate. + */ + private FilteringRemoteIterator( + RemoteIterator source, + FunctionRaisingIOE filter) { + super(source); + + this.filter = requireNonNull(filter); + } + + /** + * Fetch: retrieve the next value. + * @return true if a new value was found after filtering. + * @throws IOException failure in retrieval from source or mapping + */ + private boolean fetch() throws IOException { + while (next == null && sourceHasNext()) { + S candidate = getSource().next(); + if (filter.apply(candidate)) { + next = candidate; + return true; + } + } + return false; + } + + /** + * Trigger a fetch if an entry is needed. + * @return true if there was already an entry return, + * or there was not but one could then be retrieved.set + * @throws IOException failure in fetch operation + */ + @Override + public boolean hasNext() throws IOException { + if (next != null) { + return true; + } + return fetch(); + } + + /** + * Return the next value. + * Will retrieve the next elements if needed. + * This is where the mapper takes place. + * @return true if there is another data element. + * @throws IOException failure in fetch operation or the transformation. + * @throws NoSuchElementException no more data + */ + @Override + public S next() throws IOException { + if (hasNext()) { + S result = next; + next = null; + return result; + } + throw new NoSuchElementException(); + } + + @Override + public String toString() { + return "FilteringRemoteIterator{" + getSource() + '}'; + } + } + + /** + * A wrapping remote iterator which adds another entry to + * close. This is to assist cleanup. + * @param type + */ + private static final class CloseRemoteIterator + extends WrappingRemoteIterator { + + private final MaybeClose toClose; + private boolean closed; + + private CloseRemoteIterator( + final RemoteIterator source, + final Closeable toClose) { + super(source); + this.toClose = new MaybeClose(Objects.requireNonNull(toClose)); + } + + @Override + public boolean hasNext() throws IOException { + return sourceHasNext(); + } + + @Override + public S next() throws IOException { + + return sourceNext(); + } + + @Override + public void close() throws IOException { + if (closed) { + return; + } + closed = true; + LOG.debug("Closing {}", this); + try { + super.close(); + } finally { + toClose.close(); + } + } + } + + /** + * Class to help with Closeable logic, where sources may/may not + * be closeable, only one invocation is allowed. + * On the second and later call of close(), it is a no-op. + */ + private static final class MaybeClose implements Closeable { + + private Closeable toClose; + + /** + * Construct. + * @param o object to close. + */ + private MaybeClose(Object o) { + this(o, true); + } + + /** + * Construct -close the object if it is closeable and close==true. + * @param o object to close. + * @param close should close? + */ + private MaybeClose(Object o, boolean close) { + if (close && o instanceof Closeable) { + this.toClose = (Closeable) o; + } else { + this.toClose = null; + } + } + + @Override + public void close() throws IOException { + if (toClose != null) { + try { + toClose.close(); + } finally { + toClose = null; + } + } + } + } + + /** + * An iterator which allows for a fast exit predicate. + * @param source type + */ + private static final class HaltableRemoteIterator + extends WrappingRemoteIterator { + + /** + * Probe as to whether work should continue. + */ + private final CallableRaisingIOE continueWork; + + /** + * Wrap an iterator with one which adds a continuation probe. + * The probe will be called in the {@link #hasNext()} method, before + * the source iterator is itself checked and in {@link #next()} + * before retrieval. + * That is: it may be called multiple times per iteration. + * @param source source iterator. + * @param continueWork predicate which will trigger a fast halt if it returns false. + */ + private HaltableRemoteIterator( + final RemoteIterator source, + final CallableRaisingIOE continueWork) { + super(source); + this.continueWork = continueWork; + } + + @Override + public boolean hasNext() throws IOException { + return sourceHasNext(); + } + + @Override + public S next() throws IOException { + return sourceNext(); + } + + @Override + protected boolean sourceHasNext() throws IOException { + return continueWork.apply() && super.sourceHasNext(); + } + } + + /** + * A remote iterator which simply counts up, stopping once the + * value is greater than the finish. + * This is primarily for tests or when submitting work into a TaskPool. + */ + private static final class RangeExcludingLongIterator implements RemoteIterator { + + /** + * Current value. + */ + private long current; + + /** + * End value. + */ + private final long excludedFinish; + + /** + * Construct. + * @param start start value. + * @param excludedFinish halt the iterator once the current value is equal + * to or greater than this. + */ + private RangeExcludingLongIterator(final long start, final long excludedFinish) { + this.current = start; + this.excludedFinish = excludedFinish; + } + + @Override + public boolean hasNext() throws IOException { + return current < excludedFinish; + } + + @Override + public Long next() throws IOException { + if (!hasNext()) { + throw new NoSuchElementException(); + } + final long s = current; + current++; + return s; + } + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/TaskPool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/TaskPool.java new file mode 100644 index 0000000000000..c9e6d0b78ac11 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/TaskPool.java @@ -0,0 +1,652 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.functional; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Queue; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicBoolean; + +import javax.annotation.Nullable; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.statistics.IOStatisticsContext; + +import static java.util.Objects.requireNonNull; +import static org.apache.hadoop.util.functional.RemoteIterators.remoteIteratorFromIterable; + +/** + * Utility class for parallel execution, takes closures for the various + * actions. + * There is no retry logic: it is expected to be handled by the closures. + * From {@code org.apache.hadoop.fs.s3a.commit.Tasks} which came from + * the Netflix committer patch. + * Apache Iceberg has its own version of this, with a common ancestor + * at some point in its history. + * A key difference with this class is that the iterator is always, + * internally, an {@link RemoteIterator}. + * This is to allow tasks to be scheduled while incremental operations + * such as paged directory listings are still collecting in results. + * + * While awaiting completion, this thread spins and sleeps a time of + * {@link #SLEEP_INTERVAL_AWAITING_COMPLETION}, which, being a + * busy-wait, is inefficient. + * There's an implicit assumption that remote IO is being performed, and + * so this is not impacting throughput/performance. + * + * History: + * This class came with the Netflix contributions to the S3A committers + * in HADOOP-13786. + * It was moved into hadoop-common for use in the manifest committer and + * anywhere else it is needed, and renamed in the process as + * "Tasks" has too many meanings in the hadoop source. + * The iterator was then changed from a normal java iterable + * to a hadoop {@link org.apache.hadoop.fs.RemoteIterator}. + * This allows a task pool to be supplied with incremental listings + * from object stores, scheduling work as pages of listing + * results come in, rather than blocking until the entire + * directory/directory tree etc has been enumerated. + * + * There is a variant of this in Apache Iceberg in + * {@code org.apache.iceberg.util.Tasks} + * That is not derived from any version in the hadoop codebase, it + * just shares a common ancestor somewhere in the Netflix codebase. + * It is the more sophisticated version. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public final class TaskPool { + private static final Logger LOG = + LoggerFactory.getLogger(TaskPool.class); + + /** + * Interval in milliseconds to await completion. + */ + private static final int SLEEP_INTERVAL_AWAITING_COMPLETION = 10; + + private TaskPool() { + } + + /** + * Callback invoked to process an item. + * @param item type being processed + * @param exception class which may be raised + */ + @FunctionalInterface + public interface Task { + void run(I item) throws E; + } + + /** + * Callback invoked on a failure. + * @param item type being processed + * @param exception class which may be raised + */ + @FunctionalInterface + public interface FailureTask { + + /** + * process a failure. + * @param item item the task is processing + * @param exception the exception which was raised. + * @throws E Exception of type E + */ + void run(I item, Exception exception) throws E; + } + + /** + * Builder for task execution. + * @param item type + */ + public static class Builder { + private final RemoteIterator items; + private Submitter service = null; + private FailureTask onFailure = null; + private boolean stopOnFailure = false; + private boolean suppressExceptions = false; + private Task revertTask = null; + private boolean stopRevertsOnFailure = false; + private Task abortTask = null; + private boolean stopAbortsOnFailure = false; + private int sleepInterval = SLEEP_INTERVAL_AWAITING_COMPLETION; + + /** + * IOStatisticsContext to switch to in all threads + * taking part in the commit operation. + * This ensures that the IOStatistics collected in the + * worker threads will be aggregated into the total statistics + * of the thread calling the committer commit/abort methods. + */ + private IOStatisticsContext ioStatisticsContext = null; + + /** + * Create the builder. + * @param items items to process + */ + Builder(RemoteIterator items) { + this.items = requireNonNull(items, "items"); + } + + /** + * Create the builder. + * @param items items to process + */ + Builder(Iterable items) { + this(remoteIteratorFromIterable(items)); + } + + /** + * Declare executor service: if null, the tasks are executed in a single + * thread. + * @param submitter service to schedule tasks with. + * @return this builder. + */ + public Builder executeWith(@Nullable Submitter submitter) { + + this.service = submitter; + return this; + } + + /** + * Task to invoke on failure. + * @param task task + * @return the builder + */ + public Builder onFailure(FailureTask task) { + this.onFailure = task; + return this; + } + + public Builder stopOnFailure() { + this.stopOnFailure = true; + return this; + } + + /** + * Suppress exceptions from tasks. + * RemoteIterator exceptions are not suppressable. + * @return the builder. + */ + public Builder suppressExceptions() { + return suppressExceptions(true); + } + + /** + * Suppress exceptions from tasks. + * RemoteIterator exceptions are not suppressable. + * @param suppress new value + * @return the builder. + */ + public Builder suppressExceptions(boolean suppress) { + this.suppressExceptions = suppress; + return this; + } + + /** + * Task to revert with after another task failed. + * @param task task to execute + * @return the builder + */ + public Builder revertWith(Task task) { + this.revertTask = task; + return this; + } + + /** + * Stop trying to revert if one operation fails. + * @return the builder + */ + public Builder stopRevertsOnFailure() { + this.stopRevertsOnFailure = true; + return this; + } + + /** + * Task to abort with after another task failed. + * @param task task to execute + * @return the builder + */ + public Builder abortWith(Task task) { + this.abortTask = task; + return this; + } + + /** + * Stop trying to abort if one operation fails. + * @return the builder + */ + public Builder stopAbortsOnFailure() { + this.stopAbortsOnFailure = true; + return this; + } + + /** + * Set the sleep interval. + * @param value new value + * @return the builder + */ + public Builder sleepInterval(final int value) { + sleepInterval = value; + return this; + } + + /** + * Execute the task across the data. + * @param task task to execute + * @param exception which may be raised in execution. + * @return true if the operation executed successfully + * @throws E any exception raised. + * @throws IOException IOExceptions raised by remote iterator or in execution. + */ + public boolean run(Task task) throws E, IOException { + requireNonNull(items, "items"); + if (!items.hasNext()) { + // if there are no items, return without worrying about + // execution pools, errors etc. + return true; + } + if (service != null) { + // thread pool, so run in parallel + return runParallel(task); + } else { + // single threaded execution. + return runSingleThreaded(task); + } + } + + /** + * Single threaded execution. + * @param task task to execute + * @param exception which may be raised in execution. + * @return true if the operation executed successfully + * @throws E any exception raised. + * @throws IOException IOExceptions raised by remote iterator or in execution. + */ + private boolean runSingleThreaded(Task task) + throws E, IOException { + List succeeded = new ArrayList<>(); + List exceptions = new ArrayList<>(); + + RemoteIterator iterator = items; + boolean threw = true; + try { + while (iterator.hasNext()) { + I item = iterator.next(); + try { + task.run(item); + succeeded.add(item); + + } catch (Exception e) { + exceptions.add(e); + + if (onFailure != null) { + try { + onFailure.run(item, e); + } catch (Exception failException) { + LOG.error("Failed to clean up on failure", e); + // keep going + } + } + + if (stopOnFailure) { + break; + } + } + } + + threw = false; + } catch (IOException iteratorIOE) { + // an IOE is reaised here during iteration + LOG.debug("IOException when iterating through {}", iterator, iteratorIOE); + throw iteratorIOE; + } finally { + // threw handles exceptions that were *not* caught by the catch block, + // and exceptions that were caught and possibly handled by onFailure + // are kept in exceptions. + if (threw || !exceptions.isEmpty()) { + if (revertTask != null) { + boolean failed = false; + for (I item : succeeded) { + try { + revertTask.run(item); + } catch (Exception e) { + LOG.error("Failed to revert task", e); + failed = true; + // keep going + } + if (stopRevertsOnFailure && failed) { + break; + } + } + } + + if (abortTask != null) { + boolean failed = false; + while (iterator.hasNext()) { + try { + abortTask.run(iterator.next()); + } catch (Exception e) { + failed = true; + LOG.error("Failed to abort task", e); + // keep going + } + if (stopAbortsOnFailure && failed) { + break; + } + } + } + } + } + + if (!suppressExceptions && !exceptions.isEmpty()) { + TaskPool.throwOne(exceptions); + } + + return exceptions.isEmpty(); + } + + /** + * Parallel execution. + * All tasks run within the same IOStatisticsContext as the + * thread calling this method. + * @param task task to execute + * @param exception which may be raised in execution. + * @return true if the operation executed successfully + * @throws E any exception raised. + * @throws IOException IOExceptions raised by remote iterator or in execution. + */ + private boolean runParallel(final Task task) + throws E, IOException { + final Queue succeeded = new ConcurrentLinkedQueue<>(); + final Queue exceptions = new ConcurrentLinkedQueue<>(); + final AtomicBoolean taskFailed = new AtomicBoolean(false); + final AtomicBoolean abortFailed = new AtomicBoolean(false); + final AtomicBoolean revertFailed = new AtomicBoolean(false); + + List> futures = new ArrayList<>(); + ioStatisticsContext = IOStatisticsContext.getCurrentIOStatisticsContext(); + + IOException iteratorIOE = null; + final RemoteIterator iterator = this.items; + try { + while (iterator.hasNext()) { + final I item = iterator.next(); + // submit a task for each item that will either run or abort the task + futures.add(service.submit(() -> { + setStatisticsContext(); + try { + if (!(stopOnFailure && taskFailed.get())) { + // prepare and run the task + boolean threw = true; + try { + LOG.debug("Executing task"); + task.run(item); + succeeded.add(item); + LOG.debug("Task succeeded"); + + threw = false; + + } catch (Exception e) { + taskFailed.set(true); + exceptions.add(e); + LOG.info("Task failed {}", e.toString()); + LOG.debug("Task failed", e); + + if (onFailure != null) { + try { + onFailure.run(item, e); + } catch (Exception failException) { + LOG.warn("Failed to clean up on failure", e); + // swallow the exception + } + } + } finally { + if (threw) { + taskFailed.set(true); + } + } + + } else if (abortTask != null) { + // abort the task instead of running it + if (stopAbortsOnFailure && abortFailed.get()) { + return; + } + + boolean failed = true; + try { + LOG.info("Aborting task"); + abortTask.run(item); + failed = false; + } catch (Exception e) { + LOG.error("Failed to abort task", e); + // swallow the exception + } finally { + if (failed) { + abortFailed.set(true); + } + } + } + } finally { + resetStatisticsContext(); + } + })); + } + } catch (IOException e) { + // iterator failure. + LOG.debug("IOException when iterating through {}", iterator, e); + iteratorIOE = e; + // mark as a task failure so all submitted tasks will halt/abort + taskFailed.set(true); + } + // let the above tasks complete (or abort) + waitFor(futures, sleepInterval); + int futureCount = futures.size(); + futures.clear(); + + if (taskFailed.get() && revertTask != null) { + // at least one task failed, revert any that succeeded + LOG.info("Reverting all {} succeeded tasks from {} futures", + succeeded.size(), futureCount); + for (final I item : succeeded) { + futures.add(service.submit(() -> { + if (stopRevertsOnFailure && revertFailed.get()) { + return; + } + + boolean failed = true; + setStatisticsContext(); + try { + revertTask.run(item); + failed = false; + } catch (Exception e) { + LOG.error("Failed to revert task", e); + // swallow the exception + } finally { + if (failed) { + revertFailed.set(true); + } + resetStatisticsContext(); + } + })); + } + + // let the revert tasks complete + waitFor(futures, sleepInterval); + } + + // give priority to execution exceptions over + // iterator exceptions. + if (!suppressExceptions && !exceptions.isEmpty()) { + // there's an exception list to build up, cast and throw. + TaskPool.throwOne(exceptions); + } + + // raise any iterator exception. + // this can not be suppressed. + if (iteratorIOE != null) { + throw iteratorIOE; + } + + // return true if all tasks succeeded. + return !taskFailed.get(); + } + + /** + * Set the statistics context for this thread. + */ + private void setStatisticsContext() { + if (ioStatisticsContext != null) { + IOStatisticsContext.setThreadIOStatisticsContext(ioStatisticsContext); + } + } + + /** + * Reset the statistics context if it was set earlier. + * This unbinds the current thread from any statistics + * context. + */ + private void resetStatisticsContext() { + if (ioStatisticsContext != null) { + IOStatisticsContext.setThreadIOStatisticsContext(null); + } + } + } + + /** + * Wait for all the futures to complete; there's a small sleep between + * each iteration; enough to yield the CPU. + * @param futures futures. + * @param sleepInterval Interval in milliseconds to await completion. + */ + private static void waitFor(Collection> futures, int sleepInterval) { + int size = futures.size(); + LOG.debug("Waiting for {} tasks to complete", size); + int oldNumFinished = 0; + while (true) { + int numFinished = (int) futures.stream().filter(Future::isDone).count(); + + if (oldNumFinished != numFinished) { + LOG.debug("Finished count -> {}/{}", numFinished, size); + oldNumFinished = numFinished; + } + + if (numFinished == size) { + // all of the futures are done, stop looping + break; + } else { + try { + Thread.sleep(sleepInterval); + } catch (InterruptedException e) { + futures.forEach(future -> future.cancel(true)); + Thread.currentThread().interrupt(); + break; + } + } + } + } + + /** + * Create a task builder for the iterable. + * @param items item source. + * @param type of result. + * @return builder. + */ + public static Builder foreach(Iterable items) { + return new Builder<>(requireNonNull(items, "items")); + } + + /** + * Create a task builder for the remote iterator. + * @param items item source. + * @param type of result. + * @return builder. + */ + public static Builder foreach(RemoteIterator items) { + return new Builder<>(items); + } + + public static Builder foreach(I[] items) { + return new Builder<>(Arrays.asList(requireNonNull(items, "items"))); + } + + /** + * Throw one exception, adding the others as suppressed + * exceptions attached to the one thrown. + * This method never completes normally. + * @param exceptions collection of exceptions + * @param class of exceptions + * @throws E an extracted exception. + */ + private static void throwOne( + Collection exceptions) + throws E { + Iterator iter = exceptions.iterator(); + Exception e = iter.next(); + Class exceptionClass = e.getClass(); + + while (iter.hasNext()) { + Exception other = iter.next(); + if (!exceptionClass.isInstance(other)) { + e.addSuppressed(other); + } + } + + TaskPool.castAndThrow(e); + } + + /** + * Raise an exception of the declared type. + * This method never completes normally. + * @param e exception + * @param class of exceptions + * @throws E a recast exception. + */ + @SuppressWarnings("unchecked") + private static void castAndThrow(Exception e) throws E { + if (e instanceof RuntimeException) { + throw (RuntimeException) e; + } + throw (E) e; + } + + /** + * Interface to whatever lets us submit tasks. + */ + public interface Submitter { + + /** + * Submit work. + * @param task task to execute + * @return the future of the submitted task. + */ + Future submit(Runnable task); + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/package-info.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/package-info.java new file mode 100644 index 0000000000000..18d23b31ff6d7 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/package-info.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Support for functional programming within the Hadoop APIs. + * + * Much of this is needed simply to cope with Java's checked exceptions and + * the fact that the java.util.function can only throw runtime exceptions. + *

      + * Pretty much all the Hadoop FS APIs raise IOExceptions, hence the need + * for these classes. If Java had made a different decision about the + * nature of exceptions, life would be better. + *

      + * Do note that the {@link org.apache.hadoop.util.functional.RemoteIterators} + * iterators go beyond that of the java ones, in terms of declaring themselves + * Closeable and implementors of + * {@link org.apache.hadoop.fs.statistics.IOStatisticsSource}; a chain + * of wrapped iterators can supply statistics of the inner iterators, and + * encourage close() to be called after use. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +package org.apache.hadoop.util.functional; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/hash/JenkinsHash.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/hash/JenkinsHash.java index 3f62aef00a5f6..595a09db3f824 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/hash/JenkinsHash.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/hash/JenkinsHash.java @@ -247,7 +247,7 @@ public int hash(byte[] key, int nbytes, int initval) { /** * Compute the hash of the specified file * @param args name of file to compute hash of. - * @throws IOException + * @throws IOException raised on errors performing I/O. */ public static void main(String[] args) throws IOException { if (args.length != 1) { diff --git a/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj b/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj index ac3767b276882..36b560305b36a 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj +++ b/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj @@ -17,7 +17,7 @@ limitations under the License. --> - + Release @@ -69,15 +69,6 @@ hadoop - $(CustomSnappyPrefix) - $(CustomSnappyPrefix)\lib - $(CustomSnappyPrefix)\bin - $(CustomSnappyLib) - $(CustomSnappyPrefix) - $(CustomSnappyPrefix)\include - $(CustomSnappyInclude) - true - $(SnappyInclude);$(IncludePath) $(ZLIB_HOME);$(IncludePath) @@ -87,11 +78,6 @@ $(CustomIsalLib) true - - - - - /D HADOOP_SNAPPY_LIBRARY=L\"snappy.dll\" - - - /D HADOOP_SNAPPY_LIBRARY=L\"snappy.dll\" - - - - - - /D HADOOP_SNAPPY_LIBRARY=L\"snappy.dll\" /D HADOOP_ISAL_LIBRARY=L\"isa-l.dll\" @@ -181,7 +156,6 @@ - diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/exception.c b/hadoop-common-project/hadoop-common/src/main/native/src/exception.c index fc072e8002bf2..a25cc3d3b7eef 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/src/exception.c +++ b/hadoop-common-project/hadoop-common/src/main/native/src/exception.c @@ -111,8 +111,8 @@ jthrowable newIOException(JNIEnv* env, const char *fmt, ...) const char* terror(int errnum) { -#if defined(__sun) -// MT-Safe under Solaris which doesn't support sys_errlist/sys_nerr +#if defined(__sun) || defined(__GLIBC_PREREQ) && __GLIBC_PREREQ(2, 32) +// MT-Safe under Solaris or glibc >= 2.32 not supporting sys_errlist/sys_nerr return strerror(errnum); #else if ((errnum < 0) || (errnum >= sys_nerr)) { diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/crypto/random/OpensslSecureRandom.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/crypto/random/OpensslSecureRandom.c index 26e1fa623e859..3f141be05b549 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/crypto/random/OpensslSecureRandom.c +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/crypto/random/OpensslSecureRandom.c @@ -42,16 +42,18 @@ #ifdef UNIX static void * (*dlsym_CRYPTO_malloc) (int, const char *, int); static void (*dlsym_CRYPTO_free) (void *); +#if OPENSSL_VERSION_NUMBER < 0x10100000L static int (*dlsym_CRYPTO_num_locks) (void); static void (*dlsym_CRYPTO_set_locking_callback) (void (*)()); static void (*dlsym_CRYPTO_set_id_callback) (unsigned long (*)()); static void (*dlsym_ENGINE_load_rdrand) (void); +static void (*dlsym_ENGINE_cleanup) (void); +#endif static ENGINE * (*dlsym_ENGINE_by_id) (const char *); static int (*dlsym_ENGINE_init) (ENGINE *); static int (*dlsym_ENGINE_set_default) (ENGINE *, unsigned int); static int (*dlsym_ENGINE_finish) (ENGINE *); static int (*dlsym_ENGINE_free) (ENGINE *); -static void (*dlsym_ENGINE_cleanup) (void); static int (*dlsym_RAND_bytes) (unsigned char *, int); static unsigned long (*dlsym_ERR_get_error) (void); #endif @@ -113,6 +115,8 @@ JNIEXPORT void JNICALL Java_org_apache_hadoop_crypto_random_OpensslSecureRandom_ dlerror(); // Clear any existing error LOAD_DYNAMIC_SYMBOL(dlsym_CRYPTO_malloc, env, openssl, "CRYPTO_malloc"); LOAD_DYNAMIC_SYMBOL(dlsym_CRYPTO_free, env, openssl, "CRYPTO_free"); +#if OPENSSL_VERSION_NUMBER < 0x10100000L + // pre-1.1.0 LOAD_DYNAMIC_SYMBOL(dlsym_CRYPTO_num_locks, env, openssl, "CRYPTO_num_locks"); LOAD_DYNAMIC_SYMBOL(dlsym_CRYPTO_set_locking_callback, \ env, openssl, "CRYPTO_set_locking_callback"); @@ -120,13 +124,14 @@ JNIEXPORT void JNICALL Java_org_apache_hadoop_crypto_random_OpensslSecureRandom_ openssl, "CRYPTO_set_id_callback"); LOAD_DYNAMIC_SYMBOL(dlsym_ENGINE_load_rdrand, env, \ openssl, "ENGINE_load_rdrand"); + LOAD_DYNAMIC_SYMBOL(dlsym_ENGINE_cleanup, env, openssl, "ENGINE_cleanup"); +#endif LOAD_DYNAMIC_SYMBOL(dlsym_ENGINE_by_id, env, openssl, "ENGINE_by_id"); LOAD_DYNAMIC_SYMBOL(dlsym_ENGINE_init, env, openssl, "ENGINE_init"); LOAD_DYNAMIC_SYMBOL(dlsym_ENGINE_set_default, env, \ openssl, "ENGINE_set_default"); LOAD_DYNAMIC_SYMBOL(dlsym_ENGINE_finish, env, openssl, "ENGINE_finish"); LOAD_DYNAMIC_SYMBOL(dlsym_ENGINE_free, env, openssl, "ENGINE_free"); - LOAD_DYNAMIC_SYMBOL(dlsym_ENGINE_cleanup, env, openssl, "ENGINE_cleanup"); LOAD_DYNAMIC_SYMBOL(dlsym_RAND_bytes, env, openssl, "RAND_bytes"); LOAD_DYNAMIC_SYMBOL(dlsym_ERR_get_error, env, openssl, "ERR_get_error"); #endif @@ -303,9 +308,11 @@ static unsigned long pthreads_thread_id(void) */ static ENGINE * openssl_rand_init(void) { +#if OPENSSL_VERSION_NUMBER < 0x10100000L locks_setup(); dlsym_ENGINE_load_rdrand(); +#endif ENGINE *eng = dlsym_ENGINE_by_id("rdrand"); int ret = -1; @@ -340,11 +347,12 @@ static void openssl_rand_clean(ENGINE *eng, int clean_locks) dlsym_ENGINE_finish(eng); dlsym_ENGINE_free(eng); } - +#if OPENSSL_VERSION_NUMBER < 0x10100000L dlsym_ENGINE_cleanup(); if (clean_locks) { locks_cleanup(); } +#endif } static int openssl_rand_bytes(unsigned char *buf, int num) diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/Lz4Compressor.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/Lz4Compressor.c deleted file mode 100644 index 2c8af1b9115d5..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/Lz4Compressor.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include "org_apache_hadoop.h" -#include "org_apache_hadoop_io_compress_lz4_Lz4Compressor.h" - -#ifdef UNIX -#include "config.h" -#endif // UNIX -#include "lz4.h" -#include "lz4hc.h" - - -static jfieldID Lz4Compressor_uncompressedDirectBuf; -static jfieldID Lz4Compressor_uncompressedDirectBufLen; -static jfieldID Lz4Compressor_compressedDirectBuf; -static jfieldID Lz4Compressor_directBufferSize; - - -JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_lz4_Lz4Compressor_initIDs -(JNIEnv *env, jclass clazz){ - - Lz4Compressor_uncompressedDirectBuf = (*env)->GetFieldID(env, clazz, - "uncompressedDirectBuf", - "Ljava/nio/Buffer;"); - Lz4Compressor_uncompressedDirectBufLen = (*env)->GetFieldID(env, clazz, - "uncompressedDirectBufLen", "I"); - Lz4Compressor_compressedDirectBuf = (*env)->GetFieldID(env, clazz, - "compressedDirectBuf", - "Ljava/nio/Buffer;"); - Lz4Compressor_directBufferSize = (*env)->GetFieldID(env, clazz, - "directBufferSize", "I"); -} - -JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_lz4_Lz4Compressor_compressBytesDirect -(JNIEnv *env, jobject thisj){ - const char* uncompressed_bytes; - char *compressed_bytes; - - // Get members of Lz4Compressor - jobject uncompressed_direct_buf = (*env)->GetObjectField(env, thisj, Lz4Compressor_uncompressedDirectBuf); - jint uncompressed_direct_buf_len = (*env)->GetIntField(env, thisj, Lz4Compressor_uncompressedDirectBufLen); - jobject compressed_direct_buf = (*env)->GetObjectField(env, thisj, Lz4Compressor_compressedDirectBuf); - jint compressed_direct_buf_len = (*env)->GetIntField(env, thisj, Lz4Compressor_directBufferSize); - - // Get the input direct buffer - uncompressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf); - - if (uncompressed_bytes == 0) { - return (jint)0; - } - - // Get the output direct buffer - compressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, compressed_direct_buf); - - if (compressed_bytes == 0) { - return (jint)0; - } - - compressed_direct_buf_len = LZ4_compress(uncompressed_bytes, compressed_bytes, uncompressed_direct_buf_len); - if (compressed_direct_buf_len < 0){ - THROW(env, "java/lang/InternalError", "LZ4_compress failed"); - } - - (*env)->SetIntField(env, thisj, Lz4Compressor_uncompressedDirectBufLen, 0); - - return (jint)compressed_direct_buf_len; -} - -JNIEXPORT jstring JNICALL -Java_org_apache_hadoop_io_compress_lz4_Lz4Compressor_getLibraryName( - JNIEnv *env, jclass class - ) { - char version_buf[128]; - snprintf(version_buf, sizeof(version_buf), "revision:%d", LZ4_versionNumber()); - return (*env)->NewStringUTF(env, version_buf); -} - -JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_lz4_Lz4Compressor_compressBytesDirectHC -(JNIEnv *env, jobject thisj){ - const char* uncompressed_bytes = NULL; - char* compressed_bytes = NULL; - - // Get members of Lz4Compressor - jobject uncompressed_direct_buf = (*env)->GetObjectField(env, thisj, Lz4Compressor_uncompressedDirectBuf); - jint uncompressed_direct_buf_len = (*env)->GetIntField(env, thisj, Lz4Compressor_uncompressedDirectBufLen); - jobject compressed_direct_buf = (*env)->GetObjectField(env, thisj, Lz4Compressor_compressedDirectBuf); - jint compressed_direct_buf_len = (*env)->GetIntField(env, thisj, Lz4Compressor_directBufferSize); - - // Get the input direct buffer - uncompressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf); - - if (uncompressed_bytes == 0) { - return (jint)0; - } - - // Get the output direct buffer - compressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, compressed_direct_buf); - - if (compressed_bytes == 0) { - return (jint)0; - } - - compressed_direct_buf_len = LZ4_compressHC(uncompressed_bytes, compressed_bytes, uncompressed_direct_buf_len); - if (compressed_direct_buf_len < 0){ - THROW(env, "java/lang/InternalError", "LZ4_compressHC failed"); - } - - (*env)->SetIntField(env, thisj, Lz4Compressor_uncompressedDirectBufLen, 0); - - return (jint)compressed_direct_buf_len; -} diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/Lz4Decompressor.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/Lz4Decompressor.c deleted file mode 100644 index cdeaa315d1e59..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/Lz4Decompressor.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "org_apache_hadoop.h" -#include "org_apache_hadoop_io_compress_lz4_Lz4Decompressor.h" - -#ifdef UNIX -#include "config.h" -#endif // UNIX -#include "lz4.h" - - -static jfieldID Lz4Decompressor_compressedDirectBuf; -static jfieldID Lz4Decompressor_compressedDirectBufLen; -static jfieldID Lz4Decompressor_uncompressedDirectBuf; -static jfieldID Lz4Decompressor_directBufferSize; - -JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_lz4_Lz4Decompressor_initIDs -(JNIEnv *env, jclass clazz){ - - Lz4Decompressor_compressedDirectBuf = (*env)->GetFieldID(env,clazz, - "compressedDirectBuf", - "Ljava/nio/Buffer;"); - Lz4Decompressor_compressedDirectBufLen = (*env)->GetFieldID(env,clazz, - "compressedDirectBufLen", "I"); - Lz4Decompressor_uncompressedDirectBuf = (*env)->GetFieldID(env,clazz, - "uncompressedDirectBuf", - "Ljava/nio/Buffer;"); - Lz4Decompressor_directBufferSize = (*env)->GetFieldID(env, clazz, - "directBufferSize", "I"); -} - -JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_lz4_Lz4Decompressor_decompressBytesDirect -(JNIEnv *env, jobject thisj){ - const char *compressed_bytes; - char *uncompressed_bytes; - - // Get members of Lz4Decompressor - jobject compressed_direct_buf = (*env)->GetObjectField(env,thisj, Lz4Decompressor_compressedDirectBuf); - jint compressed_direct_buf_len = (*env)->GetIntField(env,thisj, Lz4Decompressor_compressedDirectBufLen); - jobject uncompressed_direct_buf = (*env)->GetObjectField(env,thisj, Lz4Decompressor_uncompressedDirectBuf); - size_t uncompressed_direct_buf_len = (*env)->GetIntField(env, thisj, Lz4Decompressor_directBufferSize); - - // Get the input direct buffer - compressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, compressed_direct_buf); - - if (compressed_bytes == 0) { - return (jint)0; - } - - // Get the output direct buffer - uncompressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf); - - if (uncompressed_bytes == 0) { - return (jint)0; - } - - uncompressed_direct_buf_len = LZ4_decompress_safe(compressed_bytes, uncompressed_bytes, compressed_direct_buf_len, uncompressed_direct_buf_len); - if (uncompressed_direct_buf_len < 0) { - THROW(env, "java/lang/InternalError", "LZ4_uncompress_unknownOutputSize failed."); - } - - (*env)->SetIntField(env, thisj, Lz4Decompressor_compressedDirectBufLen, 0); - - return (jint)uncompressed_direct_buf_len; -} diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.c deleted file mode 100644 index 34a61733f258e..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.c +++ /dev/null @@ -1,897 +0,0 @@ -/* - LZ4 HC - High Compression Mode of LZ4 - Copyright (C) 2011-2014, Yann Collet. - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html - - LZ4 source repository : http://code.google.com/p/lz4/ -*/ - - - -/************************************** - Tuning Parameter -**************************************/ -#define LZ4HC_DEFAULT_COMPRESSIONLEVEL 8 - - -/************************************** - Memory routines -**************************************/ -#include /* calloc, free */ -#define ALLOCATOR(s) calloc(1,s) -#define FREEMEM free -#include /* memset, memcpy */ -#define MEM_INIT memset - - -/************************************** - CPU Feature Detection -**************************************/ -/* 32 or 64 bits ? */ -#if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \ - || defined(__64BIT__) || defined(__mips64) \ - || defined(__powerpc64__) || defined(__powerpc64le__) \ - || defined(__ppc64__) || defined(__ppc64le__) \ - || defined(__PPC64__) || defined(__PPC64LE__) \ - || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) \ - || defined(__s390x__) ) /* Detects 64 bits mode */ -# define LZ4_ARCH64 1 -#else -# define LZ4_ARCH64 0 -#endif - -/* - * Little Endian or Big Endian ? - * Overwrite the #define below if you know your architecture endianess - */ -#include /* Apparently required to detect endianess */ -#if defined (__GLIBC__) -# include -# if (__BYTE_ORDER == __BIG_ENDIAN) -# define LZ4_BIG_ENDIAN 1 -# endif -#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN)) -# define LZ4_BIG_ENDIAN 1 -#elif defined(__sparc) || defined(__sparc__) \ - || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \ - || defined(__hpux) || defined(__hppa) \ - || defined(_MIPSEB) || defined(__s390__) -# define LZ4_BIG_ENDIAN 1 -#else -/* Little Endian assumed. PDP Endian and other very rare endian format are unsupported. */ -#endif - -/* - * Unaligned memory access is automatically enabled for "common" CPU, such as x86. - * For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected - * If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance - */ -#if defined(__ARM_FEATURE_UNALIGNED) -# define LZ4_FORCE_UNALIGNED_ACCESS 1 -#endif - -/* Define this parameter if your target system or compiler does not support hardware bit count */ -#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for Windows CE does not support Hardware bit count */ -# define LZ4_FORCE_SW_BITCOUNT -#endif - - -/************************************** - Compiler Options -**************************************/ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ -/* "restrict" is a known keyword */ -#else -# define restrict /* Disable restrict */ -#endif - -#ifdef _MSC_VER /* Visual Studio */ -# define FORCE_INLINE static __forceinline -# include /* For Visual 2005 */ -# if LZ4_ARCH64 /* 64-bits */ -# pragma intrinsic(_BitScanForward64) /* For Visual 2005 */ -# pragma intrinsic(_BitScanReverse64) /* For Visual 2005 */ -# else /* 32-bits */ -# pragma intrinsic(_BitScanForward) /* For Visual 2005 */ -# pragma intrinsic(_BitScanReverse) /* For Visual 2005 */ -# endif -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# pragma warning(disable : 4701) /* disable: C4701: potentially uninitialized local variable used */ -#else -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -#endif - -#ifdef _MSC_VER /* Visual Studio */ -# define lz4_bswap16(x) _byteswap_ushort(x) -#else -# define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))) -#endif - - -/************************************** - Includes -**************************************/ -#include "lz4hc.h" -#include "lz4.h" - - -/************************************** - Basic Types -**************************************/ -#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; -#else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef unsigned int U32; - typedef signed int S32; - typedef unsigned long long U64; -#endif - -#if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS) -# define _PACKED __attribute__ ((packed)) -#else -# define _PACKED -#endif - -#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) -# ifdef __IBMC__ -# pragma pack(1) -# else -# pragma pack(push, 1) -# endif -#endif - -typedef struct _U16_S { U16 v; } _PACKED U16_S; -typedef struct _U32_S { U32 v; } _PACKED U32_S; -typedef struct _U64_S { U64 v; } _PACKED U64_S; - -#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) -# pragma pack(pop) -#endif - -#define A64(x) (((U64_S *)(x))->v) -#define A32(x) (((U32_S *)(x))->v) -#define A16(x) (((U16_S *)(x))->v) - - -/************************************** - Constants -**************************************/ -#define MINMATCH 4 - -#define DICTIONARY_LOGSIZE 16 -#define MAXD (1<> ((MINMATCH*8)-HASH_LOG)) -#define HASH_VALUE(p) HASH_FUNCTION(A32(p)) -#define HASH_POINTER(p) (HashTable[HASH_VALUE(p)] + base) -#define DELTANEXT(p) chainTable[(size_t)(p) & MAXD_MASK] -#define GETNEXT(p) ((p) - (size_t)DELTANEXT(p)) - - -/************************************** - Private functions -**************************************/ -#if LZ4_ARCH64 - -FORCE_INLINE int LZ4_NbCommonBytes (register U64 val) -{ -#if defined(LZ4_BIG_ENDIAN) -# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanReverse64( &r, val ); - return (int)(r>>3); -# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clzll(val) >> 3); -# else - int r; - if (!(val>>32)) { r=4; } else { r=0; val>>=32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; -# endif -#else -# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanForward64( &r, val ); - return (int)(r>>3); -# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctzll(val) >> 3); -# else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58]; -# endif -#endif -} - -#else - -FORCE_INLINE int LZ4_NbCommonBytes (register U32 val) -{ -#if defined(LZ4_BIG_ENDIAN) -# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r; - _BitScanReverse( &r, val ); - return (int)(r>>3); -# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clz(val) >> 3); -# else - int r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; -# endif -#else -# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r; - _BitScanForward( &r, val ); - return (int)(r>>3); -# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctz(val) >> 3); -# else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; -# endif -#endif -} - -#endif - - -int LZ4_sizeofStreamStateHC() -{ - return sizeof(LZ4HC_Data_Structure); -} - -FORCE_INLINE void LZ4_initHC (LZ4HC_Data_Structure* hc4, const BYTE* base) -{ - MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable)); - MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); - hc4->nextToUpdate = base + 1; - hc4->base = base; - hc4->inputBuffer = base; - hc4->end = base; -} - -int LZ4_resetStreamStateHC(void* state, const char* inputBuffer) -{ - if ((((size_t)state) & (sizeof(void*)-1)) != 0) return 1; /* Error : pointer is not aligned for pointer (32 or 64 bits) */ - LZ4_initHC((LZ4HC_Data_Structure*)state, (const BYTE*)inputBuffer); - return 0; -} - - -void* LZ4_createHC (const char* inputBuffer) -{ - void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure)); - LZ4_initHC ((LZ4HC_Data_Structure*)hc4, (const BYTE*)inputBuffer); - return hc4; -} - - -int LZ4_freeHC (void* LZ4HC_Data) -{ - FREEMEM(LZ4HC_Data); - return (0); -} - - -/* Update chains up to ip (excluded) */ -FORCE_INLINE void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip) -{ - U16* chainTable = hc4->chainTable; - HTYPE* HashTable = hc4->hashTable; - INITBASE(base,hc4->base); - - while(hc4->nextToUpdate < ip) - { - const BYTE* const p = hc4->nextToUpdate; - size_t delta = (p) - HASH_POINTER(p); - if (delta>MAX_DISTANCE) delta = MAX_DISTANCE; - DELTANEXT(p) = (U16)delta; - HashTable[HASH_VALUE(p)] = (HTYPE)((p) - base); - hc4->nextToUpdate++; - } -} - - -char* LZ4_slideInputBufferHC(void* LZ4HC_Data) -{ - LZ4HC_Data_Structure* hc4 = (LZ4HC_Data_Structure*)LZ4HC_Data; - size_t distance = (hc4->end - 64 KB) - hc4->inputBuffer; - - if (hc4->end <= hc4->inputBuffer + 64 KB) return (char*)(hc4->end); /* no update : less than 64KB within buffer */ - - distance = (distance >> 16) << 16; /* Must be a multiple of 64 KB */ - LZ4HC_Insert(hc4, hc4->end - MINMATCH); - memcpy((void*)(hc4->end - 64 KB - distance), (const void*)(hc4->end - 64 KB), 64 KB); - hc4->nextToUpdate -= distance; - hc4->base -= distance; - if ((U32)(hc4->inputBuffer - hc4->base) > 1 GB + 64 KB) /* Avoid overflow */ - { - int i; - hc4->base += 1 GB; - for (i=0; ihashTable[i] -= 1 GB; - } - hc4->end -= distance; - return (char*)(hc4->end); -} - - -FORCE_INLINE size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit) -{ - const BYTE* p1t = p1; - - while (p1tchainTable; - HTYPE* const HashTable = hc4->hashTable; - const BYTE* ref; - INITBASE(base,hc4->base); - int nbAttempts=maxNbAttempts; - size_t repl=0, ml=0; - U16 delta=0; /* useless assignment, to remove an uninitialization warning */ - - /* HC4 match finder */ - LZ4HC_Insert(hc4, ip); - ref = HASH_POINTER(ip); - -#define REPEAT_OPTIMIZATION -#ifdef REPEAT_OPTIMIZATION - /* Detect repetitive sequences of length <= 4 */ - if ((U32)(ip-ref) <= 4) /* potential repetition */ - { - if (A32(ref) == A32(ip)) /* confirmed */ - { - delta = (U16)(ip-ref); - repl = ml = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH; - *matchpos = ref; - } - ref = GETNEXT(ref); - } -#endif - - while (((U32)(ip-ref) <= MAX_DISTANCE) && (nbAttempts)) - { - nbAttempts--; - if (*(ref+ml) == *(ip+ml)) - if (A32(ref) == A32(ip)) - { - size_t mlt = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH; - if (mlt > ml) { ml = mlt; *matchpos = ref; } - } - ref = GETNEXT(ref); - } - -#ifdef REPEAT_OPTIMIZATION - /* Complete table */ - if (repl) - { - const BYTE* ptr = ip; - const BYTE* end; - - end = ip + repl - (MINMATCH-1); - while(ptr < end-delta) - { - DELTANEXT(ptr) = delta; /* Pre-Load */ - ptr++; - } - do - { - DELTANEXT(ptr) = delta; - HashTable[HASH_VALUE(ptr)] = (HTYPE)((ptr) - base); /* Head of chain */ - ptr++; - } while(ptr < end); - hc4->nextToUpdate = end; - } -#endif - - return (int)ml; -} - - -FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos, const int maxNbAttempts) -{ - U16* const chainTable = hc4->chainTable; - HTYPE* const HashTable = hc4->hashTable; - INITBASE(base,hc4->base); - const BYTE* ref; - int nbAttempts = maxNbAttempts; - int delta = (int)(ip-startLimit); - - /* First Match */ - LZ4HC_Insert(hc4, ip); - ref = HASH_POINTER(ip); - - while (((U32)(ip-ref) <= MAX_DISTANCE) && (nbAttempts)) - { - nbAttempts--; - if (*(startLimit + longest) == *(ref - delta + longest)) - if (A32(ref) == A32(ip)) - { -#if 1 - const BYTE* reft = ref+MINMATCH; - const BYTE* ipt = ip+MINMATCH; - const BYTE* startt = ip; - - while (iptstartLimit) && (reft > hc4->inputBuffer) && (startt[-1] == reft[-1])) {startt--; reft--;} - - if ((ipt-startt) > longest) - { - longest = (int)(ipt-startt); - *matchpos = reft; - *startpos = startt; - } - } - ref = GETNEXT(ref); - } - - return longest; -} - - -typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive; - -FORCE_INLINE int LZ4HC_encodeSequence ( - const BYTE** ip, - BYTE** op, - const BYTE** anchor, - int matchLength, - const BYTE* ref, - limitedOutput_directive limitedOutputBuffer, - BYTE* oend) -{ - int length; - BYTE* token; - - /* Encode Literal length */ - length = (int)(*ip - *anchor); - token = (*op)++; - if ((limitedOutputBuffer) && ((*op + length + (2 + 1 + LASTLITERALS) + (length>>8)) > oend)) return 1; /* Check output limit */ - if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK< 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } - else *token = (BYTE)(length<>8) > oend)) return 1; /* Check output limit */ - if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; } - else *token += (BYTE)(length); - - /* Prepare next loop */ - *ip += matchLength; - *anchor = *ip; - - return 0; -} - - -#define MAX_COMPRESSION_LEVEL 16 -static int LZ4HC_compress_generic ( - void* ctxvoid, - const char* source, - char* dest, - int inputSize, - int maxOutputSize, - int compressionLevel, - limitedOutput_directive limit - ) -{ - LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid; - const BYTE* ip = (const BYTE*) source; - const BYTE* anchor = ip; - const BYTE* const iend = ip + inputSize; - const BYTE* const mflimit = iend - MFLIMIT; - const BYTE* const matchlimit = (iend - LASTLITERALS); - - BYTE* op = (BYTE*) dest; - BYTE* const oend = op + maxOutputSize; - - const int maxNbAttempts = compressionLevel > MAX_COMPRESSION_LEVEL ? 1 << MAX_COMPRESSION_LEVEL : compressionLevel ? 1<<(compressionLevel-1) : 1<end) return 0; - ctx->end += inputSize; - - ip++; - - /* Main Loop */ - while (ip < mflimit) - { - ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts); - if (!ml) { ip++; continue; } - - /* saved, in case we would skip too much */ - start0 = ip; - ref0 = ref; - ml0 = ml; - -_Search2: - if (ip+ml < mflimit) - ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2, maxNbAttempts); - else ml2 = ml; - - if (ml2 == ml) /* No better match */ - { - if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; - continue; - } - - if (start0 < ip) - { - if (start2 < ip + ml0) /* empirical */ - { - ip = start0; - ref = ref0; - ml = ml0; - } - } - - /* Here, start0==ip */ - if ((start2 - ip) < 3) /* First Match too small : removed */ - { - ml = ml2; - ip = start2; - ref =ref2; - goto _Search2; - } - -_Search3: - /* - * Currently we have : - * ml2 > ml1, and - * ip1+3 <= ip2 (usually < ip1+ml1) - */ - if ((start2 - ip) < OPTIMAL_ML) - { - int correction; - int new_ml = ml; - if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; - if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH; - correction = new_ml - (int)(start2 - ip); - if (correction > 0) - { - start2 += correction; - ref2 += correction; - ml2 -= correction; - } - } - /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */ - - if (start2 + ml2 < mflimit) - ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts); - else ml3 = ml2; - - if (ml3 == ml2) /* No better match : 2 sequences to encode */ - { - /* ip & ref are known; Now for ml */ - if (start2 < ip+ml) ml = (int)(start2 - ip); - /* Now, encode 2 sequences */ - if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; - ip = start2; - if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) return 0; - continue; - } - - if (start3 < ip+ml+3) /* Not enough space for match 2 : remove it */ - { - if (start3 >= (ip+ml)) /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */ - { - if (start2 < ip+ml) - { - int correction = (int)(ip+ml - start2); - start2 += correction; - ref2 += correction; - ml2 -= correction; - if (ml2 < MINMATCH) - { - start2 = start3; - ref2 = ref3; - ml2 = ml3; - } - } - - if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; - ip = start3; - ref = ref3; - ml = ml3; - - start0 = start2; - ref0 = ref2; - ml0 = ml2; - goto _Search2; - } - - start2 = start3; - ref2 = ref3; - ml2 = ml3; - goto _Search3; - } - - /* - * OK, now we have 3 ascending matches; let's write at least the first one - * ip & ref are known; Now for ml - */ - if (start2 < ip+ml) - { - if ((start2 - ip) < (int)ML_MASK) - { - int correction; - if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; - if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH; - correction = ml - (int)(start2 - ip); - if (correction > 0) - { - start2 += correction; - ref2 += correction; - ml2 -= correction; - } - } - else - { - ml = (int)(start2 - ip); - } - } - if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; - - ip = start2; - ref = ref2; - ml = ml2; - - start2 = start3; - ref2 = ref3; - ml2 = ml3; - - goto _Search3; - - } - - /* Encode Last Literals */ - { - int lastRun = (int)(iend - anchor); - if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */ - if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } - else *op++ = (BYTE)(lastRun< The memory position where the next input data block must start is provided as the result of the function. - -Compression can then resume, using LZ4_compressHC_continue() or LZ4_compressHC_limitedOutput_continue(), as usual. - -When compression is completed, a call to LZ4_freeHC() will release the memory used by the LZ4HC Data Structure. -*/ - -int LZ4_sizeofStreamStateHC(void); -int LZ4_resetStreamStateHC(void* state, const char* inputBuffer); - -/* -These functions achieve the same result as : -void* LZ4_createHC (const char* inputBuffer); - -They are provided here to allow the user program to allocate memory using its own routines. - -To know how much space must be allocated, use LZ4_sizeofStreamStateHC(); -Note also that space must be aligned for pointers (32 or 64 bits). - -Once space is allocated, you must initialize it using : LZ4_resetStreamStateHC(void* state, const char* inputBuffer); -void* state is a pointer to the space allocated. -It must be aligned for pointers (32 or 64 bits), and be large enough. -The parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer. -The input buffer must be already allocated, and size at least 192KB. -'inputBuffer' will also be the 'const char* source' of the first block. - -The same space can be re-used multiple times, just by initializing it each time with LZ4_resetStreamState(). -return value of LZ4_resetStreamStateHC() must be 0 is OK. -Any other value means there was an error (typically, state is not aligned for pointers (32 or 64 bits)). -*/ - - -#if defined (__cplusplus) -} -#endif diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyCompressor.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyCompressor.c deleted file mode 100644 index 9a09f078d8260..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyCompressor.c +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include "org_apache_hadoop_io_compress_snappy.h" - -#if defined HADOOP_SNAPPY_LIBRARY - -#include -#include -#include - -#ifdef UNIX -#include -#include "config.h" -#endif // UNIX - -#ifdef WINDOWS -#include "winutils.h" -#endif - -#include "org_apache_hadoop_io_compress_snappy_SnappyCompressor.h" - -#define JINT_MAX 0x7fffffff - -static jfieldID SnappyCompressor_uncompressedDirectBuf; -static jfieldID SnappyCompressor_uncompressedDirectBufLen; -static jfieldID SnappyCompressor_compressedDirectBuf; -static jfieldID SnappyCompressor_directBufferSize; - -#ifdef UNIX -static snappy_status (*dlsym_snappy_compress)(const char*, size_t, char*, size_t*); -#endif - -#ifdef WINDOWS -typedef snappy_status (__cdecl *__dlsym_snappy_compress)(const char*, size_t, char*, size_t*); -static __dlsym_snappy_compress dlsym_snappy_compress; -#endif - -JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompressor_initIDs -(JNIEnv *env, jclass clazz){ -#ifdef UNIX - // Load libsnappy.so - void *libsnappy = dlopen(HADOOP_SNAPPY_LIBRARY, RTLD_LAZY | RTLD_GLOBAL); - if (!libsnappy) { - char msg[1000]; - snprintf(msg, 1000, "%s (%s)!", "Cannot load " HADOOP_SNAPPY_LIBRARY, dlerror()); - THROW(env, "java/lang/UnsatisfiedLinkError", msg); - return; - } -#endif - -#ifdef WINDOWS - HMODULE libsnappy = LoadLibrary(HADOOP_SNAPPY_LIBRARY); - if (!libsnappy) { - THROW(env, "java/lang/UnsatisfiedLinkError", "Cannot load snappy.dll"); - return; - } -#endif - - // Locate the requisite symbols from libsnappy.so -#ifdef UNIX - dlerror(); // Clear any existing error - LOAD_DYNAMIC_SYMBOL(dlsym_snappy_compress, env, libsnappy, "snappy_compress"); -#endif - -#ifdef WINDOWS - LOAD_DYNAMIC_SYMBOL(__dlsym_snappy_compress, dlsym_snappy_compress, env, libsnappy, "snappy_compress"); -#endif - - SnappyCompressor_uncompressedDirectBuf = (*env)->GetFieldID(env, clazz, - "uncompressedDirectBuf", - "Ljava/nio/Buffer;"); - SnappyCompressor_uncompressedDirectBufLen = (*env)->GetFieldID(env, clazz, - "uncompressedDirectBufLen", "I"); - SnappyCompressor_compressedDirectBuf = (*env)->GetFieldID(env, clazz, - "compressedDirectBuf", - "Ljava/nio/Buffer;"); - SnappyCompressor_directBufferSize = (*env)->GetFieldID(env, clazz, - "directBufferSize", "I"); -} - -JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompressor_compressBytesDirect -(JNIEnv *env, jobject thisj){ - const char* uncompressed_bytes; - char* compressed_bytes; - snappy_status ret; - // Get members of SnappyCompressor - jobject uncompressed_direct_buf = (*env)->GetObjectField(env, thisj, SnappyCompressor_uncompressedDirectBuf); - jint uncompressed_direct_buf_len = (*env)->GetIntField(env, thisj, SnappyCompressor_uncompressedDirectBufLen); - jobject compressed_direct_buf = (*env)->GetObjectField(env, thisj, SnappyCompressor_compressedDirectBuf); - jint compressed_direct_buf_len = (*env)->GetIntField(env, thisj, SnappyCompressor_directBufferSize); - size_t buf_len; - - // Get the input direct buffer - uncompressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf); - - if (uncompressed_bytes == 0) { - return (jint)0; - } - - // Get the output direct buffer - compressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, compressed_direct_buf); - - if (compressed_bytes == 0) { - return (jint)0; - } - - /* size_t should always be 4 bytes or larger. */ - buf_len = (size_t)compressed_direct_buf_len; - ret = dlsym_snappy_compress(uncompressed_bytes, uncompressed_direct_buf_len, - compressed_bytes, &buf_len); - if (ret != SNAPPY_OK){ - THROW(env, "java/lang/InternalError", "Could not compress data. Buffer length is too small."); - return 0; - } - if (buf_len > JINT_MAX) { - THROW(env, "java/lang/InternalError", "Invalid return buffer length."); - return 0; - } - - (*env)->SetIntField(env, thisj, SnappyCompressor_uncompressedDirectBufLen, 0); - return (jint)buf_len; -} - -JNIEXPORT jstring JNICALL -Java_org_apache_hadoop_io_compress_snappy_SnappyCompressor_getLibraryName(JNIEnv *env, jclass class) { -#ifdef UNIX - if (dlsym_snappy_compress) { - Dl_info dl_info; - if(dladdr( - dlsym_snappy_compress, - &dl_info)) { - return (*env)->NewStringUTF(env, dl_info.dli_fname); - } - } - - return (*env)->NewStringUTF(env, HADOOP_SNAPPY_LIBRARY); -#endif - -#ifdef WINDOWS - LPWSTR filename = NULL; - GetLibraryName(dlsym_snappy_compress, &filename); - if (filename != NULL) { - return (*env)->NewString(env, filename, (jsize) wcslen(filename)); - } else { - return (*env)->NewStringUTF(env, "Unavailable"); - } -#endif -} -#endif //define HADOOP_SNAPPY_LIBRARY diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.c deleted file mode 100644 index 69ec1017526fd..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.c +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "org_apache_hadoop_io_compress_snappy.h" - -#if defined HADOOP_SNAPPY_LIBRARY - -#include -#include -#include - -#ifdef UNIX -#include -#include "config.h" -#endif - -#include "org_apache_hadoop_io_compress_snappy_SnappyDecompressor.h" - -static jfieldID SnappyDecompressor_compressedDirectBuf; -static jfieldID SnappyDecompressor_compressedDirectBufLen; -static jfieldID SnappyDecompressor_uncompressedDirectBuf; -static jfieldID SnappyDecompressor_directBufferSize; - -#ifdef UNIX -static snappy_status (*dlsym_snappy_uncompress)(const char*, size_t, char*, size_t*); -#endif - -#ifdef WINDOWS -typedef snappy_status (__cdecl *__dlsym_snappy_uncompress)(const char*, size_t, char*, size_t*); -static __dlsym_snappy_uncompress dlsym_snappy_uncompress; -#endif - -JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyDecompressor_initIDs -(JNIEnv *env, jclass clazz){ - - // Load libsnappy.so -#ifdef UNIX - void *libsnappy = dlopen(HADOOP_SNAPPY_LIBRARY, RTLD_LAZY | RTLD_GLOBAL); - if (!libsnappy) { - char* msg = (char*)malloc(1000); - snprintf(msg, 1000, "%s (%s)!", "Cannot load " HADOOP_SNAPPY_LIBRARY, dlerror()); - THROW(env, "java/lang/UnsatisfiedLinkError", msg); - return; - } -#endif - -#ifdef WINDOWS - HMODULE libsnappy = LoadLibrary(HADOOP_SNAPPY_LIBRARY); - if (!libsnappy) { - THROW(env, "java/lang/UnsatisfiedLinkError", "Cannot load snappy.dll"); - return; - } -#endif - - // Locate the requisite symbols from libsnappy.so -#ifdef UNIX - dlerror(); // Clear any existing error - LOAD_DYNAMIC_SYMBOL(dlsym_snappy_uncompress, env, libsnappy, "snappy_uncompress"); - -#endif - -#ifdef WINDOWS - LOAD_DYNAMIC_SYMBOL(__dlsym_snappy_uncompress, dlsym_snappy_uncompress, env, libsnappy, "snappy_uncompress"); -#endif - - SnappyDecompressor_compressedDirectBuf = (*env)->GetFieldID(env,clazz, - "compressedDirectBuf", - "Ljava/nio/Buffer;"); - SnappyDecompressor_compressedDirectBufLen = (*env)->GetFieldID(env,clazz, - "compressedDirectBufLen", "I"); - SnappyDecompressor_uncompressedDirectBuf = (*env)->GetFieldID(env,clazz, - "uncompressedDirectBuf", - "Ljava/nio/Buffer;"); - SnappyDecompressor_directBufferSize = (*env)->GetFieldID(env, clazz, - "directBufferSize", "I"); -} - -JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyDecompressor_decompressBytesDirect -(JNIEnv *env, jobject thisj){ - const char* compressed_bytes = NULL; - char* uncompressed_bytes = NULL; - snappy_status ret; - // Get members of SnappyDecompressor - jobject compressed_direct_buf = (*env)->GetObjectField(env,thisj, SnappyDecompressor_compressedDirectBuf); - jint compressed_direct_buf_len = (*env)->GetIntField(env,thisj, SnappyDecompressor_compressedDirectBufLen); - jobject uncompressed_direct_buf = (*env)->GetObjectField(env,thisj, SnappyDecompressor_uncompressedDirectBuf); - size_t uncompressed_direct_buf_len = (*env)->GetIntField(env, thisj, SnappyDecompressor_directBufferSize); - - // Get the input direct buffer - compressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, compressed_direct_buf); - - if (compressed_bytes == 0) { - return (jint)0; - } - - // Get the output direct buffer - uncompressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf); - - if (uncompressed_bytes == 0) { - return (jint)0; - } - - ret = dlsym_snappy_uncompress(compressed_bytes, compressed_direct_buf_len, - uncompressed_bytes, &uncompressed_direct_buf_len); - if (ret == SNAPPY_BUFFER_TOO_SMALL){ - THROW(env, "java/lang/InternalError", "Could not decompress data. Buffer length is too small."); - } else if (ret == SNAPPY_INVALID_INPUT){ - THROW(env, "java/lang/InternalError", "Could not decompress data. Input is invalid."); - } else if (ret != SNAPPY_OK){ - THROW(env, "java/lang/InternalError", "Could not decompress data."); - } - - (*env)->SetIntField(env, thisj, SnappyDecompressor_compressedDirectBufLen, 0); - - return (jint)uncompressed_direct_buf_len; -} - -#endif //define HADOOP_SNAPPY_LIBRARY diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/org_apache_hadoop_io_compress_snappy.h b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/org_apache_hadoop_io_compress_snappy.h deleted file mode 100644 index 8394efe477462..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/org_apache_hadoop_io_compress_snappy.h +++ /dev/null @@ -1,33 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#ifndef ORG_APACHE_HADOOP_IO_COMPRESS_SNAPPY_SNAPPY_H -#define ORG_APACHE_HADOOP_IO_COMPRESS_SNAPPY_SNAPPY_H - -#include "org_apache_hadoop.h" - -#ifdef UNIX -#include -#endif - -#include -#include -#include - -#endif //ORG_APACHE_HADOOP_IO_COMPRESS_SNAPPY_SNAPPY_H diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/zstd/ZStandardCompressor.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/zstd/ZStandardCompressor.c index 41eb9e2c85a10..6581f292b4a00 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/zstd/ZStandardCompressor.c +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/zstd/ZStandardCompressor.c @@ -219,13 +219,13 @@ JNIEXPORT jint Java_org_apache_hadoop_io_compress_zstd_ZStandardCompressor_defla return (jint) 0; } - bytes_read += input.pos; + bytes_read += input.pos - uncompressed_direct_buf_off; bytes_written += output.pos; (*env)->SetLongField(env, this, ZStandardCompressor_bytesRead, bytes_read); (*env)->SetLongField(env, this, ZStandardCompressor_bytesWritten, bytes_written); (*env)->SetIntField(env, this, ZStandardCompressor_uncompressedDirectBufOff, input.pos); - (*env)->SetIntField(env, this, ZStandardCompressor_uncompressedDirectBufLen, input.size - input.pos); + (*env)->SetIntField(env, this, ZStandardCompressor_uncompressedDirectBufLen, input.size); return (jint) output.pos; } diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/jni_common.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/jni_common.c index 9cca6dd754b09..816536b637d39 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/jni_common.c +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/jni_common.c @@ -92,6 +92,7 @@ void getInputs(JNIEnv *env, jobjectArray inputs, jintArray inputOffsets, destInputs[i] = NULL; } } + (*env)->ReleaseIntArrayElements(env, inputOffsets, tmpInputOffsets, 0); } void getOutputs(JNIEnv *env, jobjectArray outputs, jintArray outputOffsets, @@ -112,4 +113,5 @@ void getOutputs(JNIEnv *env, jobjectArray outputs, jintArray outputOffsets, byteBuffer)); destOutputs[i] += tmpOutputOffsets[i]; } -} \ No newline at end of file + (*env)->ReleaseIntArrayElements(env, outputOffsets, tmpOutputOffsets, 0); +} diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/jni_rs_decoder.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/jni_rs_decoder.c index 52d255afd58d8..72314d2ad545a 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/jni_rs_decoder.c +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/jni_rs_decoder.c @@ -66,6 +66,7 @@ jintArray outputOffsets) { decode(&rsDecoder->decoder, rsDecoder->inputs, tmpErasedIndexes, numErased, rsDecoder->outputs, chunkSize); + (*env)->ReleaseIntArrayElements(env, erasedIndexes, tmpErasedIndexes, 0); } JNIEXPORT void JNICALL diff --git a/hadoop-common-project/hadoop-common/src/main/proto/FSProtos.proto b/hadoop-common-project/hadoop-common/src/main/proto/FSProtos.proto index c895bce757b77..17bbcf8f48707 100644 --- a/hadoop-common-project/hadoop-common/src/main/proto/FSProtos.proto +++ b/hadoop-common-project/hadoop-common/src/main/proto/FSProtos.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax = "proto2"; diff --git a/hadoop-common-project/hadoop-common/src/main/proto/GenericRefreshProtocol.proto b/hadoop-common-project/hadoop-common/src/main/proto/GenericRefreshProtocol.proto index 6296f88da69b8..91d2e2e6c4c4e 100644 --- a/hadoop-common-project/hadoop-common/src/main/proto/GenericRefreshProtocol.proto +++ b/hadoop-common-project/hadoop-common/src/main/proto/GenericRefreshProtocol.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax = "proto2"; diff --git a/hadoop-common-project/hadoop-common/src/main/proto/GetUserMappingsProtocol.proto b/hadoop-common-project/hadoop-common/src/main/proto/GetUserMappingsProtocol.proto index cb91a13b04875..bccb57dd86832 100644 --- a/hadoop-common-project/hadoop-common/src/main/proto/GetUserMappingsProtocol.proto +++ b/hadoop-common-project/hadoop-common/src/main/proto/GetUserMappingsProtocol.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax = "proto2"; diff --git a/hadoop-common-project/hadoop-common/src/main/proto/HAServiceProtocol.proto b/hadoop-common-project/hadoop-common/src/main/proto/HAServiceProtocol.proto index 5a88a7ff03f02..d9c7d70c0e904 100644 --- a/hadoop-common-project/hadoop-common/src/main/proto/HAServiceProtocol.proto +++ b/hadoop-common-project/hadoop-common/src/main/proto/HAServiceProtocol.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax = "proto2"; diff --git a/hadoop-common-project/hadoop-common/src/main/proto/IpcConnectionContext.proto b/hadoop-common-project/hadoop-common/src/main/proto/IpcConnectionContext.proto index 16e2fb7c4db75..d853cf3afb3ca 100644 --- a/hadoop-common-project/hadoop-common/src/main/proto/IpcConnectionContext.proto +++ b/hadoop-common-project/hadoop-common/src/main/proto/IpcConnectionContext.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax = "proto2"; diff --git a/hadoop-common-project/hadoop-common/src/main/proto/ProtobufRpcEngine.proto b/hadoop-common-project/hadoop-common/src/main/proto/ProtobufRpcEngine.proto index fa11313402758..8cace2d454ae9 100644 --- a/hadoop-common-project/hadoop-common/src/main/proto/ProtobufRpcEngine.proto +++ b/hadoop-common-project/hadoop-common/src/main/proto/ProtobufRpcEngine.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax = "proto2"; @@ -64,4 +64,7 @@ message RequestHeaderProto { /** protocol version of class declaring the called method */ required uint64 clientProtocolVersion = 3; + + /** protocol extensions */ + extensions 1000 to max; } diff --git a/hadoop-common-project/hadoop-common/src/main/proto/ProtobufRpcEngine2.proto b/hadoop-common-project/hadoop-common/src/main/proto/ProtobufRpcEngine2.proto new file mode 100644 index 0000000000000..0e38070ab33fa --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/proto/ProtobufRpcEngine2.proto @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * These .proto interfaces are private and stable. + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html + * for what changes are allowed for a *stable* .proto interface. + */ +syntax = "proto2"; +/** + * These are the messages used by Hadoop RPC for the Rpc Engine Protocol Buffer + * to marshal the request and response in the RPC layer. + * The messages are sent in addition to the normal RPC header as + * defined in RpcHeader.proto + */ +option java_package = "org.apache.hadoop.ipc.protobuf"; +option java_outer_classname = "ProtobufRpcEngine2Protos"; +option java_generate_equals_and_hash = true; +package hadoop.common; + +/** + * This message is the header for the Protobuf Rpc Engine + * when sending a RPC request from RPC client to the RPC server. + * The actual request (serialized as protobuf) follows this request. + * + * No special header is needed for the Rpc Response for Protobuf Rpc Engine. + * The normal RPC response header (see RpcHeader.proto) are sufficient. + */ +message RequestHeaderProto { + /** Name of the RPC method */ + required string methodName = 1; + + /** + * RPCs for a particular interface (ie protocol) are done using a + * IPC connection that is setup using rpcProxy. + * The rpcProxy's has a declared protocol name that is + * sent form client to server at connection time. + * + * Each Rpc call also sends a protocol name + * (called declaringClassprotocolName). This name is usually the same + * as the connection protocol name except in some cases. + * For example metaProtocols such ProtocolInfoProto which get metainfo + * about the protocol reuse the connection but need to indicate that + * the actual protocol is different (i.e. the protocol is + * ProtocolInfoProto) since they reuse the connection; in this case + * the declaringClassProtocolName field is set to the ProtocolInfoProto + */ + required string declaringClassProtocolName = 2; + + /** protocol version of class declaring the called method */ + required uint64 clientProtocolVersion = 3; + + /** protocol extensions */ + extensions 1000 to max; +} diff --git a/hadoop-common-project/hadoop-common/src/main/proto/ProtocolInfo.proto b/hadoop-common-project/hadoop-common/src/main/proto/ProtocolInfo.proto index 0e9d0d4baa413..77d883227e5fe 100644 --- a/hadoop-common-project/hadoop-common/src/main/proto/ProtocolInfo.proto +++ b/hadoop-common-project/hadoop-common/src/main/proto/ProtocolInfo.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax = "proto2"; diff --git a/hadoop-common-project/hadoop-common/src/main/proto/RefreshAuthorizationPolicyProtocol.proto b/hadoop-common-project/hadoop-common/src/main/proto/RefreshAuthorizationPolicyProtocol.proto index f57c6d6303916..7bf69a70ef07f 100644 --- a/hadoop-common-project/hadoop-common/src/main/proto/RefreshAuthorizationPolicyProtocol.proto +++ b/hadoop-common-project/hadoop-common/src/main/proto/RefreshAuthorizationPolicyProtocol.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax = "proto2"; diff --git a/hadoop-common-project/hadoop-common/src/main/proto/RefreshCallQueueProtocol.proto b/hadoop-common-project/hadoop-common/src/main/proto/RefreshCallQueueProtocol.proto index 463b7c548fe22..138ede842d554 100644 --- a/hadoop-common-project/hadoop-common/src/main/proto/RefreshCallQueueProtocol.proto +++ b/hadoop-common-project/hadoop-common/src/main/proto/RefreshCallQueueProtocol.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax = "proto2"; diff --git a/hadoop-common-project/hadoop-common/src/main/proto/RefreshUserMappingsProtocol.proto b/hadoop-common-project/hadoop-common/src/main/proto/RefreshUserMappingsProtocol.proto index a1130f5c2d96d..a0f9e41b7082d 100644 --- a/hadoop-common-project/hadoop-common/src/main/proto/RefreshUserMappingsProtocol.proto +++ b/hadoop-common-project/hadoop-common/src/main/proto/RefreshUserMappingsProtocol.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax = "proto2"; diff --git a/hadoop-common-project/hadoop-common/src/main/proto/RpcHeader.proto b/hadoop-common-project/hadoop-common/src/main/proto/RpcHeader.proto index 4705b4276b876..d9becf722e982 100644 --- a/hadoop-common-project/hadoop-common/src/main/proto/RpcHeader.proto +++ b/hadoop-common-project/hadoop-common/src/main/proto/RpcHeader.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax = "proto2"; @@ -63,7 +63,7 @@ enum RpcKindProto { message RPCTraceInfoProto { optional int64 traceId = 1; // parentIdHigh optional int64 parentId = 2; // parentIdLow - + optional bytes spanContext = 3; // Trace SpanContext } /** @@ -91,6 +91,10 @@ message RpcRequestHeaderProto { // the header for the RpcRequest optional RPCTraceInfoProto traceInfo = 6; // tracing info optional RPCCallerContextProto callerContext = 7; // call context optional int64 stateId = 8; // The last seen Global State ID + // Alignment context info for use with routers. + // The client should not interpret these bytes, but only forward bytes + // received from RpcResponseHeaderProto.routerFederatedState. + optional bytes routerFederatedState = 9; } @@ -157,6 +161,10 @@ message RpcResponseHeaderProto { optional bytes clientId = 7; // Globally unique client ID optional sint32 retryCount = 8 [default = -1]; optional int64 stateId = 9; // The last written Global State ID + // Alignment context info for use with routers. + // The client should not interpret these bytes, but only + // forward them to the router using RpcRequestHeaderProto.routerFederatedState. + optional bytes routerFederatedState = 10; } message RpcSaslProto { diff --git a/hadoop-common-project/hadoop-common/src/main/proto/Security.proto b/hadoop-common-project/hadoop-common/src/main/proto/Security.proto index 5177a86ef113e..37dbf7f18a9c7 100644 --- a/hadoop-common-project/hadoop-common/src/main/proto/Security.proto +++ b/hadoop-common-project/hadoop-common/src/main/proto/Security.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax = "proto2"; diff --git a/hadoop-common-project/hadoop-common/src/main/proto/TraceAdmin.proto b/hadoop-common-project/hadoop-common/src/main/proto/TraceAdmin.proto index 8cf131bfb460a..390040cc9d152 100644 --- a/hadoop-common-project/hadoop-common/src/main/proto/TraceAdmin.proto +++ b/hadoop-common-project/hadoop-common/src/main/proto/TraceAdmin.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax = "proto2"; diff --git a/hadoop-common-project/hadoop-common/src/main/proto/ZKFCProtocol.proto b/hadoop-common-project/hadoop-common/src/main/proto/ZKFCProtocol.proto index 98bc05f4a360e..8f9d9f0d9e7f4 100644 --- a/hadoop-common-project/hadoop-common/src/main/proto/ZKFCProtocol.proto +++ b/hadoop-common-project/hadoop-common/src/main/proto/ZKFCProtocol.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax = "proto2"; diff --git a/hadoop-common-project/hadoop-common/src/main/resources/META-INF/services/org.apache.hadoop.fs.MultipartUploaderFactory b/hadoop-common-project/hadoop-common/src/main/resources/META-INF/services/org.apache.hadoop.fs.MultipartUploaderFactory deleted file mode 100644 index f0054fedb8e1c..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/resources/META-INF/services/org.apache.hadoop.fs.MultipartUploaderFactory +++ /dev/null @@ -1,16 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -org.apache.hadoop.fs.FileSystemMultipartUploader$Factory diff --git a/hadoop-common-project/hadoop-common/src/main/resources/META-INF/services/org.apache.hadoop.security.alias.CredentialProviderFactory b/hadoop-common-project/hadoop-common/src/main/resources/META-INF/services/org.apache.hadoop.security.alias.CredentialProviderFactory index f673cf4cae427..1c6fc74d33406 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/META-INF/services/org.apache.hadoop.security.alias.CredentialProviderFactory +++ b/hadoop-common-project/hadoop-common/src/main/resources/META-INF/services/org.apache.hadoop.security.alias.CredentialProviderFactory @@ -16,3 +16,5 @@ org.apache.hadoop.security.alias.JavaKeyStoreProvider$Factory org.apache.hadoop.security.alias.LocalJavaKeyStoreProvider$Factory org.apache.hadoop.security.alias.UserProvider$Factory +org.apache.hadoop.security.alias.BouncyCastleFipsKeyStoreProvider$Factory +org.apache.hadoop.security.alias.LocalBouncyCastleFipsKeyStoreProvider$Factory diff --git a/hadoop-common-project/hadoop-common/src/main/resources/common-version-info.properties b/hadoop-common-project/hadoop-common/src/main/resources/common-version-info.properties index 6f8558b8d4fe9..0f075c8139a32 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/common-version-info.properties +++ b/hadoop-common-project/hadoop-common/src/main/resources/common-version-info.properties @@ -24,3 +24,4 @@ date=${version-info.build.time} url=${version-info.scm.uri} srcChecksum=${version-info.source.md5} protocVersion=${hadoop.protobuf.version} +compilePlatform=${os.detected.classifier} diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index aba3b52dfc9b5..b1a25ce1f0081 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -48,6 +48,14 @@ ordering of the filters. + + hadoop.http.idle_timeout.ms + 60000 + + NN/JN/DN Server connection timeout in milliseconds. + + + @@ -113,6 +121,14 @@ + + hadoop.security.resolver.impl + org.apache.hadoop.net.DNSDomainNameResolver + + The resolver implementation used to resolve FQDN for Kerberos + + + hadoop.security.dns.log-slow-lookups.enabled false @@ -659,6 +675,8 @@ ssl.keystore.pass$ fs.s3a.server-side-encryption.key fs.s3a.*.server-side-encryption.key + fs.s3a.encryption.algorithm + fs.s3a.encryption.key fs.s3a.secret.key fs.s3a.*.secret.key fs.s3a.session.key @@ -668,6 +686,9 @@ fs.azure.account.key.* fs.azure.oauth2.* fs.adl.oauth2.* + fs.gs.encryption.* + fs.gs.proxy.* + fs.gs.auth.* credential$ oauth.*secret oauth.*password @@ -681,6 +702,27 @@ + + hadoop.security.token.service.use_ip + true + + Controls whether tokens always use IP addresses. + DNS changes will not be detected if this option is enabled. + Existing client connections that break will always reconnect + to the IP of the original host. New clients will connect + to the host's new IP but fail to locate a token. + Disabling this option will allow existing and new clients + to detect an IP change and continue to locate the new host's token. + + In secure multi-homed environments, this parameter will need to + be set to false on both cluster servers and clients (see HADOOP-7733). + If it is not set correctly, the symptom will be inability to + submit an application to YARN from an external client + (with error "client host not a member of the Hadoop cluster"), + or even from an in-cluster client if server failover occurs. + + + hadoop.workaround.non.threadsafe.getpwuid true @@ -944,6 +986,125 @@ + + fs.viewfs.overload.scheme.target.hdfs.impl + org.apache.hadoop.hdfs.DistributedFileSystem + The DistributedFileSystem for view file system overload scheme + when child file system and ViewFSOverloadScheme's schemes are hdfs. + + + + + fs.viewfs.overload.scheme.target.s3a.impl + org.apache.hadoop.fs.s3a.S3AFileSystem + The S3AFileSystem for view file system overload scheme when + child file system and ViewFSOverloadScheme's schemes are s3a. + + + + fs.viewfs.overload.scheme.target.ofs.impl + org.apache.hadoop.fs.ozone.RootedOzoneFileSystem + The RootedOzoneFileSystem for view file system overload scheme + when child file system and ViewFSOverloadScheme's schemes are ofs. + + + + + fs.viewfs.overload.scheme.target.o3fs.impl + org.apache.hadoop.fs.ozone.OzoneFileSystem + The OzoneFileSystem for view file system overload scheme when + child file system and ViewFSOverloadScheme's schemes are o3fs. + + + + fs.viewfs.overload.scheme.target.ftp.impl + org.apache.hadoop.fs.ftp.FTPFileSystem + The FTPFileSystem for view file system overload scheme when + child file system and ViewFSOverloadScheme's schemes are ftp. + + + + + fs.viewfs.overload.scheme.target.webhdfs.impl + org.apache.hadoop.hdfs.web.WebHdfsFileSystem + The WebHdfsFileSystem for view file system overload scheme when + child file system and ViewFSOverloadScheme's schemes are webhdfs. + + + + + fs.viewfs.overload.scheme.target.swebhdfs.impl + org.apache.hadoop.hdfs.web.SWebHdfsFileSystem + The SWebHdfsFileSystem for view file system overload scheme when + child file system and ViewFSOverloadScheme's schemes are swebhdfs. + + + + + fs.viewfs.overload.scheme.target.file.impl + org.apache.hadoop.fs.LocalFileSystem + The LocalFileSystem for view file system overload scheme when + child file system and ViewFSOverloadScheme's schemes are file. + + + + + fs.viewfs.overload.scheme.target.abfs.impl + org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem + The AzureBlobFileSystem for view file system overload scheme + when child file system and ViewFSOverloadScheme's schemes are abfs. + + + + + fs.viewfs.overload.scheme.target.abfss.impl + org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem + The SecureAzureBlobFileSystem for view file system overload + scheme when child file system and ViewFSOverloadScheme's schemes are abfss. + + + + + fs.viewfs.overload.scheme.target.wasb.impl + org.apache.hadoop.fs.azure.NativeAzureFileSystem + The NativeAzureFileSystem for view file system overload scheme + when child file system and ViewFSOverloadScheme's schemes are wasb. + + + + + fs.viewfs.overload.scheme.target.oss.impl + org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem + The AliyunOSSFileSystem for view file system overload scheme + when child file system and ViewFSOverloadScheme's schemes are oss. + + + + + fs.viewfs.overload.scheme.target.http.impl + org.apache.hadoop.fs.http.HttpFileSystem + The HttpFileSystem for view file system overload scheme + when child file system and ViewFSOverloadScheme's schemes are http. + + + + + fs.viewfs.overload.scheme.target.gs.impl + com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS + The GoogleHadoopFS/Google Cloud Storage file system for view + file system overload scheme when child file system and ViewFSOverloadScheme's + schemes are gs. + + + + + fs.viewfs.overload.scheme.target.https.impl + org.apache.hadoop.fs.http.HttpsFileSystem + The HttpsFileSystem for view file system overload scheme + when child file system and ViewFSOverloadScheme's schemes are https. + + + fs.AbstractFileSystem.ftp.impl org.apache.hadoop.fs.ftp.FtpFs @@ -1020,12 +1181,6 @@ File space usage statistics refresh interval in msec. - - fs.swift.impl - org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem - The implementation class of the OpenStack Swift Filesystem - - fs.automatic.close true @@ -1059,7 +1214,7 @@ com.amazonaws.auth.AWSCredentialsProvider. When S3A delegation tokens are not enabled, this list will be used - to directly authenticate with S3 and DynamoDB services. + to directly authenticate with S3 and other AWS services. When S3A Delegation tokens are enabled, depending upon the delegation token binding it may be used to communicate wih the STS endpoint to request session/role @@ -1188,12 +1343,6 @@ - - fs.s3a.delegation.tokens.enabled - false - - - fs.s3a.delegation.token.binding @@ -1213,7 +1362,7 @@ fs.s3a.connection.maximum - 48 + 96 Controls the maximum number of simultaneous connections to S3. This must be bigger than the value of fs.s3a.threads.max so as to stop threads being blocked waiting for new HTTPS connections. @@ -1382,7 +1531,9 @@ fs.s3a.acl.default Set a canned ACL for newly created and copied objects. Value may be Private, PublicRead, PublicReadWrite, AuthenticatedRead, LogDeliveryWrite, BucketOwnerRead, - or BucketOwnerFullControl. + or BucketOwnerFullControl. + If set, caller IAM role must have "s3:PutObjectAcl" permission on the bucket. + @@ -1408,20 +1559,22 @@ - fs.s3a.server-side-encryption-algorithm - Specify a server-side encryption algorithm for s3a: file system. - Unset by default. It supports the following values: 'AES256' (for SSE-S3), - 'SSE-KMS' and 'SSE-C'. + fs.s3a.encryption.algorithm + Specify a server-side encryption or client-side + encryption algorithm for s3a: file system. Unset by default. It supports the + following values: 'AES256' (for SSE-S3), 'SSE-KMS', 'SSE-C', and 'CSE-KMS' - fs.s3a.server-side-encryption.key - Specific encryption key to use if fs.s3a.server-side-encryption-algorithm - has been set to 'SSE-KMS' or 'SSE-C'. In the case of SSE-C, the value of this property - should be the Base64 encoded key. If you are using SSE-KMS and leave this property empty, - you'll be using your default's S3 KMS key, otherwise you should set this property to - the specific KMS key id. + fs.s3a.encryption.key + Specific encryption key to use if fs.s3a.encryption.algorithm + has been set to 'SSE-KMS', 'SSE-C' or 'CSE-KMS'. In the case of SSE-C + , the value of this property should be the Base64 encoded key. If you are + using SSE-KMS and leave this property empty, you'll be using your default's + S3 KMS key, otherwise you should set this property to the specific KMS key + id. In case of 'CSE-KMS' this value needs to be the AWS-KMS Key ID + generated from AWS console. @@ -1431,6 +1584,14 @@ implementations can still be used + + fs.s3a.accesspoint.required + false + Require that all S3 access is made through Access Points and not through + buckets directly. If enabled, use per-bucket overrides to allow bucket access to a specific set + of buckets. + + fs.s3a.block.size 32M @@ -1441,9 +1602,12 @@ fs.s3a.buffer.dir - ${hadoop.tmp.dir}/s3a + ${env.LOCAL_DIRS:-${hadoop.tmp.dir}}/s3a Comma separated list of directories that will be used to buffer file - uploads to. + uploads to. + Yarn container path will be used as default value on yarn applications, + otherwise fall back to hadoop.tmp.dir + @@ -1510,180 +1674,18 @@ - - fs.s3a.metadatastore.authoritative - false - - When true, allow MetadataStore implementations to act as source of - truth for getting file status and directory listings. Even if this - is set to true, MetadataStore implementations may choose not to - return authoritative results. If the configured MetadataStore does - not support being authoritative, this setting will have no effect. - - - - - fs.s3a.metadatastore.metadata.ttl - 15m - - This value sets how long an entry in a MetadataStore is valid. - - - - - fs.s3a.metadatastore.impl - org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore - - Fully-qualified name of the class that implements the MetadataStore - to be used by s3a. The default class, NullMetadataStore, has no - effect: s3a will continue to treat the backing S3 service as the one - and only source of truth for file and directory metadata. - - - - - fs.s3a.metadatastore.fail.on.write.error - true - - When true (default), FileSystem write operations generate - org.apache.hadoop.fs.s3a.MetadataPersistenceException if the metadata - cannot be saved to the metadata store. When false, failures to save to - metadata store are logged at ERROR level, but the overall FileSystem - write operation succeeds. - - - - - fs.s3a.s3guard.cli.prune.age - 86400000 - - Default age (in milliseconds) after which to prune metadata from the - metadatastore when the prune command is run. Can be overridden on the - command-line. - - - - fs.s3a.impl org.apache.hadoop.fs.s3a.S3AFileSystem The implementation class of the S3A Filesystem - - fs.s3a.s3guard.ddb.region - - - AWS DynamoDB region to connect to. An up-to-date list is - provided in the AWS Documentation: regions and endpoints. Without this - property, the S3Guard will operate table in the associated S3 bucket region. - - - - - fs.s3a.s3guard.ddb.table - - - The DynamoDB table name to operate. Without this property, the respective - S3 bucket name will be used. - - - - - fs.s3a.s3guard.ddb.table.create - false - - If true, the S3A client will create the table if it does not already exist. - - - - - fs.s3a.s3guard.ddb.table.capacity.read - 0 - - Provisioned throughput requirements for read operations in terms of capacity - units for the DynamoDB table. This config value will only be used when - creating a new DynamoDB table. - If set to 0 (the default), new tables are created with "per-request" capacity. - If a positive integer is provided for this and the write capacity, then - a table with "provisioned capacity" will be created. - You can change the capacity of an existing provisioned-capacity table - through the "s3guard set-capacity" command. - - - - - fs.s3a.s3guard.ddb.table.capacity.write - 0 - - Provisioned throughput requirements for write operations in terms of - capacity units for the DynamoDB table. - If set to 0 (the default), new tables are created with "per-request" capacity. - Refer to related configuration option fs.s3a.s3guard.ddb.table.capacity.read - - - - - fs.s3a.s3guard.ddb.table.sse.enabled - false - - Whether server-side encryption (SSE) is enabled or disabled on the table. - By default it's disabled, meaning SSE is set to AWS owned CMK. - - - - - fs.s3a.s3guard.ddb.table.sse.cmk - - - The KMS Customer Master Key (CMK) used for the KMS encryption on the table. - To specify a CMK, this config value can be its key ID, Amazon Resource Name - (ARN), alias name, or alias ARN. Users only need to provide this config if - the key is different from the default DynamoDB KMS Master Key, which is - alias/aws/dynamodb. - - - - - fs.s3a.s3guard.ddb.max.retries - 9 - - Max retries on throttled/incompleted DynamoDB operations - before giving up and throwing an IOException. - Each retry is delayed with an exponential - backoff timer which starts at 100 milliseconds and approximately - doubles each time. The minimum wait before throwing an exception is - sum(100, 200, 400, 800, .. 100*2^N-1 ) == 100 * ((2^N)-1) - - - - - fs.s3a.s3guard.ddb.throttle.retry.interval - 100ms - - Initial interval to retry after a request is throttled events; - the back-off policy is exponential until the number of retries of - fs.s3a.s3guard.ddb.max.retries is reached. - - - - - fs.s3a.s3guard.ddb.background.sleep - 25ms - - Length (in milliseconds) of pause between each batch of deletes when - pruning metadata. Prevents prune operations (which can typically be low - priority background operations) from overly interfering with other I/O - operations. - - - fs.s3a.retry.limit 7 Number of times to retry any repeatable S3 client request on failure, - excluding throttling requests and S3Guard inconsistency resolution. + excluding throttling requests. @@ -1692,7 +1694,7 @@ 500ms Initial retry interval when retrying operations for any reason other - than S3 throttle errors and S3Guard inconsistency resolution. + than S3 throttle errors. @@ -1715,27 +1717,6 @@ - - fs.s3a.s3guard.consistency.retry.limit - 7 - - Number of times to retry attempts to read/open/copy files when - S3Guard believes a specific version of the file to be available, - but the S3 request does not find any version of a file, or a different - version. - - - - - fs.s3a.s3guard.consistency.retry.interval - 2s - - Initial interval between attempts to retry operations while waiting for S3 - to become consistent with the S3Guard data. - An exponential back-off is used here: every failure doubles the delay. - - - fs.s3a.committer.name file @@ -1747,11 +1728,9 @@ fs.s3a.committer.magic.enabled - false + true - Enable support in the filesystem for the S3 "Magic" committer. - When working with AWS S3, S3Guard must be enabled for the destination - bucket, as consistent metadata listings are required. + Enable support in the S3A filesystem for the "Magic" committer. @@ -1799,20 +1778,13 @@ - fs.s3a.committer.staging.abort.pending.uploads + fs.s3a.committer.abort.pending.uploads true - Should the staging committers abort all pending uploads to the destination + Should the committers abort all pending uploads to the destination directory? - Changing this if more than one partitioned committer is - writing to the same destination tree simultaneously; otherwise - the first job to complete will cancel all outstanding uploads from the - others. However, it may lead to leaked outstanding uploads from failed - tasks. If disabled, configure the bucket lifecycle to remove uploads - after a time period, and/or set up a workflow to explicitly delete - entries. Otherwise there is a risk that uncommitted uploads may run up - bills. + Set to false if more than one job is writing to the same directory tree. @@ -2032,7 +2004,27 @@ - + + fs.s3a.downgrade.syncable.exceptions + true + + Warn but continue when applications use Syncable.hsync when writing + to S3A. + + + + + + fs.s3a.audit.enabled + true + + Should auditing of S3A requests be enabled? + + + + fs.AbstractFileSystem.wasb.impl org.apache.hadoop.fs.azure.Wasb @@ -2138,6 +2130,26 @@ This setting provides better performance compared to blob-specific saskeys. + + + fs.azure.buffer.dir + ${hadoop.tmp.dir}/abfs + Directory path for buffer files needed to upload data blocks + in AbfsOutputStream. + + + + fs.AbstractFileSystem.gs.impl + com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS + The AbstractFileSystem for gs: uris. + + + + fs.azure.enable.readahead + true + Enabled readahead/prefetching in AbfsInputStream. + + io.seqfile.compress.blocksize 1000000 @@ -2291,6 +2303,14 @@ + + ipc.server.purge.interval + 15 + Define how often calls are cleaned up in the server. + The default is 15 minutes. The unit is minutes. + + + ipc.maximum.data.length 134217728 @@ -2719,7 +2739,7 @@ ${user.home}/hadoop-http-auth-signature-secret The signature secret for signing the authentication tokens. - The same secret should be used for RM/NM/NN/DN configurations. + A different secret should be used for each service. @@ -2960,14 +2980,6 @@ - - hadoop.ssl.enabled - false - - Deprecated. Use dfs.http.policy and yarn.http.policy instead. - - - hadoop.ssl.enabled.protocols TLSv1.2 @@ -3144,6 +3156,21 @@ + + rpc.metrics.timeunit + MILLISECONDS + + This property is used to configure timeunit for various RPC Metrics + e.g rpcQueueTime, rpcLockWaitTime, rpcProcessingTime, + deferredRpcProcessingTime. In the absence of this property, + default timeunit used is milliseconds. + The value of this property should match to any one value of enum: + java.util.concurrent.TimeUnit. + Some of the valid values: NANOSECONDS, MICROSECONDS, MILLISECONDS, + SECONDS etc. + + + rpc.metrics.percentiles.intervals @@ -3692,6 +3719,16 @@ in audit logs. + + hadoop.caller.context.separator + , + + The separator is for context which maybe contain many fields. For example, + if the separator is ',', and there are two key/value fields in context, + in which case the context string is "key1:value1,key2:value2". The + separator should not contain '\t', '\n', '='. + + seq.io.sort.mb diff --git a/hadoop-common-project/hadoop-common/src/main/resources/org.apache.hadoop.application-classloader.properties b/hadoop-common-project/hadoop-common/src/main/resources/org.apache.hadoop.application-classloader.properties index cbbb88764d1f8..dc37949851cf5 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/org.apache.hadoop.application-classloader.properties +++ b/hadoop-common-project/hadoop-common/src/main/resources/org.apache.hadoop.application-classloader.properties @@ -19,7 +19,7 @@ # contains key properties for setting up the application classloader system.classes.default=java.,\ javax.accessibility.,\ - javax.activation.,\ + -javax.activation.,\ javax.activity.,\ javax.annotation.,\ javax.annotation.processing.,\ diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/AdminCompatibilityGuide.md b/hadoop-common-project/hadoop-common/src/site/markdown/AdminCompatibilityGuide.md index 67f9c907ff97c..5d2c38d4c5646 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/AdminCompatibilityGuide.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/AdminCompatibilityGuide.md @@ -137,7 +137,8 @@ internal state stores: * The internal MapReduce state data will remain compatible across minor releases within the same major version to facilitate rolling upgrades while MapReduce workloads execute. * HDFS maintains metadata about the data stored in HDFS in a private, internal format that is versioned. In the event of an incompatible change, the store's version number will be incremented. When upgrading an existing cluster, the metadata store will automatically be upgraded if possible. After the metadata store has been upgraded, it is always possible to reverse the upgrade process. -* The AWS S3A guard keeps a private, internal metadata store that is versioned. Incompatible changes will cause the version number to be incremented. If an upgrade requires reformatting the store, it will be indicated in the release notes. +* The AWS S3A guard kept a private, internal metadata store. + Now that the feature has been removed, the store is obsolete and can be deleted. * The YARN resource manager keeps a private, internal state store of application and scheduler information that is versioned. Incompatible changes will cause the version number to be incremented. If an upgrade requires reformatting the store, it will be indicated in the release notes. * The YARN node manager keeps a private, internal state store of application information that is versioned. Incompatible changes will cause the version number to be incremented. If an upgrade requires reformatting the store, it will be indicated in the release notes. * The YARN federation service keeps a private, internal state store of application and cluster information that is versioned. Incompatible changes will cause the version number to be incremented. If an upgrade requires reformatting the store, it will be indicated in the release notes. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/ClusterSetup.md b/hadoop-common-project/hadoop-common/src/site/markdown/ClusterSetup.md index 7f61d3bd45592..9095d6f98903d 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/ClusterSetup.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/ClusterSetup.md @@ -22,7 +22,17 @@ Purpose This document describes how to install and configure Hadoop clusters ranging from a few nodes to extremely large clusters with thousands of nodes. To play with Hadoop, you may first want to install it on a single machine (see [Single Node Setup](./SingleCluster.html)). -This document does not cover advanced topics such as [Security](./SecureMode.html) or High Availability. +This document does not cover advanced topics such as High Availability. + +*Important*: all production Hadoop clusters use Kerberos to authenticate callers +and secure access to HDFS data as well as restriction access to computation +services (YARN etc.). + +These instructions do not cover integration with any Kerberos services, +-everyone bringing up a production cluster should include connecting to their +organisation's Kerberos infrastructure as a key part of the deployment. + +See [Security](./SecureMode.html) for details on how to secure a cluster. Prerequisites ------------- @@ -156,7 +166,7 @@ This section deals with important parameters to be specified in the given config | `yarn.nodemanager.remote-app-log-dir` | */logs* | HDFS directory where the application logs are moved on application completion. Need to set appropriate permissions. Only applicable if log-aggregation is enabled. | | `yarn.nodemanager.remote-app-log-dir-suffix` | *logs* | Suffix appended to the remote log dir. Logs will be aggregated to ${yarn.nodemanager.remote-app-log-dir}/${user}/${thisParam} Only applicable if log-aggregation is enabled. | | `yarn.nodemanager.aux-services` | mapreduce\_shuffle | Shuffle service that needs to be set for Map Reduce applications. | -| `yarn.nodemanager.env-whitelist` | Environment properties to be inherited by containers from NodeManagers | For mapreduce application in addition to the default values HADOOP\_MAPRED_HOME should to be added. Property value should JAVA\_HOME,HADOOP\_COMMON\_HOME,HADOOP\_HDFS\_HOME,HADOOP\_CONF\_DIR,CLASSPATH\_PREPEND\_DISTCACHE,HADOOP\_YARN\_HOME,HADOOP\_MAPRED\_HOME | +| `yarn.nodemanager.env-whitelist` | Environment properties to be inherited by containers from NodeManagers | For mapreduce application in addition to the default values HADOOP\_MAPRED_HOME should to be added. Property value should JAVA\_HOME,HADOOP\_COMMON\_HOME,HADOOP\_HDFS\_HOME,HADOOP\_CONF\_DIR,CLASSPATH\_PREPEND\_DISTCACHE,HADOOP\_YARN\_HOME,HADOOP\_HOME,PATH,LANG,TZ,HADOOP\_MAPRED\_HOME | * Configurations for History Server (Needs to be moved elsewhere): @@ -237,7 +247,7 @@ To start a Hadoop cluster you will need to start both the HDFS and YARN cluster. The first time you bring up HDFS, it must be formatted. Format a new distributed filesystem as *hdfs*: - [hdfs]$ $HADOOP_HOME/bin/hdfs namenode -format + [hdfs]$ $HADOOP_HOME/bin/hdfs namenode -format Start the HDFS NameNode with the following command on the designated node as *hdfs*: diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/CommandsManual.md b/hadoop-common-project/hadoop-common/src/site/markdown/CommandsManual.md index 0bda253fc8b54..4842d5b86d621 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/CommandsManual.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/CommandsManual.md @@ -60,7 +60,7 @@ Many subcommands honor a common set of configuration options to alter their beha | `-files ` | Specify comma separated files to be copied to the map reduce cluster. Applies only to job. | | `-fs or ` | Specify default filesystem URL to use. Overrides 'fs.defaultFS' property from configurations. | | `-jt or ` | Specify a ResourceManager. Applies only to job. | -| `-libjars ` | Specify comma separated jar files to include in the classpath. Applies only to job. | +| `-libjars ` | Specify comma separated jar files to include in the classpath. Applies only to job. | Hadoop Common Commands ====================== diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Compatibility.md b/hadoop-common-project/hadoop-common/src/site/markdown/Compatibility.md index 03d162a18acc2..0ccb6a8b5c3ca 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/Compatibility.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/Compatibility.md @@ -477,19 +477,12 @@ rolled back to the older layout. ##### AWS S3A Guard Metadata -For each operation in the Hadoop S3 client (s3a) that reads or modifies -file metadata, a shadow copy of that file metadata is stored in a separate -metadata store, which offers HDFS-like consistency for the metadata, and may -also provide faster lookups for things like file status or directory listings. -S3A guard tables are created with a version marker which indicates -compatibility. +The S3Guard metastore used to store metadata in DynamoDB tables; +as such it had to maintain a compatibility strategy. +Now that S3Guard is removed, the tables are not needed. -###### Policy - -The S3A guard metadata schema SHALL be considered -[Private](./InterfaceClassification.html#Private) and -[Unstable](./InterfaceClassification.html#Unstable). Any incompatible change -to the schema MUST result in the version number of the schema being incremented. +Applications configured to use an S3A metadata store other than +the "null" store will fail. ##### YARN Resource Manager State Store diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/CredentialProviderAPI.md b/hadoop-common-project/hadoop-common/src/site/markdown/CredentialProviderAPI.md index 0c5f4861c1afb..0de09250b582b 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/CredentialProviderAPI.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/CredentialProviderAPI.md @@ -133,18 +133,21 @@ In order to indicate a particular provider type and location, the user must prov 1. The `UserProvider`, which is represented by the provider URI `user:///`, is used to retrieve credentials from a user's Credentials file. This file is used to store various tokens, secrets and passwords that are needed by executing jobs and applications. 2. The `JavaKeyStoreProvider`, which is represented by the provider URI `jceks://SCHEME/path-to-keystore`, is used to retrieve credentials from a Java keystore file in a filesystem `` The underlying use of the Hadoop filesystem API allows credentials to be stored on the local filesystem or within cluster stores. -3. The `LocalJavaKeyStoreProvider`, which is represented by the provider URI `localjceks://file/path-to-keystore`, is used to access credentials from a Java keystore that is must be stored on the local filesystem. This is needed for credentials that would result in a recursive dependency on accessing HDFS. Anytime that your credential is required to gain access to HDFS we can't depend on getting a credential out of HDFS to do so. +3. The `LocalJavaKeyStoreProvider`, which is represented by the provider URI `localjceks://file/path-to-keystore`, is used to access credentials from a Java keystore that must be stored on the local filesystem. This is needed for credentials that would result in a recursive dependency on accessing HDFS. Anytime that your credential is required to gain access to HDFS we can't depend on getting a credential out of HDFS to do so. +4. The `BouncyCastleFIPSKeyStoreProvider`, which is represented by the provider URI `bcfks://SCHEME/path-to-keystore`, is used to retrieve credentials from a Bouncy Castle FIPS keystore file in a file system `` + The underlying use of the Hadoop filesystem API allows credentials to be stored on the local filesystem or within cluster stores. +5. The `LocalBcouncyCastleFIPSKeyStoreProvider`, which is represented by the provider URI `localbcfks://file/path-to-keystore`, is used to access credentials from a Bouncy Castle FIPS keystore that must be stored on the local filesystem. This is needed for credentials that would result in a recursive dependency on accessing HDFS. Anytime that your credential is required to gain access to HDFS we can't depend on getting a credential out of HDFS to do so. When credentials are stored in a filesystem, the following rules apply: -* Credentials stored in local `localjceks://` files are loaded in the process reading in the configuration. +* Credentials stored in local `localjceks://` or `localbcfks://` files are loaded in the process reading in the configuration. For use in a YARN application, this means that they must be visible across the entire cluster, in the local filesystems of the hosts. -* Credentials stored with the `jceks://` provider can be stored in the cluster filesystem, +* Credentials stored with the `jceks://` or `bcfks://` provider can be stored in the cluster filesystem, and so visible across the cluster —but not in the filesystem which requires the specific credentials for their access. -To wrap filesystem URIs with a `jceks` URI follow these steps: +To wrap filesystem URIs with a `jceks` URI follow these steps. Bouncy Castle FIPS provider follows a similar step by replacing `jceks` with `bcfks` along with OS/JDK level FIPS provider configured. 1. Take a filesystem URI such as `hdfs://namenode:9001/users/alice/secrets.jceks` 1. Place `jceks://` in front of the URL: `jceks://hdfs://namenode:9001/users/alice/secrets.jceks` diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/DownstreamDev.md b/hadoop-common-project/hadoop-common/src/site/markdown/DownstreamDev.md index b04bc2488f8ae..e38dfc4c88bc7 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/DownstreamDev.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/DownstreamDev.md @@ -300,7 +300,7 @@ that conflicts with a property defined by Hadoop can lead to unexpected and undesirable results. Users are encouraged to avoid using custom configuration property names that conflict with the namespace of Hadoop-defined properties and thus should avoid using any prefixes used by Hadoop, -e.g. hadoop, io, ipc, fs, net, file, ftp, kfs, ha, file, dfs, mapred, +e.g. hadoop, io, ipc, fs, net, ftp, ha, file, dfs, mapred, mapreduce, and yarn. ### Logging Configuration Files diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md b/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md index 7df2cce574b68..382a6df2104c7 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md @@ -122,30 +122,17 @@ Options copyFromLocal ------------- -Usage: `hadoop fs -copyFromLocal URI` - -Similar to the `fs -put` command, except that the source is restricted to a local file reference. - -Options: - -* `-p` : Preserves access and modification times, ownership and the permissions. -(assuming the permissions can be propagated across filesystems) -* `-f` : Overwrites the destination if it already exists. -* `-l` : Allow DataNode to lazily persist the file to disk, Forces a replication - factor of 1. This flag will result in reduced durability. Use with care. -* `-d` : Skip creation of temporary file with the suffix `._COPYING_`. +Identical to the -put command. copyToLocal ----------- -Usage: `hadoop fs -copyToLocal [-ignorecrc] [-crc] URI ` - -Similar to get command, except that the destination is restricted to a local file reference. +Identical to the -get command. count ----- -Usage: `hadoop fs -count [-q] [-h] [-v] [-x] [-t []] [-u] [-e] ` +Usage: `hadoop fs -count [-q] [-h] [-v] [-x] [-t []] [-u] [-e] [-s] ` Count the number of directories, files and bytes under the paths that match the specified file pattern. Get the quota and the usage. The output columns with -count are: DIR\_COUNT, FILE\_COUNT, CONTENT\_SIZE, PATHNAME @@ -169,6 +156,8 @@ The output columns with -count -e are: DIR\_COUNT, FILE\_COUNT, CONTENT_SIZE, ER The ERASURECODING\_POLICY is name of the policy for the file. If a erasure coding policy is setted on that file, it will return name of the policy. If no erasure coding policy is setted, it will return \"Replicated\" which means it use replication storage strategy. +The -s option shows the snapshot counts for each directory. + Example: * `hadoop fs -count hdfs://nn1.example.com/file1 hdfs://nn2.example.com/file2` @@ -179,6 +168,7 @@ Example: * `hadoop fs -count -u -h hdfs://nn1.example.com/file1` * `hadoop fs -count -u -h -v hdfs://nn1.example.com/file1` * `hadoop fs -count -e hdfs://nn1.example.com/file1` +* `hadoop fs -count -s hdfs://nn1.example.com/file1` Exit Code: @@ -187,7 +177,7 @@ Returns 0 on success and -1 on error. cp ---- -Usage: `hadoop fs -cp [-f] [-p | -p[topax]] URI [URI ...] ` +Usage: `hadoop fs -cp [-f] [-p | -p[topax]] [-t ] [-q ] URI [URI ...] ` Copy files from source to destination. This command allows multiple sources as well in which case the destination must be a directory. @@ -195,13 +185,18 @@ Copy files from source to destination. This command allows multiple sources as w Options: -* The -f option will overwrite the destination if it already exists. -* The -p option will preserve file attributes [topx] (timestamps, ownership, permission, ACL, XAttr). If -p is specified with no *arg*, then preserves timestamps, ownership, permission. If -pa is specified, then preserves permission also because ACL is a super-set of permission. Determination of whether raw namespace extended attributes are preserved is independent of the -p flag. +* `-f` : Overwrite the destination if it already exists. +* `-d` : Skip creation of temporary file with the suffix `._COPYING_`. +* `-p` : Preserve file attributes [topx] (timestamps, ownership, permission, ACL, XAttr). If -p is specified with no *arg*, then preserves timestamps, ownership, permission. If -pa is specified, then preserves permission also because ACL is a super-set of permission. Determination of whether raw namespace extended attributes are preserved is independent of the -p flag. +* `-t ` : Number of threads to be used, default is 1. Useful when copying directories containing more than 1 file. +* `-q ` : Thread pool queue size to be used, default is 1024. It takes effect only when thread count greater than 1. Example: * `hadoop fs -cp /user/hadoop/file1 /user/hadoop/file2` * `hadoop fs -cp /user/hadoop/file1 /user/hadoop/file2 /user/hadoop/dir` +* `hadoop fs -cp -t 5 /user/hadoop/file1 /user/hadoop/file2 /user/hadoop/dir` +* `hadoop fs -cp -t 10 -q 2048 /user/hadoop/file1 /user/hadoop/file2 /user/hadoop/dir` Exit Code: @@ -333,27 +328,33 @@ Returns 0 on success and -1 on error. get --- -Usage: `hadoop fs -get [-ignorecrc] [-crc] [-p] [-f] ` +Usage: `hadoop fs -get [-ignorecrc] [-crc] [-p] [-f] [-t ] [-q ] ... ` Copy files to the local file system. Files that fail the CRC check may be copied with the -ignorecrc option. Files and CRCs may be copied using the -crc option. +Options: + +* `-p` : Preserves access and modification times, ownership and the permissions. + (assuming the permissions can be propagated across filesystems) +* `-f` : Overwrites the destination if it already exists. +* `-ignorecrc` : Skip CRC checks on the file(s) downloaded. +* `-crc`: write CRC checksums for the files downloaded. +* `-t ` : Number of threads to be used, default is 1. + Useful when downloading directories containing more than 1 file. +* `-q ` : Thread pool queue size to be used, default is 1024. + It takes effect only when thread count greater than 1. + Example: * `hadoop fs -get /user/hadoop/file localfile` * `hadoop fs -get hdfs://nn.example.com/user/hadoop/file localfile` +* `hadoop fs -get -t 10 hdfs://nn.example.com/user/hadoop/dir1 localdir` +* `hadoop fs -get -t 10 -q 2048 hdfs://nn.example.com/user/hadoop/dir* localdir` Exit Code: Returns 0 on success and -1 on error. -Options: - -* `-p` : Preserves access and modification times, ownership and the permissions. -(assuming the permissions can be propagated across filesystems) -* `-f` : Overwrites the destination if it already exists. -* `-ignorecrc` : Skip CRC checks on the file(s) downloaded. -* `-crc`: write CRC checksums for the files downloaded. - getfacl ------- @@ -535,7 +536,7 @@ Returns 0 on success and -1 on error. put --- -Usage: `hadoop fs -put [-f] [-p] [-l] [-d] [ - | .. ]. ` +Usage: `hadoop fs -put [-f] [-p] [-l] [-d] [-t ] [-q ] [ - | ...] ` Copy single src, or multiple srcs from local file system to the destination file system. Also reads input from stdin and writes to destination file system if the source is set to "-" @@ -550,6 +551,10 @@ Options: * `-l` : Allow DataNode to lazily persist the file to disk, Forces a replication factor of 1. This flag will result in reduced durability. Use with care. * `-d` : Skip creation of temporary file with the suffix `._COPYING_`. +* `-t ` : Number of threads to be used, default is 1. + Useful when uploading directories containing more than 1 file. +* `-q ` : Thread pool queue size to be used, default is 1024. + It takes effect only when thread count greater than 1. Examples: @@ -558,6 +563,8 @@ Examples: * `hadoop fs -put -f localfile1 localfile2 /user/hadoop/hadoopdir` * `hadoop fs -put -d localfile hdfs://nn.example.com/hadoop/hadoopfile` * `hadoop fs -put - hdfs://nn.example.com/hadoop/hadoopfile` Reads the input from stdin. +* `hadoop fs -put -t 5 localdir hdfs://nn.example.com/hadoop/hadoopdir` +* `hadoop fs -put -t 10 -q 2048 localdir1 localdir2 hdfs://nn.example.com/hadoop/hadoopdir` Exit Code: @@ -768,7 +775,7 @@ timestamp of that URI. * Use -a option to change only the access time * Use -m option to change only the modification time -* Use -t option to specify timestamp (in format yyyyMMddHHmmss) instead of current time +* Use -t option to specify timestamp (in format yyyyMMdd:HHmmss) instead of current time * Use -c option to not create file if it does not exist The timestamp format is as follows @@ -778,13 +785,13 @@ The timestamp format is as follows * HH Two digit hour of the day using 24 hour notation (e.g. 23 stands for 11 pm, 11 stands for 11 am) * mm Two digit minutes of the hour * ss Two digit seconds of the minute -e.g. 20180809230000 represents August 9th 2018, 11pm +e.g. 20180809:230000 represents August 9th 2018, 11pm Example: * `hadoop fs -touch pathname` -* `hadoop fs -touch -m -t 20180809230000 pathname` -* `hadoop fs -touch -t 20180809230000 pathname` +* `hadoop fs -touch -m -t 20180809:230000 pathname` +* `hadoop fs -touch -t 20180809:230000 pathname` * `hadoop fs -touch -a pathname` Exit Code: Returns 0 on success and -1 on error. @@ -821,6 +828,18 @@ Example: * `hadoop fs -truncate 55 /user/hadoop/file1 /user/hadoop/file2` * `hadoop fs -truncate -w 127 hdfs://nn1.example.com/user/hadoop/file1` +concat +-------- + +Usage: `hadoop fs -concat ` + +Concatenate existing source files into the target file. Target file and source +files should be in the same directory. + +Example: + +* `hadoop fs -concat hdfs://cluster/user/hadoop/target-file hdfs://cluster/user/hadoop/file-0 hdfs://cluster/user/hadoop/file-1` + usage ----- @@ -833,7 +852,7 @@ Return the help for an individual command. ==================================================== The Hadoop FileSystem shell works with Object Stores such as Amazon S3, -Azure WASB and OpenStack Swift. +Azure ABFS and Google GCS. @@ -1100,6 +1119,7 @@ actually fail. | `setfattr` | generally unsupported permissions model | | `setrep`| has no effect | | `truncate` | generally unsupported | +| `concat` | generally unsupported | Different object store clients *may* support these commands: do consult the documentation and test against the target store. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/HttpAuthentication.md b/hadoop-common-project/hadoop-common/src/site/markdown/HttpAuthentication.md index ca5ce4898aa71..0c131ef3ea32b 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/HttpAuthentication.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/HttpAuthentication.md @@ -43,7 +43,7 @@ The following properties should be in the `core-site.xml` of all the nodes in th | `hadoop.http.authentication.type` | `simple` | Defines authentication used for the HTTP web-consoles. The supported values are: `simple` \| `kerberos` \| `#AUTHENTICATION_HANDLER_CLASSNAME#`. | | `hadoop.http.authentication.token.validity` | `36000` | Indicates how long (in seconds) an authentication token is valid before it has to be renewed. | | `hadoop.http.authentication.token.max-inactive-interval` | `-1` (disabled) | Specifies the time, in seconds, between client requests the server will invalidate the token. | -| `hadoop.http.authentication.signature.secret.file` | `$user.home/hadoop-http-auth-signature-secret` | The signature secret file for signing the authentication tokens. The same secret should be used for all nodes in the cluster, ResourceManager, NameNode, DataNode and NodeManager. This file should be readable only by the Unix user running the daemons. | +| `hadoop.http.authentication.signature.secret.file` | `$user.home/hadoop-http-auth-signature-secret` | The signature secret file for signing the authentication tokens. A different secret should be used for each service in the cluster, ResourceManager, NameNode, DataNode and NodeManager. This file should be readable only by the Unix user running the daemons. | | `hadoop.http.authentication.cookie.domain` | | The domain to use for the HTTP cookie that stores the authentication token. For authentication to work correctly across all nodes in the cluster the domain must be correctly set. There is no default value, the HTTP cookie will not have a domain working only with the hostname issuing the HTTP cookie. | | `hadoop.http.authentication.cookie.persistent` | `false` (session cookie) | Specifies the persistence of the HTTP cookie. If the value is true, the cookie is a persistent one. Otherwise, it is a session cookie. *IMPORTANT*: when using IP addresses, browsers ignore cookies with domain settings. For this setting to work properly all nodes in the cluster must be configured to generate URLs with `hostname.domain` names on it. | | `hadoop.http.authentication.simple.anonymous.allowed` | `true` | Indicates whether anonymous requests are allowed when using 'simple' authentication. | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md index 8210eee038421..92b3ea452bf74 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md @@ -65,6 +65,8 @@ rpc --- Each metrics record contains tags such as Hostname and port (number to which server is bound) as additional information along with metrics. +`rpc.metrics.timeunit` config can be used to configure timeunit for RPC metrics. +The default timeunit used for RPC metrics is milliseconds (as per the below description). | Name | Description | |:---- |:---- | @@ -101,6 +103,8 @@ Each metrics record contains tags such as Hostname and port (number to which ser | `rpcLockWaitTime`*num*`s90thPercentileLatency` | Shows the 90th percentile of RPC lock wait time in milliseconds (*num* seconds granularity) if `rpc.metrics.quantile.enable` is set to true. *num* is specified by `rpc.metrics.percentiles.intervals`. | | `rpcLockWaitTime`*num*`s95thPercentileLatency` | Shows the 95th percentile of RPC lock wait time in milliseconds (*num* seconds granularity) if `rpc.metrics.quantile.enable` is set to true. *num* is specified by `rpc.metrics.percentiles.intervals`. | | `rpcLockWaitTime`*num*`s99thPercentileLatency` | Shows the 99th percentile of RPC lock wait time in milliseconds (*num* seconds granularity) if `rpc.metrics.quantile.enable` is set to true. *num* is specified by `rpc.metrics.percentiles.intervals`. | +| `TotalRequests` | Total num of requests served by the RPC server. | +| `TotalRequestsPerSeconds` | Total num of requests per second served by the RPC server. | RetryCache/NameNodeRetryCache ----------------------------- @@ -228,6 +232,7 @@ Each metrics record contains tags such as ProcessName, SessionId, and Hostname a | `EditLogTailIntervalNumOps` | Total number of intervals between edit log tailings by standby NameNode | | `EditLogTailIntervalAvgTime` | Average time of intervals between edit log tailings by standby NameNode in milliseconds | | `EditLogTailInterval`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of time between edit log tailings by standby NameNode in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `PendingEditsCount` | Current number of pending edits | FSNamesystem ------------ @@ -282,6 +287,8 @@ Each metrics record contains tags such as HAState and Hostname as additional inf | `HAState` | (HA-only) Current state of the NameNode: initializing or active or standby or stopping state | | `FSState` | Current state of the file system: Safemode or Operational | | `LockQueueLength` | Number of threads waiting to acquire FSNameSystem lock | +| `ReadLockLongHoldCount` | The number of time the read lock has been held for longer than the threshold | +| `WriteLockLongHoldCount` | The number of time the write lock has been held for longer than the threshold | | `TotalSyncCount` | Total number of sync operations performed by edit log | | `TotalSyncTimes` | Total number of milliseconds spent by various edit logs in sync operation| | `NameDirSize` | NameNode name directories size in bytes | @@ -361,6 +368,9 @@ Each metrics record contains tags such as SessionId and Hostname as additional i |:---- |:---- | | `BytesWritten` | Total number of bytes written to DataNode | | `BytesRead` | Total number of bytes read from DataNode | +| `ReadTransferRateNumOps` | Total number of data read transfers | +| `ReadTransferRateAvgTime` | Average transfer rate of bytes read from DataNode, measured in bytes per second. | +| `ReadTransferRate`*num*`s(50/75/90/95/99)thPercentileRate` | The 50/75/90/95/99th percentile of the transfer rate of bytes read from DataNode, measured in bytes per second. | | `BlocksWritten` | Total number of blocks written to DataNode | | `BlocksRead` | Total number of blocks read from DataNode | | `BlocksReplicated` | Total number of blocks replicated | @@ -449,6 +459,7 @@ Each metrics record contains tags such as SessionId and Hostname as additional i | `BlocksDeletedInPendingIBR` | Number of blocks at deleted status in pending incremental block report (IBR) | | `EcReconstructionTasks` | Total number of erasure coding reconstruction tasks | | `EcFailedReconstructionTasks` | Total number of erasure coding failed reconstruction tasks | +| `EcInvalidReconstructionTasks` | Total number of erasure coding invalidated reconstruction tasks | | `EcDecodingTimeNanos` | Total number of nanoseconds spent by decoding tasks | | `EcReconstructionBytesRead` | Total number of bytes read by erasure coding worker | | `EcReconstructionBytesWritten` | Total number of bytes written by erasure coding worker | @@ -486,6 +497,12 @@ contains tags such as Hostname as additional information along with metrics. | `WriteIoRateNumOps` | The number of file write io operations within an interval time of metric | | `WriteIoRateAvgTime` | Mean time of file write io operations in milliseconds | | `WriteIoLatency`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of file write io operations latency in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `TransferIoRateNumOps` | The number of file transfer io operations within an interval time of metric | +| `TransferIoRateAvgTime` | Mean time of file transfer io operations in milliseconds | +| `TransferIoLatency`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of file transfer io operations latency in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `NativeCopyIoRateNumOps` | The number of file nativeCopy io operations within an interval time of metric | +| `NativeCopyIoRateAvgTime` | Mean time of file nativeCopy io operations in milliseconds | +| `NativeCopyIoLatency`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of file nativeCopy io operations latency in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `TotalFileIoErrors` | Total number (monotonically increasing) of file io error operations | | `FileIoErrorRateNumOps` | The number of file io error operations within an interval time of metric | | `FileIoErrorRateAvgTime` | It measures the mean time in milliseconds from the start of an operation to hitting a failure | @@ -505,9 +522,12 @@ RBFMetrics shows the metrics which are the aggregated values of sub-clusters' in | `NumInMaintenanceLiveDataNodes` | Number of live Datanodes which are in maintenance state | | `NumInMaintenanceDeadDataNodes` | Number of dead Datanodes which are in maintenance state | | `NumEnteringMaintenanceDataNodes` | Number of Datanodes that are entering the maintenance state | -| `TotalCapacity` | Current raw capacity of DataNodes in bytes | -| `UsedCapacity` | Current used capacity across all DataNodes in bytes | -| `RemainingCapacity` | Current remaining capacity in bytes | +| `TotalCapacity` | Current raw capacity of DataNodes in bytes (long primitive, may overflow) | +| `UsedCapacity` | Current used capacity across all DataNodes in bytes (long primitive, may overflow) | +| `RemainingCapacity` | Current remaining capacity in bytes (long primitive, may overflow) | +| `TotalCapacityBigInt` | Current raw capacity of DataNodes in bytes (using BigInteger) | +| `UsedCapacityBigInt` | Current used capacity across all DataNodes in bytes (using BigInteger) | +| `RemainingCapacityBigInt` | Current remaining capacity in bytes (using BigInteger) | | `NumOfMissingBlocks` | Current number of missing blocks | | `NumLiveNodes` | Number of datanodes which are currently live | | `NumDeadNodes` | Number of datanodes which are currently dead | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/NativeLibraries.md.vm b/hadoop-common-project/hadoop-common/src/site/markdown/NativeLibraries.md.vm index e4f720cee8ce1..1e62e94394f91 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/NativeLibraries.md.vm +++ b/hadoop-common-project/hadoop-common/src/site/markdown/NativeLibraries.md.vm @@ -47,7 +47,7 @@ Components The native hadoop library includes various components: -* Compression Codecs (bzip2, lz4, snappy, zlib) +* Compression Codecs (bzip2, lz4, zlib) * Native IO utilities for [HDFS Short-Circuit Local Reads](../hadoop-hdfs/ShortCircuitLocalReads.html) and [Centralized Cache Management in HDFS](../hadoop-hdfs/CentralizedCacheManagement.html) * CRC32 checksum implementation @@ -117,7 +117,6 @@ NativeLibraryChecker is a tool to check whether native libraries are loaded corr Native library checking: hadoop: true /home/ozawa/hadoop/lib/native/libhadoop.so.1.0.0 zlib: true /lib/x86_64-linux-gnu/libz.so.1 - snappy: true /usr/lib/libsnappy.so.1 zstd: true /usr/lib/libzstd.so.1 lz4: true revision:99 bzip2: false diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/SecureMode.md b/hadoop-common-project/hadoop-common/src/site/markdown/SecureMode.md index 856861f29e3c2..45b0f1c83b7c1 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/SecureMode.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/SecureMode.md @@ -20,7 +20,9 @@ Hadoop in Secure Mode Introduction ------------ -This document describes how to configure authentication for Hadoop in secure mode. When Hadoop is configured to run in secure mode, each Hadoop service and each user must be authenticated by Kerberos. +In its default configuration, we expect you to make sure attackers don't have access to your Hadoop cluster by restricting all network access. If you want any restrictions on who can remotely access data or submit work, you MUST secure authentication and access for your Hadoop cluster as described in this document. + +When Hadoop is configured to run in secure mode, each Hadoop service and each user must be authenticated by Kerberos. Forward and reverse host lookup for all service hosts must be configured correctly to allow services to authenticate with each other. Host lookups may be configured using either DNS or `/etc/hosts` files. Working knowledge of Kerberos and DNS is recommended before attempting to configure Hadoop services in Secure Mode. @@ -267,9 +269,8 @@ The following settings allow configuring SSL access to the NameNode web UI (opti | Parameter | Value | Notes | |:-----------------------------|:------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `dfs.http.policy` | `HTTP_ONLY` or `HTTPS_ONLY` or `HTTP_AND_HTTPS` | `HTTPS_ONLY` turns off http access. This option takes precedence over the deprecated configuration dfs.https.enable and hadoop.ssl.enabled. If using SASL to authenticate data transfer protocol instead of running DataNode as root and using privileged ports, then this property must be set to `HTTPS_ONLY` to guarantee authentication of HTTP servers. (See `dfs.data.transfer.protection`.) | +| `dfs.http.policy` | `HTTP_ONLY` or `HTTPS_ONLY` or `HTTP_AND_HTTPS` | `HTTPS_ONLY` turns off http access. If using SASL to authenticate data transfer protocol instead of running DataNode as root and using privileged ports, then this property must be set to `HTTPS_ONLY` to guarantee authentication of HTTP servers. (See `dfs.data.transfer.protection`.) | | `dfs.namenode.https-address` | `0.0.0.0:9871` | This parameter is used in non-HA mode and without federation. See [HDFS High Availability](../hadoop-hdfs/HDFSHighAvailabilityWithNFS.html#Deployment) and [HDFS Federation](../hadoop-hdfs/Federation.html#Federation_Configuration) for details. | -| `dfs.https.enable` | `true` | This value is deprecated. `Use dfs.http.policy` | ### Secondary NameNode diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/SingleCluster.md.vm b/hadoop-common-project/hadoop-common/src/site/markdown/SingleCluster.md.vm index 45c084bb543be..8153dce5c3f82 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/SingleCluster.md.vm +++ b/hadoop-common-project/hadoop-common/src/site/markdown/SingleCluster.md.vm @@ -26,6 +26,17 @@ Purpose This document describes how to set up and configure a single-node Hadoop installation so that you can quickly perform simple operations using Hadoop MapReduce and the Hadoop Distributed File System (HDFS). + +*Important*: all production Hadoop clusters use Kerberos to authenticate callers +and secure access to HDFS data as well as restriction access to computation +services (YARN etc.). + +These instructions do not cover integration with any Kerberos services, +-everyone bringing up a production cluster should include connecting to their +organisation's Kerberos infrastructure as a key part of the deployment. + +See [Security](./SecureMode.html) for details on how to secure a cluster. + Prerequisites ------------- @@ -33,8 +44,6 @@ $H3 Supported Platforms * GNU/Linux is supported as a development and production platform. Hadoop has been demonstrated on GNU/Linux clusters with 2000 nodes. -* Windows is also a supported platform but the followings steps are for Linux only. To set up Hadoop on Windows, see [wiki page](http://wiki.apache.org/hadoop/Hadoop2OnWindows). - $H3 Required Software Required software for Linux include: @@ -148,8 +157,7 @@ The following instructions are to run a MapReduce job locally. If you want to ex 4. Make the HDFS directories required to execute MapReduce jobs: - $ bin/hdfs dfs -mkdir /user - $ bin/hdfs dfs -mkdir /user/ + $ bin/hdfs dfs -mkdir -p /user/ 5. Copy the input files into the distributed filesystem: @@ -206,7 +214,7 @@ The following instructions assume that 1. ~ 4. steps of [the above instructions] yarn.nodemanager.env-whitelist - JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME + JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,PATH,LANG,TZ,HADOOP_MAPRED_HOME diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Superusers.md b/hadoop-common-project/hadoop-common/src/site/markdown/Superusers.md index 678d56b123c0f..56a763ad08e88 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/Superusers.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/Superusers.md @@ -80,7 +80,7 @@ If more lax security is preferred, the wildcard value \* may be used to allow im * -The `hadoop.proxyuser.$superuser.hosts` accepts list of ip addresses, ip address ranges in CIDR format and/or host names. For example, by specifying as below, user named `super` accessing from hosts in the range `10.222.0.0-15` and `10.113.221.221` can impersonate `user1` and `user2`. +The `hadoop.proxyuser.$superuser.hosts` accepts list of ip addresses, ip address ranges in CIDR format and/or host names. For example, by specifying as below, user named `super` accessing from hosts in the range `10.222.0.0-10.222.255.255` and `10.113.221.221` can impersonate `user1` and `user2`. hadoop.proxyuser.super.hosts diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md b/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md index ffe2aec96ab7a..ca32fd8ee2f95 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md @@ -134,7 +134,7 @@ Apache Hadoop's shell code has a [function library](./UnixShellAPI.html) that is The shell code allows for core functions to be overridden. However, not all functions can be or are safe to be replaced. If a function is not safe to replace, it will have an attribute of Replaceable: No. If a function is safe to replace, it will have the attribute of Replaceable: Yes. -In order to replace a function, create a file called `hadoop-user-functions.sh` in the `${HADOOP_CONF_DIR}` directory. Simply define the new, replacement function in this file and the system will pick it up automatically. There may be as many replacement functions as needed in this file. Examples of function replacement are in the `hadoop-user-functions.sh.examples` file. +In order to replace a function, create a file called `hadoop-user-functions.sh` in the `${HADOOP_CONF_DIR}` directory. Simply define the new, replacement function in this file and the system will pick it up automatically. There may be as many replacement functions as needed in this file. Examples of function replacement are in the `hadoop-user-functions.sh.example` file. Functions that are marked Public and Stable are safe to use in shell profiles as-is. Other functions may change in a minor release. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/abortable.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/abortable.md new file mode 100644 index 0000000000000..7e6ea01a8fe9b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/abortable.md @@ -0,0 +1,186 @@ + + + + + + + +# interface `org.apache.hadoop.fs.Abortable` + + + +Abort the active operation such that the output does not become +manifest. + +Specifically, if supported on an [output stream](outputstream.html), +a successful `abort()` MUST guarantee that the stream will not be made visible in the `close()` +operation. + +```java + +@InterfaceAudience.Public +@InterfaceStability.Unstable +public interface Abortable { + + /** + * Abort the active operation without the output becoming visible. + * + * This is to provide ability to cancel the write on stream; once + * a stream is aborted, the write MUST NOT become visible. + * + * @throws UnsupportedOperationException if the operation is not supported. + * @return the result. + */ + AbortableResult abort(); + + /** + * Interface for the result of aborts; allows subclasses to extend + * (IOStatistics etc) or for future enhancements if ever needed. + */ + interface AbortableResult { + + /** + * Was the stream already closed/aborted? + * @return true if a close/abort operation had already + * taken place. + */ + boolean alreadyClosed(); + + /** + * Any exception caught during cleanup operations, + * exceptions whose raising/catching does not change + * the semantics of the abort. + * @return an exception or null. + */ + IOException anyCleanupException(); + } +} +``` + +## Method `abort()` + +Aborts the ongoing operation such that no output SHALL become visible +when the operation is completed. + +Unless and until other File System classes implement `Abortable`, the +interface is specified purely for output streams. + +## Method `abort()` on an output stream + +`Abortable.abort()` MUST only be supported on output streams +whose output is only made visible when `close()` is called, +for example. output streams returned by the S3A FileSystem. + +## Preconditions + +The stream MUST implement `Abortable` and `StreamCapabilities`. + +```python + if unsupported: + throw UnsupportedException + +if not isOpen(stream): + no-op + +StreamCapabilities.hasCapability("fs.capability.outputstream.abortable") == True + +``` + + +## Postconditions + +After `abort()` returns, the filesystem MUST be unchanged: + +``` +FS' = FS +``` + +A successful `abort()` operation MUST guarantee that +when the stream` close()` is invoked no output shall be manifest. + +* The stream MUST retry any remote calls needed to force the abort outcome. +* If any file was present at the destination path, it MUST remain unchanged. + +Strictly then: + +> if `Abortable.abort()` does not raise `UnsupportedOperationException` +> then returns, then it guarantees that the write SHALL NOT become visible +> and that any existing data in the filesystem at the destination path SHALL +> continue to be available. + + +1. Calls to `write()` methods MUST fail. +1. Calls to `flush()` MUST be no-ops (applications sometimes call this on closed streams) +1. Subsequent calls to `abort()` MUST be no-ops. +1. `close()` MUST NOT manifest the file, and MUST NOT raise an exception + +That is, the postconditions of `close()` becomes: + +``` +FS' = FS +``` + +### Cleanup + +* If temporary data is stored in the local filesystem or in the store's upload + infrastructure then this MAY be cleaned up; best-effort is expected here. + +* The stream SHOULD NOT retry cleanup operations; any failure there MUST be + caught and added to `AbortResult` + +#### Returned `AbortResult` + +The `AbortResult` value returned is primarily for testing and logging. + +`alreadyClosed()`: MUST return `true` if the write had already been aborted or closed; + +`anyCleanupException();`: SHOULD return any IOException raised during any optional +cleanup operations. + + +### Thread safety and atomicity + +Output streams themselves aren't formally required to be thread safe, +but as applications do sometimes assume they are, this call MUST be thread safe. + +## Path/Stream capability "fs.capability.outputstream.abortable" + + +An application MUST be able to verify that a stream supports the `Abortable.abort()` +operation without actually calling it. This is done through the `StreamCapabilities` +interface. + +1. If a stream instance supports `Abortable` then it MUST return `true` +in the probe `hasCapability("fs.capability.outputstream.abortable")` + +1. If a stream instance does not support `Abortable` then it MUST return `false` +in the probe `hasCapability("fs.capability.outputstream.abortable")` + +That is: if a stream declares its support for the feature, a call to `abort()` +SHALL meet the defined semantics of the operation. + +FileSystem/FileContext implementations SHOULD declare support similarly, to +allow for applications to probe for the feature in the destination directory/path. + +If a filesystem supports `Abortable` under a path `P` then it SHOULD return `true` to +`PathCababilities.hasPathCapability(path, "fs.capability.outputstream.abortable")` +This is to allow applications to verify that the store supports the feature. + +If a filesystem does not support `Abortable` under a path `P` then it MUST +return `false` to +`PathCababilities.hasPathCapability(path, "fs.capability.outputstream.abortable")` + + + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md index 665e328447d5b..9fd14f2218939 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md @@ -116,6 +116,36 @@ for both files and directories, MUST always return `true` to the `isEncrypted()` predicate. This can be done by setting the `encrypted` flag to true when creating the `FileStatus` instance. + +### `msync()` + +Synchronize metadata state of the client with the latest state of the metadata +service of the FileSystem. + +In highly available FileSystems standby service can be used as a read-only +metadata replica. This call is essential to guarantee consistency of +reads from the standby replica and to avoid stale reads. + +It is currently only implemented for HDFS and others will just throw +`UnsupportedOperationException`. + +#### Preconditions + + +#### Postconditions + +This call internally records the state of the metadata service at the time of +the call. This guarantees consistency of subsequent reads from any metadata +replica. It assures the client will never access the state of the metadata that +preceded the recorded state. + +#### HDFS implementation notes + +HDFS supports `msync()` in HA mode by calling the Active NameNode and requesting +its latest journal transaction ID. For more details see HDFS documentation +[Consistent Reads from HDFS Observer NameNode](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/ObserverNameNode.html) + + ### `Path getHomeDirectory()` The function `getHomeDirectory` returns the home directory for the FileSystem @@ -294,6 +324,24 @@ any optimizations. The atomicity and consistency constraints are as for `listStatus(Path, PathFilter)`. +### `RemoteIterator listStatusIterator(Path p)` + +Return an iterator enumerating the `FileStatus` entries under +a path. This is similar to `listStatus(Path)` except the fact that +rather than returning an entire list, an iterator is returned. +The result is exactly the same as `listStatus(Path)`, provided no other +caller updates the directory during the listing. Having said that, this does +not guarantee atomicity if other callers are adding/deleting the files +inside the directory while listing is being performed. Different filesystems +may provide a more efficient implementation, for example S3A does the +listing in pages and fetches the next pages asynchronously while a +page is getting processed. + +Note that now since the initial listing is async, bucket/path existence +exception may show up later during next() call. + +Callers should prefer using listStatusIterator over listStatus as it +is incremental in nature. ### `FileStatus[] listStatus(Path[] paths)` @@ -405,6 +453,26 @@ The function `getLocatedFileStatus(FS, d)` is as defined in The atomicity and consistency constraints are as for `listStatus(Path, PathFilter)`. + +### `ContentSummary getContentSummary(Path path)` + +Given a path return its content summary. + +`getContentSummary()` first checks if the given path is a file and if yes, it returns 0 for directory count +and 1 for file count. + +#### Preconditions + + exists(FS, path) else raise FileNotFoundException + +#### Postconditions + +Returns a `ContentSummary` object with information such as directory count +and file count for a given path. + +The atomicity and consistency constraints are as for +`listStatus(Path, PathFilter)`. + ### `BlockLocation[] getFileBlockLocations(FileStatus f, int s, int l)` #### Preconditions @@ -616,11 +684,15 @@ For instance, HDFS may raise an `InvalidPathException`. result = FSDataOutputStream -The updated (valid) FileSystem must contains all the parent directories of the path, as created by `mkdirs(parent(p))`. +A zero byte file MUST exist at the end of the specified path, visible to all. + +The updated (valid) FileSystem MUST contain all the parent directories of the path, as created by `mkdirs(parent(p))`. The result is `FSDataOutputStream`, which through its operations may generate new filesystem states with updated values of `FS.Files[p]` +The behavior of the returned stream is covered in [Output](outputstream.html). + #### Implementation Notes * Some implementations split the create into a check for the file existing @@ -629,10 +701,18 @@ The result is `FSDataOutputStream`, which through its operations may generate ne clients creating files with `overwrite==true` to fail if the file is created by another client between the two tests. -* S3A, Swift and potentially other Object Stores do not currently change the FS state +* The S3A and potentially other Object Stores connectors not currently change the `FS` state until the output stream `close()` operation is completed. -This MAY be a bug, as it allows >1 client to create a file with `overwrite==false`, - and potentially confuse file/directory logic +This is a significant difference between the behavior of object stores +and that of filesystems, as it allows >1 client to create a file with `overwrite=false`, +and potentially confuse file/directory logic. In particular, using `create()` to acquire +an exclusive lock on a file (whoever creates the file without an error is considered +the holder of the lock) may not not a safe algorithm to use when working with object stores. + +* Object stores may create an empty file as a marker when a file is created. +However, object stores with `overwrite=true` semantics may not implement this atomically, +so creating files with `overwrite=false` cannot be used as an implicit exclusion +mechanism between processes. * The Local FileSystem raises a `FileNotFoundException` when trying to create a file over a directory, hence it is listed as an exception that MAY be raised when @@ -644,6 +724,8 @@ this precondition fails. Make a `FSDataOutputStreamBuilder` to specify the parameters to create a file. +The behavior of the returned stream is covered in [Output](outputstream.html). + #### Implementation Notes `createFile(p)` returns a `FSDataOutputStreamBuilder` only and does not make @@ -669,17 +751,21 @@ Implementations without a compliant call SHOULD throw `UnsupportedOperationExcep #### Postconditions - FS + FS' = FS result = FSDataOutputStream Return: `FSDataOutputStream`, which can update the entry `FS.Files[p]` by appending data to the existing list. +The behavior of the returned stream is covered in [Output](outputstream.html). + ### `FSDataOutputStreamBuilder appendFile(Path p)` Make a `FSDataOutputStreamBuilder` to specify the parameters to append to an existing file. +The behavior of the returned stream is covered in [Output](outputstream.html). + #### Implementation Notes `appendFile(p)` returns a `FSDataOutputStreamBuilder` only and does not make @@ -728,97 +814,11 @@ exists in the metadata, but no copies of any its blocks can be located; ### `FSDataInputStreamBuilder openFile(Path path)` -Creates a [`FSDataInputStreamBuilder`](fsdatainputstreambuilder.html) -to construct a operation to open the file at `path` for reading. - -When `build()` is invoked on the returned `FSDataInputStreamBuilder` instance, -the builder parameters are verified and -`openFileWithOptions(Path, OpenFileParameters)` invoked. - -This (protected) operation returns a `CompletableFuture` -which, when its `get()` method is called, either returns an input -stream of the contents of opened file, or raises an exception. - -The base implementation of the `openFileWithOptions(PathHandle, OpenFileParameters)` -ultimately invokes `open(Path, int)`. - -Thus the chain `openFile(path).build().get()` has the same preconditions -and postconditions as `open(Path p, int bufferSize)` - -However, there is one difference which implementations are free to -take advantage of: - -The returned stream MAY implement a lazy open where file non-existence or -access permission failures may not surface until the first `read()` of the -actual data. - -The `openFile()` operation may check the state of the filesystem during its -invocation, but as the state of the filesystem may change betwen this call and -the actual `build()` and `get()` operations, this file-specific -preconditions (file exists, file is readable, etc) MUST NOT be checked here. - -FileSystem implementations which do not implement `open(Path, int)` -MAY postpone raising an `UnsupportedOperationException` until either the -`FSDataInputStreamBuilder.build()` or the subsequent `get()` call, -else they MAY fail fast in the `openFile()` call. - -### Implementors notes +See [openFile()](openfile.html). -The base implementation of `openFileWithOptions()` actually executes -the `open(path)` operation synchronously, yet still returns the result -or any failures in the `CompletableFuture<>`, so as to ensure that users -code expecting this. - -Any filesystem where the time to open a file may be significant SHOULD -execute it asynchronously by submitting the operation in some executor/thread -pool. This is particularly recommended for object stores and other filesystems -likely to be accessed over long-haul connections. - -Arbitrary filesystem-specific options MAY be supported; these MUST -be prefixed with either the filesystem schema, e.g. `hdfs.` -or in the "fs.SCHEMA" format as normal configuration settings `fs.hdfs`). The -latter style allows the same configuration option to be used for both -filesystem configuration and file-specific configuration. - -It SHOULD be possible to always open a file without specifying any options, -so as to present a consistent model to users. However, an implementation MAY -opt to require one or more mandatory options to be set. - -The returned stream may perform "lazy" evaluation of file access. This is -relevant for object stores where the probes for existence are expensive, and, -even with an asynchronous open, may be considered needless. - ### `FSDataInputStreamBuilder openFile(PathHandle)` -Creates a `FSDataInputStreamBuilder` to build an operation to open a file. -Creates a [`FSDataInputStreamBuilder`](fsdatainputstreambuilder.html) -to construct a operation to open the file identified by the given `PathHandle` for reading. - -When `build()` is invoked on the returned `FSDataInputStreamBuilder` instance, -the builder parameters are verified and -`openFileWithOptions(PathHandle, OpenFileParameters)` invoked. - -This (protected) operation returns a `CompletableFuture` -which, when its `get()` method is called, either returns an input -stream of the contents of opened file, or raises an exception. - -The base implementation of the `openFileWithOptions(PathHandle, OpenFileParameters)` method -returns a future which invokes `open(Path, int)`. - -Thus the chain `openFile(pathhandle).build().get()` has the same preconditions -and postconditions as `open(Pathhandle, int)` - -As with `FSDataInputStreamBuilder openFile(PathHandle)`, the `openFile()` -call must not be where path-specific preconditions are checked -that -is postponed to the `build()` and `get()` calls. - -FileSystem implementations which do not implement `open(PathHandle handle, int bufferSize)` -MAY postpone raising an `UnsupportedOperationException` until either the -`FSDataInputStreamBuilder.build()` or the subsequent `get()` call, -else they MAY fail fast in the `openFile()` call. - -The base implementation raises this exception in the `build()` operation; -other implementations SHOULD copy this. +See [openFile()](openfile.html). ### `PathHandle getPathHandle(FileStatus stat, HandleOpt... options)` @@ -1098,7 +1098,7 @@ deletion, preventing the stores' use as drop-in replacements for HDFS. ### `boolean rename(Path src, Path d)` -In terms of its specification, `rename()` is one of the most complex operations within a filesystem . +In terms of its specification, `rename()` is one of the most complex operations within a filesystem. In terms of its implementation, it is the one with the most ambiguity regarding when to return false versus raising an exception. @@ -1107,7 +1107,7 @@ Rename includes the calculation of the destination path. If the destination exists and is a directory, the final destination of the rename becomes the destination + the filename of the source path. - let dest = if (isDir(FS, src) and d != src) : + let dest = if (isDir(FS, d) and d != src) : d + [filename(src)] else : d @@ -1121,7 +1121,6 @@ Source `src` must exist: exists(FS, src) else raise FileNotFoundException - `dest` cannot be a descendant of `src`: if isDescendant(FS, src, dest) : raise IOException @@ -1175,7 +1174,7 @@ Renaming a file where the destination is a directory moves the file as a child FS' where: not exists(FS', src) and exists(FS', dest) - and data(FS', dest) == data (FS, dest) + and data(FS', dest) == data (FS, source) result = True @@ -1186,10 +1185,10 @@ If `src` is a directory then all its children will then exist under `dest`, whil `src` and its descendants will no longer exist. The names of the paths under `dest` will match those under `src`, as will the contents: - if isDir(FS, src) isDir(FS, dest) and src != dest : + if isDir(FS, src) and isDir(FS, dest) and src != dest : FS' where: not exists(FS', src) - and dest in FS'.Directories] + and dest in FS'.Directories and forall c in descendants(FS, src) : not exists(FS', c)) and forall c in descendants(FS, src) where isDir(FS, c): @@ -1217,7 +1216,16 @@ that the parent directories of the destination also exist. exists(FS', parent(dest)) -*Other Filesystems (including Swift) * +*S3A FileSystem* + +The outcome is as a normal rename, with the additional (implicit) feature that +the parent directories of the destination then exist: +`exists(FS', parent(dest))` + +There is a check for and rejection if the `parent(dest)` is a file, but +no checks for any other ancestors. + +*Other Filesystems* Other filesystems strictly reject the operation, raising a `FileNotFoundException` @@ -1345,6 +1353,112 @@ operations related to the part of the file being truncated is undefined. +### `boolean copyFromLocalFile(boolean delSrc, boolean overwrite, Path src, Path dst)` + +The source file or directory at `src` is on the local disk and is copied into the file system at +destination `dst`. If the source must be deleted after the move then `delSrc` flag must be +set to TRUE. If destination already exists, and the destination contents must be overwritten +then `overwrite` flag must be set to TRUE. + +#### Preconditions +Source and destination must be different +```python +if src = dest : raise FileExistsException +``` + +Destination and source must not be descendants one another +```python +if isDescendant(src, dest) or isDescendant(dest, src) : raise IOException +``` + +The source file or directory must exist locally: +```python +if not exists(LocalFS, src) : raise FileNotFoundException +``` + +Directories cannot be copied into files regardless to what the overwrite flag is set to: + +```python +if isDir(LocalFS, src) and isFile(FS, dst) : raise PathExistsException +``` + +For all cases, except the one for which the above precondition throws, the overwrite flag must be +set to TRUE for the operation to succeed if destination exists. This will also overwrite any files + / directories at the destination: + +```python +if exists(FS, dst) and not overwrite : raise PathExistsException +``` + +#### Determining the final name of the copy +Given a base path on the source `base` and a child path `child` where `base` is in +`ancestors(child) + child`: + +```python +def final_name(base, child, dest): + is base = child: + return dest + else: + return dest + childElements(base, child) +``` + +#### Outcome where source is a file `isFile(LocalFS, src)` +For a file, data at destination becomes that of the source. All ancestors are directories. +```python +if isFile(LocalFS, src) and (not exists(FS, dest) or (exists(FS, dest) and overwrite)): + FS' = FS where: + FS'.Files[dest] = LocalFS.Files[src] + FS'.Directories = FS.Directories + ancestors(FS, dest) + LocalFS' = LocalFS where + not delSrc or (delSrc = true and delete(LocalFS, src, false)) +else if isFile(LocalFS, src) and isDir(FS, dest): + FS' = FS where: + let d = final_name(src, dest) + FS'.Files[d] = LocalFS.Files[src] + LocalFS' = LocalFS where: + not delSrc or (delSrc = true and delete(LocalFS, src, false)) +``` +There are no expectations that the file changes are atomic for both local `LocalFS` and remote `FS`. + +#### Outcome where source is a directory `isDir(LocalFS, src)` +```python +if isDir(LocalFS, src) and (isFile(FS, dest) or isFile(FS, dest + childElements(src))): + raise FileAlreadyExistsException +else if isDir(LocalFS, src): + if exists(FS, dest): + dest' = dest + childElements(src) + if exists(FS, dest') and not overwrite: + raise PathExistsException + else: + dest' = dest + + FS' = FS where: + forall c in descendants(LocalFS, src): + not exists(FS', final_name(c)) or overwrite + and forall c in descendants(LocalFS, src) where isDir(LocalFS, c): + FS'.Directories = FS'.Directories + (dest' + childElements(src, c)) + and forall c in descendants(LocalFS, src) where isFile(LocalFS, c): + FS'.Files[final_name(c, dest')] = LocalFS.Files[c] + LocalFS' = LocalFS where + not delSrc or (delSrc = true and delete(LocalFS, src, true)) +``` +There are no expectations of operation isolation / atomicity. +This means files can change in source or destination while the operation is executing. +No guarantees are made for the final state of the file or directory after a copy other than it is +best effort. E.g.: when copying a directory, one file can be moved from source to destination but +there's nothing stopping the new file at destination being updated while the copy operation is still +in place. + +#### Implementation + +The default HDFS implementation, is to recurse through each file and folder, found at `src`, and +copy them sequentially to their final destination (relative to `dst`). + +Object store based file systems should be mindful of what limitations arise from the above +implementation and could take advantage of parallel uploads and possible re-ordering of files copied +into the store to maximize throughput. + + ## interface `RemoteIterator` The `RemoteIterator` interface is used as a remote-access equivalent @@ -1518,3 +1632,92 @@ in:readahead | READAHEAD | CanSetReadahead | Set the readahead on the input st dropbehind | DROPBEHIND | CanSetDropBehind | Drop the cache. in:unbuffer | UNBUFFER | CanUnbuffer | Reduce the buffering on the input stream. +## Etag probes through the interface `EtagSource` + +FileSystem implementations MAY support querying HTTP etags from `FileStatus` +entries. If so, the requirements are as follows + +### Etag support MUST BE across all list/`getFileStatus()` calls. + +That is: when adding etag support, all operations which return `FileStatus` or `ListLocatedStatus` +entries MUST return subclasses which are instances of `EtagSource`. + +### FileStatus instances MUST have etags whenever the remote store provides them. + +To support etags, they MUST BE to be provided in both `getFileStatus()` +and list calls. + +Implementors note: the core APIs which MUST BE overridden to achieve this are as follows: + +```java +FileStatus getFileStatus(Path) +FileStatus[] listStatus(Path) +RemoteIterator listStatusIterator(Path) +RemoteIterator listFiles([Path, boolean) +``` + + +### Etags of files MUST BE Consistent across all list/getFileStatus operations. + +The value of `EtagSource.getEtag()` MUST be the same for list* queries which return etags for calls of `getFileStatus()` for the specific object. + +```java +((EtagSource)getFileStatus(path)).getEtag() == ((EtagSource)listStatus(path)[0]).getEtag() +``` + +Similarly, the same value MUST BE returned for `listFiles()`, `listStatusIncremental()` of the path and +when listing the parent path, of all files in the listing. + +### Etags MUST BE different for different file contents. + +Two different arrays of data written to the same path MUST have different etag values when probed. +This is a requirement of the HTTP specification. + +### Etags of files SHOULD BE preserved across rename operations + +After a file is renamed, the value of `((EtagSource)getFileStatus(dest)).getEtag()` +SHOULD be the same as the value of `((EtagSource)getFileStatus(source)).getEtag()` +was before the rename took place. + +This is an implementation detail of the store; it does not hold for AWS S3. + +If and only if the store consistently meets this requirement, the filesystem SHOULD +declare in `hasPathCapability()` that it supports +`fs.capability.etags.preserved.in.rename` + +### Directories MAY have etags + +Directory entries MAY return etags in listing/probe operations; these entries MAY be preserved across renames. + +Equally, directory entries MAY NOT provide such entries, MAY NOT preserve them acrosss renames, +and MAY NOT guarantee consistency over time. + +Note: special mention of the root path "/". +As that isn't a real "directory", nobody should expect it to have an etag. + +### All etag-aware `FileStatus` subclass MUST BE `Serializable`; MAY BE `Writable` + +The base `FileStatus` class implements `Serializable` and `Writable` and marshalls its fields appropriately. + +Subclasses MUST support java serialization (Some Apache Spark applications use it), preserving the etag. +This is a matter of making the etag field non-static and adding a `serialVersionUID`. + +The `Writable` support was used for marshalling status data over Hadoop IPC calls; +in Hadoop 3 that is implemented through `org/apache/hadoop/fs/protocolPB/PBHelper.java`and the methods deprecated. +Subclasses MAY override the deprecated methods to add etag marshalling. +However -but there is no expectation of this and such marshalling is unlikely to ever take place. + +### Appropriate etag Path Capabilities SHOULD BE declared + +1. `hasPathCapability(path, "fs.capability.etags.available")` MUST return true iff + the filesystem returns valid (non-empty etags) on file status/listing operations. +2. `hasPathCapability(path, "fs.capability.etags.consistent.across.rename")` MUST return + true if and only if etags are preserved across renames. + +### Non-requirements of etag support + +* There is no requirement/expectation that `FileSystem.getFileChecksum(Path)` returns + a checksum value related to the etag of an object, if any value is returned. +* If the same data is uploaded to the twice to the same or a different path, + the etag of the second upload MAY NOT match that of the first upload. + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md index 090696483be34..f64a2bd03b63b 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md @@ -443,6 +443,52 @@ The semantics of this are exactly equivalent to That is, the buffer is filled entirely with the contents of the input source from position `position` +### `default void readVectored(List ranges, IntFunction allocate)` + +Read fully data for a list of ranges asynchronously. The default implementation +iterates through the ranges, tries to coalesce the ranges based on values of +`minSeekForVectorReads` and `maxReadSizeForVectorReads` and then read each merged +ranges synchronously, but the intent is sub classes can implement efficient +implementation. Reading in both direct and heap byte buffers are supported. +Also, clients are encouraged to use `WeakReferencedElasticByteBufferPool` for +allocating buffers such that even direct buffers are garbage collected when +they are no longer referenced. + +The position returned by `getPos()` after `readVectored()` is undefined. + +If a file is changed while the `readVectored()` operation is in progress, the output is +undefined. Some ranges may have old data, some may have new, and some may have both. + +While a `readVectored()` operation is in progress, normal read api calls may block. + +Note: Don't use direct buffers for reading from ChecksumFileSystem as that may +lead to memory fragmentation explained in HADOOP-18296. + + +#### Preconditions + +For each requested range: + + range.getOffset >= 0 else raise IllegalArgumentException + range.getLength >= 0 else raise EOFException + +#### Postconditions + +For each requested range: + + range.getData() returns CompletableFuture which will have data + from range.getOffset to range.getLength. + +### `minSeekForVectorReads()` + +The smallest reasonable seek. Two ranges won't be merged together if the difference between +end of first and start of next range is more than this value. + +### `maxReadSizeForVectorReads()` + +Maximum number of bytes which can be read in one go after merging the ranges. +Two ranges won't be merged if the combined data to be read is more than this value. +Essentially setting this to 0 will disable the merging of ranges. ## Consistency diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstreambuilder.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstreambuilder.md index eadba174fc1a6..22bec19e5b4f4 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstreambuilder.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstreambuilder.md @@ -13,10 +13,10 @@ --> - + -# class `org.apache.hadoop.fs.FSDataInputStreamBuilder` +# class `org.apache.hadoop.fs.FutureDataInputStreamBuilder` @@ -25,9 +25,58 @@ references to `FSDataInputStream` and its subclasses. It is used to initate a (potentially asynchronous) operation to open an existing file for reading. + +## History + +### Hadoop 3.3.0: API introduced + +[HADOOP-15229](https://issues.apache.org/jira/browse/HADOOP-15229) +_Add FileSystem builder-based openFile() API to match createFile()_ + +* No `opt(String key, long value)` method was available. +* the `withFileStatus(status)` call required a non-null parameter. +* Sole Filesystem to process options and file status was S3A; +* Only the s3a specific options were the S3 select and `fs.s3a.experimental.input.fadvise` +* S3A Filesystem raised `IllegalArgumentException` if a file status was passed in + and the path of the filestatus did not match the path of the `openFile(path)` call. + +This is the baseline implementation. To write code guaranteed to compile against this version, +use the `opt(String, String)` and `must(String, String)` methods, converting numbers to +string explicitly. + +```java +fs.open("s3a://bucket/file") + .opt("fs.option.openfile.length", Long.toString(length)) + .build().get() +``` + +### Hadoop 3.3.5: standardization and expansion + +[HADOOP-16202](https://issues.apache.org/jira/browse/HADOOP-16202) +_Enhance openFile() for better read performance against object stores_ + +* `withFileStatus(null)` required to be accepted (and ignored) +* only the filename part of any supplied FileStatus path must match the + filename passed in on `openFile(path)`. +* An `opt(String key, long value)` option was added. *This is now deprecated as it +caused regression +* Standard `fs.option.openfile` options defined. +* S3A FS to use openfile length option, seek start/end options not _yet_ used. +* Azure ABFS connector takes a supplied `VersionedFileStatus` and omits any + HEAD probe for the object. + +### Hadoop 3.3.6: API change to address operator overload bugs. + +new `optLong()`, `optDouble()`, `mustLong()` and `mustDouble()` builder methods. + +* See [HADOOP-18724](https://issues.apache.org/jira/browse/HADOOP-18724) _Open file fails with NumberFormatException for S3AFileSystem_, + which was somehow caused by the overloaded `opt(long)`. +* Specification updated to declare that unparseable numbers MUST be treated as "unset" and the default + value used instead. + ## Invariants -The `FSDataInputStreamBuilder` interface does not require parameters or +The `FutureDataInputStreamBuilder` interface does not require parameters or or the state of `FileSystem` until [`build()`](#build) is invoked and/or during the asynchronous open operation itself. @@ -36,14 +85,14 @@ Some aspects of the state of the filesystem, MAY be checked in the initial change between `openFile()` and the `build().get()` sequence. For example, path validation. -## Implementation-agnostic parameters. +## `Implementation-agnostic parameters. -### `FSDataInputStreamBuilder bufferSize(int bufSize)` +### `FutureDataInputStreamBuilder bufferSize(int bufSize)` Set the size of the buffer to be used. -### `FSDataInputStreamBuilder withFileStatus(FileStatus status)` +### `FutureDataInputStreamBuilder withFileStatus(FileStatus status)` A `FileStatus` instance which refers to the file being opened. @@ -53,7 +102,7 @@ So potentially saving on remote calls especially to object stores. Requirements: * `status != null` -* `status.getPath()` == the resolved path of the file being opened. +* `status.getPath().getName()` == the name of the file being opened. The path validation MUST take place if the store uses the `FileStatus` when it opens files, and MAY be performed otherwise. The validation @@ -65,37 +114,112 @@ If a filesystem implementation extends the `FileStatus` returned in its implementation MAY use this information when opening the file. This is relevant with those stores which return version/etag information, -including the S3A and ABFS connectors -they MAY use this to guarantee that -the file they opened is exactly the one returned in the listing. +-they MAY use this to guarantee that the file they opened +is exactly the one returned in the listing. + + +The final `status.getPath().getName()` element of the supplied status MUST equal +the name value of the path supplied to the `openFile(path)` call. + +Filesystems MUST NOT validate the rest of the path. +This is needed to support viewfs and other mount-point wrapper filesystems +where schemas and paths are different. These often create their own FileStatus results + +Preconditions + +```python +status == null or status.getPath().getName() == path.getName() + +``` -### Set optional or mandatory parameters +Filesystems MUST NOT require the class of `status` to equal +that of any specific subclass their implementation returns in filestatus/list +operations. This is to support wrapper filesystems and serialization/deserialization +of the status. - FSDataInputStreamBuilder opt(String key, ...) - FSDataInputStreamBuilder must(String key, ...) + +### Set optional or mandatory parameters + +```java +FutureDataInputStreamBuilder opt(String key, String value) +FutureDataInputStreamBuilder opt(String key, int value) +FutureDataInputStreamBuilder opt(String key, boolean value) +FutureDataInputStreamBuilder optLong(String key, long value) +FutureDataInputStreamBuilder optDouble(String key, double value) +FutureDataInputStreamBuilder must(String key, String value) +FutureDataInputStreamBuilder must(String key, int value) +FutureDataInputStreamBuilder must(String key, boolean value) +FutureDataInputStreamBuilder mustLong(String key, long value) +FutureDataInputStreamBuilder mustDouble(String key, double value) +``` Set optional or mandatory parameters to the builder. Using `opt()` or `must()`, client can specify FS-specific parameters without inspecting the concrete type of `FileSystem`. +Example: + ```java out = fs.openFile(path) - .opt("fs.s3a.experimental.input.fadvise", "random") - .must("fs.s3a.readahead.range", 256 * 1024) + .must("fs.option.openfile.read.policy", "random") + .optLong("fs.http.connection.timeout", 30_000L) .withFileStatus(statusFromListing) .build() .get(); ``` -#### Implementation Notes +Here the read policy of `random` has been specified, +with the requirement that the filesystem implementation must understand the option. +An http-specific option has been supplied which may be interpreted by any store; +If the filesystem opening the file does not recognize the option, it can safely be +ignored. + +### When to use `opt` versus `must` + +The difference between `opt` versus `must` is how the FileSystem opening +the file must react to an option which it does not recognize. + +```python + +def must(name, value): + if not name in known_keys: + raise IllegalArgumentException + if not name in supported_keys: + raise UnsupportedException + + +def opt(name, value): + if not name in known_keys: + # ignore option + +``` + +For any known key, the validation of the `value` argument MUST be the same +irrespective of how the (key, value) pair was declared. + +1. For a filesystem-specific option, it is the choice of the implementation + how to validate the entry. +1. For standard options, the specification of what is a valid `value` is + defined in this filesystem specification, validated through contract + tests. + +## Implementation Notes Checking for supported options must be performed in the `build()` operation. 1. If a mandatory parameter declared via `must(key, value)`) is not recognized, `IllegalArgumentException` MUST be thrown. -1. If a mandatory parameter declared via `must(key, value)`) relies on +1. If a mandatory parameter declared via `must(key, value)` relies on a feature which is recognized but not supported in the specific -Filesystem/FileContext instance `UnsupportedException` MUST be thrown. +`FileSystem`/`FileContext` instance `UnsupportedException` MUST be thrown. + +Parsing of numeric values SHOULD trim any string and if the value +cannot be parsed as a number, downgrade to any default value supplied. +This is to address [HADOOP-18724](https://issues.apache.org/jira/browse/HADOOP-18724) +_Open file fails with NumberFormatException for S3AFileSystem_, which was cause by the overloaded `opt()` +builder parameter binding to `opt(String, double)` rather than `opt(String, long)` when a long +value was passed in. The behavior of resolving the conflicts between the parameters set by builder methods (i.e., `bufferSize()`) and `opt()`/`must()` is as follows: @@ -110,15 +234,20 @@ custom subclasses. This is critical to ensure safe use of the feature: directory listing/ status serialization/deserialization can result result in the `withFileStatus()` -argumennt not being the custom subclass returned by the Filesystem instance's +argument not being the custom subclass returned by the Filesystem instance's own `getFileStatus()`, `listFiles()`, `listLocatedStatus()` calls, etc. In such a situation the implementations must: -1. Validate the path (always). -1. Use the status/convert to the custom type, *or* simply discard it. +1. Verify that `status.getPath().getName()` matches the current `path.getName()` + value. The rest of the path MUST NOT be validated. +1. Use any status fields as desired -for example the file length. + +Even if not values of the status are used, the presence of the argument +can be interpreted as the caller declaring that they believe the file +to be present and of the given size. -## Builder interface +## Builder interface ### `CompletableFuture build()` @@ -128,26 +257,494 @@ completed, returns an input stream which can read data from the filesystem. The `build()` operation MAY perform the validation of the file's existence, its kind, so rejecting attempts to read from a directory or non-existent -file. **Alternatively**, the `build()` operation may delay all checks -until an asynchronous operation whose outcome is provided by the `Future` +file. Alternatively +* file existence/status checks MAY be performed asynchronously within the returned + `CompletableFuture<>`. +* file existence/status checks MAY be postponed until the first byte is read in + any of the read such as `read()` or `PositionedRead`. That is, the precondition `exists(FS, path)` and `isFile(FS, path)` are -only guaranteed to have been met after the `get()` on the returned future is successful. +only guaranteed to have been met after the `get()` called on returned future +and an attempt has been made to read the stream. -Thus, if even a file does not exist, the following call will still succeed, returning -a future to be evaluated. +Thus, if even when file does not exist, or is a directory rather than a file, +the following call MUST succeed, returning a `CompletableFuture` to be evaluated. ```java Path p = new Path("file://tmp/file-which-does-not-exist"); CompletableFuture future = p.getFileSystem(conf) .openFile(p) - .build; + .build(); +``` + +The inability to access/read a file MUST raise an `IOException`or subclass +in either the future's `get()` call, or, for late binding operations, +when an operation to read data is invoked. + +Therefore the following sequence SHALL fail when invoked on the +`future` returned by the previous example. + +```java + future.get().read(); +``` + +Access permission checks have the same visibility requirements: permission failures +MUST be delayed until the `get()` call and MAY be delayed into subsequent operations. + +Note: some operations on the input stream, such as `seek()` may not attempt any IO +at all. Such operations MAY NOT raise exceotions when interacting with +nonexistent/unreadable files. + +## Standard `openFile()` options since Hadoop 3.3.3 + +These are options which `FileSystem` and `FileContext` implementation +MUST recognise and MAY support by changing the behavior of +their input streams as appropriate. + +Hadoop 3.3.0 added the `openFile()` API; these standard options were defined in +a later release. Therefore, although they are "well known", unless confident that +the application will only be executed against releases of Hadoop which knows of +the options -applications SHOULD set the options via `opt()` calls rather than `must()`. + +When opening a file through the `openFile()` builder API, callers MAY use +both `.opt(key, value)` and `.must(key, value)` calls to set standard and +filesystem-specific options. + +If set as an `opt()` parameter, unsupported "standard" options MUST be ignored, +as MUST unrecognized standard options. + +If set as a `must()` parameter, unsupported "standard" options MUST be ignored. +unrecognized standard options MUST be rejected. + +The standard `openFile()` options are defined +in `org.apache.hadoop.fs.OpenFileOptions`; they all SHALL start +with `fs.option.openfile.`. + +Note that while all `FileSystem`/`FileContext` instances SHALL support these +options to the extent that `must()` declarations SHALL NOT fail, the +implementations MAY support them to the extent of interpreting the values. This +means that it is not a requirement for the stores to actually read the read +policy or file length values and use them when opening files. + +Unless otherwise stated, they SHOULD be viewed as hints. + +Note: if a standard option is added such that if set but not +supported would be an error, then implementations SHALL reject it. For example, +the S3A filesystem client supports the ability to push down SQL commands. If +something like that were ever standardized, then the use of the option, either +in `opt()` or `must()` argument MUST be rejected for filesystems which don't +support the feature. + +### Option: `fs.option.openfile.buffer.size` + +Read buffer size in bytes. + +This overrides the default value set in the configuration with the option +`io.file.buffer.size`. + +It is supported by all filesystem clients which allow for stream-specific buffer +sizes to be set via `FileSystem.open(path, buffersize)`. + +### Option: `fs.option.openfile.read.policy` + +Declare the read policy of the input stream. This is a hint as to what the +expected read pattern of an input stream will be. This MAY control readahead, +buffering and other optimizations. + +Sequential reads may be optimized with prefetching data and/or reading data in +larger blocks. Some applications (e.g. distCp) perform sequential IO even over +columnar data. + +In contrast, random IO reads data in different parts of the file using a +sequence of `seek()/read()` +or via the `PositionedReadable` or `ByteBufferPositionedReadable` APIs. + +Random IO performance may be best if little/no prefetching takes place, along +with other possible optimizations + +Queries over columnar formats such as Apache ORC and Apache Parquet perform such +random IO; other data formats may be best read with sequential or whole-file +policies. + +What is key is that optimizing reads for seqential reads may impair random +performance -and vice versa. + +1. The seek policy is a hint; even if declared as a `must()` option, the + filesystem MAY ignore it. +1. The interpretation/implementation of a policy is a filesystem specific + behavior -and it may change with Hadoop releases and/or specific storage + subsystems. +1. If a policy is not recognized, the filesystem client MUST ignore it. + +| Policy | Meaning | +|--------------|----------------------------------------------------------| +| `adaptive` | Any adaptive policy implemented by the store. | +| `default` | The default policy for this store. Generally "adaptive". | +| `random` | Optimize for random access. | +| `sequential` | Optimize for sequential access. | +| `vector` | The Vectored IO API is intended to be used. | +| `whole-file` | The whole file will be read. | + +Choosing the wrong read policy for an input source may be inefficient. + +A list of read policies MAY be supplied; the first one recognized/supported by +the filesystem SHALL be the one used. This allows for custom policies to be +supported, for example an `hbase-hfile` policy optimized for HBase HFiles. + +The S3A and ABFS input streams both implement +the [IOStatisticsSource](iostatistics.html) API, and can be queried for their IO +Performance. + +*Tip:* log the `toString()` value of input streams at `DEBUG`. The S3A and ABFS +Input Streams log read statistics, which can provide insight about whether reads +are being performed efficiently or not. + +_Futher reading_ + +* [Linux fadvise()](https://linux.die.net/man/2/fadvise). +* [Windows `CreateFile()`](https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createfilea#caching-behavior) + +#### Read Policy `adaptive` + +Try to adapt the seek policy to the read pattern of the application. + +The `normal` policy of the S3A client and the sole policy supported by +the `wasb:` client are both adaptive -they assume sequential IO, but once a +backwards seek/positioned read call is made the stream switches to random IO. + +Other filesystem implementations may wish to adopt similar strategies, and/or +extend the algorithms to detect forward seeks and/or switch from random to +sequential IO if that is considered more efficient. + +Adaptive read policies are the absence of the ability to +declare the seek policy in the `open()` API, so requiring it to be declared, if +configurable, in the cluster/application configuration. However, the switch from +sequential to random seek policies may be exensive. + +When applications explicitly set the `fs.option.openfile.read.policy` option, if +they know their read plan, they SHOULD declare which policy is most appropriate. + +#### Read Policy `` + +The default policy for the filesystem instance. +Implementation/installation-specific. + +#### Read Policy `sequential` + +Expect sequential reads from the first byte read to the end of the file/until +the stream is closed. + +#### Read Policy `random` + +Expect `seek()/read()` sequences, or use of `PositionedReadable` +or `ByteBufferPositionedReadable` APIs. + + +#### Read Policy `vector` + +This declares that the caller intends to use the Vectored read API of +[HADOOP-11867](https://issues.apache.org/jira/browse/HADOOP-11867) +_Add a high-performance vectored read API_. + +This is a hint: it is not a requirement when using the API. +It does inform the implemenations that the stream should be +configured for optimal vectored IO performance, if such a +feature has been implemented. + +It is *not* exclusive: the same stream may still be used for +classic `InputStream` and `PositionedRead` API calls. +Implementations SHOULD use the `random` read policy +with these operations. + +#### Read Policy `whole-file` + + +This declares that the whole file is to be read end-to-end; the file system client is free to enable +whatever strategies maximise performance for this. In particular, larger ranged reads/GETs can +deliver high bandwidth by reducing socket/TLS setup costs and providing a connection long-lived +enough for TCP flow control to determine the optimal download rate. + +Strategies can include: + +* Initiate an HTTP GET of the entire file in `openFile()` operation. +* Prefech data in large blocks, possibly in parallel read operations. + +Applications which know that the entire file is to be read from an opened stream SHOULD declare this +read policy. + +### Option: `fs.option.openfile.length` + +Declare the length of a file. + +This can be used by clients to skip querying a remote store for the size +of/existence of a file when opening it, similar to declaring a file status +through the `withFileStatus()` option. + +If supported by a filesystem connector, this option MUST be interpreted as +declaring the minimum length of the file: + +1. If the value is negative, the option SHALL be considered unset. +2. It SHALL NOT be an error if the actual length of the file is greater than + this value. +3. `read()`, `seek()` and positioned read calls MAY use a position across/beyond + this length but below the actual length of the file. Implementations MAY + raise `EOFExceptions` in such cases, or they MAY return data. + +If this option is used by the FileSystem implementation + +*Implementor's Notes* + +* A value of `fs.option.openfile.length` < 0 MUST be ignored. +* If a file status is supplied along with a value in `fs.opt.openfile.length`; + the file status values take precedence. + +### Options: `fs.option.openfile.split.start` and `fs.option.openfile.split.end` + +Declare the start and end of the split when a file has been split for processing +in pieces. + +1. If a value is negative, the option SHALL be considered unset. +1. Filesystems MAY assume that the length of the file is greater than or equal + to the value of `fs.option.openfile.split.end`. +1. And that they MAY raise an exception if the client application reads past the + value set in `fs.option.openfile.split.end`. +1. The pair of options MAY be used to optimise the read plan, such as setting + the content range for GET requests, or using the split end as an implicit + declaration of the guaranteed minimum length of the file. +1. If both options are set, and the split start is declared as greater than the + split end, then the split start SHOULD just be reset to zero, rather than + rejecting the operation. + +The split end value can provide a hint as to the end of the input stream. The +split start can be used to optimize any initial read offset for filesystem +clients. + +*Note for implementors: applications will read past the end of a split when they +need to read to the end of a record/line which begins before the end of the +split. + +Therefore clients MUST be allowed to `seek()`/`read()` past the length +set in `fs.option.openfile.split.end` if the file is actually longer +than that value. + +## S3A-specific options + +The S3A Connector supports custom options for readahead and seek policy. + +| Name | Type | Meaning | +|--------------------------------------|----------|---------------------------------------------------------------------------| +| `fs.s3a.readahead.range` | `long` | readahead range in bytes | +| `fs.s3a.experimental.input.fadvise` | `String` | seek policy. Superceded by `fs.option.openfile.read.policy` | +| `fs.s3a.input.async.drain.threshold` | `long` | threshold to switch to asynchronous draining of the stream. (Since 3.3.5) | + +If the option set contains a SQL statement in the `fs.s3a.select.sql` statement, +then the file is opened as an S3 Select query. +Consult the S3A documentation for more details. + +## ABFS-specific options + +The ABFS Connector supports custom input stream options. + +| Name | Type | Meaning | +|-----------------------------------|-----------|----------------------------------------------------| +| `fs.azure.buffered.pread.disable` | `boolean` | disable caching on the positioned read operations. | + + +Disables caching on data read through the [PositionedReadable](fsdatainputstream.html#PositionedReadable) +APIs. + +Consult the ABFS Documentation for more details. + +## Examples + +#### Declaring seek policy and split limits when opening a file. + +Here is an example from a proof of +concept `org.apache.parquet.hadoop.util.HadoopInputFile` +reader which uses a (nullable) file status and a split start/end. + +The `FileStatus` value is always passed in -but if it is null, then the split +end is used to declare the length of the file. + +```java +protected SeekableInputStream newStream(Path path, FileStatus stat, + long splitStart, long splitEnd) + throws IOException { + + FutureDataInputStreamBuilder builder = fs.openFile(path) + .opt("fs.option.openfile.read.policy", "vector, random") + .withFileStatus(stat); + + builder.optLong("fs.option.openfile.split.start", splitStart); + builder.optLong("fs.option.openfile.split.end", splitEnd); + CompletableFuture streamF = builder.build(); + return HadoopStreams.wrap(FutureIO.awaitFuture(streamF)); +} +``` + +As a result, whether driven directly by a file listing, or when opening a file +from a query plan of `(path, splitStart, splitEnd)`, there is no need to probe +the remote store for the length of the file. When working with remote object +stores, this can save tens to hundreds of milliseconds, even if such a probe is +done asynchronously. + +If both the file length and the split end is set, then the file length MUST be +considered "more" authoritative, that is it really SHOULD be defining the file +length. If the split end is set, the caller MAY ot read past it. + +The `CompressedSplitLineReader` can read past the end of a split if it is +partway through processing a compressed record. That is: it assumes an +incomplete record read means that the file length is greater than the split +length, and that it MUST read the entirety of the partially read record. Other +readers may behave similarly. + +Therefore + +1. File length as supplied in a `FileStatus` or in `fs.option.openfile.length` + SHALL set the strict upper limit on the length of a file +2. The split end as set in `fs.option.openfile.split.end` MUST be viewed as a + hint, rather than the strict end of the file. + +### Opening a file with both standard and non-standard options + +Standard and non-standard options MAY be combined in the same `openFile()` +operation. + +```java +Future f = openFile(path) + .must("fs.option.openfile.read.policy", "random, adaptive") + .opt("fs.s3a.readahead.range", 1024 * 1024) + .build(); + +FSDataInputStream is = f.get(); +``` + +The option set in `must()` MUST be understood, or at least recognized and +ignored by all filesystems. In this example, S3A-specific option MAY be +ignored by all other filesystem clients. + +### Opening a file with older releases + +Not all hadoop releases recognize the `fs.option.openfile.read.policy` option. + +The option can be safely used in application code if it is added via the `opt()` +builder argument, as it will be treated as an unknown optional key which can +then be discarded. + +```java +Future f = openFile(path) + .opt("fs.option.openfile.read.policy", "vector, random, adaptive") + .build(); + +FSDataInputStream is = f.get(); ``` -The preconditions for opening the file are checked during the asynchronous -evaluation, and so will surface when the future is completed: +*Note 1* if the option name is set by a reference to a constant in +`org.apache.hadoop.fs.Options.OpenFileOptions`, then the program will not link +against versions of Hadoop without the specific option. Therefore for resilient +linking against older releases -use a copy of the value. + +*Note 2* as option validation is performed in the FileSystem connector, +a third-party connector designed to work with multiple hadoop versions +MAY NOT support the option. + +### Passing options in to MapReduce + +Hadoop MapReduce will automatically read MR Job Options with the prefixes +`mapreduce.job.input.file.option.` and `mapreduce.job.input.file.must.` +prefixes, and apply these values as `.opt()` and `must()` respectively, after +remove the mapreduce-specific prefixes. + +This makes passing options in to MR jobs straightforward. For example, to +declare that a job should read its data using random IO: + +```java +JobConf jobConf = (JobConf) job.getConfiguration() +jobConf.set( + "mapreduce.job.input.file.option.fs.option.openfile.read.policy", + "random"); +``` + +### MapReduce input format propagating options + +An example of a record reader passing in options to the file it opens. + +```java + public void initialize(InputSplit genericSplit, + TaskAttemptContext context) throws IOException { + FileSplit split = (FileSplit)genericSplit; + Configuration job = context.getConfiguration(); + start = split.getStart(); + end = start + split.getLength(); + Path file = split.getPath(); + + // open the file and seek to the start of the split + FutureDataInputStreamBuilder builder = + file.getFileSystem(job).openFile(file); + // the start and end of the split may be used to build + // an input strategy. + builder.optLong("fs.option.openfile.split.start", start); + builder.optLong("fs.option.openfile.split.end", end); + FutureIO.propagateOptions(builder, job, + "mapreduce.job.input.file.option", + "mapreduce.job.input.file.must"); + + fileIn = FutureIO.awaitFuture(builder.build()); + fileIn.seek(start) + /* Rest of the operation on the opened stream */ + } +``` + +### `FileContext.openFile` + +From `org.apache.hadoop.fs.AvroFSInput`; a file is opened with sequential input. +Because the file length has already been probed for, the length is passed down + +```java + public AvroFSInput(FileContext fc, Path p) throws IOException { + FileStatus status = fc.getFileStatus(p); + this.len = status.getLen(); + this.stream = awaitFuture(fc.openFile(p) + .opt("fs.option.openfile.read.policy", + "sequential") + .optLong("fs.option.openfile.length", + Long.toString(status.getLen())) + .build()); + fc.open(p); + } +``` + +In this example, the length is passed down as a string (via `Long.toString()`) +rather than directly as a long. This is to ensure that the input format will +link against versions of $Hadoop which do not have the +`opt(String, long)` and `must(String, long)` builder parameters. Similarly, the +values are passed as optional, so that if unrecognized the application will +still succeed. + +### Example: reading a whole file + +This is from `org.apache.hadoop.util.JsonSerialization`. + +Its `load(FileSystem, Path, FileStatus)` method +* declares the whole file is to be read end to end. +* passes down the file status ```java -FSDataInputStream in = future.get(); +public T load(FileSystem fs, + Path path, + status) + throws IOException { + + try (FSDataInputStream dataInputStream = + awaitFuture(fs.openFile(path) + .opt("fs.option.openfile.read.policy", "whole-file") + .withFileStatus(status) + .build())) { + return fromJsonStream(dataInputStream); + } catch (JsonProcessingException e) { + throw new PathIOException(path.toString(), + "Failed to read JSON file " + e, e); + } +} ``` diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md index 64dda2df8c63c..59a93c5887a1f 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md @@ -26,7 +26,7 @@ create a new file or open an existing file on `FileSystem` for write. ## Invariants The `FSDataOutputStreamBuilder` interface does not validate parameters -and modify the state of `FileSystem` until [`build()`](#Builder.build) is +and modify the state of `FileSystem` until `build()` is invoked. ## Implementation-agnostic parameters. @@ -110,7 +110,7 @@ of `FileSystem`. #### Implementation Notes The concrete `FileSystem` and/or `FSDataOutputStreamBuilder` implementation -MUST verify that implementation-agnostic parameters (i.e., "syncable") or +MUST verify that implementation-agnostic parameters (i.e., "syncable`) or implementation-specific parameters (i.e., "foofs:cache") are supported. `FileSystem` will satisfy optional parameters (via `opt(key, ...)`) on best effort. If the mandatory parameters (via `must(key, ...)`) can not be satisfied @@ -182,3 +182,58 @@ see `FileSystem#create(path, ...)` and `FileSystem#append()`. result = FSDataOutputStream The result is `FSDataOutputStream` to be used to write data to filesystem. + + +## S3A-specific options + +Here are the custom options which the S3A Connector supports. + +| Name | Type | Meaning | +|-----------------------------|-----------|----------------------------------------| +| `fs.s3a.create.performance` | `boolean` | create a file with maximum performance | +| `fs.s3a.create.header` | `string` | prefix for user supplied headers | + +### `fs.s3a.create.performance` + +Prioritize file creation performance over safety checks for filesystem consistency. + +This: +1. Skips the `LIST` call which makes sure a file is being created over a directory. + Risk: a file is created over a directory. +1. Ignores the overwrite flag. +1. Never issues a `DELETE` call to delete parent directory markers. + +It is possible to probe an S3A Filesystem instance for this capability through +the `hasPathCapability(path, "fs.s3a.create.performance")` check. + +Creating files with this option over existing directories is likely +to make S3A filesystem clients behave inconsistently. + +Operations optimized for directories (e.g. listing calls) are likely +to see the directory tree not the file; operations optimized for +files (`getFileStatus()`, `isFile()`) more likely to see the file. +The exact form of the inconsistencies, and which operations/parameters +trigger this are undefined and may change between even minor releases. + +Using this option is the equivalent of pressing and holding down the +"Electronic Stability Control" +button on a rear-wheel drive car for five seconds: the safety checks are off. +Things wil be faster if the driver knew what they were doing. +If they didn't, the fact they had held the button down will +be used as evidence at the inquest as proof that they made a +conscious decision to choose speed over safety and +that the outcome was their own fault. + +Accordingly: *Use if and only if you are confident that the conditions are met.* + +### `fs.s3a.create.header` User-supplied header support + +Options with the prefix `fs.s3a.create.header.` will be added to to the +S3 object metadata as "user defined metadata". +This metadata is visible to all applications. It can also be retrieved through the +FileSystem/FileContext `listXAttrs()` and `getXAttrs()` API calls with the prefix `header.` + +When an object is renamed, the metadata is propagated the copy created. + +It is possible to probe an S3A Filesystem instance for this capability through +the `hasPathCapability(path, "fs.s3a.create.header")` check. \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md index df538ee6cf96b..df39839e831c8 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md @@ -32,9 +32,15 @@ HDFS as these are commonly expected by Hadoop client applications. 1. [Notation](notation.html) 1. [Model](model.html) 1. [FileSystem class](filesystem.html) +1. [OutputStream, Syncable and `StreamCapabilities`](outputstream.html) +1. [Abortable](abortable.html) 1. [FSDataInputStream class](fsdatainputstream.html) 1. [PathCapabilities interface](pathcapabilities.html) 1. [FSDataOutputStreamBuilder class](fsdataoutputstreambuilder.html) 2. [Testing with the Filesystem specification](testing.html) 2. [Extending the specification and its tests](extending.html) 1. [Uploading a file using Multiple Parts](multipartuploader.html) +1. [IOStatistics](iostatistics.html) +1. [openFile()](openfile.html) +1. [SafeMode](safemode.html) +1. [LeaseRecoverable](leaserecoverable.html) \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/introduction.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/introduction.md index 37191a5b2a69a..76782b45409ad 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/introduction.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/introduction.md @@ -30,8 +30,8 @@ are places where HDFS diverges from the expected behaviour of a POSIX filesystem. The bundled S3A FileSystem clients make Amazon's S3 Object Store ("blobstore") -accessible through the FileSystem API. The Swift FileSystem driver provides similar -functionality for the OpenStack Swift blobstore. The Azure WASB and ADL object +accessible through the FileSystem API. +The Azure ABFS, WASB and ADL object storage FileSystems talks to Microsoft's Azure storage. All of these bind to object stores, which do have different behaviors, especially regarding consistency guarantees, and atomicity of operations. @@ -314,10 +314,10 @@ child entries This specification refers to *Object Stores* in places, often using the term *Blobstore*. Hadoop does provide FileSystem client classes for some of these -even though they violate many of the requirements. This is why, although -Hadoop can read and write data in an object store, the two which Hadoop ships -with direct support for — Amazon S3 and OpenStack Swift — cannot -be used as direct replacements for HDFS. +even though they violate many of the requirements. + +Consult the documentation for a specific store to determine its compatibility +with specific applications and services. *What is an Object Store?* @@ -343,7 +343,7 @@ stores pretend that they are a FileSystem, a FileSystem with the same features and operations as HDFS. This is —ultimately—a pretence: they have different characteristics and occasionally the illusion fails. -1. **Consistency**. Object stores are generally *Eventually Consistent*: it +1. **Consistency**. Object may be *Eventually Consistent*: it can take time for changes to objects —creation, deletion and updates— to become visible to all callers. Indeed, there is no guarantee a change is immediately visible to the client which just made the change. As an example, @@ -447,10 +447,6 @@ Object stores have an even vaguer view of time, which can be summarized as * The timestamp is likely to be in UTC or the TZ of the object store. If the client is in a different timezone, the timestamp of objects may be ahead or behind that of the client. - * Object stores with cached metadata databases (for example: AWS S3 with - an in-memory or a DynamoDB metadata store) may have timestamps generated - from the local system clock, rather than that of the service. - This is an optimization to avoid round-trip calls to the object stores. + A file's modification time is often the same as its creation time. + The `FileSystem.setTimes()` operation to set file timestamps *may* be ignored. * `FileSystem.chmod()` may update modification times (example: Azure `wasb://`). diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/iostatistics.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/iostatistics.md new file mode 100644 index 0000000000000..bd77dc7e0f8a7 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/iostatistics.md @@ -0,0 +1,432 @@ + + +# Statistic collection with the IOStatistics API + +```java +@InterfaceAudience.Public +@InterfaceStability.Unstable +``` + +The `IOStatistics` API is intended to provide statistics on individual IO +classes -such as input and output streams, *in a standard way which +applications can query* + +Many filesystem-related classes have implemented statistics gathering +and provided private/unstable ways to query this, but as they were +not common across implementations it was unsafe for applications +to reference these values. Example: `S3AInputStream` and its statistics +API. This is used in internal tests, but cannot be used downstream in +applications such as Apache Hive or Apache HBase. + +The IOStatistics API is intended to + +1. Be instance specific:, rather than shared across multiple instances + of a class, or thread local. +1. Be public and stable enough to be used by applications. +1. Be easy to use in applications written in Java, Scala, and, via libhdfs, C/C++ +1. Have foundational interfaces and classes in the `hadoop-common` JAR. + +## Core Model + +Any class *may* implement `IOStatisticsSource` in order to +provide statistics. + +Wrapper I/O Classes such as `FSDataInputStream` anc `FSDataOutputStream` *should* +implement the interface and forward it to the wrapped class, if they also +implement it -and return `null` if they do not. + +`IOStatisticsSource` implementations `getIOStatistics()` return an +instance of `IOStatistics` enumerating the statistics of that specific +instance. + +The `IOStatistics` Interface exports five kinds of statistic: + + +| Category | Type | Description | +|------|------|-------------| +| `counter` | `long` | a counter which may increase in value; SHOULD BE >= 0 | +| `gauge` | `long` | an arbitrary value which can down as well as up; SHOULD BE >= 0 | +| `minimum` | `long` | an minimum value; MAY BE negative | +| `maximum` | `long` | a maximum value; MAY BE negative | +| `meanStatistic` | `MeanStatistic` | an arithmetic mean and sample size; mean MAY BE negative | + +Four are simple `long` values, with the variations how they are likely to +change and how they are aggregated. + + +#### Aggregation of Statistic Values + +For the different statistic category, the result of `aggregate(x, y)` is + +| Category | Aggregation | +|------------------|-------------| +| `counter` | `max(0, x) + max(0, y)` | +| `gauge` | `max(0, x) + max(0, y)` | +| `minimum` | `min(x, y)` | +| `maximum` | `max(x, y)` | +| `meanStatistic` | calculation of the mean of `x` and `y` ) | + + +#### Class `MeanStatistic` + +## package `org.apache.hadoop.fs.statistics` + +This package contains the public statistics APIs intended +for use by applications. + + + + + +`MeanStatistic` is a tuple of `(mean, samples)` to support aggregation. + +A `MeanStatistic` with a sample of `0` is considered an empty statistic. + +All `MeanStatistic` instances where `sample = 0` are considered equal, +irrespective of the `mean` value. + +Algorithm to calculate the mean : + +```python +if x.samples = 0: + y +else if y.samples = 0 : + x +else: + samples' = x.samples + y.samples + mean' = (x.mean * x.samples) + (y.mean * y.samples) / samples' + (samples', mean') +``` + +Implicitly, this means that if both samples are empty, then the aggregate value is also empty. + +```java +public final class MeanStatistic implements Serializable, Cloneable { + /** + * Arithmetic mean. + */ + private double mean; + + /** + * Number of samples used to calculate + * the mean. + */ + private long samples; + + /** + * Get the mean value. + * @return the mean + */ + public double getMean() { + return mean; + } + + /** + * Get the sample count. + * @return the sample count; 0 means empty + */ + public long getSamples() { + return samples; + } + + /** + * Is a statistic empty? + * @return true if the sample count is 0 + */ + public boolean isEmpty() { + return samples == 0; + } + /** + * Add another mean statistic to create a new statistic. + * When adding two statistics, if either is empty then + * a copy of the non-empty statistic is returned. + * If both are empty then a new empty statistic is returned. + * + * @param other other value + * @return the aggregate mean + */ + public MeanStatistic add(final MeanStatistic other) { + /* Implementation elided. */ + } + @Override + public int hashCode() { + return Objects.hash(mean, samples); + } + + @Override + public boolean equals(final Object o) { + if (this == o) { return true; } + if (o == null || getClass() != o.getClass()) { return false; } + MeanStatistic that = (MeanStatistic) o; + if (this.isEmpty()) { + return that.isEmpty(); + } + return Double.compare(that.mean, mean) == 0 && + samples == that.samples; + } + + @Override + public MeanStatistic clone() { + return new MeanStatistic(this); + } + + public MeanStatistic copy() { + return new MeanStatistic(this); + } + +} +``` + + + + + +### class `org.apache.hadoop.fs.statistics.IOStatisticsSource` + +```java + +/** + * A source of IO statistics. + * These statistics MUST be instance specific, not thread local. + */ +@InterfaceStability.Unstable +public interface IOStatisticsSource { + + /** + * Return a statistics instance. + * It is not a requirement that the same instance is returned every time. + * {@link IOStatisticsSource}. + * If the object implementing this is Closeable, this method + * may return null if invoked on a closed object, even if + * it returns a valid instance when called earlier. + * @return an IOStatistics instance or null + */ + IOStatistics getIOStatistics(); +} +``` + +This is the interface which an object instance MUST implement if they are a source of +IOStatistics information. + +#### Invariants + +The result of `getIOStatistics()` must be one of + +* `null` +* an immutable `IOStatistics` for which each map of entries is +an empty map. +* an instance of an `IOStatistics` whose statistics MUST BE unique to that +instance of the class implementing `IOStatisticsSource`. + +Less formally: if the statistics maps returned are non-empty, all the statistics +must be collected from the current instance, and not from any other instances, the way +some of the `FileSystem` statistics are collected. + + +The result of `getIOStatistics()`, if non-null, MAY be a different instance +on every invocation. + + + + + + +### class `org.apache.hadoop.fs.statistics.IOStatistics` + +These are per-instance statistics provided by an object which +implements `IOStatisticsSource`. + +```java +@InterfaceAudience.Public +@InterfaceStability.Unstable +public interface IOStatistics { + + /** + * Map of counters. + * @return the current map of counters. + */ + Map counters(); + + /** + * Map of gauges. + * @return the current map of gauges. + */ + Map gauges(); + + /** + * Map of minumums. + * @return the current map of minumums. + */ + Map minumums(); + + /** + * Map of maximums. + * @return the current map of maximums. + */ + Map maximums(); + + /** + * Map of meanStatistics. + * @return the current map of MeanStatistic statistics. + */ + Map meanStatistics(); + +} +``` + +### Statistic Naming + +The naming policy of statistics is designed to be readable, shareable +and ideally consistent across `IOStatisticSource` implementations. + +* Characters in key names MUST match the regular expression + `[a-z|0-9|_]` with the exception of the first character, which + MUST be in the range `[a-z]`. Thus the full regular expression + for a valid statistic name is: + + [a-z][a-z|0-9|_]+ + +* Where possible, the names of statistics SHOULD be those defined + with common names. + + org.apache.hadoop.fs.statistics.StreamStatisticNames + org.apache.hadoop.fs.statistics.StoreStatisticNames + + Note 1.: these are evolving; for clients to safely reference their + statistics by name they SHOULD be copied to the application. + (i.e. for an application compiled hadoop 3.4.2 to link against hadoop 3.4.1, + copy the strings). + + Note 2: keys defined in these classes SHALL NOT be removed + from subsequent Hadoop releases. + +* A common statistic name MUST NOT be used to report any other statistic and + MUST use the pre-defined unit of measurement. + +* A statistic name in one of the maps SHOULD NOT be re-used in another map. + This aids diagnostics of logged statistics. + +### Statistic Maps + +For each map of statistics returned: + +* The operations to add/remove entries are unsupported: the map returned + MAY be mutable by the source of statistics. + +* The map MAY be empty. + +* The map keys each represent a measured statistic. + +* The set of keys in a map SHOULD remain unchanged, and MUST NOT remove keys. + +* The statistics SHOULD be dynamic: every lookup of an entry SHOULD + return the latest value. + +* The values MAY change across invocations of `Map.values()` and `Map.entries()` + +* The update MAY be in the `iterable()` calls of the iterators returned, + or MAY be in the actual `iterable.next()` operation. That is: there is + no guarantee as to when the evaluation takes place. + +* The returned `Map.Entry` instances MUST return the same value on + repeated `getValue()` calls. (i.e once you have the entry, it is immutable). + +* Queries of statistics SHOULD be fast and non-blocking to the extent + that if invoked during a long operation, they will prioritize + returning fast over most timely values. + +* The statistics MAY lag; especially for statistics collected in separate + operations (e.g stream IO statistics as provided by a filesystem + instance). + +* Statistics which represent time SHOULD use milliseconds as their unit. + +* Statistics which represent time and use a different unit MUST document + the unit used. + +### Thread Model + +1. An instance of `IOStatistics` can be shared across threads; + +1. Read access to the supplied statistics maps MUST be thread safe. + +1. Iterators returned from the maps MUST NOT be shared across threads. + +1. The statistics collected MUST include all operations which took + place across all threads performing work for the monitored object. + +1. The statistics reported MUST NOT be local to the active thread. + +This is different from the `FileSystem.Statistics` behavior where per-thread statistics +are collected and reported. + +That mechanism supports collecting limited read/write statistics for different +worker threads sharing the same FS instance, but as the collection is thread local, +it invariably under-reports IO performed in other threads on behalf of a worker thread. + + +## Statisic Snapshot + +A snapshot of the current statistic values MAY be obtained by calling +`IOStatisticsSupport.snapshotIOStatistics()` + +```java + public static X + snapshotIOStatistics(IOStatistics statistics) +``` + +This snapshot is serializable through Java serialization and through +Jackson to/from JSON. + +## Helper Classes + + +### class `org.apache.hadoop.fs.statistics.IOStatisticsSupport` + +This provides helper methods to work with IOStatistics sources and instances. + +Consult the javadocs for its operations. + +### class `org.apache.hadoop.fs.statistics.IOStatisticsLogging` + +Support for efficiently logging `IOStatistics`/`IOStatisticsSource` +instances. + +These are intended for assisting logging, including only enumerating the +state of an `IOStatistics` instance when the log level needs it. + +```java +LOG.info("IOStatistics after upload: {}", demandStringify(iostats)); + +// or even better, as it results in only a single object creations +Object latest = demandStringify(iostats); +LOG.info("IOStatistics : {}", latest); +/* do some work. */ +LOG.info("IOStatistics : {}", latest); + +``` + +## Package `org.apache.hadoop.fs.statistics.impl` + +This contains implementation classes to support providing statistics to applications. + +These MUST NOT BE used by applications. If a feature is needed from this package then +the provisioning of a public implementation MAY BE raised via the Hadoop development +channels. + +These MAY be used by those implementations of the Hadoop `FileSystem`, `AbstractFileSystem` +and related classes which are not in the hadoop source tree. Implementors MUST BE +aware that the implementation this code is unstable and may change across +minor point releases of Hadoop. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/leaserecoverable.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/leaserecoverable.md new file mode 100644 index 0000000000000..5640e7c49c861 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/leaserecoverable.md @@ -0,0 +1,52 @@ + + +# interface `LeaseRecoverable` + +The `LeaseRecoverable` interface tells whether a given path of current filesystem can perform lease +recovery for open file that a lease is not explicitly renewed or the client holding it goes away. + +This interface should be implemented accordingly when necessary to any Filesystem that supports +lease recovery, e.g. `DistributedFileSystem` (HDFS) and `ViewDistributedFileSystem`. + +```java +public interface LeaseRecoverable { + boolean recoverLease(Path file) throws IOException; + boolean isFileClosed(Path file) throws IOException; +} +``` + +There are two main functions of this interface, one performs lease recovery and another one +verifies if a file has been closed. + +### boolean recoverLease(Path file) + +This function performs the lease recovery for the given file path, and it does not support +directory path recovery. +1. Return `true`, if the file has already closed, or does not require lease recovery. +1. Return `false`, if the lease recovery is yet completed. +1. Throw `IOException` if a directory path is given as input. + +### boolean isFileClosed(Path file) + +This function only checks if the give file path has been closed, and it does not support directory +verification. +1. Return `true`, if the file has been closed. +1. Return `false`, if the file is still open. +1. Throw `IOException` if a directory path is given as input. + +### Path Capabilities SHOULD BE declared + +If a filesystem supports `LeaseRecoverable`, it should return `true` to +`PathCapabilities.hasPathCapability(path, "fs.capability.lease.recoverable")` for a given path. \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/multipartuploader.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/multipartuploader.md index 629c0c418fdf2..906c592eea09d 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/multipartuploader.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/multipartuploader.md @@ -14,14 +14,14 @@ - + -# class `org.apache.hadoop.fs.MultipartUploader` +# interface `org.apache.hadoop.fs.MultipartUploader` -The abstract `MultipartUploader` class is the original class to upload a file +The `MultipartUploader` can upload a file using multiple parts to Hadoop-supported filesystems. The benefits of a multipart upload is that the file can be uploaded from multiple clients or processes in parallel and the results will not be visible to other clients until @@ -30,13 +30,12 @@ the `complete` function is called. When implemented by an object store, uploaded data may incur storage charges, even before it is visible in the filesystems. Users of this API must be diligent and always perform best-effort attempts to complete or abort the upload. +The `abortUploadsUnderPath(path)` operation can help here. ## Invariants -All the requirements of a valid MultipartUploader are considered implicit +All the requirements of a valid `MultipartUploader` are considered implicit econditions and postconditions: -all operations on a valid MultipartUploader MUST result in a new -MultipartUploader that is also valid. The operations of a single multipart upload may take place across different instance of a multipart uploader, across different processes and hosts. @@ -45,16 +44,28 @@ It is therefore a requirement that: 1. All state needed to upload a part, complete an upload or abort an upload must be contained within or retrievable from an upload handle. -1. If an upload handle is marshalled to another process, then, if the -receiving process has the correct permissions, it may participate in the -upload, by uploading one or more parts, by completing an upload, and/or by -aborting the upload. +1. That handle MUST be serializable; it MUST be deserializable to different +processes executing the exact same version of Hadoop. + +1. different hosts/processes MAY upload different parts, sequentially or +simultaneously. The order in which they are uploaded to the filesystem +MUST NOT constrain the order in which the data is stored in the final file. + +1. An upload MAY be completed on a different instance than any which uploaded +parts. + +1. The output of an upload MUST NOT be visible at the final destination +until the upload may complete. + +1. It is not an error if a single multipart uploader instance initiates +or completes multiple uploads files to the same destination sequentially, +irrespective of whether or not the store supports concurrent uploads. ## Concurrency Multiple processes may upload parts of a multipart upload simultaneously. -If a call is made to `initialize(path)` to a destination where an active +If a call is made to `startUpload(path)` to a destination where an active upload is in progress, implementations MUST perform one of the two operations. * Reject the call as a duplicate. @@ -70,9 +81,17 @@ the in-progress upload, if it has not completed, must not be included in the final file, in whole or in part. Implementations SHOULD raise an error in the `putPart()` operation. +# Serialization Compatibility + +Users MUST NOT expect that serialized PathHandle versions are compatible across +* different multipart uploader implementations. +* different versions of the same implementation. + +That is: all clients MUST use the exact same version of Hadoop. + ## Model -A File System which supports Multipart Uploads extends the existing model +A FileSystem/FileContext which supports Multipart Uploads extends the existing model `(Directories, Files, Symlinks)` to one of `(Directories, Files, Symlinks, Uploads)` `Uploads` of type `Map[UploadHandle -> Map[PartHandle -> UploadPart]`. @@ -112,11 +131,40 @@ However, if Part Handles are rapidly recycled, there is a risk that the nominall idempotent operation `abort(FS, uploadHandle)` could unintentionally cancel a successor operation which used the same Upload Handle. +## Asynchronous API + +All operations return `CompletableFuture<>` types which must be +subsequently evaluated to get their return values. + +1. The execution of the operation MAY be a blocking operation in on the call thread. +1. If not, it SHALL be executed in a separate thread and MUST complete by the time the +future evaluation returns. +1. Some/All preconditions MAY be evaluated at the time of initial invocation, +1. All those which are not evaluated at that time, MUST Be evaluated during the execution +of the future. + + +What this means is that when an implementation interacts with a fast file system/store all preconditions +including the existence of files MAY be evaluated early, whereas and implementation interacting with a +remote object store whose probes are slow MAY verify preconditions in the asynchronous phase -especially +those which interact with the remote store. + +Java CompletableFutures do not work well with checked exceptions. The Hadoop codease is still evolving the +details of the exception handling here, as more use is made of the asynchronous APIs. Assume that any +precondition failure which declares that an `IOException` MUST be raised may have that operation wrapped in a +`RuntimeException` of some form if evaluated in the future; this also holds for any other `IOException` +raised during the operations. + +### `close()` + +Applications MUST call `close()` after using an uploader; this is so it may release other +objects, update statistics, etc. + ## State Changing Operations -### `UploadHandle initialize(Path path)` +### `CompletableFuture startUpload(Path)` -Initialized a Multipart Upload, returning an upload handle for use in +Starts a Multipart Upload, ultimately returning an `UploadHandle` for use in subsequent operations. #### Preconditions @@ -128,17 +176,15 @@ if exists(FS, path) and not isFile(FS, path) raise PathIsDirectoryException, IOE ``` If a filesystem does not support concurrent uploads to a destination, -then the following precondition is added +then the following precondition is added: ```python if path in values(FS.Uploads) raise PathExistsException, IOException - ``` - #### Postconditions -The outcome of this operation is that the filesystem state is updated with a new +Once the initialization operation completes, the filesystem state is updated with a new active upload, with a new handle, this handle being returned to the caller. ```python @@ -147,9 +193,10 @@ FS' = FS where FS'.Uploads(handle') == {} result = handle' ``` -### `PartHandle putPart(Path path, InputStream inputStream, int partNumber, UploadHandle uploadHandle, long lengthInBytes)` +### `CompletableFuture putPart(UploadHandle uploadHandle, int partNumber, Path filePath, InputStream inputStream, long lengthInBytes)` -Upload a part for the multipart upload. +Upload a part for the specific multipart upload, eventually being returned an opaque part handle +represting this part of the specified upload. #### Preconditions @@ -170,10 +217,12 @@ FS' = FS where FS'.uploads(uploadHandle).parts(partHandle') == data' result = partHandle' ``` -The data is stored in the filesystem, pending completion. +The data is stored in the filesystem, pending completion. It MUST NOT be visible at the destination path. +It MAY be visible in a temporary path somewhere in the file system; +This is implementation-specific and MUST NOT be relied upon. -### `PathHandle complete(Path path, Map parts, UploadHandle multipartUploadId)` +### ` CompletableFuture complete(UploadHandle uploadId, Path filePath, Map handles)` Complete the multipart upload. @@ -188,11 +237,23 @@ uploadHandle in keys(FS.Uploads) else raise FileNotFoundException FS.Uploads(uploadHandle).path == path if exists(FS, path) and not isFile(FS, path) raise PathIsDirectoryException, IOException parts.size() > 0 +forall k in keys(parts): k > 0 +forall k in keys(parts): + not exists(k2 in keys(parts)) where (parts[k] == parts[k2]) ``` -If there are handles in the MPU which aren't included in the map, then the omitted -parts will not be a part of the resulting file. It is up to the implementation -of the MultipartUploader to make sure the leftover parts are cleaned up. +All keys MUST be greater than zero, and there MUST not be any duplicate +references to the same parthandle. +These validations MAY be performed at any point during the operation. +After a failure, there is no guarantee that a `complete()` call for this +upload with a valid map of paths will complete. +Callers SHOULD invoke `abort()` after any such failure to ensure cleanup. + +if `putPart()` operations For this `uploadHandle` were performed But whose +`PathHandle` Handles were not included in this request -the omitted +parts SHALL NOT be a part of the resulting file. + +The MultipartUploader MUST clean up any such outstanding entries. In the case of backing stores that support directories (local filesystem, HDFS, etc), if, at the point of completion, there is now a directory at the @@ -206,14 +267,14 @@ exists(FS', path') and result = PathHandle(path') FS' = FS where FS.Files(path) == UploadData' and not uploadHandle in keys(FS'.uploads) ``` -The PathHandle is returned by the complete operation so subsequent operations +The `PathHandle` is returned by the complete operation so subsequent operations will be able to identify that the data has not changed in the meantime. The order of parts in the uploaded by file is that of the natural order of -parts: part 1 is ahead of part 2, etc. +parts in the map: part 1 is ahead of part 2, etc. -### `void abort(Path path, UploadHandle multipartUploadId)` +### `CompletableFuture abort(UploadHandle uploadId, Path filePath)` Abort a multipart upload. The handle becomes invalid and not subject to reuse. @@ -233,3 +294,23 @@ FS' = FS where not uploadHandle in keys(FS'.uploads) ``` A subsequent call to `abort()` with the same handle will fail, unless the handle has been recycled. + +### `CompletableFuture abortUploadsUnderPath(Path path)` + +Perform a best-effort cleanup of all uploads under a path. + +returns a future which resolves to. + + -1 if unsuppported + >= 0 if supported + +Because it is best effort a strict postcondition isn't possible. +The ideal postcondition is all uploads under the path are aborted, +and the count is the number of uploads aborted: + +```python +FS'.uploads forall upload in FS.uploads: + not isDescendant(FS, path, upload.path) +return len(forall upload in FS.uploads: + isDescendant(FS, path, upload.path)) +``` diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/openfile.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/openfile.md new file mode 100644 index 0000000000000..afb3245c5105f --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/openfile.md @@ -0,0 +1,122 @@ + + +# `FileSystem.openFile()`/`FileContext.openFile()` + +This is a method provided by both FileSystem and FileContext for +advanced file opening options and, where implemented, +an asynchrounous/lazy opening of a file. + +Creates a builder to open a file, supporting options +both standard and filesystem specific. The return +value of the `build()` call is a `Future`, +which must be waited on. The file opening may be +asynchronous, and it may actually be postponed (including +permission/existence checks) until reads are actually +performed. + +This API call was added to `FileSystem` and `FileContext` in +Hadoop 3.3.0; it was tuned in Hadoop 3.3.1 as follows. + +* Added `opt(key, long)` and `must(key, long)`. +* Declared that `withFileStatus(null)` is allowed. +* Declared that `withFileStatus(status)` only checks + the filename of the path, not the full path. + This is needed to support passthrough/mounted filesystems. +* Added standard option keys. + +### `FutureDataInputStreamBuilder openFile(Path path)` + +Creates a [`FutureDataInputStreamBuilder`](fsdatainputstreambuilder.html) +to construct a operation to open the file at `path` for reading. + +When `build()` is invoked on the returned `FutureDataInputStreamBuilder` instance, +the builder parameters are verified and +`FileSystem.openFileWithOptions(Path, OpenFileParameters)` or +`AbstractFileSystem.openFileWithOptions(Path, OpenFileParameters)` invoked. + +These protected methods returns a `CompletableFuture` +which, when its `get()` method is called, either returns an input +stream of the contents of opened file, or raises an exception. + +The base implementation of the `FileSystem.openFileWithOptions(PathHandle, OpenFileParameters)` +ultimately invokes `FileSystem.open(Path, int)`. + +Thus the chain `FileSystem.openFile(path).build().get()` has the same preconditions +and postconditions as `FileSystem.open(Path p, int bufferSize)` + +However, there is one difference which implementations are free to +take advantage of: + +The returned stream MAY implement a lazy open where file non-existence or +access permission failures may not surface until the first `read()` of the +actual data. + +This saves network IO on object stores. + +The `openFile()` operation MAY check the state of the filesystem during its +invocation, but as the state of the filesystem may change between this call and +the actual `build()` and `get()` operations, this file-specific +preconditions (file exists, file is readable, etc) MUST NOT be checked here. + +FileSystem implementations which do not implement `open(Path, int)` +MAY postpone raising an `UnsupportedOperationException` until either the +`FutureDataInputStreamBuilder.build()` or the subsequent `get()` call, +else they MAY fail fast in the `openFile()` call. + +Consult [`FutureDataInputStreamBuilder`](fsdatainputstreambuilder.html) for details +on how to use the builder, and for standard options which may be passed in. + +### `FutureDataInputStreamBuilder openFile(PathHandle)` + +Creates a [`FutureDataInputStreamBuilder`](fsdatainputstreambuilder.html) +to construct a operation to open the file identified by the given `PathHandle` for reading. + +If implemented by a filesystem, the semantics of [`openFile(Path)`](#openfile_path_) +Thus the chain `openFile(pathhandle).build().get()` has the same preconditions and postconditions +as `open(Pathhandle, int)` + +FileSystem implementations which do not implement `open(PathHandle handle, int bufferSize)` +MAY postpone raising an `UnsupportedOperationException` until either the +`FutureDataInputStreamBuilder.build()` or the subsequent `get()` call, else they MAY fail fast in +the `openFile(PathHandle)` call. + +The base implementation raises this exception in the `build()` operation; other implementations +SHOULD copy this. + +### Implementors notes + +The base implementation of `openFileWithOptions()` actually executes +the `open(path)` operation synchronously, yet still returns the result +or any failures in the `CompletableFuture<>`, so as to provide a consistent +lifecycle across all filesystems. + +Any filesystem client where the time to open a file may be significant SHOULD +execute it asynchronously by submitting the operation in some executor/thread +pool. This is particularly recommended for object stores and other filesystems +likely to be accessed over long-haul connections. + +Arbitrary filesystem-specific options MAY be supported; these MUST +be prefixed with either the filesystem schema, e.g. `hdfs.` +or in the `fs.SCHEMA` format as normal configuration settings `fs.hdfs`. The +latter style allows the same configuration option to be used for both +filesystem configuration and file-specific configuration. + +It SHOULD be possible to always open a file without specifying any options, +so as to present a consistent model to users. However, an implementation MAY +opt to require one or more mandatory options to be set. + +The returned stream may perform "lazy" evaluation of file access. This is +relevant for object stores where the probes for existence are expensive, and, +even with an asynchronous open, may be considered needless. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/outputstream.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/outputstream.md new file mode 100644 index 0000000000000..8d0d4c4354f0b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/outputstream.md @@ -0,0 +1,1016 @@ + + + + +# Output: `OutputStream`, `Syncable` and `StreamCapabilities` + +## Introduction + +This document covers the Output Streams within the context of the +[Hadoop File System Specification](index.html). + +It uses the filesystem model defined in [A Model of a Hadoop Filesystem](model.html) +with the notation defined in [notation](Notation.md). + +The target audiences are: +1. Users of the APIs. While `java.io.OutputStream` is a standard interfaces, +this document clarifies how it is implemented in HDFS and elsewhere. +The Hadoop-specific interfaces `Syncable` and `StreamCapabilities` are new; +`Syncable` is notable in offering durability and visibility guarantees which +exceed that of `OutputStream`. +1. Implementors of File Systems and clients. + +## How data is written to a filesystem + +The core mechanism to write data to files through the Hadoop FileSystem APIs +is through `OutputStream` subclasses obtained through calls to +`FileSystem.create()`, `FileSystem.append()`, +or `FSDataOutputStreamBuilder.build()`. + +These all return instances of `FSDataOutputStream`, through which data +can be written through various `write()` methods. +After a stream's `close()` method is called, all data written to the +stream MUST BE persisted to the fileysystem and visible to oll other +clients attempting to read data from that path via `FileSystem.open()`. + +As well as operations to write the data, Hadoop's `OutputStream` implementations +provide methods to flush buffered data back to the filesystem, +so as to ensure that the data is reliably persisted and/or visible +to other callers. This is done via the `Syncable` interface. It was +originally intended that the presence of this interface could be interpreted +as a guarantee that the stream supported its methods. However, this has proven +impossible to guarantee as the static nature of the interface is incompatible +with filesystems whose syncability semantics may vary on a store/path basis. +As an example, erasure coded files in HDFS do not support the Sync operations, +even though they are implemented as subclass of an output stream which is `Syncable`. + +A new interface: `StreamCapabilities`. This allows callers +to probe the exact capabilities of a stream, even transitively +through a chain of streams. + +## Output Stream Model + +For this specification, an output stream can be viewed as a list of bytes +stored in the client; `hsync()` and `hflush()` are operations the actions +which propagate the data to be visible to other readers of the file and/or +made durable. + +```python +buffer: List[byte] +``` + +A flag, `open` tracks whether the stream is open: after the stream +is closed no more data may be written to it: + +```python +open: bool +buffer: List[byte] +``` + +The destination path of the stream, `path`, can be tracked to form a triple +`path, open, buffer` + +```python +Stream = (path: Path, open: Boolean, buffer: byte[]) +``` + +#### Visibility of Flushed Data + +(Immediately) after `Syncable` operations which flush data to the filesystem, +the data at the stream's destination path MUST match that of +`buffer`. That is, the following condition MUST hold: + +```python +FS'.Files(path) == buffer +``` + +Any client reading the data at the path MUST see the new data. +The `Syncable` operations differ in their durability +guarantees, not visibility of data. + +### State of Stream and File System after `Filesystem.create()` + +The output stream returned by a `FileSystem.create(path)` or +`FileSystem.createFile(path).build()` within a filesystem `FS`, +can be modeled as a triple containing an empty array of no data: + +```python +Stream' = (path, true, []) +``` + +The filesystem `FS'` MUST contain a 0-byte file at the path: + +```python +FS' = FS where data(FS', path) == [] +``` + +Thus, the initial state of `Stream'.buffer` is implicitly +consistent with the data at the filesystem. + + +*Object Stores*: see caveats in the "Object Stores" section below. + +### State of Stream and File System after `Filesystem.append()` + +The output stream returned from a call of + `FileSystem.append(path, buffersize, progress)` within a filesystem `FS`, +can be modelled as a stream whose `buffer` is intialized to that of +the original file: + +```python +Stream' = (path, true, data(FS, path)) +``` + +#### Persisting data + +When the stream writes data back to its store, be it in any +supported flush operation, in the `close()` operation, or at any other +time the stream chooses to do so, the contents of the file +are replaced with the current buffer + +```python +Stream' = (path, true, buffer) +FS' = FS where data(FS', path) == buffer +``` + +After a call to `close()`, the stream is closed for all operations other +than `close()`; they MAY fail with `IOException` or `RuntimeException`. + +```python +Stream' = (path, false, []) +``` + +The `close()` operation MUST be idempotent with the sole attempt to write the +data made in the first invocation. + +1. If `close()` succeeds, subsequent calls are no-ops. +1. If `close()` fails, again, subsequent calls are no-ops. They MAY rethrow +the previous exception, but they MUST NOT retry the write. + + + + + +## Class `FSDataOutputStream` + +```java +public class FSDataOutputStream + extends DataOutputStream + implements Syncable, CanSetDropBehind, StreamCapabilities { + // ... +} +``` + +The `FileSystem.create()`, `FileSystem.append()` and +`FSDataOutputStreamBuilder.build()` calls return an instance +of a class `FSDataOutputStream`, a subclass of `java.io.OutputStream`. + +The base class wraps an `OutputStream` instance, one which may implement `Syncable`, +`CanSetDropBehind` and `StreamCapabilities`. + +This document covers the requirements of such implementations. + +HDFS's `FileSystem` implementation, `DistributedFileSystem`, returns an instance +of `HdfsDataOutputStream`. This implementation has at least two behaviors +which are not explicitly declared by the base Java implmentation + +1. Writes are synchronized: more than one thread can write to the same +output stream. This is a use pattern which HBase relies on. + +1. `OutputStream.flush()` is a no-op when the file is closed. Apache Druid +has made such a call on this in the past +[HADOOP-14346](https://issues.apache.org/jira/browse/HADOOP-14346). + + +As the HDFS implementation is considered the de-facto specification of +the FileSystem APIs, the fact that `write()` is thread-safe is significant. + +For compatibility, not only SHOULD other FS clients be thread-safe, +but new HDFS features, such as encryption and Erasure Coding SHOULD also +implement consistent behavior with the core HDFS output stream. + +Put differently: + +*It isn't enough for Output Streams to implement the core semantics +of `java.io.OutputStream`: they need to implement the extra semantics +of `HdfsDataOutputStream`, especially for HBase to work correctly.* + +The concurrent `write()` call is the most significant tightening of +the Java specification. + +## Class `java.io.OutputStream` + +A Java `OutputStream` allows applications to write a sequence of bytes to a destination. +In a Hadoop filesystem, that destination is the data under a path in the filesystem. + +```java +public abstract class OutputStream implements Closeable, Flushable { + public abstract void write(int b) throws IOException; + public void write(byte b[]) throws IOException; + public void write(byte b[], int off, int len) throws IOException; + public void flush() throws IOException; + public void close() throws IOException; +} +``` +### `write(Stream, data)` + +Writes a byte of data to the stream. + +#### Preconditions + +```python +Stream.open else raise ClosedChannelException, PathIOException, IOException +``` + +The exception `java.nio.channels.ClosedChannelExceptionn` is +raised in the HDFS output streams when trying to write to a closed file. +This exception does not include the destination path; and +`Exception.getMessage()` is `null`. It is therefore of limited value in stack +traces. Implementors may wish to raise exceptions with more detail, such +as a `PathIOException`. + + +#### Postconditions + +The buffer has the lower 8 bits of the data argument appended to it. + +```python +Stream'.buffer = Stream.buffer + [data & 0xff] +``` + +There may be an explicit limit on the size of cached data, or an implicit +limit based by the available capacity of the destination filesystem. +When a limit is reached, `write()` SHOULD fail with an `IOException`. + +### `write(Stream, byte[] data, int offset, int len)` + + +#### Preconditions + +The preconditions are all defined in `OutputStream.write()` + +```python +Stream.open else raise ClosedChannelException, PathIOException, IOException +data != null else raise NullPointerException +offset >= 0 else raise IndexOutOfBoundsException +len >= 0 else raise IndexOutOfBoundsException +offset < data.length else raise IndexOutOfBoundsException +offset + len < data.length else raise IndexOutOfBoundsException +``` + +After the operation has returned, the buffer may be re-used. The outcome +of updates to the buffer while the `write()` operation is in progress is undefined. + +#### Postconditions + +```python +Stream'.buffer = Stream.buffer + data[offset...(offset + len)] +``` + +### `write(byte[] data)` + +This is defined as the equivalent of: + +```python +write(data, 0, data.length) +``` + +### `flush()` + +Requests that the data is flushed. The specification of `ObjectStream.flush()` +declares that this SHOULD write data to the "intended destination". + +It explicitly precludes any guarantees about durability. + +For that reason, this document doesn't provide any normative +specifications of behaviour. + +#### Preconditions + +None. + +#### Postconditions + +None. + +If the implementation chooses to implement a stream-flushing operation, +the data may be saved to the file system such that it becomes visible to +others" + +```python +FS' = FS where data(FS', path) == buffer +``` + +When a stream is closed, `flush()` SHOULD downgrade to being a no-op, if it was not +one already. This is to work with applications and libraries which can invoke +it in exactly this way. + + +*Issue*: Should `flush()` forward to `hflush()`? + +No. Or at least, make it optional. + +There's a lot of application code which assumes that `flush()` is low cost +and should be invoked after writing every single line of output, after +writing small 4KB blocks or similar. + +Forwarding this to a full flush across a distributed filesystem, or worse, +a distant object store, is very inefficient. +Filesystem clients which convert a `flush()` to an `hflush()` will eventually +have to roll back that feature: +[HADOOP-16548](https://issues.apache.org/jira/browse/HADOOP-16548). + +### `close()` + +The `close()` operation saves all data to the filesystem and +releases any resources used for writing data. + +The `close()` call is expected to block +until the write has completed (as with `Syncable.hflush()`), possibly +until it has been written to durable storage. + +After `close()` completes, the data in a file MUST be visible and consistent +with the data most recently written. The metadata of the file MUST be consistent +with the data and the write history itself (i.e. any modification time fields +updated). + +After `close()` is invoked, all subsequent `write()` calls on the stream +MUST fail with an `IOException`. + +Any locking/leaseholding mechanism MUST release its lock/lease. + +```python +Stream'.open = false +FS' = FS where data(FS', path) == buffer +``` + +The `close()` call MAY fail during its operation. + +1. Callers of the API MUST expect for some calls to `close()` to fail and SHOULD code appropriately. +Catching and swallowing exceptions, while common, is not always the ideal solution. +1. Even after a failure, `close()` MUST place the stream into a closed state. +Follow-on calls to `close()` are ignored, and calls to other methods +rejected. That is: caller's cannot be expected to call `close()` repeatedly +until it succeeds. +1. The duration of the `close()` operation is undefined. Operations which rely +on acknowledgements from remote systems to meet the persistence guarantees +implicitly have to await these acknowledgements. Some Object Store output streams +upload the entire data file in the `close()` operation. This can take a large amount +of time. The fact that many user applications assume that `close()` is both fast +and does not fail means that this behavior is dangerous. + +Recommendations for safe use by callers + +* Do plan for exceptions being raised, either in catching and logging or +by throwing the exception further up. Catching and silently swallowing exceptions +may hide serious problems. +* Heartbeat operations SHOULD take place on a separate thread, so that a long +delay in `close()` does not block the thread so long that the heartbeat times +out. + +Implementors: + +* Have a look at [HADOOP-16785](https://issues.apache.org/jira/browse/HADOOP-16785) +to see examples of complications in close. +* Incrementally writing blocks before a close operation results in a behavior which +matches client expectations better: write failures to surface earlier and close +to be more housekeeping than the actual upload. +* If block uploads are executed in separate threads, the output stream `close()` +call MUST block until all the asynchronous uploads have completed; any error raised +MUST be reported. +If multiple errors were raised, the stream can choose which to propagate. +What is important is: when `close()` returns without an error, applications expect +the data to have been successfully written. + +### HDFS and `OutputStream.close()` + +HDFS does not immediately `sync()` the output of a written file to disk on +`OutputStream.close()` unless configured with `dfs.datanode.synconclose` +is true. This has caused [problems in some applications](https://issues.apache.org/jira/browse/ACCUMULO-1364). + +Applications which absolutely require the guarantee that a file has been persisted +MUST call `Syncable.hsync()` *before* the file is closed. + + +## `org.apache.hadoop.fs.Syncable` + +```java +@InterfaceAudience.Public +@InterfaceStability.Stable +public interface Syncable { + + + /** Flush out the data in client's user buffer. After the return of + * this call, new readers will see the data. + * @throws IOException if any error occurs + */ + void hflush() throws IOException; + + /** Similar to posix fsync, flush out the data in client's user buffer + * all the way to the disk device (but the disk may have it in its cache). + * @throws IOException if error occurs + */ + void hsync() throws IOException; +} +``` + +The purpose of `Syncable` interface is to provide guarantees that data is written +to a filesystem for both visibility and durability. + +*SYNC-1*: An `OutputStream` which implements `Syncable` and does not raise +`UnsupportedOperationException` on invocations is +making an explicit declaration that it can meet those guarantees. + +*SYNC-2*: If a stream, declares the interface as implemented, but does not +provide durability, the interface's methods MUST raise +`UnsupportedOperationException`. + +The `Syncable` interface has been implemented by other classes than +subclasses of `OutputStream`, such as `org.apache.hadoop.io.SequenceFile.Writer`. + +*SYNC-3* The fact that a class implements `Syncable` does not guarantee +that `extends OutputStream` holds. + +That is, for any class `C`: `(C instanceof Syncable)` does not imply +`(C instanceof OutputStream)` + +This specification only covers the required behavior of `OutputStream` subclasses +which implement `Syncable`. + + +*SYNC-4:* The return value of `FileSystem.create(Path)` is an instance +of `FSDataOutputStream`. + +*SYNC-5:* `FSDataOutputStream implements Syncable` + + +SYNC-5 and SYNC-1 imply that all output streams which can be created +with `FileSystem.create(Path)` must support the semantics of `Syncable`. +This is demonstrably not true: `FSDataOutputStream` simply downgrades +to a `flush()` if its wrapped stream is not `Syncable`. +Therefore the declarations SYNC-1 and SYNC-2 do not hold: you cannot trust `Syncable`. + +Put differently: *callers MUST NOT rely on the presence of the interface +as evidence that the semantics of `Syncable` are supported*. Instead +they MUST be dynamically probed for using the `StreamCapabilities` +interface, where available. + + +### `Syncable.hflush()` + +Flush out the data in client's user buffer. After the return of +this call, new readers will see the data. The `hflush()` operation +does not contain any guarantees as to the durability of the data. only +its visibility. + +Thus implementations may cache the written data in memory +—visible to all, but not yet persisted. + +#### Preconditions + +```python +hasCapability(Stream, "hflush") +Stream.open else raise IOException +``` + + +#### Postconditions + +```python +FS' = FS where data(path) == cache +``` + + +After the call returns, the data MUST be visible to all new callers +of `FileSystem.open(path)` and `FileSystem.openFile(path).build()`. + +There is no requirement or guarantee that clients with an existing +`DataInputStream` created by a call to `(FS, path)` will see the updated +data, nor is there a guarantee that they *will not* in a current or subsequent +read. + +Implementation note: as a correct `hsync()` implementation MUST also +offer all the semantics of an `hflush()` call, implementations of `hflush()` +may just invoke `hsync()`: + +```java +public void hflush() throws IOException { + hsync(); +} +``` + +#### `hflush()` Performance + +The `hflush()` call MUST block until the store has acknowledge that the +data has been received and is now visible to others. This can be slow, +as it will include the time to upload any outstanding data from the +client, and for the filesystem itself to process it. + +Often Filesystems only offer the `Syncable.hsync()` guarantees: persistence as +well as visibility. This means the time to return can be even greater. + +Application code MUST NOT call `hflush()` or `hsync()` at the end of every line +or, unless they are writing a WAL, at the end of every record. Use with care. + + +### `Syncable.hsync()` + +Similar to POSIX `fsync()`, this call saves the data in client's user buffer +all the way to the disk device (but the disk may have it in its cache). + +That is: it is a requirement for the underlying FS To save all the data to +the disk hardware itself, where it is expected to be durable. + +#### Preconditions + +```python +hasCapability(Stream, "hsync") +Stream.open else raise IOException +``` + +#### Postconditions + +```python +FS' = FS where data(path) == buffer +``` + +_Implementations are required to block until that write has been +acknowledged by the store._ + +This is so the caller can be confident that once the call has +returned successfully, the data has been written. + + + +## Interface `StreamCapabilities` + +```java +@InterfaceAudience.Public +@InterfaceStability.Evolving +``` + +The `org.apache.hadoop.fs.StreamCapabilities` interface exists to allow callers to dynamically +determine the behavior of a stream. + +```java + public boolean hasCapability(String capability) { + switch (capability.toLowerCase(Locale.ENGLISH)) { + case StreamCapabilities.HSYNC: + case StreamCapabilities.HFLUSH: + return supportFlush; + default: + return false; + } + } +``` + +Once a stream has been closed, a `hasCapability()` call MUST do one of + +* return the capabilities of the open stream. +* return false. + +That is: it MUST NOT raise an exception about the file being closed; + +See [pathcapabilities](pathcapabilities.html) for specifics on the `PathCapabilities` API; +the requirements are similar: a stream MUST NOT return true for a capability +for which it lacks support, be it because + +* The capability is unknown. +* The capability is known and known to be unsupported. + +Standard stream capabilities are defined in `StreamCapabilities`; +consult the javadocs for the complete set of options. + +| Name | Probes for support of | +|-------|---------| +| `dropbehind` | `CanSetDropBehind.setDropBehind()` | +| `hsync` | `Syncable.hsync()` | +| `hflush` | `Syncable.hflush()`. Deprecated: probe for `HSYNC` only. | +| `in:readahead` | `CanSetReadahead.setReadahead()` | +| `in:unbuffer"` | `CanUnbuffer.unbuffer()` | +| `in:readbytebuffer` | `ByteBufferReadable#read(ByteBuffer)` | +| `in:preadbytebuffer` | `ByteBufferPositionedReadable#read(long, ByteBuffer)` | + +Stream implementations MAY add their own custom options. +These MUST be prefixed with `fs.SCHEMA.`, where `SCHEMA` is the schema of the filesystem. + +## interface `CanSetDropBehind` + +```java +@InterfaceAudience.Public +@InterfaceStability.Evolving +public interface CanSetDropBehind { + /** + * Configure whether the stream should drop the cache. + * + * @param dropCache Whether to drop the cache. null means to use the + * default value. + * @throws IOException If there was an error changing the dropBehind + * setting. + * UnsupportedOperationException If this stream doesn't support + * setting the drop-behind. + */ + void setDropBehind(Boolean dropCache) + throws IOException, UnsupportedOperationException; +} +``` + +This interface allows callers to change policies used inside HDFS. + +Implementations MUST return `true` for the call + +```java +StreamCapabilities.hasCapability("dropbehind"); +``` + + +## Durability, Concurrency, Consistency and Visibility of stream output. + +These are the aspects of the system behaviour which are not directly +covered in this (very simplistic) filesystem model, but which are visible +in production. + + +### Durability + +1. `OutputStream.write()` MAY persist the data, synchronously or asynchronously +1. `OutputStream.flush()` flushes data to the destination. There +are no strict persistence requirements. +1. `Syncable.hflush()` synchronously sends all outstaning data to the destination +filesystem. After returning to the caller, the data MUST be visible to other readers, +it MAY be durable. That is: it does not have to be persisted, merely guaranteed +to be consistently visible to all clients attempting to open a new stream reading +data at the path. +1. `Syncable.hsync()` MUST transmit the data as per `hflush` and persist + that data to the underlying durable storage. +1. `close()` The first call to `close()` MUST flush out all remaining data in +the buffers, and persist it, as a call to `hsync()`. + + +Many applications call `flush()` far too often -such as at the end of every line written. +If this triggered an update of the data in persistent storage and any accompanying +metadata, distributed stores would overload fast. +Thus: `flush()` is often treated at most as a cue to flush data to the network +buffers -but not commit to writing any data. + +It is only the `Syncable` interface which offers guarantees. + +The two `Syncable` operations `hsync()` and `hflush()` differ purely by the extra guarantee of `hsync()`: the data must be persisted. +If `hsync()` is implemented, then `hflush()` can be implemented simply +by invoking `hsync()` + +```java +public void hflush() throws IOException { + hsync(); +} +``` + +This is perfectly acceptable as an implementation: the semantics of `hflush()` +are satisifed. +What is not acceptable is downgrading `hsync()` to `hflush()`, as the durability guarantee is no longer met. + + +### Concurrency + +1. The outcome of more than one process writing to the same file is undefined. + +1. An input stream opened to read a file *before the file was opened for writing* +MAY fetch data updated by writes to an OutputStream. +Because of buffering and caching, this is not a requirement +—and if an input stream does pick up updated data, the point at +which the updated data is read is undefined. This surfaces in object stores +where a `seek()` call which closes and re-opens the connection may pick up +updated data, while forward stream reads do not. Similarly, in block-oriented +filesystems, the data may be cached a block at a time —and changes only picked +up when a different block is read. + +1. A filesystem MAY allow the destination path to be manipulated while a stream +is writing to it —for example, `rename()` of the path or a parent; `delete()` of +a path or parent. In such a case, the outcome of future write operations on +the output stream is undefined. Some filesystems MAY implement locking to +prevent conflict. However, this tends to be rare on distributed filesystems, +for reasons well known in the literature. + +1. The Java API specification of `java.io.OutputStream` does not require +an instance of the class to be thread safe. +However, `org.apache.hadoop.hdfs.DFSOutputStream` +has a stronger thread safety model (possibly unintentionally). This fact is +relied upon in Apache HBase, as discovered in HADOOP-11708. Implementations +SHOULD be thread safe. *Note*: even the `DFSOutputStream` synchronization +model permits the output stream to have `close()` invoked while awaiting an +acknowledgement from datanode or namenode writes in an `hsync()` operation. + +### Consistency and Visibility + +There is no requirement for the data to be immediately visible to other applications +—not until a specific call to flush buffers or persist it to the underlying storage +medium are made. + +If an output stream is created with `FileSystem.create(path, overwrite==true)` +and there is an existing file at the path, that is `exists(FS, path)` holds, +then, the existing data is immediately unavailable; the data at the end of the +path MUST consist of an empty byte sequence `[]`, with consistent metadata. + + +```python +exists(FS, path) +(Stream', FS') = create(FS, path) +exists(FS', path) +getFileStatus(FS', path).getLen() = 0 +``` + +The metadata of a file (`length(FS, path)` in particular) SHOULD be consistent +with the contents of the file after `flush()` and `sync()`. + +```python +(Stream', FS') = create(FS, path) +(Stream'', FS'') = write(Stream', data) +(Stream''', FS''') hsync(Stream'') +exists(FS''', path) +getFileStatus(FS''', path).getLen() = len(data) +``` + +*HDFS does not do this except when the write crosses a block boundary*; to do +otherwise would overload the Namenode. Other stores MAY copy this behavior. + +As a result, while a file is being written +`length(Filesystem, Path)` MAY be less than the length of `data(Filesystem, Path)`. + +The metadata MUST be consistent with the contents of a file after the `close()` +operation. + +After the contents of an output stream have been persisted (`hflush()/hsync()`) +all new `open(FS, Path)` operations MUST return the updated data. + +After `close()` has been invoked on an output stream, +a call to `getFileStatus(path)` MUST return the final metadata of the written file, +including length and modification time. +The metadata of the file returned in any of the FileSystem `list` operations +MUST be consistent with this metadata. + +The value of `getFileStatus(path).getModificationTime()` is not defined +while a stream is being written to. +The timestamp MAY be updated while a file is being written, +especially after a `Syncable.hsync()` call. +The timestamps MUST be updated after the file is closed +to that of a clock value observed by the server during the `close()` call. +It is *likely* to be in the time and time zone of the filesystem, rather +than that of the client. + +Formally, if a `close()` operation triggers an interaction with a server +which starts at server-side time `t1` and completes at time `t2` with a successfully +written file, then the last modification time SHOULD be a time `t` where +`t1 <= t <= t2` + +## Issues with the Hadoop Output Stream model. + +There are some known issues with the output stream model as offered by Hadoop, +specifically about the guarantees about when data is written and persisted +—and when the metadata is synchronized. +These are where implementation aspects of HDFS and the "Local" filesystem +do not follow the simple model of the filesystem used in this specification. + +### HDFS + +#### HDFS: `hsync()` only syncs the latest block + +The reference implementation, `DFSOutputStream` will block until an +acknowledgement is received from the datanodes: that is, all hosts in the +replica write chain have successfully written the file. + +That means that the expectation callers may have is that the return of the +method call contains visibility and durability guarantees which other +implementations must maintain. + +Note, however, that the reference `DFSOutputStream.hsync()` call only actually +persists *the current block*. If there have been a series of writes since the +last sync, such that a block boundary has been crossed. The `hsync()` call +claims only to write the most recent. + +From the javadocs of `DFSOutputStream.hsync(EnumSet syncFlags)` + +> Note that only the current block is flushed to the disk device. +> To guarantee durable sync across block boundaries the stream should +> be created with {@link CreateFlag#SYNC_BLOCK}. + + +This is an important HDFS implementation detail which must not be ignored by +anyone relying on HDFS to provide a Write-Ahead-Log or other database structure +where the requirement of the application is that +"all preceeding bytes MUST have been persisted before the commit flag in the WAL +is flushed" + +See [Stonebraker81], Michael Stonebraker, _Operating System Support for Database Management_, +1981, for a discussion on this topic. + +If you do need `hsync()` to have synced every block in a very large write, call +it regularly. + +#### HDFS: delayed visibility of metadata updates. + +That HDFS file metadata often lags the content of a file being written +to is not something everyone expects, nor convenient for any program trying +to pick up updated data in a file being written. Most visible is the length +of a file returned in the various `list` commands and `getFileStatus` —this +is often out of date. + +As HDFS only supports file growth in its output operations, this means +that the size of the file as listed in the metadata may be less than or equal +to the number of available bytes —but never larger. This is a guarantee which +is also held + +One algorithm to determine whether a file in HDFS is updated is: + +1. Remember the last read position `pos` in the file, using `0` if this is the initial +read. +1. Use `getFileStatus(FS, Path)` to query the updated length of the file as +recorded in the metadata. +1. If `Status.length > pos`, the file has grown. +1. If the number has not changed, then + 1. Reopen the file. + 1. `seek(pos)` to that location + 1. If `read() != -1`, there is new data. + +This algorithm works for filesystems which are consistent with metadata and +data, as well as HDFS. What is important to know is that, for an open file +`getFileStatus(FS, path).getLen() == 0` does not imply that `data(FS, path)` is +empty. + +When an output stream in HDFS is closed; the newly written data is not immediately +written to disk unless HDFS is deployed with `dfs.datanode.synconclose` set to +true. Otherwise it is cached and written to disk later. + +### Local Filesystem, `file:` + +`LocalFileSystem`, `file:`, (or any other `FileSystem` implementation based on +`ChecksumFileSystem`) has a different issue. If an output stream +is obtained from `create()` and `FileSystem.setWriteChecksum(false)` has +*not* been called on the filesystem, then the stream only flushes as much +local data as can be written to full checksummed blocks of data. + +That is, the hsync/hflush operations are not guaranteed to write all the pending +data until the file is finally closed. + +For this reason, the local fileystem accessed via `file://` URLs +does not support `Syncable` unless `setWriteChecksum(false)` was +called on that FileSystem instance so as to disable checksum creation. +After which, obviously, checksums are not generated for any file. +Is +### Checksummed output streams + +Because `org.apache.hadoop.fs.FSOutputSummer` and +`org.apache.hadoop.fs.ChecksumFileSystem.ChecksumFSOutputSummer` +implement the underlying checksummed output stream used by HDFS and +other filesystems, it provides some of the core semantics of the output +stream behavior. + +1. The `close()` call is unsynchronized, re-entrant and may attempt +to close the stream more than once. +1. It is possible to call `write(int)` on a closed stream (but not +`write(byte[], int, int)`). +1. It is possible to call `flush()` on a closed stream. + +Behaviors 1 and 2 really have to be considered bugs to fix, albeit with care. + +Behavior 3 has to be considered a defacto standard, for other implementations +to copy. + +### Object Stores + +Object store streams MAY buffer the entire stream's output +until the final `close()` operation triggers a single `PUT` of the data +and materialization of the final output. + +This significantly changes their behaviour compared to that of +POSIX filesystems and that specified in this document. + +#### Visibility of newly created objects + +There is no guarantee that any file will be visible at the path of an output +stream after the output stream is created . + +That is: while `create(FS, path, boolean)` returns a new stream + +```python +Stream' = (path, true, []) +``` + +The other postcondition of the operation, `data(FS', path) == []` MAY NOT +hold, in which case: + +1. `exists(FS, p)` MAY return false. +1. If a file was created with `overwrite = True`, the existing data MAY still +be visible: `data(FS', path) = data(FS, path)`. + +1. The check for existing data in a `create()` call with `overwrite=False`, may +take place in the `create()` call itself, in the `close()` call prior to/during +the write, or at some point in between. In the special case that the +object store supports an atomic `PUT` operation, the check for existence of +existing data and the subsequent creation of data at the path contains a race +condition: other clients may create data at the path between the existence check +and the subsequent write. + +1. Calls to `create(FS, Path, overwrite=false)` MAY succeed, returning a new +`OutputStream`, even while another stream is open and writing to the destination +path. + +This allows for the following sequence of operations, which would +raise an exception in the second `open()` call if invoked against HDFS: + +```python +Stream1 = open(FS, path, false) +sleep(200) +Stream2 = open(FS, path, false) +Stream.write('a') +Stream1.close() +Stream2.close() +``` + +For anyone wondering why the clients don't create a 0-byte file in the `create()` call, +it would cause problems after `close()` —the marker file could get +returned in `open()` calls instead of the final data. + +#### Visibility of the output of a stream after `close()` + +One guarantee which Object Stores SHOULD make is the same as those of POSIX +filesystems: After a stream `close()` call returns, the data MUST be persisted +durably and visible to all callers. Unfortunately, even that guarantee is +not always met: + +1. Existing data on a path MAY be visible for an indeterminate period of time. + +1. If the store has any form of create inconsistency or buffering of negative +existence probes, then even after the stream's `close()` operation has returned, +`getFileStatus(FS, path)` and `open(FS, path)` may fail with a `FileNotFoundException`. + +In their favour, the atomicity of the store's PUT operations do offer their +own guarantee: a newly created object is either absent or all of its data +is present: the act of instantiating the object, while potentially exhibiting +create inconsistency, is atomic. Applications may be able to use that fact +to their advantage. + +The [Abortable](abortable.html) interface exposes this ability to abort an output +stream before its data is made visible, so can be used for checkpointing and similar +operations. + +## Implementors notes. + +### Always implement `Syncable` -even if just to throw `UnsupportedOperationException` + +Because `FSDataOutputStream` silently downgrades `Syncable.hflush()` +and `Syncable.hsync()` to `wrappedStream.flush()`, callers of the +API MAY be misled into believing that their data has been flushed/synced +after syncing to a stream which does not support the APIs. + +Implementations SHOULD implement the API but +throw `UnsupportedOperationException`. + +### `StreamCapabilities` + +Implementors of filesystem clients SHOULD implement the `StreamCapabilities` +interface and its `hasCapabilities()` method to to declare whether or not +an output streams offer the visibility and durability guarantees of `Syncable`. + +Implementors of `StreamCapabilities.hasCapabilities()` MUST NOT declare that +they support the `hflush` and `hsync` capabilities on streams where this is not true. + +Sometimes streams pass their data to store, but the far end may not +sync it all the way to disk. That is not something the client can determine. +Here: if the client code is making the hflush/hsync passes these requests +on to the distributed FS, it SHOULD declare that it supports them. + +### Metadata updates + +Implementors MAY NOT update a file's metadata (length, date, ...) after +every `hsync()` call. HDFS doesn't, except when the written data crosses +a block boundary. + + + +### Does `close()` synchronize and persist data? + +By default, HDFS does not immediately data to disk when a stream is closed; it will +be asynchronously saved to disk. + +This does not mean that users do not expect it. + +The behavior as implemented is similar to the write-back aspect's of NFS's +[caching](https://docstore.mik.ua/orelly/networking_2ndEd/nfs/ch07_04.htm). +`DFSClient.close()` is performing an `hflush()` to the client to upload +all data to the datanodes. + +1. `close()` SHALL return once the guarantees of `hflush()` are met: the data is + visible to others. +1. For durability guarantees, `hsync()` MUST be called first. \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/pathcapabilities.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/pathcapabilities.md index e053bfbaede9b..e00efed69e499 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/pathcapabilities.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/pathcapabilities.md @@ -31,7 +31,7 @@ There are a number of goals here: having to invoke them. 1. Allow filesystems with their own optional per-instance features to declare whether or not they are active for the specific instance. -1. Allow for fileystem connectors which work with object stores to expose the +1. Allow for filesystem connectors which work with object stores to expose the fundamental difference in semantics of these stores (e.g: files not visible until closed, file rename being `O(data)`), directory rename being non-atomic, etc. @@ -122,7 +122,7 @@ will be permitted on that path by the caller. *Duration of availability* As the state of a remote store changes,so may path capabilities. This -may be due to changes in the local state of the fileystem (e.g. symbolic links +may be due to changes in the local state of the filesystem (e.g. symbolic links or mount points changing), or changes in its functionality (e.g. a feature becoming availaible/unavailable due to operational changes, system upgrades, etc.) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/safemode.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/safemode.md new file mode 100644 index 0000000000000..22ab4aa75cdb9 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/safemode.md @@ -0,0 +1,45 @@ + + +# interface `SafeMode` + +The `SafeMode` interface provides a way to perform safe mode actions and obtain the +status after such actions performed to the `FileSystem`. + +This is admin only interface, should be implemented accordingly when necessary to +Filesystem that support safe mode, e.g. `DistributedFileSystem` (HDFS) and +`ViewDistributedFileSystem`. + +```java +public interface SafeMode { + default boolean setSafeMode(SafeModeAction action) throws IOException { + return setSafeMode(action, false); + } + boolean setSafeMode(SafeModeAction action, boolean isChecked) throws IOException; +} +``` + +The goals of this interface is allow any file system implementation to share the +same concept of safe mode with the following actions and states + +### Safe mode actions +1. `GET`, get the safe mode status of the file system. +1. `ENTER`, enter the safe mode for the file system. +1. `LEAVE`, exit safe mode for the file system gracefully. +1. `FORCE_EXIT`, exit safe mode for the file system even if there is any ongoing data process. + +### Safe mode states +1. return true, when safe mode is on. +1. return false, when safe mode is off, usually it's the result of safe mode actions +with `GET`, `LEAVE`, `FORCE_EXIT`. \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/testing.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/testing.md index 4c6fa3ff0f6d4..53eb9870bc17d 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/testing.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/testing.md @@ -66,55 +66,6 @@ Example: - -### swift:// - -The OpenStack Swift login details must be defined in the file -`/hadoop-tools/hadoop-openstack/src/test/resources/contract-test-options.xml`. -The standard hadoop-common `contract-test-options.xml` resource file cannot be -used, as that file does not get included in `hadoop-common-test.jar`. - - -In `/hadoop-tools/hadoop-openstack/src/test/resources/contract-test-options.xml` -the Swift bucket name must be defined in the property `fs.contract.test.fs.swift`, -along with the login details for the specific Swift service provider in which the -bucket is posted. - - - - fs.contract.test.fs.swift - swift://swiftbucket.rackspace/ - - - - fs.swift.service.rackspace.auth.url - https://auth.api.rackspacecloud.com/v2.0/tokens - Rackspace US (multiregion) - - - - fs.swift.service.rackspace.username - this-is-your-username - - - - fs.swift.service.rackspace.region - DFW - - - - fs.swift.service.rackspace.apikey - ab0bceyoursecretapikeyffef - - - - -1. Often the different public cloud Swift infrastructures exhibit different behaviors -(authentication and throttling in particular). We recommand that testers create -accounts on as many of these providers as possible and test against each of them. -1. They can be slow, especially remotely. Remote links are also the most likely -to make eventual-consistency behaviors visible, which is a mixed benefit. - ## Testing a new filesystem The core of adding a new FileSystem to the contract tests is adding a @@ -228,8 +179,6 @@ Passing all the FileSystem contract tests does not mean that a filesystem can be * Scalability: does it support files as large as HDFS, or as many in a single directory? * Durability: do files actually last -and how long for? -Proof that this is is true is the fact that the Amazon S3 and OpenStack Swift object stores are eventually consistent object stores with non-atomic rename and delete operations. Single threaded test cases are unlikely to see some of the concurrency issues, while consistency is very often only visible in tests that span a datacenter. - There are also some specific aspects of the use of the FileSystem API: * Compatibility with the `hadoop -fs` CLI. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.2/CHANGELOG.3.2.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.2/CHANGELOG.3.2.2.md new file mode 100644 index 0000000000000..4d6a0f1102981 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.2/CHANGELOG.3.2.2.md @@ -0,0 +1,576 @@ + + +# Apache Hadoop Changelog + +## Release 3.2.2 - 2021-01-03 + + + +### NEW FEATURES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-15691](https://issues.apache.org/jira/browse/HADOOP-15691) | Add PathCapabilities to FS and FC to complement StreamCapabilities | Major | . | Steve Loughran | Steve Loughran | +| [YARN-9760](https://issues.apache.org/jira/browse/YARN-9760) | Support configuring application priorities on a workflow level | Major | . | Jonathan Hung | Varun Saxena | +| [HDFS-14905](https://issues.apache.org/jira/browse/HDFS-14905) | Backport HDFS persistent memory read cache support to branch-3.2 | Major | caching, datanode | Feilong He | Feilong He | +| [HDFS-12943](https://issues.apache.org/jira/browse/HDFS-12943) | Consistent Reads from Standby Node | Major | hdfs | Konstantin Shvachko | Konstantin Shvachko | +| [HADOOP-16790](https://issues.apache.org/jira/browse/HADOOP-16790) | Add Write Convenience Methods | Minor | . | David Mollitor | David Mollitor | +| [HADOOP-17210](https://issues.apache.org/jira/browse/HADOOP-17210) | backport HADOOP-15691 PathCapabilities API to branch-3.2 | Major | fs, fs/s3 | Steve Loughran | Steve Loughran | + + +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-8750](https://issues.apache.org/jira/browse/YARN-8750) | Refactor TestQueueMetrics | Minor | resourcemanager | Szilard Nemeth | Szilard Nemeth | +| [HADOOP-15849](https://issues.apache.org/jira/browse/HADOOP-15849) | Upgrade netty version to 3.10.6 | Major | . | Xiao Chen | Xiao Chen | +| [HDFS-12946](https://issues.apache.org/jira/browse/HDFS-12946) | Add a tool to check rack configuration against EC policies | Major | erasure-coding | Xiao Chen | Kitti Nanasi | +| [HDFS-14113](https://issues.apache.org/jira/browse/HDFS-14113) | EC : Add Configuration to restrict UserDefined Policies | Major | erasure-coding | Ayush Saxena | Ayush Saxena | +| [HDFS-14006](https://issues.apache.org/jira/browse/HDFS-14006) | Refactor name node to allow different token verification implementations | Major | . | CR Hota | CR Hota | +| [HADOOP-15909](https://issues.apache.org/jira/browse/HADOOP-15909) | KeyProvider class should implement Closeable | Major | kms | Kuhu Shukla | Kuhu Shukla | +| [HDFS-14061](https://issues.apache.org/jira/browse/HDFS-14061) | Check if the cluster topology supports the EC policy before setting, enabling or adding it | Major | erasure-coding, hdfs | Kitti Nanasi | Kitti Nanasi | +| [HDFS-14187](https://issues.apache.org/jira/browse/HDFS-14187) | Make warning message more clear when there are not enough data nodes for EC write | Major | erasure-coding | Kitti Nanasi | Kitti Nanasi | +| [HDFS-14125](https://issues.apache.org/jira/browse/HDFS-14125) | Use parameterized log format in ECTopologyVerifier | Trivial | erasure-coding | Kitti Nanasi | Kitti Nanasi | +| [HDFS-14188](https://issues.apache.org/jira/browse/HDFS-14188) | Make hdfs ec -verifyClusterSetup command accept an erasure coding policy as a parameter | Major | erasure-coding | Kitti Nanasi | Kitti Nanasi | +| [HADOOP-16126](https://issues.apache.org/jira/browse/HADOOP-16126) | ipc.Client.stop() may sleep too long to wait for all connections | Major | ipc | Tsz-wo Sze | Tsz-wo Sze | +| [HADOOP-15014](https://issues.apache.org/jira/browse/HADOOP-15014) | KMS should log the IP address of the clients | Major | kms | Zsombor Gegesy | Zsombor Gegesy | +| [HDFS-14460](https://issues.apache.org/jira/browse/HDFS-14460) | DFSUtil#getNamenodeWebAddr should return HTTPS address based on policy configured | Major | . | CR Hota | CR Hota | +| [HDFS-14624](https://issues.apache.org/jira/browse/HDFS-14624) | When decommissioning a node, log remaining blocks to replicate periodically | Major | namenode | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-13693](https://issues.apache.org/jira/browse/HDFS-13693) | Remove unnecessary search in INodeDirectory.addChild during image loading | Major | namenode | zhouyingchao | Lisheng Sun | +| [HDFS-14313](https://issues.apache.org/jira/browse/HDFS-14313) | Get hdfs used space from FsDatasetImpl#volumeMap#ReplicaInfo in memory instead of df/du | Major | datanode, performance | Lisheng Sun | Lisheng Sun | +| [HDFS-14678](https://issues.apache.org/jira/browse/HDFS-14678) | Allow triggerBlockReport to a specific namenode | Major | datanode | Leon Gao | Leon Gao | +| [HDFS-14523](https://issues.apache.org/jira/browse/HDFS-14523) | Remove excess read lock for NetworkToplogy | Major | . | Wu Weiwei | Wu Weiwei | +| [HDFS-14497](https://issues.apache.org/jira/browse/HDFS-14497) | Write lock held by metasave impact following RPC processing | Major | namenode | Xiaoqiao He | Xiaoqiao He | +| [HADOOP-16531](https://issues.apache.org/jira/browse/HADOOP-16531) | Log more detail for slow RPC | Major | . | Chen Zhang | Chen Zhang | +| [YARN-9764](https://issues.apache.org/jira/browse/YARN-9764) | Print application submission context label in application summary | Major | . | Jonathan Hung | Manoj Kumar | +| [YARN-9824](https://issues.apache.org/jira/browse/YARN-9824) | Fall back to configured queue ordering policy class name | Major | . | Jonathan Hung | Jonathan Hung | +| [HADOOP-16069](https://issues.apache.org/jira/browse/HADOOP-16069) | Support configure ZK\_DTSM\_ZK\_KERBEROS\_PRINCIPAL in ZKDelegationTokenSecretManager using principal with Schema /\_HOST | Minor | common | luhuachao | luhuachao | +| [YARN-9762](https://issues.apache.org/jira/browse/YARN-9762) | Add submission context label to audit logs | Major | . | Jonathan Hung | Manoj Kumar | +| [HDFS-14850](https://issues.apache.org/jira/browse/HDFS-14850) | Optimize FileSystemAccessService#getFileSystemConfiguration | Major | httpfs, performance | Lisheng Sun | Lisheng Sun | +| [HDFS-14192](https://issues.apache.org/jira/browse/HDFS-14192) | Track missing DFS operations in Statistics and StorageStatistics | Major | . | Ayush Saxena | Ayush Saxena | +| [YARN-9356](https://issues.apache.org/jira/browse/YARN-9356) | Add more tests to ratio method in TestResourceCalculator | Major | . | Szilard Nemeth | Zoltan Siegl | +| [HADOOP-16643](https://issues.apache.org/jira/browse/HADOOP-16643) | Update netty4 to the latest 4.1.42 | Major | . | Wei-Chiu Chuang | Lisheng Sun | +| [HADOOP-16640](https://issues.apache.org/jira/browse/HADOOP-16640) | WASB: Override getCanonicalServiceName() to return full url of WASB filesystem | Major | fs/azure | Da Zhou | Da Zhou | +| [HDFS-14915](https://issues.apache.org/jira/browse/HDFS-14915) | Move Superuser Check Before Taking Lock For Encryption API | Major | . | Ayush Saxena | Ayush Saxena | +| [HDFS-14921](https://issues.apache.org/jira/browse/HDFS-14921) | Remove SuperUser Check in Setting Storage Policy in FileStatus During Listing | Major | . | Ayush Saxena | Ayush Saxena | +| [HDFS-14923](https://issues.apache.org/jira/browse/HDFS-14923) | Remove dead code from HealthMonitor | Minor | . | Hui Fei | Hui Fei | +| [YARN-9914](https://issues.apache.org/jira/browse/YARN-9914) | Use separate configs for free disk space checking for full and not-full disks | Minor | yarn | Jim Brennan | Jim Brennan | +| [MAPREDUCE-7208](https://issues.apache.org/jira/browse/MAPREDUCE-7208) | Tuning TaskRuntimeEstimator | Minor | . | Ahmed Hussein | Ahmed Hussein | +| [HDFS-14942](https://issues.apache.org/jira/browse/HDFS-14942) | Change Log Level to debug in JournalNodeSyncer#syncWithJournalAtIndex | Minor | . | Lisheng Sun | Lisheng Sun | +| [HDFS-14979](https://issues.apache.org/jira/browse/HDFS-14979) | [Observer Node] Balancer should submit getBlocks to Observer Node when possible | Major | balancer & mover, hdfs | Erik Krogen | Erik Krogen | +| [HADOOP-16705](https://issues.apache.org/jira/browse/HADOOP-16705) | MBeanInfoBuilder puts unnecessary memory pressure on the system with a debug log | Major | metrics | Lukas Majercak | Lukas Majercak | +| [HADOOP-16712](https://issues.apache.org/jira/browse/HADOOP-16712) | Config ha.failover-controller.active-standby-elector.zk.op.retries is not in core-default.xml | Trivial | . | Wei-Chiu Chuang | Xieming Li | +| [HDFS-14952](https://issues.apache.org/jira/browse/HDFS-14952) | Skip safemode if blockTotal is 0 in new NN | Trivial | namenode | Rajesh Balamohan | Xiaoqiao He | +| [YARN-8842](https://issues.apache.org/jira/browse/YARN-8842) | Expose metrics for custom resource types in QueueMetrics | Major | . | Szilard Nemeth | Szilard Nemeth | +| [YARN-9966](https://issues.apache.org/jira/browse/YARN-9966) | Code duplication in UserGroupMappingPlacementRule | Major | . | Szilard Nemeth | Kevin Su | +| [YARN-9937](https://issues.apache.org/jira/browse/YARN-9937) | Add missing queue configs in RMWebService#CapacitySchedulerQueueInfo | Major | capacity scheduler | Prabhu Joseph | Prabhu Joseph | +| [HADOOP-16718](https://issues.apache.org/jira/browse/HADOOP-16718) | Allow disabling Server Name Indication (SNI) for Jetty | Major | . | Siyao Meng | Aravindan Vijayan | +| [HADOOP-16729](https://issues.apache.org/jira/browse/HADOOP-16729) | Extract version numbers to head of pom.xml | Minor | build | Tamas Penzes | Tamas Penzes | +| [HADOOP-16735](https://issues.apache.org/jira/browse/HADOOP-16735) | Make it clearer in config default that EnvironmentVariableCredentialsProvider supports AWS\_SESSION\_TOKEN | Minor | documentation, fs/s3 | Mingliang Liu | Mingliang Liu | +| [YARN-10012](https://issues.apache.org/jira/browse/YARN-10012) | Guaranteed and max capacity queue metrics for custom resources | Major | . | Jonathan Hung | Manikandan R | +| [HDFS-15050](https://issues.apache.org/jira/browse/HDFS-15050) | Optimize log information when DFSInputStream meet CannotObtainBlockLengthException | Major | dfsclient | Xiaoqiao He | Xiaoqiao He | +| [YARN-10033](https://issues.apache.org/jira/browse/YARN-10033) | TestProportionalCapacityPreemptionPolicy not initializing vcores for effective max resources | Major | capacity scheduler, test | Eric Payne | Eric Payne | +| [YARN-10039](https://issues.apache.org/jira/browse/YARN-10039) | Allow disabling app submission from REST endpoints | Major | . | Jonathan Hung | Jonathan Hung | +| [YARN-9894](https://issues.apache.org/jira/browse/YARN-9894) | CapacitySchedulerPerf test for measuring hundreds of apps in a large number of queues. | Major | capacity scheduler, test | Eric Payne | Eric Payne | +| [HADOOP-16771](https://issues.apache.org/jira/browse/HADOOP-16771) | Update checkstyle to 8.26 and maven-checkstyle-plugin to 3.1.0 | Major | build | Andras Bokor | Andras Bokor | +| [YARN-10009](https://issues.apache.org/jira/browse/YARN-10009) | In Capacity Scheduler, DRC can treat minimum user limit percent as a max when custom resource is defined | Critical | capacity scheduler | Eric Payne | Eric Payne | +| [HDFS-12999](https://issues.apache.org/jira/browse/HDFS-12999) | When reach the end of the block group, it may not need to flush all the data packets(flushAllInternals) twice. | Major | erasure-coding, hdfs-client | lufei | lufei | +| [HDFS-15074](https://issues.apache.org/jira/browse/HDFS-15074) | DataNode.DataTransfer thread should catch all the expception and log it. | Major | datanode | Surendra Singh Lilhore | Hemanth Boyina | +| [HDFS-14740](https://issues.apache.org/jira/browse/HDFS-14740) | Recover data blocks from persistent memory read cache during datanode restarts | Major | caching, datanode | Feilong He | Feilong He | +| [HADOOP-16775](https://issues.apache.org/jira/browse/HADOOP-16775) | DistCp reuses the same temp file within the task attempt for different files. | Major | tools/distcp | Amir Shenavandeh | Amir Shenavandeh | +| [HDFS-15097](https://issues.apache.org/jira/browse/HDFS-15097) | Purge log in KMS and HttpFS | Minor | httpfs, kms | Doris Gu | Doris Gu | +| [HADOOP-16753](https://issues.apache.org/jira/browse/HADOOP-16753) | Refactor HAAdmin | Major | ha | Akira Ajisaka | Xieming Li | +| [HDFS-14968](https://issues.apache.org/jira/browse/HDFS-14968) | Add ability to know datanode staleness | Minor | datanode, logging, namenode | Ahmed Hussein | Ahmed Hussein | +| [YARN-7913](https://issues.apache.org/jira/browse/YARN-7913) | Improve error handling when application recovery fails with exception | Major | resourcemanager | Gergo Repas | Wilfred Spiegelenburg | +| [HDFS-15117](https://issues.apache.org/jira/browse/HDFS-15117) | EC: Add getECTopologyResultForPolicies to DistributedFileSystem | Major | . | Ayush Saxena | Ayush Saxena | +| [HDFS-15119](https://issues.apache.org/jira/browse/HDFS-15119) | Allow expiration of cached locations in DFSInputStream | Minor | dfsclient | Ahmed Hussein | Ahmed Hussein | +| [MAPREDUCE-7262](https://issues.apache.org/jira/browse/MAPREDUCE-7262) | MRApp helpers block for long intervals (500ms) | Minor | mr-am | Ahmed Hussein | Ahmed Hussein | +| [MAPREDUCE-7260](https://issues.apache.org/jira/browse/MAPREDUCE-7260) | Cross origin request support for Job history server web UI | Critical | jobhistoryserver | Adam Antal | Adam Antal | +| [YARN-10084](https://issues.apache.org/jira/browse/YARN-10084) | Allow inheritance of max app lifetime / default app lifetime | Major | capacity scheduler | Eric Payne | Eric Payne | +| [HDFS-12491](https://issues.apache.org/jira/browse/HDFS-12491) | Support wildcard in CLASSPATH for libhdfs | Major | libhdfs | John Zhuge | Muhammad Samir Khan | +| [YARN-10116](https://issues.apache.org/jira/browse/YARN-10116) | Expose diagnostics in RMAppManager summary | Major | . | Jonathan Hung | Jonathan Hung | +| [HADOOP-16739](https://issues.apache.org/jira/browse/HADOOP-16739) | Fix native build failure of hadoop-pipes on CentOS 8 | Major | tools/pipes | Masatake Iwasaki | Masatake Iwasaki | +| [HADOOP-16847](https://issues.apache.org/jira/browse/HADOOP-16847) | Test TestGroupsCaching fail if HashSet iterates in a different order | Minor | test | testfixer0 | testfixer0 | +| [HDFS-14758](https://issues.apache.org/jira/browse/HDFS-14758) | Decrease lease hard limit | Minor | . | Eric Payne | Hemanth Boyina | +| [HDFS-15086](https://issues.apache.org/jira/browse/HDFS-15086) | Block scheduled counter never get decremet if the block got deleted before replication. | Major | 3.1.1 | Surendra Singh Lilhore | Hemanth Boyina | +| [HDFS-15174](https://issues.apache.org/jira/browse/HDFS-15174) | Optimize ReplicaCachingGetSpaceUsed by reducing unnecessary io operations | Major | . | Lisheng Sun | Lisheng Sun | +| [YARN-9018](https://issues.apache.org/jira/browse/YARN-9018) | Add functionality to AuxiliaryLocalPathHandler to return all locations to read for a given path | Major | . | Kuhu Shukla | Kuhu Shukla | +| [HDFS-14861](https://issues.apache.org/jira/browse/HDFS-14861) | Reset LowRedundancyBlocks Iterator periodically | Major | namenode | Stephen O'Donnell | Stephen O'Donnell | +| [HADOOP-16899](https://issues.apache.org/jira/browse/HADOOP-16899) | Update HdfsDesign.md to reduce ambiguity | Minor | documentation | Akshay Nehe | Akshay Nehe | +| [HADOOP-16772](https://issues.apache.org/jira/browse/HADOOP-16772) | Extract version numbers to head of pom.xml (addendum) | Major | build | Tamas Penzes | Tamas Penzes | +| [HDFS-15197](https://issues.apache.org/jira/browse/HDFS-15197) | [SBN read] Change ObserverRetryOnActiveException log to debug | Minor | hdfs | Chen Liang | Chen Liang | +| [HADOOP-16935](https://issues.apache.org/jira/browse/HADOOP-16935) | Backport HADOOP-10848. Cleanup calling of sun.security.krb5.Config to branch-3.2 | Minor | . | Siyao Meng | Siyao Meng | +| [YARN-10200](https://issues.apache.org/jira/browse/YARN-10200) | Add number of containers to RMAppManager summary | Major | . | Jonathan Hung | Jonathan Hung | +| [HADOOP-16952](https://issues.apache.org/jira/browse/HADOOP-16952) | Add .diff to gitignore | Minor | . | Ayush Saxena | Ayush Saxena | +| [MAPREDUCE-7266](https://issues.apache.org/jira/browse/MAPREDUCE-7266) | historyContext doesn't need to be a class attribute inside JobHistoryServer | Minor | jobhistoryserver | Siddharth Ahuja | Siddharth Ahuja | +| [YARN-10003](https://issues.apache.org/jira/browse/YARN-10003) | YarnConfigurationStore#checkVersion throws exception that belongs to RMStateStore | Major | . | Szilard Nemeth | Benjamin Teke | +| [YARN-10212](https://issues.apache.org/jira/browse/YARN-10212) | Create separate configuration for max global AM attempts | Major | . | Jonathan Hung | Bilwa S T | +| [YARN-5277](https://issues.apache.org/jira/browse/YARN-5277) | When localizers fail due to resource timestamps being out, provide more diagnostics | Major | nodemanager | Steve Loughran | Siddharth Ahuja | +| [YARN-9995](https://issues.apache.org/jira/browse/YARN-9995) | Code cleanup in TestSchedConfCLI | Minor | . | Szilard Nemeth | Bilwa S T | +| [YARN-9354](https://issues.apache.org/jira/browse/YARN-9354) | Resources should be created with ResourceTypesTestHelper instead of TestUtils | Trivial | . | Szilard Nemeth | Andras Gyori | +| [YARN-10002](https://issues.apache.org/jira/browse/YARN-10002) | Code cleanup and improvements in ConfigurationStoreBaseTest | Minor | . | Szilard Nemeth | Benjamin Teke | +| [YARN-9954](https://issues.apache.org/jira/browse/YARN-9954) | Configurable max application tags and max tag length | Major | . | Jonathan Hung | Bilwa S T | +| [YARN-10001](https://issues.apache.org/jira/browse/YARN-10001) | Add explanation of unimplemented methods in InMemoryConfigurationStore | Major | . | Szilard Nemeth | Siddharth Ahuja | +| [HADOOP-17001](https://issues.apache.org/jira/browse/HADOOP-17001) | The suffix name of the unified compression class | Major | io | bianqi | bianqi | +| [YARN-9997](https://issues.apache.org/jira/browse/YARN-9997) | Code cleanup in ZKConfigurationStore | Minor | . | Szilard Nemeth | Andras Gyori | +| [YARN-9996](https://issues.apache.org/jira/browse/YARN-9996) | Code cleanup in QueueAdminConfigurationMutationACLPolicy | Major | . | Szilard Nemeth | Siddharth Ahuja | +| [YARN-9998](https://issues.apache.org/jira/browse/YARN-9998) | Code cleanup in LeveldbConfigurationStore | Minor | . | Szilard Nemeth | Benjamin Teke | +| [YARN-9999](https://issues.apache.org/jira/browse/YARN-9999) | TestFSSchedulerConfigurationStore: Extend from ConfigurationStoreBaseTest, general code cleanup | Minor | . | Szilard Nemeth | Benjamin Teke | +| [HDFS-15295](https://issues.apache.org/jira/browse/HDFS-15295) | AvailableSpaceBlockPlacementPolicy should use chooseRandomWithStorageTypeTwoTrial() for better performance. | Minor | . | Jinglun | Jinglun | +| [YARN-10189](https://issues.apache.org/jira/browse/YARN-10189) | Code cleanup in LeveldbRMStateStore | Minor | . | Benjamin Teke | Benjamin Teke | +| [HADOOP-16886](https://issues.apache.org/jira/browse/HADOOP-16886) | Add hadoop.http.idle\_timeout.ms to core-default.xml | Major | . | Wei-Chiu Chuang | Lisheng Sun | +| [YARN-10260](https://issues.apache.org/jira/browse/YARN-10260) | Allow transitioning queue from DRAINING to RUNNING state | Major | . | Jonathan Hung | Bilwa S T | +| [HADOOP-17042](https://issues.apache.org/jira/browse/HADOOP-17042) | Hadoop distcp throws "ERROR: Tools helper ///usr/lib/hadoop/libexec/tools/hadoop-distcp.sh was not found" | Minor | tools/distcp | Aki Tanaka | Aki Tanaka | +| [HADOOP-14698](https://issues.apache.org/jira/browse/HADOOP-14698) | Make copyFromLocal's -t option available for put as well | Major | . | Andras Bokor | Andras Bokor | +| [YARN-6492](https://issues.apache.org/jira/browse/YARN-6492) | Generate queue metrics for each partition | Major | capacity scheduler | Jonathan Hung | Manikandan R | +| [HADOOP-17047](https://issues.apache.org/jira/browse/HADOOP-17047) | TODO comments exist in trunk while the related issues are already fixed. | Trivial | . | Rungroj Maipradit | Rungroj Maipradit | +| [HDFS-15406](https://issues.apache.org/jira/browse/HDFS-15406) | Improve the speed of Datanode Block Scan | Major | . | Hemanth Boyina | Hemanth Boyina | +| [HADOOP-17090](https://issues.apache.org/jira/browse/HADOOP-17090) | Increase precommit job timeout from 5 hours to 20 hours | Major | build | Akira Ajisaka | Akira Ajisaka | +| [YARN-10297](https://issues.apache.org/jira/browse/YARN-10297) | TestContinuousScheduling#testFairSchedulerContinuousSchedulingInitTime fails intermittently | Major | . | Jonathan Hung | Jim Brennan | +| [HADOOP-17127](https://issues.apache.org/jira/browse/HADOOP-17127) | Use RpcMetrics.TIMEUNIT to initialize rpc queueTime and processingTime | Minor | common | Jim Brennan | Jim Brennan | +| [HDFS-15404](https://issues.apache.org/jira/browse/HDFS-15404) | ShellCommandFencer should expose info about source | Major | . | Chen Liang | Chen Liang | +| [HADOOP-17147](https://issues.apache.org/jira/browse/HADOOP-17147) | Dead link in hadoop-kms/index.md.vm | Minor | documentation, kms | Akira Ajisaka | Xieming Li | +| [YARN-10343](https://issues.apache.org/jira/browse/YARN-10343) | Legacy RM UI should include labeled metrics for allocated, total, and reserved resources. | Major | . | Eric Payne | Eric Payne | +| [YARN-1529](https://issues.apache.org/jira/browse/YARN-1529) | Add Localization overhead metrics to NM | Major | nodemanager | Gera Shegalov | Jim Brennan | +| [YARN-10251](https://issues.apache.org/jira/browse/YARN-10251) | Show extended resources on legacy RM UI. | Major | . | Eric Payne | Eric Payne | +| [HADOOP-17159](https://issues.apache.org/jira/browse/HADOOP-17159) | Make UGI support forceful relogin from keytab ignoring the last login time | Major | security | Sandeep Guggilam | Sandeep Guggilam | +| [YARN-10353](https://issues.apache.org/jira/browse/YARN-10353) | Log vcores used and cumulative cpu in containers monitor | Minor | yarn | Jim Brennan | Jim Brennan | +| [YARN-10369](https://issues.apache.org/jira/browse/YARN-10369) | Make NMTokenSecretManagerInRM sending NMToken for nodeId DEBUG | Minor | yarn | Jim Brennan | Jim Brennan | +| [YARN-10390](https://issues.apache.org/jira/browse/YARN-10390) | LeafQueue: retain user limits cache across assignContainers() calls | Major | capacity scheduler, capacityscheduler | Muhammad Samir Khan | Muhammad Samir Khan | +| [HDFS-15574](https://issues.apache.org/jira/browse/HDFS-15574) | Remove unnecessary sort of block list in DirectoryScanner | Major | . | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-15583](https://issues.apache.org/jira/browse/HDFS-15583) | Backport DirectoryScanner improvements HDFS-14476, HDFS-14751 and HDFS-15048 to branch 3.2 and 3.1 | Major | datanode | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-15581](https://issues.apache.org/jira/browse/HDFS-15581) | Access Controlled HTTPFS Proxy | Minor | httpfs | Richard | Richard | +| [HDFS-15415](https://issues.apache.org/jira/browse/HDFS-15415) | Reduce locking in Datanode DirectoryScanner | Major | datanode | Stephen O'Donnell | Stephen O'Donnell | +| [HADOOP-17287](https://issues.apache.org/jira/browse/HADOOP-17287) | Support new Instance by non default constructor by ReflectionUtils | Major | . | Baolong Mao | Baolong Mao | +| [YARN-10451](https://issues.apache.org/jira/browse/YARN-10451) | RM (v1) UI NodesPage can NPE when yarn.io/gpu resource type is defined. | Major | . | Eric Payne | Eric Payne | +| [YARN-9667](https://issues.apache.org/jira/browse/YARN-9667) | Container-executor.c duplicates messages to stdout | Major | nodemanager, yarn | Adam Antal | Peter Bacsko | +| [MAPREDUCE-7301](https://issues.apache.org/jira/browse/MAPREDUCE-7301) | Expose Mini MR Cluster attribute for testing | Minor | test | Swaroopa Kadam | Swaroopa Kadam | +| [HDFS-15567](https://issues.apache.org/jira/browse/HDFS-15567) | [SBN Read] HDFS should expose msync() API to allow downstream applications call it explicitly. | Major | ha, hdfs-client | Konstantin Shvachko | Konstantin Shvachko | +| [YARN-10450](https://issues.apache.org/jira/browse/YARN-10450) | Add cpu and memory utilization per node and cluster-wide metrics | Minor | yarn | Jim Brennan | Jim Brennan | +| [YARN-10475](https://issues.apache.org/jira/browse/YARN-10475) | Scale RM-NM heartbeat interval based on node utilization | Minor | yarn | Jim Brennan | Jim Brennan | +| [HDFS-15665](https://issues.apache.org/jira/browse/HDFS-15665) | Balancer logging improvement | Major | balancer & mover | Konstantin Shvachko | Konstantin Shvachko | +| [HADOOP-17342](https://issues.apache.org/jira/browse/HADOOP-17342) | Creating a token identifier should not do kerberos name resolution | Major | common | Jim Brennan | Jim Brennan | +| [YARN-10479](https://issues.apache.org/jira/browse/YARN-10479) | RMProxy should retry on SocketTimeout Exceptions | Major | yarn | Jim Brennan | Jim Brennan | +| [HDFS-15623](https://issues.apache.org/jira/browse/HDFS-15623) | Respect configured values of rpc.engine | Major | hdfs | Hector Sandoval Chaverri | Hector Sandoval Chaverri | +| [HDFS-14395](https://issues.apache.org/jira/browse/HDFS-14395) | Remove WARN Logging From Interrupts in DataStreamer | Minor | hdfs-client | David Mollitor | David Mollitor | +| [HADOOP-17367](https://issues.apache.org/jira/browse/HADOOP-17367) | Add InetAddress api to ProxyUsers.authorize | Major | performance, security | Ahmed Hussein | Ahmed Hussein | +| [MAPREDUCE-7304](https://issues.apache.org/jira/browse/MAPREDUCE-7304) | Enhance the map-reduce Job end notifier to be able to notify the given URL via a custom class | Major | mrv2 | Daniel Fritsi | Zoltán Erdmann | +| [MAPREDUCE-7309](https://issues.apache.org/jira/browse/MAPREDUCE-7309) | Improve performance of reading resource request for mapper/reducers from config | Major | applicationmaster | Wangda Tan | Peter Bacsko | +| [HADOOP-17389](https://issues.apache.org/jira/browse/HADOOP-17389) | KMS should log full UGI principal | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15717](https://issues.apache.org/jira/browse/HDFS-15717) | Improve fsck logging | Major | logging, namenode | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15751](https://issues.apache.org/jira/browse/HDFS-15751) | Add documentation for msync() API to filesystem.md | Major | documentation | Konstantin Shvachko | Konstantin Shvachko | + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-15418](https://issues.apache.org/jira/browse/HADOOP-15418) | Hadoop KMSAuthenticationFilter needs to use getPropsByPrefix instead of iterator to avoid ConcurrentModificationException | Major | common | Suma Shivaprasad | Suma Shivaprasad | +| [HDFS-14004](https://issues.apache.org/jira/browse/HDFS-14004) | TestLeaseRecovery2#testCloseWhileRecoverLease fails intermittently in trunk | Major | . | Ayush Saxena | Ayush Saxena | +| [HDFS-13959](https://issues.apache.org/jira/browse/HDFS-13959) | TestUpgradeDomainBlockPlacementPolicy is flaky | Major | . | Ayush Saxena | Ayush Saxena | +| [YARN-8948](https://issues.apache.org/jira/browse/YARN-8948) | PlacementRule interface should be for all YarnSchedulers | Major | . | Bibin Chundatt | Bibin Chundatt | +| [HADOOP-16013](https://issues.apache.org/jira/browse/HADOOP-16013) | DecayRpcScheduler decay thread should run as a daemon | Major | ipc | Erik Krogen | Erik Krogen | +| [HDFS-14175](https://issues.apache.org/jira/browse/HDFS-14175) | EC: Native XOR decoder should reset the output buffer before using it. | Major | ec, hdfs | Surendra Singh Lilhore | Ayush Saxena | +| [HDFS-14202](https://issues.apache.org/jira/browse/HDFS-14202) | "dfs.disk.balancer.max.disk.throughputInMBperSec" property is not working as per set value. | Major | diskbalancer | Ranith Sardar | Ranith Sardar | +| [HADOOP-16127](https://issues.apache.org/jira/browse/HADOOP-16127) | In ipc.Client, put a new connection could happen after stop | Major | ipc | Tsz-wo Sze | Tsz-wo Sze | +| [YARN-4901](https://issues.apache.org/jira/browse/YARN-4901) | QueueMetrics needs to be cleared before MockRM is initialized | Major | scheduler | Daniel Templeton | Peter Bacsko | +| [HADOOP-16161](https://issues.apache.org/jira/browse/HADOOP-16161) | NetworkTopology#getWeightUsingNetworkLocation return unexpected result | Major | net | Xiaoqiao He | Xiaoqiao He | +| [HDFS-14434](https://issues.apache.org/jira/browse/HDFS-14434) | webhdfs that connect secure hdfs should not use user.name parameter | Minor | webhdfs | KWON BYUNGCHANG | KWON BYUNGCHANG | +| [HDFS-14527](https://issues.apache.org/jira/browse/HDFS-14527) | Stop all DataNodes may result in NN terminate | Major | namenode | Xiaoqiao He | Xiaoqiao He | +| [HDFS-14494](https://issues.apache.org/jira/browse/HDFS-14494) | Move Server logging of StatedId inside receiveRequestState() | Major | . | Konstantin Shvachko | Shweta | +| [HDFS-14599](https://issues.apache.org/jira/browse/HDFS-14599) | HDFS-12487 breaks test TestDiskBalancer.testDiskBalancerWithFedClusterWithOneNameServiceEmpty | Major | diskbalancer | Wei-Chiu Chuang | Xiaoqiao He | +| [HDFS-14618](https://issues.apache.org/jira/browse/HDFS-14618) | Incorrect synchronization of ArrayList field (ArrayList is thread-unsafe). | Critical | . | Paul Ward | Paul Ward | +| [HDFS-14610](https://issues.apache.org/jira/browse/HDFS-14610) | HashMap is not thread safe. Field storageMap is typically synchronized by storageMap. However, in one place, field storageMap is not protected with synchronized. | Critical | . | Paul Ward | Paul Ward | +| [HDFS-14499](https://issues.apache.org/jira/browse/HDFS-14499) | Misleading REM\_QUOTA value with snapshot and trash feature enabled for a directory | Major | snapshots | Shashikant Banerjee | Shashikant Banerjee | +| [HADOOP-16451](https://issues.apache.org/jira/browse/HADOOP-16451) | Update jackson-databind to 2.9.9.1 | Major | . | Wei-Chiu Chuang | Siyao Meng | +| [HDFS-14647](https://issues.apache.org/jira/browse/HDFS-14647) | NPE during secure namenode startup | Major | hdfs | Fengnan Li | Fengnan Li | +| [HADOOP-16461](https://issues.apache.org/jira/browse/HADOOP-16461) | Regression: FileSystem cache lock parses XML within the lock | Major | fs | Gopal Vijayaraghavan | Gopal Vijayaraghavan | +| [HDFS-14660](https://issues.apache.org/jira/browse/HDFS-14660) | [SBN Read] ObserverNameNode should throw StandbyException for requests not from ObserverProxyProvider | Major | . | Chao Sun | Chao Sun | +| [HADOOP-16460](https://issues.apache.org/jira/browse/HADOOP-16460) | ABFS: fix for Sever Name Indication (SNI) | Major | fs/azure | Thomas Marqardt | Sneha Vijayarajan | +| [HDFS-14569](https://issues.apache.org/jira/browse/HDFS-14569) | Result of crypto -listZones is not formatted properly | Major | . | Hemanth Boyina | Hemanth Boyina | +| [HADOOP-12282](https://issues.apache.org/jira/browse/HADOOP-12282) | Connection thread's name should be updated after address changing is detected | Major | ipc | zhouyingchao | Lisheng Sun | +| [HDFS-14686](https://issues.apache.org/jira/browse/HDFS-14686) | HttpFS: HttpFSFileSystem#getErasureCodingPolicy always returns null | Major | httpfs | Siyao Meng | Siyao Meng | +| [HADOOP-15865](https://issues.apache.org/jira/browse/HADOOP-15865) | ConcurrentModificationException in Configuration.overlay() method | Major | . | Oleksandr Shevchenko | Oleksandr Shevchenko | +| [HADOOP-16487](https://issues.apache.org/jira/browse/HADOOP-16487) | Update jackson-databind to 2.9.9.2 | Critical | . | Siyao Meng | Siyao Meng | +| [HDFS-14759](https://issues.apache.org/jira/browse/HDFS-14759) | HDFS cat logs an info message | Major | . | Eric Badger | Eric Badger | +| [HADOOP-16533](https://issues.apache.org/jira/browse/HADOOP-16533) | Update jackson-databind to 2.9.9.3 | Major | . | Akira Ajisaka | Akira Ajisaka | +| [HDFS-14699](https://issues.apache.org/jira/browse/HDFS-14699) | Erasure Coding: Storage not considered in live replica when replication streams hard limit reached to threshold | Critical | ec | Zhao Yi Ming | Zhao Yi Ming | +| [YARN-9833](https://issues.apache.org/jira/browse/YARN-9833) | Race condition when DirectoryCollection.checkDirs() runs during container launch | Major | . | Peter Bacsko | Peter Bacsko | +| [YARN-9837](https://issues.apache.org/jira/browse/YARN-9837) | YARN Service fails to fetch status for Stopped apps with bigger spec files | Major | yarn-native-services | Tarun Parimi | Tarun Parimi | +| [YARN-2255](https://issues.apache.org/jira/browse/YARN-2255) | YARN Audit logging not added to log4j.properties | Major | . | Varun Saxena | Aihua Xu | +| [HDFS-14836](https://issues.apache.org/jira/browse/HDFS-14836) | FileIoProvider should not increase FileIoErrors metric in datanode volume metric | Minor | . | Aiphago | Aiphago | +| [HADOOP-16582](https://issues.apache.org/jira/browse/HADOOP-16582) | LocalFileSystem's mkdirs() does not work as expected under viewfs. | Major | . | Kihwal Lee | Kihwal Lee | +| [HADOOP-16581](https://issues.apache.org/jira/browse/HADOOP-16581) | ValueQueue does not trigger an async refill when number of values falls below watermark | Major | common, kms | Yuval Degani | Yuval Degani | +| [HDFS-14853](https://issues.apache.org/jira/browse/HDFS-14853) | NPE in DFSNetworkTopology#chooseRandomWithStorageType() when the excludedNode is not present | Major | . | Ranith Sardar | Ranith Sardar | +| [HDFS-13660](https://issues.apache.org/jira/browse/HDFS-13660) | DistCp job fails when new data is appended in the file while the distCp copy job is running | Critical | distcp | Mukund Thakur | Mukund Thakur | +| [HDFS-14808](https://issues.apache.org/jira/browse/HDFS-14808) | EC: Improper size values for corrupt ec block in LOG | Major | ec | Harshakiran Reddy | Ayush Saxena | +| [HDFS-14849](https://issues.apache.org/jira/browse/HDFS-14849) | Erasure Coding: the internal block is replicated many times when datanode is decommissioning | Major | ec, erasure-coding | HuangTao | HuangTao | +| [YARN-9858](https://issues.apache.org/jira/browse/YARN-9858) | Optimize RMContext getExclusiveEnforcedPartitions | Major | . | Jonathan Hung | Jonathan Hung | +| [HDFS-14492](https://issues.apache.org/jira/browse/HDFS-14492) | Snapshot memory leak | Major | snapshots | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HDFS-14418](https://issues.apache.org/jira/browse/HDFS-14418) | Remove redundant super user priveledge checks from namenode. | Major | . | Ayush Saxena | Ayush Saxena | +| [HADOOP-16619](https://issues.apache.org/jira/browse/HADOOP-16619) | Upgrade jackson and jackson-databind to 2.9.10 | Major | . | Siyao Meng | Siyao Meng | +| [HDFS-14637](https://issues.apache.org/jira/browse/HDFS-14637) | Namenode may not replicate blocks to meet the policy after enabling upgradeDomain | Major | namenode | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-14879](https://issues.apache.org/jira/browse/HDFS-14879) | Header was wrong in Snapshot web UI | Major | . | Hemanth Boyina | Hemanth Boyina | +| [HDFS-14655](https://issues.apache.org/jira/browse/HDFS-14655) | [SBN Read] Namenode crashes if one of The JN is down | Critical | . | Harshakiran Reddy | Ayush Saxena | +| [HDFS-14859](https://issues.apache.org/jira/browse/HDFS-14859) | Prevent unnecessary evaluation of costly operation getNumLiveDataNodes when dfs.namenode.safemode.min.datanodes is not zero | Major | hdfs | Srinivasu Majeti | Srinivasu Majeti | +| [YARN-6715](https://issues.apache.org/jira/browse/YARN-6715) | Fix documentation about NodeHealthScriptRunner | Major | documentation, nodemanager | Peter Bacsko | Peter Bacsko | +| [YARN-9552](https://issues.apache.org/jira/browse/YARN-9552) | FairScheduler: NODE\_UPDATE can cause NoSuchElementException | Major | fairscheduler | Peter Bacsko | Peter Bacsko | +| [HDFS-14754](https://issues.apache.org/jira/browse/HDFS-14754) | Erasure Coding : The number of Under-Replicated Blocks never reduced | Critical | ec | Hemanth Boyina | Hemanth Boyina | +| [HDFS-14245](https://issues.apache.org/jira/browse/HDFS-14245) | Class cast error in GetGroups with ObserverReadProxyProvider | Major | . | Shen Yinjie | Erik Krogen | +| [HDFS-14373](https://issues.apache.org/jira/browse/HDFS-14373) | EC : Decoding is failing when block group last incomplete cell fall in to AlignedStripe | Critical | ec, hdfs-client | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [HDFS-14509](https://issues.apache.org/jira/browse/HDFS-14509) | DN throws InvalidToken due to inequality of password when upgrade NN 2.x to 3.x | Blocker | . | Yuxuan Wang | Yuxuan Wang | +| [HDFS-14886](https://issues.apache.org/jira/browse/HDFS-14886) | In NameNode Web UI's Startup Progress page, Loading edits always shows 0 sec | Major | . | Hemanth Boyina | Hemanth Boyina | +| [YARN-8453](https://issues.apache.org/jira/browse/YARN-8453) | Additional Unit tests to verify queue limit and max-limit with multiple resource types | Major | capacity scheduler | Sunil G | Adam Antal | +| [HDFS-14890](https://issues.apache.org/jira/browse/HDFS-14890) | Setting permissions on name directory fails on non posix compliant filesystems | Blocker | . | hirik | Siddharth Wagle | +| [HADOOP-16580](https://issues.apache.org/jira/browse/HADOOP-16580) | Disable retry of FailoverOnNetworkExceptionRetry in case of AccessControlException | Major | common | Adam Antal | Adam Antal | +| [HDFS-14909](https://issues.apache.org/jira/browse/HDFS-14909) | DFSNetworkTopology#chooseRandomWithStorageType() should not decrease storage count for excluded node which is already part of excluded scope | Major | namenode | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [HADOOP-16662](https://issues.apache.org/jira/browse/HADOOP-16662) | Remove unnecessary InnerNode check in NetworkTopology#add() | Minor | . | Lisheng Sun | Lisheng Sun | +| [HDFS-14847](https://issues.apache.org/jira/browse/HDFS-14847) | Erasure Coding: Blocks are over-replicated while EC decommissioning | Critical | ec | Hui Fei | Hui Fei | +| [HDFS-14913](https://issues.apache.org/jira/browse/HDFS-14913) | Correct the value of available count in DFSNetworkTopology#chooseRandomWithStorageType() | Major | . | Ayush Saxena | Ayush Saxena | +| [YARN-9915](https://issues.apache.org/jira/browse/YARN-9915) | Fix FindBug issue in QueueMetrics | Minor | . | Prabhu Joseph | Prabhu Joseph | +| [HDFS-12749](https://issues.apache.org/jira/browse/HDFS-12749) | DN may not send block report to NN after NN restart | Major | datanode | TanYuxin | Xiaoqiao He | +| [HDFS-13901](https://issues.apache.org/jira/browse/HDFS-13901) | INode access time is ignored because of race between open and rename | Major | . | Jinglun | Jinglun | +| [HDFS-14910](https://issues.apache.org/jira/browse/HDFS-14910) | Rename Snapshot with Pre Descendants Fail With IllegalArgumentException. | Blocker | . | Íñigo Goiri | Wei-Chiu Chuang | +| [HDFS-14308](https://issues.apache.org/jira/browse/HDFS-14308) | DFSStripedInputStream curStripeBuf is not freed by unbuffer() | Major | ec | Joe McDonnell | Zhao Yi Ming | +| [HDFS-14931](https://issues.apache.org/jira/browse/HDFS-14931) | hdfs crypto commands limit column width | Major | . | Eric Badger | Eric Badger | +| [HADOOP-16669](https://issues.apache.org/jira/browse/HADOOP-16669) | TestRawLocalFileSystemContract.testPermission fails if no native library | Minor | common, test | Steve Loughran | Steve Loughran | +| [HDFS-14920](https://issues.apache.org/jira/browse/HDFS-14920) | Erasure Coding: Decommission may hang If one or more datanodes are out of service during decommission | Major | ec | Hui Fei | Hui Fei | +| [HDFS-13736](https://issues.apache.org/jira/browse/HDFS-13736) | BlockPlacementPolicyDefault can not choose favored nodes when 'dfs.namenode.block-placement-policy.default.prefer-local-node' set to false | Major | . | hu xiaodong | hu xiaodong | +| [HDFS-14925](https://issues.apache.org/jira/browse/HDFS-14925) | rename operation should check nest snapshot | Major | namenode | Junwang Zhao | Junwang Zhao | +| [YARN-9949](https://issues.apache.org/jira/browse/YARN-9949) | Add missing queue configs for root queue in RMWebService#CapacitySchedulerInfo | Minor | capacity scheduler | Prabhu Joseph | Prabhu Joseph | +| [HDFS-14945](https://issues.apache.org/jira/browse/HDFS-14945) | Revise PacketResponder's log. | Minor | datanode | Xudong Cao | Xudong Cao | +| [HDFS-14946](https://issues.apache.org/jira/browse/HDFS-14946) | Erasure Coding: Block recovery failed during decommissioning | Major | . | Hui Fei | Hui Fei | +| [HDFS-14384](https://issues.apache.org/jira/browse/HDFS-14384) | When lastLocatedBlock token expire, it will take 1~3s second to refetch it. | Major | hdfs-client | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [HDFS-14806](https://issues.apache.org/jira/browse/HDFS-14806) | Bootstrap standby may fail if used in-progress tailing | Major | namenode | Chen Liang | Chen Liang | +| [HDFS-14941](https://issues.apache.org/jira/browse/HDFS-14941) | Potential editlog race condition can cause corrupted file | Major | namenode | Chen Liang | Chen Liang | +| [HDFS-14958](https://issues.apache.org/jira/browse/HDFS-14958) | TestBalancerWithNodeGroup is not using NetworkTopologyWithNodeGroup | Minor | hdfs | Jim Brennan | Jim Brennan | +| [HDFS-14720](https://issues.apache.org/jira/browse/HDFS-14720) | DataNode shouldn't report block as bad block if the block length is Long.MAX\_VALUE. | Major | datanode | Surendra Singh Lilhore | Hemanth Boyina | +| [HADOOP-16676](https://issues.apache.org/jira/browse/HADOOP-16676) | Backport HADOOP-16152 to branch-3.2 | Major | common | DW | Siyao Meng | +| [HADOOP-16677](https://issues.apache.org/jira/browse/HADOOP-16677) | Recalculate the remaining timeout millis correctly while throwing an InterupptedException in SocketIOWithTimeout. | Minor | common | Xudong Cao | Xudong Cao | +| [HDFS-14884](https://issues.apache.org/jira/browse/HDFS-14884) | Add sanity check that zone key equals feinfo key while setting Xattrs | Major | encryption, hdfs | Mukul Kumar Singh | Mukul Kumar Singh | +| [HADOOP-15097](https://issues.apache.org/jira/browse/HADOOP-15097) | AbstractContractDeleteTest::testDeleteNonEmptyDirRecursive with misleading path | Minor | fs, test | zhoutai.zt | Xieming Li | +| [HADOOP-16710](https://issues.apache.org/jira/browse/HADOOP-16710) | testing\_azure.md documentation is misleading | Major | fs/azure, test | Andras Bokor | Andras Bokor | +| [YARN-9984](https://issues.apache.org/jira/browse/YARN-9984) | FSPreemptionThread can cause NullPointerException while app is unregistered with containers running on a node | Major | fairscheduler | Wilfred Spiegelenburg | Wilfred Spiegelenburg | +| [YARN-9983](https://issues.apache.org/jira/browse/YARN-9983) | Typo in YARN Service overview documentation | Trivial | documentation | Denes Gerencser | Denes Gerencser | +| [HADOOP-16719](https://issues.apache.org/jira/browse/HADOOP-16719) | Remove the disallowed element config within maven-checkstyle-plugin | Major | . | Wanqiang Ji | Wanqiang Ji | +| [HADOOP-16700](https://issues.apache.org/jira/browse/HADOOP-16700) | RpcQueueTime may be negative when the response has to be sent later | Minor | . | xuzq | xuzq | +| [HADOOP-15686](https://issues.apache.org/jira/browse/HADOOP-15686) | Supress bogus AbstractWadlGeneratorGrammarGenerator in KMS stderr | Major | kms | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HDFS-14940](https://issues.apache.org/jira/browse/HDFS-14940) | HDFS Balancer : Do not allow to set balancer maximum network bandwidth more than 1TB | Minor | balancer & mover | Souryakanta Dwivedy | Hemanth Boyina | +| [YARN-9838](https://issues.apache.org/jira/browse/YARN-9838) | Fix resource inconsistency for queues when moving app with reserved container to another queue | Critical | capacity scheduler | jiulongzhu | jiulongzhu | +| [YARN-9968](https://issues.apache.org/jira/browse/YARN-9968) | Public Localizer is exiting in NodeManager due to NullPointerException | Major | nodemanager | Tarun Parimi | Tarun Parimi | +| [YARN-9011](https://issues.apache.org/jira/browse/YARN-9011) | Race condition during decommissioning | Major | nodemanager | Peter Bacsko | Peter Bacsko | +| [HDFS-14973](https://issues.apache.org/jira/browse/HDFS-14973) | Balancer getBlocks RPC dispersal does not function properly | Major | balancer & mover | Erik Krogen | Erik Krogen | +| [HADOOP-16685](https://issues.apache.org/jira/browse/HADOOP-16685) | FileSystem#listStatusIterator does not check if given path exists | Major | fs | Sahil Takiar | Sahil Takiar | +| [MAPREDUCE-7240](https://issues.apache.org/jira/browse/MAPREDUCE-7240) | Exception ' Invalid event: TA\_TOO\_MANY\_FETCH\_FAILURE at SUCCESS\_FINISHING\_CONTAINER' cause job error | Critical | . | luhuachao | luhuachao | +| [MAPREDUCE-7249](https://issues.apache.org/jira/browse/MAPREDUCE-7249) | Invalid event TA\_TOO\_MANY\_FETCH\_FAILURE at SUCCESS\_CONTAINER\_CLEANUP causes job failure | Critical | applicationmaster, mrv2 | Wilfred Spiegelenburg | Wilfred Spiegelenburg | +| [YARN-9993](https://issues.apache.org/jira/browse/YARN-9993) | Remove incorrectly committed files from YARN-9011 | Major | yarn | Wilfred Spiegelenburg | Wilfred Spiegelenburg | +| [HDFS-15010](https://issues.apache.org/jira/browse/HDFS-15010) | BlockPoolSlice#addReplicaThreadPool static pool should be initialized by static method | Major | datanode | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [HADOOP-16744](https://issues.apache.org/jira/browse/HADOOP-16744) | Fix building instruction to enable zstd | Minor | documentation | Masatake Iwasaki | Masatake Iwasaki | +| [YARN-9985](https://issues.apache.org/jira/browse/YARN-9985) | Unsupported "transitionToObserver" option displaying for rmadmin command | Minor | RM, yarn | Souryakanta Dwivedy | Ayush Saxena | +| [HADOOP-16754](https://issues.apache.org/jira/browse/HADOOP-16754) | Fix docker failed to build yetus/hadoop | Blocker | build | Kevin Su | Kevin Su | +| [HDFS-15032](https://issues.apache.org/jira/browse/HDFS-15032) | Balancer crashes when it fails to contact an unavailable NN via ObserverReadProxyProvider | Major | balancer & mover | Erik Krogen | Erik Krogen | +| [HDFS-15036](https://issues.apache.org/jira/browse/HDFS-15036) | Active NameNode should not silently fail the image transfer | Major | namenode | Konstantin Shvachko | Chen Liang | +| [HDFS-14519](https://issues.apache.org/jira/browse/HDFS-14519) | NameQuota is not update after concat operation, so namequota is wrong | Major | . | Ranith Sardar | Ranith Sardar | +| [YARN-10055](https://issues.apache.org/jira/browse/YARN-10055) | bower install fails | Blocker | build, yarn-ui-v2 | Akira Ajisaka | Akira Ajisaka | +| [HDFS-15076](https://issues.apache.org/jira/browse/HDFS-15076) | Fix tests that hold FSDirectory lock, without holding FSNamesystem lock. | Major | test | Konstantin Shvachko | Konstantin Shvachko | +| [HDFS-15073](https://issues.apache.org/jira/browse/HDFS-15073) | Replace curator-shaded guava import with the standard one | Minor | hdfs-client | Akira Ajisaka | Chandra Sanivarapu | +| [HADOOP-16042](https://issues.apache.org/jira/browse/HADOOP-16042) | Update the link to HadoopJavaVersion | Minor | documentation | Akira Ajisaka | Chandra Sanivarapu | +| [HDFS-14934](https://issues.apache.org/jira/browse/HDFS-14934) | [SBN Read] Standby NN throws many InterruptedExceptions when dfs.ha.tail-edits.period is 0 | Major | . | Takanobu Asanuma | Ayush Saxena | +| [YARN-10053](https://issues.apache.org/jira/browse/YARN-10053) | Placement rules do not use correct group service init | Major | yarn | Wilfred Spiegelenburg | Wilfred Spiegelenburg | +| [HDFS-15068](https://issues.apache.org/jira/browse/HDFS-15068) | DataNode could meet deadlock if invoke refreshVolumes when register | Major | datanode | Xiaoqiao He | Aiphago | +| [MAPREDUCE-7255](https://issues.apache.org/jira/browse/MAPREDUCE-7255) | Fix typo in MapReduce documentaion example | Trivial | documentation | Sergey Pogorelov | Sergey Pogorelov | +| [HDFS-15072](https://issues.apache.org/jira/browse/HDFS-15072) | HDFS MiniCluster fails to start when run in directory path with a % | Minor | . | Geoffrey Jacoby | Masatake Iwasaki | +| [HDFS-15077](https://issues.apache.org/jira/browse/HDFS-15077) | Fix intermittent failure of TestDFSClientRetries#testLeaseRenewSocketTimeout | Minor | test | Masatake Iwasaki | Masatake Iwasaki | +| [HDFS-15080](https://issues.apache.org/jira/browse/HDFS-15080) | Fix the issue in reading persistent memory cached data with an offset | Major | caching, datanode | Feilong He | Feilong He | +| [YARN-7387](https://issues.apache.org/jira/browse/YARN-7387) | org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestIncreaseAllocationExpirer fails intermittently | Major | . | Miklos Szegedi | Jim Brennan | +| [YARN-8672](https://issues.apache.org/jira/browse/YARN-8672) | TestContainerManager#testLocalingResourceWhileContainerRunning occasionally times out | Major | nodemanager | Jason Darrell Lowe | Chandni Singh | +| [HDFS-14957](https://issues.apache.org/jira/browse/HDFS-14957) | INodeReference Space Consumed was not same in QuotaUsage and ContentSummary | Major | namenode | Hemanth Boyina | Hemanth Boyina | +| [MAPREDUCE-7252](https://issues.apache.org/jira/browse/MAPREDUCE-7252) | Handling 0 progress in SimpleExponential task runtime estimator | Minor | . | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-16749](https://issues.apache.org/jira/browse/HADOOP-16749) | Configuration parsing of CDATA values are blank | Major | conf | Jonathan Turner Eagles | Daryn Sharp | +| [HDFS-15095](https://issues.apache.org/jira/browse/HDFS-15095) | Fix accidental comment in flaky test TestDecommissioningStatus | Major | hdfs | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15099](https://issues.apache.org/jira/browse/HDFS-15099) | [SBN Read] checkOperation(WRITE) should throw ObserverRetryOnActiveException on ObserverNode | Major | namenode | Konstantin Shvachko | Chen Liang | +| [HDFS-14578](https://issues.apache.org/jira/browse/HDFS-14578) | AvailableSpaceBlockPlacementPolicy always prefers local node | Major | block placement | Wei-Chiu Chuang | Ayush Saxena | +| [HADOOP-16683](https://issues.apache.org/jira/browse/HADOOP-16683) | Disable retry of FailoverOnNetworkExceptionRetry in case of wrapped AccessControlException | Major | common | Adam Antal | Adam Antal | +| [MAPREDUCE-7256](https://issues.apache.org/jira/browse/MAPREDUCE-7256) | Fix javadoc error in SimpleExponentialSmoothing | Minor | documentation | Masatake Iwasaki | Masatake Iwasaki | +| [YARN-8373](https://issues.apache.org/jira/browse/YARN-8373) | RM Received RMFatalEvent of type CRITICAL\_THREAD\_CRASH | Major | fairscheduler, resourcemanager | Girish Bhat | Wilfred Spiegelenburg | +| [MAPREDUCE-7247](https://issues.apache.org/jira/browse/MAPREDUCE-7247) | Modify HistoryServerRest.html content,change The job attempt id‘s datatype from string to int | Major | documentation | zhaoshengjie | zhaoshengjie | +| [YARN-9970](https://issues.apache.org/jira/browse/YARN-9970) | Refactor TestUserGroupMappingPlacementRule#verifyQueueMapping | Major | . | Manikandan R | Manikandan R | +| [YARN-8148](https://issues.apache.org/jira/browse/YARN-8148) | Update decimal values for queue capacities shown on queue status CLI | Major | client | Prabhu Joseph | Prabhu Joseph | +| [HADOOP-16808](https://issues.apache.org/jira/browse/HADOOP-16808) | Use forkCount and reuseForks parameters instead of forkMode in the config of maven surefire plugin | Minor | build | Akira Ajisaka | Xieming Li | +| [HADOOP-16793](https://issues.apache.org/jira/browse/HADOOP-16793) | Remove WARN log when ipc connection interrupted in Client#handleSaslConnectionFailure() | Minor | . | Lisheng Sun | Lisheng Sun | +| [YARN-9462](https://issues.apache.org/jira/browse/YARN-9462) | TestResourceTrackerService.testNodeRemovalGracefully fails sporadically | Minor | resourcemanager, test | Prabhu Joseph | Prabhu Joseph | +| [YARN-9790](https://issues.apache.org/jira/browse/YARN-9790) | Failed to set default-application-lifetime if maximum-application-lifetime is less than or equal to zero | Major | . | kyungwan nam | kyungwan nam | +| [HDFS-14993](https://issues.apache.org/jira/browse/HDFS-14993) | checkDiskError doesn't work during datanode startup | Major | datanode | Yang Yun | Yang Yun | +| [HDFS-13179](https://issues.apache.org/jira/browse/HDFS-13179) | TestLazyPersistReplicaRecovery#testDnRestartWithSavedReplicas fails intermittently | Critical | fs | Gabor Bota | Ahmed Hussein | +| [MAPREDUCE-7259](https://issues.apache.org/jira/browse/MAPREDUCE-7259) | testSpeculateSuccessfulWithUpdateEvents fails Intermittently | Minor | . | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15146](https://issues.apache.org/jira/browse/HDFS-15146) | TestBalancerRPCDelay.testBalancerRPCDelay fails intermittently | Minor | . | Ahmed Hussein | Ahmed Hussein | +| [MAPREDUCE-7079](https://issues.apache.org/jira/browse/MAPREDUCE-7079) | JobHistory#ServiceStop implementation is incorrect | Major | . | Jason Darrell Lowe | Ahmed Hussein | +| [HDFS-15118](https://issues.apache.org/jira/browse/HDFS-15118) | [SBN Read] Slow clients when Observer reads are enabled but there are no Observers on the cluster. | Major | hdfs-client | Konstantin Shvachko | Chen Liang | +| [HDFS-7175](https://issues.apache.org/jira/browse/HDFS-7175) | Client-side SocketTimeoutException during Fsck | Major | namenode | Carl Steinbach | Stephen O'Donnell | +| [HDFS-15148](https://issues.apache.org/jira/browse/HDFS-15148) | dfs.namenode.send.qop.enabled should not apply to primary NN port | Major | . | Chen Liang | Chen Liang | +| [HADOOP-16410](https://issues.apache.org/jira/browse/HADOOP-16410) | Hadoop 3.2 azure jars incompatible with alpine 3.9 | Minor | fs/azure | Jose Luis Pedrosa | | +| [HDFS-15115](https://issues.apache.org/jira/browse/HDFS-15115) | Namenode crash caused by NPE in BlockPlacementPolicyDefault when dynamically change logger to debug | Major | . | wangzhixiang | wangzhixiang | +| [HDFS-15158](https://issues.apache.org/jira/browse/HDFS-15158) | The number of failed volumes mismatch with volumeFailures of Datanode metrics | Minor | datanode | Yang Yun | Yang Yun | +| [HADOOP-16849](https://issues.apache.org/jira/browse/HADOOP-16849) | start-build-env.sh behaves incorrectly when username is numeric only | Minor | build | Jihyun Cho | Jihyun Cho | +| [HDFS-15161](https://issues.apache.org/jira/browse/HDFS-15161) | When evictableMmapped or evictable size is zero, do not throw NoSuchElementException in ShortCircuitCache#close() | Major | . | Lisheng Sun | Lisheng Sun | +| [HDFS-15164](https://issues.apache.org/jira/browse/HDFS-15164) | Fix TestDelegationTokensWithHA | Major | . | Ayush Saxena | Ayush Saxena | +| [HADOOP-16868](https://issues.apache.org/jira/browse/HADOOP-16868) | ipc.Server readAndProcess threw NullPointerException | Major | rpc-server | Tsz-wo Sze | Tsz-wo Sze | +| [HADOOP-16869](https://issues.apache.org/jira/browse/HADOOP-16869) | Upgrade findbugs-maven-plugin to 3.0.5 to fix mvn findbugs:findbugs failure | Major | build | Akira Ajisaka | Akira Ajisaka | +| [HDFS-15052](https://issues.apache.org/jira/browse/HDFS-15052) | WebHDFS getTrashRoot leads to OOM due to FileSystem object creation | Major | webhdfs | Wei-Chiu Chuang | Masatake Iwasaki | +| [HDFS-15185](https://issues.apache.org/jira/browse/HDFS-15185) | StartupProgress reports edits segments until the entire startup completes | Major | namenode | Konstantin Shvachko | Konstantin Shvachko | +| [HDFS-15166](https://issues.apache.org/jira/browse/HDFS-15166) | Remove redundant field fStream in ByteStringLog | Major | . | Konstantin Shvachko | Xieming Li | +| [YARN-10143](https://issues.apache.org/jira/browse/YARN-10143) | YARN-10101 broke Yarn logs CLI | Blocker | yarn | Adam Antal | Adam Antal | +| [HADOOP-16841](https://issues.apache.org/jira/browse/HADOOP-16841) | The description of hadoop.http.authentication.signature.secret.file contains outdated information | Minor | documentation | Akira Ajisaka | Xieming Li | +| [YARN-10156](https://issues.apache.org/jira/browse/YARN-10156) | Fix typo 'complaint' which means quite different in Federation.md | Minor | documentation, federation | Sungpeo Kook | Sungpeo Kook | +| [HDFS-15147](https://issues.apache.org/jira/browse/HDFS-15147) | LazyPersistTestCase wait logic is error-prone | Minor | . | Ahmed Hussein | Ahmed Hussein | +| [HDFS-14668](https://issues.apache.org/jira/browse/HDFS-14668) | Support Fuse with Users from multiple Security Realms | Critical | fuse-dfs | Sailesh Patel | István Fajth | +| [HDFS-15111](https://issues.apache.org/jira/browse/HDFS-15111) | stopStandbyServices() should log which service state it is transitioning from. | Major | hdfs, logging | Konstantin Shvachko | Xieming Li | +| [HDFS-15199](https://issues.apache.org/jira/browse/HDFS-15199) | NPE in BlockSender | Major | . | Ayush Saxena | Ayush Saxena | +| [HADOOP-16891](https://issues.apache.org/jira/browse/HADOOP-16891) | Upgrade jackson-databind to 2.9.10.3 | Blocker | . | Siyao Meng | Siyao Meng | +| [HDFS-15204](https://issues.apache.org/jira/browse/HDFS-15204) | TestRetryCacheWithHA testRemoveCacheDescriptor fails intermittently | Major | hdfs | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-16840](https://issues.apache.org/jira/browse/HADOOP-16840) | AliyunOSS: getFileStatus throws FileNotFoundException in versioning bucket | Major | fs/oss | wujinhu | wujinhu | +| [YARN-9419](https://issues.apache.org/jira/browse/YARN-9419) | Log a warning if GPU isolation is enabled but LinuxContainerExecutor is disabled | Major | . | Szilard Nemeth | Andras Gyori | +| [YARN-9427](https://issues.apache.org/jira/browse/YARN-9427) | TestContainerSchedulerQueuing.testKillOnlyRequiredOpportunisticContainers fails sporadically | Major | scheduler, test | Prabhu Joseph | Ahmed Hussein | +| [HDFS-15135](https://issues.apache.org/jira/browse/HDFS-15135) | EC : ArrayIndexOutOfBoundsException in BlockRecoveryWorker#RecoveryTaskStriped. | Major | erasure-coding | Surendra Singh Lilhore | Ravuri Sushma sree | +| [HDFS-14442](https://issues.apache.org/jira/browse/HDFS-14442) | Disagreement between HAUtil.getAddressOfActive and RpcInvocationHandler.getConnectionId | Major | . | Erik Krogen | Ravuri Sushma sree | +| [HDFS-15216](https://issues.apache.org/jira/browse/HDFS-15216) | Wrong Use Case of -showprogress in fsck | Major | . | Ravuri Sushma sree | Ravuri Sushma sree | +| [HDFS-15211](https://issues.apache.org/jira/browse/HDFS-15211) | EC: File write hangs during close in case of Exception during updatePipeline | Critical | . | Ayush Saxena | Ayush Saxena | +| [HDFS-15208](https://issues.apache.org/jira/browse/HDFS-15208) | Suppress bogus AbstractWadlGeneratorGrammarGenerator in KMS stderr in hdfs | Trivial | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HDFS-15223](https://issues.apache.org/jira/browse/HDFS-15223) | FSCK fails if one namenode is not available | Major | . | Ayush Saxena | Ayush Saxena | +| [HDFS-15232](https://issues.apache.org/jira/browse/HDFS-15232) | Fix libhdfspp test failures with GCC 7 | Major | native, test | Akira Ajisaka | Akira Ajisaka | +| [HDFS-15219](https://issues.apache.org/jira/browse/HDFS-15219) | DFS Client will stuck when ResponseProcessor.run throw Error | Major | hdfs-client | zhengchenyu | zhengchenyu | +| [HDFS-15191](https://issues.apache.org/jira/browse/HDFS-15191) | EOF when reading legacy buffer in BlockTokenIdentifier | Major | hdfs | Steven Rand | Steven Rand | +| [YARN-10202](https://issues.apache.org/jira/browse/YARN-10202) | Fix documentation about NodeAttributes. | Minor | documentation | Sen Zhao | Sen Zhao | +| [HADOOP-16949](https://issues.apache.org/jira/browse/HADOOP-16949) | pylint fails in the build environment | Critical | build | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-14836](https://issues.apache.org/jira/browse/HADOOP-14836) | Upgrade maven-clean-plugin to 3.1.0 | Major | build | Allen Wittenauer | Akira Ajisaka | +| [YARN-10207](https://issues.apache.org/jira/browse/YARN-10207) | CLOSE\_WAIT socket connection leaks during rendering of (corrupted) aggregated logs on the JobHistoryServer Web UI | Major | yarn | Siddharth Ahuja | Siddharth Ahuja | +| [HDFS-12862](https://issues.apache.org/jira/browse/HDFS-12862) | CacheDirective becomes invalid when NN restart or failover | Major | caching, hdfs | Wang XL | Wang XL | +| [MAPREDUCE-7272](https://issues.apache.org/jira/browse/MAPREDUCE-7272) | TaskAttemptListenerImpl excessive log messages | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15283](https://issues.apache.org/jira/browse/HDFS-15283) | Cache pool MAXTTL is not persisted and restored on cluster restart | Major | namenode | Stephen O'Donnell | Stephen O'Donnell | +| [HADOOP-16944](https://issues.apache.org/jira/browse/HADOOP-16944) | Use Yetus 0.12.0 in GitHub PR | Major | build | Akira Ajisaka | Akira Ajisaka | +| [HDFS-15276](https://issues.apache.org/jira/browse/HDFS-15276) | Concat on INodeRefernce fails with illegal state exception | Critical | . | Hemanth Boyina | Hemanth Boyina | +| [YARN-10223](https://issues.apache.org/jira/browse/YARN-10223) | Duplicate jersey-test-framework-core dependency in yarn-server-common | Minor | build | Akira Ajisaka | Akira Ajisaka | +| [HDFS-15281](https://issues.apache.org/jira/browse/HDFS-15281) | ZKFC ignores dfs.namenode.rpc-bind-host and uses dfs.namenode.rpc-address to bind to host address | Major | ha, namenode | Dhiraj Hegde | Dhiraj Hegde | +| [HDFS-15297](https://issues.apache.org/jira/browse/HDFS-15297) | TestNNHandlesBlockReportPerStorage::blockReport\_02 fails intermittently in trunk | Major | datanode, test | Mingliang Liu | Ayush Saxena | +| [HADOOP-17014](https://issues.apache.org/jira/browse/HADOOP-17014) | Upgrade jackson-databind to 2.9.10.4 | Blocker | . | Siyao Meng | Siyao Meng | +| [YARN-9848](https://issues.apache.org/jira/browse/YARN-9848) | revert YARN-4946 | Blocker | log-aggregation, resourcemanager | Steven Rand | Steven Rand | +| [HDFS-15286](https://issues.apache.org/jira/browse/HDFS-15286) | Concat on a same files deleting the file | Critical | . | Hemanth Boyina | Hemanth Boyina | +| [YARN-10256](https://issues.apache.org/jira/browse/YARN-10256) | Refactor TestContainerSchedulerQueuing.testContainerUpdateExecTypeGuaranteedToOpportunistic | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15270](https://issues.apache.org/jira/browse/HDFS-15270) | Account for \*env == NULL in hdfsThreadDestructor | Major | . | Babneet Singh | Babneet Singh | +| [YARN-8959](https://issues.apache.org/jira/browse/YARN-8959) | TestContainerResizing fails randomly | Minor | . | Bibin Chundatt | Ahmed Hussein | +| [HDFS-15323](https://issues.apache.org/jira/browse/HDFS-15323) | StandbyNode fails transition to active due to insufficient transaction tailing | Major | namenode, qjm | Konstantin Shvachko | Konstantin Shvachko | +| [HADOOP-17025](https://issues.apache.org/jira/browse/HADOOP-17025) | Fix invalid metastore configuration in S3GuardTool tests | Minor | fs/s3, test | Masatake Iwasaki | Masatake Iwasaki | +| [HDFS-15339](https://issues.apache.org/jira/browse/HDFS-15339) | TestHDFSCLI fails for user names with the dot/dash character | Major | test | Yan Xiaole | Yan Xiaole | +| [HDFS-15250](https://issues.apache.org/jira/browse/HDFS-15250) | Setting \`dfs.client.use.datanode.hostname\` to true can crash the system because of unhandled UnresolvedAddressException | Major | . | Ctest | Ctest | +| [HDFS-14367](https://issues.apache.org/jira/browse/HDFS-14367) | EC: Parameter maxPoolSize in striped reconstruct thread pool isn't affecting number of threads | Major | ec | Guo Lei | Guo Lei | +| [HADOOP-15565](https://issues.apache.org/jira/browse/HADOOP-15565) | ViewFileSystem.close doesn't close child filesystems and causes FileSystem objects leak. | Major | . | Jinglun | Jinglun | +| [YARN-9444](https://issues.apache.org/jira/browse/YARN-9444) | YARN API ResourceUtils's getRequestedResourcesFromConfig doesn't recognize yarn.io/gpu as a valid resource | Minor | api | Gergely Pollak | Gergely Pollak | +| [HADOOP-17044](https://issues.apache.org/jira/browse/HADOOP-17044) | Revert "HADOOP-8143. Change distcp to have -pb on by default" | Major | tools/distcp | Steve Loughran | Steve Loughran | +| [HDFS-15293](https://issues.apache.org/jira/browse/HDFS-15293) | Relax the condition for accepting a fsimage when receiving a checkpoint | Critical | namenode | Chen Liang | Chen Liang | +| [HADOOP-17024](https://issues.apache.org/jira/browse/HADOOP-17024) | ListStatus on ViewFS root (ls "/") should list the linkFallBack root (configured target root). | Major | fs, viewfs | Uma Maheswara Rao G | Abhishek Das | +| [HADOOP-17040](https://issues.apache.org/jira/browse/HADOOP-17040) | Fix intermittent failure of ITestBlockingThreadPoolExecutorService | Minor | fs/s3, test | Masatake Iwasaki | Masatake Iwasaki | +| [HDFS-15363](https://issues.apache.org/jira/browse/HDFS-15363) | BlockPlacementPolicyWithNodeGroup should validate if it is initialized by NetworkTopologyWithNodeGroup | Major | . | Hemanth Boyina | Hemanth Boyina | +| [MAPREDUCE-7278](https://issues.apache.org/jira/browse/MAPREDUCE-7278) | Speculative execution behavior is observed even when mapreduce.map.speculative and mapreduce.reduce.speculative are false | Major | task | Tarun Parimi | Tarun Parimi | +| [HADOOP-7002](https://issues.apache.org/jira/browse/HADOOP-7002) | Wrong description of copyFromLocal and copyToLocal in documentation | Minor | . | Jingguo Yao | Andras Bokor | +| [HADOOP-17052](https://issues.apache.org/jira/browse/HADOOP-17052) | NetUtils.connect() throws unchecked exception (UnresolvedAddressException) causing clients to abort | Major | net | Dhiraj Hegde | Dhiraj Hegde | +| [HADOOP-17062](https://issues.apache.org/jira/browse/HADOOP-17062) | Fix shelldocs path in Jenkinsfile | Major | build | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17056](https://issues.apache.org/jira/browse/HADOOP-17056) | shelldoc fails in hadoop-common | Major | build | Akira Ajisaka | Akira Ajisaka | +| [YARN-10286](https://issues.apache.org/jira/browse/YARN-10286) | PendingContainers bugs in the scheduler outputs | Critical | . | Adam Antal | Andras Gyori | +| [HDFS-15396](https://issues.apache.org/jira/browse/HDFS-15396) | Fix TestViewFileSystemOverloadSchemeHdfsFileSystemContract#testListStatusRootDir | Major | . | Ayush Saxena | Ayush Saxena | +| [HDFS-15386](https://issues.apache.org/jira/browse/HDFS-15386) | ReplicaNotFoundException keeps happening in DN after removing multiple DN's data directories | Major | . | Toshihiro Suzuki | Toshihiro Suzuki | +| [YARN-10300](https://issues.apache.org/jira/browse/YARN-10300) | appMasterHost not set in RM ApplicationSummary when AM fails before first heartbeat | Major | . | Eric Badger | Eric Badger | +| [HADOOP-17059](https://issues.apache.org/jira/browse/HADOOP-17059) | ArrayIndexOfboundsException in ViewFileSystem#listStatus | Major | viewfs | Hemanth Boyina | Hemanth Boyina | +| [YARN-10296](https://issues.apache.org/jira/browse/YARN-10296) | Make ContainerPBImpl#getId/setId synchronized | Minor | . | Benjamin Teke | Benjamin Teke | +| [YARN-10295](https://issues.apache.org/jira/browse/YARN-10295) | CapacityScheduler NPE can cause apps to get stuck without resources | Major | capacityscheduler | Benjamin Teke | Benjamin Teke | +| [HADOOP-17060](https://issues.apache.org/jira/browse/HADOOP-17060) | listStatus and getFileStatus behave inconsistent in the case of ViewFs implementation for isDirectory | Major | viewfs | Srinivasu Majeti | Uma Maheswara Rao G | +| [YARN-10312](https://issues.apache.org/jira/browse/YARN-10312) | Add support for yarn logs -logFile to retain backward compatibility | Major | client | Jim Brennan | Jim Brennan | +| [HDFS-15403](https://issues.apache.org/jira/browse/HDFS-15403) | NPE in FileIoProvider#transferToSocketFully | Major | . | Hemanth Boyina | Hemanth Boyina | +| [HADOOP-17029](https://issues.apache.org/jira/browse/HADOOP-17029) | ViewFS does not return correct user/group and ACL | Major | fs, viewfs | Abhishek Das | Abhishek Das | +| [HDFS-15421](https://issues.apache.org/jira/browse/HDFS-15421) | IBR leak causes standby NN to be stuck in safe mode | Blocker | namenode | Kihwal Lee | Akira Ajisaka | +| [YARN-9903](https://issues.apache.org/jira/browse/YARN-9903) | Support reservations continue looking for Node Labels | Major | . | Tarun Parimi | Jim Brennan | +| [HADOOP-17032](https://issues.apache.org/jira/browse/HADOOP-17032) | Handle an internal dir in viewfs having multiple children mount points pointing to different filesystems | Major | fs, viewfs | Abhishek Das | Abhishek Das | +| [HDFS-15446](https://issues.apache.org/jira/browse/HDFS-15446) | CreateSnapshotOp fails during edit log loading for /.reserved/raw/path with error java.io.FileNotFoundException: Directory does not exist: /.reserved/raw/path | Major | hdfs | Srinivasu Majeti | Stephen O'Donnell | +| [HADOOP-17081](https://issues.apache.org/jira/browse/HADOOP-17081) | MetricsSystem doesn't start the sink adapters on restart | Minor | metrics | Madhusoodan | Madhusoodan | +| [HDFS-15451](https://issues.apache.org/jira/browse/HDFS-15451) | Restarting name node stuck in safe mode when using provided storage | Major | namenode | shanyu zhao | shanyu zhao | +| [HADOOP-17120](https://issues.apache.org/jira/browse/HADOOP-17120) | Fix failure of docker image creation due to pip2 install error | Major | . | Masatake Iwasaki | Masatake Iwasaki | +| [YARN-10347](https://issues.apache.org/jira/browse/YARN-10347) | Fix double locking in CapacityScheduler#reinitialize in branch-3.1 | Critical | capacity scheduler | Masatake Iwasaki | Masatake Iwasaki | +| [YARN-10348](https://issues.apache.org/jira/browse/YARN-10348) | Allow RM to always cancel tokens after app completes | Major | yarn | Jim Brennan | Jim Brennan | +| [MAPREDUCE-7284](https://issues.apache.org/jira/browse/MAPREDUCE-7284) | TestCombineFileInputFormat#testMissingBlocks fails | Major | test | Akira Ajisaka | Akira Ajisaka | +| [HDFS-14498](https://issues.apache.org/jira/browse/HDFS-14498) | LeaseManager can loop forever on the file for which create has failed | Major | namenode | Sergey Shelukhin | Stephen O'Donnell | +| [HADOOP-17130](https://issues.apache.org/jira/browse/HADOOP-17130) | Configuration.getValByRegex() shouldn't update the results while fetching. | Major | common | Mukund Thakur | Mukund Thakur | +| [HADOOP-17119](https://issues.apache.org/jira/browse/HADOOP-17119) | Jetty upgrade to 9.4.x causes MR app fail with IOException | Major | . | Bilwa S T | Bilwa S T | +| [YARN-4771](https://issues.apache.org/jira/browse/YARN-4771) | Some containers can be skipped during log aggregation after NM restart | Major | nodemanager | Jason Darrell Lowe | Jim Brennan | +| [MAPREDUCE-7051](https://issues.apache.org/jira/browse/MAPREDUCE-7051) | Fix typo in MultipleOutputFormat | Trivial | . | ywheel | ywheel | +| [HDFS-15313](https://issues.apache.org/jira/browse/HDFS-15313) | Ensure inodes in active filesystem are not deleted during snapshot delete | Major | snapshots | Shashikant Banerjee | Shashikant Banerjee | +| [HDFS-14950](https://issues.apache.org/jira/browse/HDFS-14950) | missing libhdfspp libs in dist-package | Major | build, libhdfs++ | Yuan Zhou | Yuan Zhou | +| [HADOOP-17184](https://issues.apache.org/jira/browse/HADOOP-17184) | Add --mvn-custom-repos parameter to yetus calls | Major | build | Mingliang Liu | Mingliang Liu | +| [HDFS-15499](https://issues.apache.org/jira/browse/HDFS-15499) | Clean up httpfs/pom.xml to remove aws-java-sdk-s3 exclusion | Major | httpfs | Mingliang Liu | Mingliang Liu | +| [HADOOP-17164](https://issues.apache.org/jira/browse/HADOOP-17164) | UGI loginUserFromKeytab doesn't set the last login time | Major | security | Sandeep Guggilam | Sandeep Guggilam | +| [YARN-4575](https://issues.apache.org/jira/browse/YARN-4575) | ApplicationResourceUsageReport should return ALL reserved resource | Major | . | Bibin Chundatt | Bibin Chundatt | +| [HADOOP-17196](https://issues.apache.org/jira/browse/HADOOP-17196) | Fix C/C++ standard warnings | Major | build | Gautham Banasandra | Gautham Banasandra | +| [HADOOP-17204](https://issues.apache.org/jira/browse/HADOOP-17204) | Fix typo in Hadoop KMS document | Trivial | documentation, kms | Akira Ajisaka | Xieming Li | +| [HADOOP-17209](https://issues.apache.org/jira/browse/HADOOP-17209) | Erasure Coding: Native library memory leak | Major | native | Sean Chow | Sean Chow | +| [HADOOP-16925](https://issues.apache.org/jira/browse/HADOOP-16925) | MetricsConfig incorrectly loads the configuration whose value is String list in the properties file | Major | metrics | Jiayi Liu | Jiayi Liu | +| [HDFS-14852](https://issues.apache.org/jira/browse/HDFS-14852) | Removing from LowRedundancyBlocks does not remove the block from all queues | Major | namenode | Hui Fei | Hui Fei | +| [HDFS-15290](https://issues.apache.org/jira/browse/HDFS-15290) | NPE in HttpServer during NameNode startup | Major | namenode | Konstantin Shvachko | Simbarashe Dzinamarira | +| [YARN-10430](https://issues.apache.org/jira/browse/YARN-10430) | Log improvements in NodeStatusUpdaterImpl | Minor | nodemanager | Bilwa S T | Bilwa S T | +| [MAPREDUCE-7294](https://issues.apache.org/jira/browse/MAPREDUCE-7294) | Only application master should upload resource to Yarn Shared Cache | Major | mrv2 | zhenzhao wang | zhenzhao wang | +| [MAPREDUCE-7289](https://issues.apache.org/jira/browse/MAPREDUCE-7289) | Fix wrong comment in LongLong.java | Trivial | documentation, examples | Akira Ajisaka | Wanqiang Ji | +| [YARN-9809](https://issues.apache.org/jira/browse/YARN-9809) | NMs should supply a health status when registering with RM | Major | . | Eric Badger | Eric Badger | +| [YARN-10393](https://issues.apache.org/jira/browse/YARN-10393) | MR job live lock caused by completed state container leak in heartbeat between node manager and RM | Major | nodemanager, yarn | zhenzhao wang | Jim Brennan | +| [YARN-10455](https://issues.apache.org/jira/browse/YARN-10455) | TestNMProxy.testNMProxyRPCRetry is not consistent | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17223](https://issues.apache.org/jira/browse/HADOOP-17223) | update org.apache.httpcomponents:httpclient to 4.5.13 and httpcore to 4.4.13 | Blocker | . | Pranav Bheda | Pranav Bheda | +| [HDFS-15628](https://issues.apache.org/jira/browse/HDFS-15628) | HttpFS server throws NPE if a file is a symlink | Major | fs, httpfs | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15627](https://issues.apache.org/jira/browse/HDFS-15627) | Audit log deletes before collecting blocks | Major | logging, namenode | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17309](https://issues.apache.org/jira/browse/HADOOP-17309) | Javadoc warnings and errors are ignored in the precommit jobs | Major | build, documentation | Akira Ajisaka | Akira Ajisaka | +| [HDFS-15639](https://issues.apache.org/jira/browse/HDFS-15639) | [JDK 11] Fix Javadoc errors in hadoop-hdfs-client | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-15618](https://issues.apache.org/jira/browse/HDFS-15618) | Improve datanode shutdown latency | Major | datanode | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15622](https://issues.apache.org/jira/browse/HDFS-15622) | Deleted blocks linger in the replications queue | Major | hdfs | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15641](https://issues.apache.org/jira/browse/HDFS-15641) | DataNode could meet deadlock if invoke refreshNameNode | Critical | . | Hongbing Wang | Hongbing Wang | +| [HDFS-15644](https://issues.apache.org/jira/browse/HDFS-15644) | Failed volumes can cause DNs to stop block reporting | Major | block placement, datanode | Ahmed Hussein | Ahmed Hussein | +| [YARN-10467](https://issues.apache.org/jira/browse/YARN-10467) | ContainerIdPBImpl objects can be leaked in RMNodeImpl.completedContainers | Major | resourcemanager | Haibo Chen | Haibo Chen | +| [HADOOP-17329](https://issues.apache.org/jira/browse/HADOOP-17329) | mvn site commands fails due to MetricsSystemImpl changes | Major | . | Xiaoqiao He | Xiaoqiao He | +| [YARN-10472](https://issues.apache.org/jira/browse/YARN-10472) | Backport YARN-10314 to branch-3.2 | Blocker | yarn | Siyao Meng | Siyao Meng | +| [HADOOP-17340](https://issues.apache.org/jira/browse/HADOOP-17340) | TestLdapGroupsMapping failing -string mismatch in exception validation | Major | test | Steve Loughran | Steve Loughran | +| [HADOOP-17352](https://issues.apache.org/jira/browse/HADOOP-17352) | Update PATCH\_NAMING\_RULE in the personality file | Minor | build | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17096](https://issues.apache.org/jira/browse/HADOOP-17096) | ZStandardCompressor throws java.lang.InternalError: Error (generic) | Major | io | Stephen Jung (Stripe) | Stephen Jung (Stripe) | +| [HADOOP-17358](https://issues.apache.org/jira/browse/HADOOP-17358) | Improve excessive reloading of Configurations | Major | conf | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15538](https://issues.apache.org/jira/browse/HDFS-15538) | Fix the documentation for dfs.namenode.replication.max-streams in hdfs-default.xml | Major | . | Xieming Li | Xieming Li | +| [HADOOP-17362](https://issues.apache.org/jira/browse/HADOOP-17362) | Doing hadoop ls on Har file triggers too many RPC calls | Major | fs | Ahmed Hussein | Ahmed Hussein | +| [YARN-10485](https://issues.apache.org/jira/browse/YARN-10485) | TimelineConnector swallows InterruptedException | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17360](https://issues.apache.org/jira/browse/HADOOP-17360) | Log the remote address for authentication success | Minor | ipc | Ahmed Hussein | Ahmed Hussein | +| [YARN-10396](https://issues.apache.org/jira/browse/YARN-10396) | Max applications calculation per queue disregards queue level settings in absolute mode | Major | capacity scheduler | Benjamin Teke | Benjamin Teke | +| [HADOOP-17346](https://issues.apache.org/jira/browse/HADOOP-17346) | Fair call queue is defeated by abusive service principals | Major | common, ipc | Ahmed Hussein | Ahmed Hussein | +| [YARN-10470](https://issues.apache.org/jira/browse/YARN-10470) | When building new web ui with root user, the bower install should support it. | Major | build, yarn-ui-v2 | zhuqi | zhuqi | +| [HADOOP-16080](https://issues.apache.org/jira/browse/HADOOP-16080) | hadoop-aws does not work with hadoop-client-api | Major | fs/s3 | Keith Turner | Chao Sun | +| [HDFS-15707](https://issues.apache.org/jira/browse/HDFS-15707) | NNTop counts don't add up as expected | Major | hdfs, metrics, namenode | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15709](https://issues.apache.org/jira/browse/HDFS-15709) | EC: Socket file descriptor leak in StripedBlockChecksumReconstructor | Major | datanode, ec, erasure-coding | Yushi Hayasaka | Yushi Hayasaka | +| [HDFS-15240](https://issues.apache.org/jira/browse/HDFS-15240) | Erasure Coding: dirty buffer causes reconstruction block error | Blocker | datanode, erasure-coding | HuangTao | HuangTao | + + +### TESTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-10072](https://issues.apache.org/jira/browse/YARN-10072) | TestCSAllocateCustomResource failures | Major | yarn | Jim Brennan | Jim Brennan | +| [YARN-10161](https://issues.apache.org/jira/browse/YARN-10161) | TestRouterWebServicesREST is corrupting STDOUT | Minor | yarn | Jim Brennan | Jim Brennan | +| [HADOOP-14206](https://issues.apache.org/jira/browse/HADOOP-14206) | TestSFTPFileSystem#testFileExists failure: Invalid encoding for signature | Major | fs, test | John Zhuge | Jim Brennan | +| [MAPREDUCE-7288](https://issues.apache.org/jira/browse/MAPREDUCE-7288) | Fix TestLongLong#testRightShift | Minor | . | Wanqiang Ji | Wanqiang Ji | +| [HDFS-15514](https://issues.apache.org/jira/browse/HDFS-15514) | Remove useless dfs.webhdfs.enabled | Minor | test | Hui Fei | Hui Fei | +| [HADOOP-17205](https://issues.apache.org/jira/browse/HADOOP-17205) | Move personality file from Yetus to Hadoop repository | Major | test, yetus | Chao Sun | Chao Sun | + + +### SUB-TASKS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-15775](https://issues.apache.org/jira/browse/HADOOP-15775) | [JDK9] Add missing javax.activation-api dependency | Critical | test | Akira Ajisaka | Akira Ajisaka | +| [HDFS-14096](https://issues.apache.org/jira/browse/HDFS-14096) | [SPS] : Add Support for Storage Policy Satisfier in ViewFs | Major | federation | Ayush Saxena | Ayush Saxena | +| [HADOOP-15787](https://issues.apache.org/jira/browse/HADOOP-15787) | [JDK11] TestIPC.testRTEDuringConnectionSetup fails | Major | . | Akira Ajisaka | Zsolt Venczel | +| [HDFS-14262](https://issues.apache.org/jira/browse/HDFS-14262) | [SBN read] Unclear Log.WARN message in GlobalStateIdContext | Major | hdfs | Shweta | Shweta | +| [YARN-7243](https://issues.apache.org/jira/browse/YARN-7243) | Moving logging APIs over to slf4j in hadoop-yarn-server-resourcemanager | Major | . | Yeliang Cang | Prabhu Joseph | +| [HDFS-13404](https://issues.apache.org/jira/browse/HDFS-13404) | RBF: TestRouterWebHDFSContractAppend.testRenameFileBeingAppended fails | Major | test | Takanobu Asanuma | Takanobu Asanuma | +| [HADOOP-16117](https://issues.apache.org/jira/browse/HADOOP-16117) | Update AWS SDK to 1.11.563 | Major | build, fs/s3 | Steve Loughran | Steve Loughran | +| [HDFS-14590](https://issues.apache.org/jira/browse/HDFS-14590) | [SBN Read] Add the document link to the top page | Major | documentation | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-9791](https://issues.apache.org/jira/browse/YARN-9791) | Queue Mutation API does not allow to remove a config | Major | capacity scheduler | Prabhu Joseph | Prabhu Joseph | +| [HDFS-14822](https://issues.apache.org/jira/browse/HDFS-14822) | [SBN read] Revisit GlobalStateIdContext locking when getting server state id | Major | hdfs | Chen Liang | Chen Liang | +| [HDFS-14785](https://issues.apache.org/jira/browse/HDFS-14785) | [SBN read] Change client logging to be less aggressive | Major | hdfs | Chen Liang | Chen Liang | +| [YARN-9864](https://issues.apache.org/jira/browse/YARN-9864) | Format CS Configuration present in Configuration Store | Major | capacity scheduler | Prabhu Joseph | Prabhu Joseph | +| [YARN-9801](https://issues.apache.org/jira/browse/YARN-9801) | SchedConfCli does not work with https mode | Major | . | Prabhu Joseph | Prabhu Joseph | +| [HDFS-14858](https://issues.apache.org/jira/browse/HDFS-14858) | [SBN read] Allow configurably enable/disable AlignmentContext on NameNode | Major | hdfs | Chen Liang | Chen Liang | +| [HDFS-12979](https://issues.apache.org/jira/browse/HDFS-12979) | StandbyNode should upload FsImage to ObserverNode after checkpointing. | Major | hdfs | Konstantin Shvachko | Chen Liang | +| [YARN-9873](https://issues.apache.org/jira/browse/YARN-9873) | Mutation API Config Change need to update Version Number | Major | . | Prabhu Joseph | Prabhu Joseph | +| [HDFS-14162](https://issues.apache.org/jira/browse/HDFS-14162) | Balancer should work with ObserverNode | Major | . | Konstantin Shvachko | Erik Krogen | +| [YARN-9773](https://issues.apache.org/jira/browse/YARN-9773) | Add QueueMetrics for Custom Resources | Major | . | Manikandan R | Manikandan R | +| [HADOOP-16598](https://issues.apache.org/jira/browse/HADOOP-16598) | Backport "HADOOP-16558 [COMMON+HDFS] use protobuf-maven-plugin to generate protobuf classes" to all active branches | Major | common | Duo Zhang | Duo Zhang | +| [YARN-9950](https://issues.apache.org/jira/browse/YARN-9950) | Unset Ordering Policy of Leaf/Parent queue converted from Parent/Leaf queue respectively | Major | capacity scheduler | Prabhu Joseph | Prabhu Joseph | +| [YARN-9900](https://issues.apache.org/jira/browse/YARN-9900) | Revert to previous state when Invalid Config is applied and Refresh Support in SchedulerConfig Format | Major | capacity scheduler | Prabhu Joseph | Prabhu Joseph | +| [HADOOP-16610](https://issues.apache.org/jira/browse/HADOOP-16610) | Upgrade to yetus 0.11.1 and use emoji vote on github pre commit | Major | build | Duo Zhang | Duo Zhang | +| [YARN-9909](https://issues.apache.org/jira/browse/YARN-9909) | Offline format of YarnConfigurationStore | Major | capacity scheduler | Prabhu Joseph | Prabhu Joseph | +| [YARN-9836](https://issues.apache.org/jira/browse/YARN-9836) | General usability improvements in showSimulationTrace.html | Minor | scheduler-load-simulator | Adam Antal | Adam Antal | +| [HADOOP-16612](https://issues.apache.org/jira/browse/HADOOP-16612) | Track Azure Blob File System client-perceived latency | Major | fs/azure, hdfs-client | Jeetesh Mangwani | Jeetesh Mangwani | +| [HADOOP-16758](https://issues.apache.org/jira/browse/HADOOP-16758) | Refine testing.md to tell user better how to use auth-keys.xml | Minor | fs/s3 | Mingliang Liu | Mingliang Liu | +| [HADOOP-16609](https://issues.apache.org/jira/browse/HADOOP-16609) | Add Jenkinsfile for all active branches | Major | build | Duo Zhang | Akira Ajisaka | +| [HADOOP-16785](https://issues.apache.org/jira/browse/HADOOP-16785) | Improve wasb and abfs resilience on double close() calls | Major | fs/azure | Steve Loughran | Steve Loughran | +| [YARN-10026](https://issues.apache.org/jira/browse/YARN-10026) | Pull out common code pieces from ATS v1.5 and v2 | Major | ATSv2, yarn | Adam Antal | Adam Antal | +| [YARN-10028](https://issues.apache.org/jira/browse/YARN-10028) | Integrate the new abstract log servlet to the JobHistory server | Major | yarn | Adam Antal | Adam Antal | +| [YARN-10083](https://issues.apache.org/jira/browse/YARN-10083) | Provide utility to ask whether an application is in final status | Minor | . | Adam Antal | Adam Antal | +| [YARN-10109](https://issues.apache.org/jira/browse/YARN-10109) | Allow stop and convert from leaf to parent queue in a single Mutation API call | Major | capacity scheduler | Prabhu Joseph | Prabhu Joseph | +| [YARN-10101](https://issues.apache.org/jira/browse/YARN-10101) | Support listing of aggregated logs for containers belonging to an application attempt | Major | log-aggregation, yarn | Adam Antal | Adam Antal | +| [YARN-10022](https://issues.apache.org/jira/browse/YARN-10022) | Create RM Rest API to validate a CapacityScheduler Configuration | Major | . | Kinga Marton | Kinga Marton | +| [HDFS-15173](https://issues.apache.org/jira/browse/HDFS-15173) | RBF: Delete repeated configuration 'dfs.federation.router.metrics.enable' | Minor | documentation, rbf | panlijie | panlijie | +| [YARN-10139](https://issues.apache.org/jira/browse/YARN-10139) | ValidateAndGetSchedulerConfiguration API fails when cluster max allocation \> default 8GB | Major | . | Prabhu Joseph | Prabhu Joseph | +| [HDFS-14731](https://issues.apache.org/jira/browse/HDFS-14731) | [FGL] Remove redundant locking on NameNode. | Major | namenode | Konstantin Shvachko | Konstantin Shvachko | +| [YARN-10194](https://issues.apache.org/jira/browse/YARN-10194) | YARN RMWebServices /scheduler-conf/validate leaks ZK Connections | Blocker | capacityscheduler | Akhil PB | Prabhu Joseph | +| [HDFS-14353](https://issues.apache.org/jira/browse/HDFS-14353) | Erasure Coding: metrics xmitsInProgress become to negative. | Major | datanode, erasure-coding | Baolong Mao | Baolong Mao | +| [HDFS-15305](https://issues.apache.org/jira/browse/HDFS-15305) | Extend ViewFS and provide ViewFSOverloadScheme implementation with scheme configurable. | Major | fs, hadoop-client, hdfs-client, viewfs | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-15306](https://issues.apache.org/jira/browse/HDFS-15306) | Make mount-table to read from central place ( Let's say from HDFS) | Major | configuration, hadoop-client | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HADOOP-16756](https://issues.apache.org/jira/browse/HADOOP-16756) | distcp -update to S3A; abfs, etc always overwrites due to block size mismatch | Major | fs/s3, tools/distcp | Daisuke Kobayashi | Steve Loughran | +| [HDFS-15322](https://issues.apache.org/jira/browse/HDFS-15322) | Make NflyFS to work when ViewFsOverloadScheme's scheme and target uris schemes are same. | Major | fs, nflyFs, viewfs, viewfsOverloadScheme | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-15321](https://issues.apache.org/jira/browse/HDFS-15321) | Make DFSAdmin tool to work with ViewFSOverloadScheme | Major | dfsadmin, fs, viewfs | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-15330](https://issues.apache.org/jira/browse/HDFS-15330) | Document the ViewFSOverloadScheme details in ViewFS guide | Major | viewfs, viewfsOverloadScheme | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-15389](https://issues.apache.org/jira/browse/HDFS-15389) | DFSAdmin should close filesystem and dfsadmin -setBalancerBandwidth should work with ViewFSOverloadScheme | Major | dfsadmin, viewfsOverloadScheme | Ayush Saxena | Ayush Saxena | +| [HDFS-15394](https://issues.apache.org/jira/browse/HDFS-15394) | Add all available fs.viewfs.overload.scheme.target.\.impl classes in core-default.xml bydefault. | Major | configuration, viewfs, viewfsOverloadScheme | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-15387](https://issues.apache.org/jira/browse/HDFS-15387) | FSUsage$DF should consider ViewFSOverloadScheme in processPath | Minor | viewfs | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-15418](https://issues.apache.org/jira/browse/HDFS-15418) | ViewFileSystemOverloadScheme should represent mount links as non symlinks | Major | . | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-15427](https://issues.apache.org/jira/browse/HDFS-15427) | Merged ListStatus with Fallback target filesystem and InternalDirViewFS. | Major | viewfs | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-15429](https://issues.apache.org/jira/browse/HDFS-15429) | mkdirs should work when parent dir is internalDir and fallback configured. | Major | . | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-15436](https://issues.apache.org/jira/browse/HDFS-15436) | Default mount table name used by ViewFileSystem should be configurable | Major | viewfs, viewfsOverloadScheme | Virajith Jalaparti | Virajith Jalaparti | +| [HDFS-15450](https://issues.apache.org/jira/browse/HDFS-15450) | Fix NN trash emptier to work if ViewFSOveroadScheme enabled | Major | namenode, viewfsOverloadScheme | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-15462](https://issues.apache.org/jira/browse/HDFS-15462) | Add fs.viewfs.overload.scheme.target.ofs.impl to core-default.xml | Major | configuration, viewfs, viewfsOverloadScheme | Siyao Meng | Siyao Meng | +| [HDFS-15464](https://issues.apache.org/jira/browse/HDFS-15464) | ViewFsOverloadScheme should work when -fs option pointing to remote cluster without mount links | Major | viewfsOverloadScheme | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HADOOP-17101](https://issues.apache.org/jira/browse/HADOOP-17101) | Replace Guava Function with Java8+ Function | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17099](https://issues.apache.org/jira/browse/HADOOP-17099) | Replace Guava Predicate with Java8+ Predicate | Minor | . | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15478](https://issues.apache.org/jira/browse/HDFS-15478) | When Empty mount points, we are assigning fallback link to self. But it should not use full URI for target fs. | Major | . | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HADOOP-17100](https://issues.apache.org/jira/browse/HADOOP-17100) | Replace Guava Supplier with Java8+ Supplier in Hadoop | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15515](https://issues.apache.org/jira/browse/HDFS-15515) | mkdirs on fallback should throw IOE out instead of suppressing and returning false | Major | . | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HADOOP-17199](https://issues.apache.org/jira/browse/HADOOP-17199) | Backport HADOOP-13230 list/getFileStatus changes for preserved directory markers | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HDFS-8631](https://issues.apache.org/jira/browse/HDFS-8631) | WebHDFS : Support setQuota | Major | . | nijel | Chao Sun | +| [YARN-10332](https://issues.apache.org/jira/browse/YARN-10332) | RESOURCE\_UPDATE event was repeatedly registered in DECOMMISSIONING state | Minor | resourcemanager | yehuanhuan | yehuanhuan | +| [HDFS-15459](https://issues.apache.org/jira/browse/HDFS-15459) | TestBlockTokenWithDFSStriped fails intermittently | Major | hdfs | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15461](https://issues.apache.org/jira/browse/HDFS-15461) | TestDFSClientRetries#testGetFileChecksum fails intermittently | Major | dfsclient, test | Ahmed Hussein | Ahmed Hussein | +| [HDFS-9776](https://issues.apache.org/jira/browse/HDFS-9776) | TestHAAppend#testMultipleAppendsDuringCatchupTailing is flaky | Major | . | Vinayakumar B | Ahmed Hussein | +| [HADOOP-17330](https://issues.apache.org/jira/browse/HADOOP-17330) | Backport HADOOP-16005-"NativeAzureFileSystem does not support setXAttr" to branch-3.2 | Major | fs/azure | Sally Zuo | Sally Zuo | +| [HDFS-15643](https://issues.apache.org/jira/browse/HDFS-15643) | EC: Fix checksum computation in case of native encoders | Blocker | . | Ahmed Hussein | Ayush Saxena | +| [HADOOP-17343](https://issues.apache.org/jira/browse/HADOOP-17343) | Upgrade aws-java-sdk to 1.11.901 | Minor | build, fs/s3 | Dongjoon Hyun | Steve Loughran | +| [HADOOP-17325](https://issues.apache.org/jira/browse/HADOOP-17325) | WASB: Test failures | Major | fs/azure, test | Sneha Vijayarajan | Steve Loughran | +| [HDFS-15708](https://issues.apache.org/jira/browse/HDFS-15708) | TestURLConnectionFactory fails by NoClassDefFoundError in branch-3.3 and branch-3.2 | Blocker | test | Akira Ajisaka | Chao Sun | + + +### OTHER: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-14394](https://issues.apache.org/jira/browse/HDFS-14394) | Add -std=c99 / -std=gnu99 to libhdfs compile flags | Major | hdfs-client, libhdfs, native | Sahil Takiar | Sahil Takiar | +| [HADOOP-16365](https://issues.apache.org/jira/browse/HADOOP-16365) | Upgrade jackson-databind to 2.9.9 | Major | build | Shweta | Shweta | +| [HADOOP-16491](https://issues.apache.org/jira/browse/HADOOP-16491) | Upgrade jetty version to 9.3.27 | Major | . | Hrishikesh Gadre | Hrishikesh Gadre | +| [HADOOP-16542](https://issues.apache.org/jira/browse/HADOOP-16542) | Update commons-beanutils version to 1.9.4 | Major | . | Wei-Chiu Chuang | Kevin Su | +| [YARN-9730](https://issues.apache.org/jira/browse/YARN-9730) | Support forcing configured partitions to be exclusive based on app node label | Major | . | Jonathan Hung | Jonathan Hung | +| [HADOOP-16675](https://issues.apache.org/jira/browse/HADOOP-16675) | Upgrade jackson-databind to 2.9.10.1 | Blocker | security | Wei-Chiu Chuang | Lisheng Sun | +| [HDFS-14959](https://issues.apache.org/jira/browse/HDFS-14959) | [SBNN read] access time should be turned off | Major | documentation | Wei-Chiu Chuang | Chao Sun | +| [HADOOP-16784](https://issues.apache.org/jira/browse/HADOOP-16784) | Update the year to 2020 | Major | . | Ayush Saxena | Ayush Saxena | +| [HADOOP-16803](https://issues.apache.org/jira/browse/HADOOP-16803) | Upgrade jackson-databind to 2.9.10.2 | Blocker | security | Akira Ajisaka | Masatake Iwasaki | +| [HADOOP-16871](https://issues.apache.org/jira/browse/HADOOP-16871) | Upgrade Netty version to 4.1.45.Final to handle CVE-2019-20444,CVE-2019-16869 | Major | . | Aray Chenchu Sukesh | Aray Chenchu Sukesh | +| [HADOOP-16647](https://issues.apache.org/jira/browse/HADOOP-16647) | Support OpenSSL 1.1.1 LTS | Critical | security | Wei-Chiu Chuang | Rakesh Radhakrishnan | +| [HADOOP-16982](https://issues.apache.org/jira/browse/HADOOP-16982) | Update Netty to 4.1.48.Final | Blocker | . | Wei-Chiu Chuang | Lisheng Sun | +| [HADOOP-16990](https://issues.apache.org/jira/browse/HADOOP-16990) | Update Mockserver | Major | . | Wei-Chiu Chuang | Attila Doroszlai | +| [YARN-10540](https://issues.apache.org/jira/browse/YARN-10540) | Node page is broken in YARN UI1 and UI2 including RMWebService api for nodes | Critical | webapp | Sunil G | Jim Brennan | +| [HADOOP-17445](https://issues.apache.org/jira/browse/HADOOP-17445) | Update the year to 2021 | Major | . | Xiaoqiao He | Xiaoqiao He | + + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.2/RELEASENOTES.3.2.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.2/RELEASENOTES.3.2.2.md new file mode 100644 index 0000000000000..c4f4aa6c03b3d --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.2/RELEASENOTES.3.2.2.md @@ -0,0 +1,86 @@ + + +# Apache Hadoop 3.2.2 Release Notes + +These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. + + +--- + +* [HADOOP-16460](https://issues.apache.org/jira/browse/HADOOP-16460) | *Major* | **ABFS: fix for Sever Name Indication (SNI)** + +ABFS: Bug fix to support Server Name Indication (SNI). + + +--- + +* [HDFS-14890](https://issues.apache.org/jira/browse/HDFS-14890) | *Blocker* | **Setting permissions on name directory fails on non posix compliant filesystems** + +- Fixed namenode/journal startup on Windows. + + +--- + +* [HDFS-14905](https://issues.apache.org/jira/browse/HDFS-14905) | *Major* | **Backport HDFS persistent memory read cache support to branch-3.2** + +Non-volatile storage class memory (SCM, also known as persistent memory) is supported in HDFS cache. To enable SCM cache, user just needs to configure SCM volume for property “dfs.datanode.cache.pmem.dirs” in hdfs-site.xml. And all HDFS cache directives keep unchanged. There are two implementations for HDFS SCM Cache, one is pure java code implementation and the other is native PMDK based implementation. The latter implementation can bring user better performance gain in cache write and cache read. If PMDK native libs could be loaded, it will use PMDK based implementation otherwise it will fallback to java code implementation. To enable PMDK based implementation, user should install PMDK library by referring to the official site http://pmem.io/. Then, build Hadoop with PMDK support by referring to "PMDK library build options" section in \`BUILDING.txt\` in the source code. If multiple SCM volumes are configured, a round-robin policy is used to select an available volume for caching a block. Consistent with DRAM cache, SCM cache also has no cache eviction mechanism. When DataNode receives a data read request from a client, if the corresponding block is cached into SCM, DataNode will instantiate an InputStream with the block location path on SCM (pure java implementation) or cache address on SCM (PMDK based implementation). Once the InputStream is created, DataNode will send the cached data to the client. Please refer "Centralized Cache Management" guide for more details. + + +--- + +* [HDFS-12943](https://issues.apache.org/jira/browse/HDFS-12943) | *Major* | **Consistent Reads from Standby Node** + +Observer is a new type of a NameNode in addition to Active and Standby Nodes in HA settings. An Observer Node maintains a replica of the namespace same as a Standby Node. It additionally allows execution of clients read requests. + +To ensure read-after-write consistency within a single client, a state ID is introduced in RPC headers. The Observer responds to the client request only after its own state has caught up with the client’s state ID, which it previously received from the Active NameNode. + +Clients can explicitly invoke a new client protocol call msync(), which ensures that subsequent reads by this client from an Observer are consistent. + +A new client-side ObserverReadProxyProvider is introduced to provide automatic switching between Active and Observer NameNodes for submitting respectively write and read requests. + + +--- + +* [HADOOP-16771](https://issues.apache.org/jira/browse/HADOOP-16771) | *Major* | **Update checkstyle to 8.26 and maven-checkstyle-plugin to 3.1.0** + +Updated checkstyle to 8.26 and updated maven-checkstyle-plugin to 3.1.0. + + +--- + +* [HDFS-15281](https://issues.apache.org/jira/browse/HDFS-15281) | *Major* | **ZKFC ignores dfs.namenode.rpc-bind-host and uses dfs.namenode.rpc-address to bind to host address** + +ZKFC binds host address to "dfs.namenode.servicerpc-bind-host", if configured. Otherwise, it binds to "dfs.namenode.rpc-bind-host". If neither of those is configured, ZKFC binds itself to NameNode RPC server address (effectively "dfs.namenode.rpc-address"). + + +--- + +* [HADOOP-17024](https://issues.apache.org/jira/browse/HADOOP-17024) | *Major* | **ListStatus on ViewFS root (ls "/") should list the linkFallBack root (configured target root).** + +ViewFS#listStatus on root("/") considers listing from fallbackLink if available. If the same directory name is present in configured mount path as well as in fallback link, then only the configured mount path will be listed in the returned result. + + +--- + +* [YARN-9809](https://issues.apache.org/jira/browse/YARN-9809) | *Major* | **NMs should supply a health status when registering with RM** + +Improved node registration with node health status. + + + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.3/CHANGELOG.3.2.3.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.3/CHANGELOG.3.2.3.md new file mode 100644 index 0000000000000..34928bf54e50d --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.3/CHANGELOG.3.2.3.md @@ -0,0 +1,386 @@ + + +# Apache Hadoop Changelog + +## Release 3.2.3 - 2022-03-02 + + + +### NEW FEATURES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-15691](https://issues.apache.org/jira/browse/HADOOP-15691) | Add PathCapabilities to FS and FC to complement StreamCapabilities | Major | . | Steve Loughran | Steve Loughran | +| [HDFS-15711](https://issues.apache.org/jira/browse/HDFS-15711) | Add Metrics to HttpFS Server | Major | httpfs | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15759](https://issues.apache.org/jira/browse/HDFS-15759) | EC: Verify EC reconstruction correctness on DataNode | Major | datanode, ec, erasure-coding | Toshihiko Uchida | Toshihiko Uchida | +| [HDFS-16337](https://issues.apache.org/jira/browse/HDFS-16337) | Show start time of Datanode on Web | Minor | . | tomscut | tomscut | + + +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-16052](https://issues.apache.org/jira/browse/HADOOP-16052) | Remove Subversion and Forrest from Dockerfile | Minor | build | Akira Ajisaka | Xieming Li | +| [YARN-9783](https://issues.apache.org/jira/browse/YARN-9783) | Remove low-level zookeeper test to be able to build Hadoop against zookeeper 3.5.5 | Major | test | Mate Szalay-Beko | Mate Szalay-Beko | +| [HADOOP-16717](https://issues.apache.org/jira/browse/HADOOP-16717) | Remove GenericsUtil isLog4jLogger dependency on Log4jLoggerAdapter | Major | . | David Mollitor | Xieming Li | +| [YARN-10036](https://issues.apache.org/jira/browse/YARN-10036) | Install yarnpkg and upgrade nodejs in Dockerfile | Major | buid, yarn-ui-v2 | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-16811](https://issues.apache.org/jira/browse/HADOOP-16811) | Use JUnit TemporaryFolder Rule in TestFileUtils | Minor | common, test | David Mollitor | David Mollitor | +| [HDFS-15075](https://issues.apache.org/jira/browse/HDFS-15075) | Remove process command timing from BPServiceActor | Major | . | Íñigo Goiri | Xiaoqiao He | +| [HADOOP-16054](https://issues.apache.org/jira/browse/HADOOP-16054) | Update Dockerfile to use Bionic | Major | build, test | Akira Ajisaka | Akira Ajisaka | +| [HDFS-15574](https://issues.apache.org/jira/browse/HDFS-15574) | Remove unnecessary sort of block list in DirectoryScanner | Major | . | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-15583](https://issues.apache.org/jira/browse/HDFS-15583) | Backport DirectoryScanner improvements HDFS-14476, HDFS-14751 and HDFS-15048 to branch 3.2 and 3.1 | Major | datanode | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-15567](https://issues.apache.org/jira/browse/HDFS-15567) | [SBN Read] HDFS should expose msync() API to allow downstream applications call it explicitly. | Major | ha, hdfs-client | Konstantin Shvachko | Konstantin Shvachko | +| [HDFS-15633](https://issues.apache.org/jira/browse/HDFS-15633) | Avoid redundant RPC calls for getDiskStatus | Major | dfsclient | Ayush Saxena | Ayush Saxena | +| [YARN-10450](https://issues.apache.org/jira/browse/YARN-10450) | Add cpu and memory utilization per node and cluster-wide metrics | Minor | yarn | Jim Brennan | Jim Brennan | +| [HDFS-15652](https://issues.apache.org/jira/browse/HDFS-15652) | Make block size from NNThroughputBenchmark configurable | Minor | benchmarks | Hui Fei | Hui Fei | +| [YARN-10475](https://issues.apache.org/jira/browse/YARN-10475) | Scale RM-NM heartbeat interval based on node utilization | Minor | yarn | Jim Brennan | Jim Brennan | +| [HDFS-15665](https://issues.apache.org/jira/browse/HDFS-15665) | Balancer logging improvement | Major | balancer & mover | Konstantin Shvachko | Konstantin Shvachko | +| [HADOOP-17342](https://issues.apache.org/jira/browse/HADOOP-17342) | Creating a token identifier should not do kerberos name resolution | Major | common | Jim Brennan | Jim Brennan | +| [YARN-10479](https://issues.apache.org/jira/browse/YARN-10479) | RMProxy should retry on SocketTimeout Exceptions | Major | yarn | Jim Brennan | Jim Brennan | +| [HDFS-15623](https://issues.apache.org/jira/browse/HDFS-15623) | Respect configured values of rpc.engine | Major | hdfs | Hector Sandoval Chaverri | Hector Sandoval Chaverri | +| [HDFS-14395](https://issues.apache.org/jira/browse/HDFS-14395) | Remove WARN Logging From Interrupts in DataStreamer | Minor | hdfs-client | David Mollitor | David Mollitor | +| [HADOOP-17367](https://issues.apache.org/jira/browse/HADOOP-17367) | Add InetAddress api to ProxyUsers.authorize | Major | performance, security | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15694](https://issues.apache.org/jira/browse/HDFS-15694) | Avoid calling UpdateHeartBeatState inside DataNodeDescriptor | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15703](https://issues.apache.org/jira/browse/HDFS-15703) | Don't generate edits for set operations that are no-op | Major | namenode | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17392](https://issues.apache.org/jira/browse/HADOOP-17392) | Remote exception messages should not include the exception class | Major | ipc | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15706](https://issues.apache.org/jira/browse/HDFS-15706) | HttpFS: Log more information on request failures | Major | httpfs | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17389](https://issues.apache.org/jira/browse/HADOOP-17389) | KMS should log full UGI principal | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15720](https://issues.apache.org/jira/browse/HDFS-15720) | namenode audit async logger should add some log4j config | Minor | hdfs | Max Xie | | +| [HDFS-15704](https://issues.apache.org/jira/browse/HDFS-15704) | Mitigate lease monitor's rapid infinite loop | Major | namenode | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15751](https://issues.apache.org/jira/browse/HDFS-15751) | Add documentation for msync() API to filesystem.md | Major | documentation | Konstantin Shvachko | Konstantin Shvachko | +| [YARN-10538](https://issues.apache.org/jira/browse/YARN-10538) | Add recommissioning nodes to the list of updated nodes returned to the AM | Major | . | Srinivas S T | Srinivas S T | +| [YARN-4589](https://issues.apache.org/jira/browse/YARN-4589) | Diagnostics for localization timeouts is lacking | Major | . | Chang Li | Chang Li | +| [YARN-10562](https://issues.apache.org/jira/browse/YARN-10562) | Follow up changes for YARN-9833 | Major | yarn | Jim Brennan | Jim Brennan | +| [HDFS-15783](https://issues.apache.org/jira/browse/HDFS-15783) | Speed up BlockPlacementPolicyRackFaultTolerant#verifyBlockPlacement | Major | block placement | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17478](https://issues.apache.org/jira/browse/HADOOP-17478) | Improve the description of hadoop.http.authentication.signature.secret.file | Minor | documentation | Akira Ajisaka | Akira Ajisaka | +| [HDFS-15789](https://issues.apache.org/jira/browse/HDFS-15789) | Lease renewal does not require namesystem lock | Major | hdfs | Jim Brennan | Jim Brennan | +| [HADOOP-17501](https://issues.apache.org/jira/browse/HADOOP-17501) | Fix logging typo in ShutdownHookManager | Major | common | Konstantin Shvachko | Fengnan Li | +| [HADOOP-17354](https://issues.apache.org/jira/browse/HADOOP-17354) | Move Jenkinsfile outside of the root directory | Major | build | Akira Ajisaka | Akira Ajisaka | +| [HDFS-15799](https://issues.apache.org/jira/browse/HDFS-15799) | Make DisallowedDatanodeException terse | Minor | hdfs | Richard | Richard | +| [HDFS-15813](https://issues.apache.org/jira/browse/HDFS-15813) | DataStreamer: keep sending heartbeat packets while streaming | Major | hdfs | Jim Brennan | Jim Brennan | +| [MAPREDUCE-7319](https://issues.apache.org/jira/browse/MAPREDUCE-7319) | Log list of mappers at trace level in ShuffleHandler audit log | Minor | yarn | Jim Brennan | Jim Brennan | +| [HDFS-15821](https://issues.apache.org/jira/browse/HDFS-15821) | Add metrics for in-service datanodes | Minor | . | Zehao Chen | Zehao Chen | +| [YARN-10626](https://issues.apache.org/jira/browse/YARN-10626) | Log resource allocation in NM log at container start time | Major | . | Eric Badger | Eric Badger | +| [HDFS-15815](https://issues.apache.org/jira/browse/HDFS-15815) | if required storageType are unavailable, log the failed reason during choosing Datanode | Minor | block placement | Yang Yun | Yang Yun | +| [HDFS-15826](https://issues.apache.org/jira/browse/HDFS-15826) | Solve the problem of incorrect progress of delegation tokens when loading FsImage | Major | . | JiangHua Zhu | JiangHua Zhu | +| [HDFS-15734](https://issues.apache.org/jira/browse/HDFS-15734) | [READ] DirectoryScanner#scan need not check StorageType.PROVIDED | Minor | datanode | Yuxuan Wang | Yuxuan Wang | +| [HADOOP-17538](https://issues.apache.org/jira/browse/HADOOP-17538) | Add kms-default.xml and httpfs-default.xml to site index | Minor | documentation | Masatake Iwasaki | Masatake Iwasaki | +| [YARN-10613](https://issues.apache.org/jira/browse/YARN-10613) | Config to allow Intra- and Inter-queue preemption to enable/disable conservativeDRF | Minor | capacity scheduler, scheduler preemption | Eric Payne | Eric Payne | +| [YARN-10653](https://issues.apache.org/jira/browse/YARN-10653) | Fixed the findbugs issues introduced by YARN-10647. | Major | . | Qi Zhu | Qi Zhu | +| [MAPREDUCE-7324](https://issues.apache.org/jira/browse/MAPREDUCE-7324) | ClientHSSecurityInfo class is in wrong META-INF file | Major | . | Eric Badger | Eric Badger | +| [HADOOP-17546](https://issues.apache.org/jira/browse/HADOOP-17546) | Update Description of hadoop-http-auth-signature-secret in HttpAuthentication.md | Minor | . | Ravuri Sushma sree | Ravuri Sushma sree | +| [YARN-10664](https://issues.apache.org/jira/browse/YARN-10664) | Allow parameter expansion in NM\_ADMIN\_USER\_ENV | Major | yarn | Jim Brennan | Jim Brennan | +| [HADOOP-17570](https://issues.apache.org/jira/browse/HADOOP-17570) | Apply YETUS-1102 to re-enable GitHub comments | Major | build | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17594](https://issues.apache.org/jira/browse/HADOOP-17594) | DistCp: Expose the JobId for applications executing through run method | Major | . | Ayush Saxena | Ayush Saxena | +| [HDFS-15911](https://issues.apache.org/jira/browse/HDFS-15911) | Provide blocks moved count in Balancer iteration result | Major | balancer & mover | Viraj Jasani | Viraj Jasani | +| [HDFS-15919](https://issues.apache.org/jira/browse/HDFS-15919) | BlockPoolManager should log stack trace if unable to get Namenode addresses | Major | datanode | Stephen O'Donnell | Stephen O'Donnell | +| [HADOOP-16870](https://issues.apache.org/jira/browse/HADOOP-16870) | Use spotbugs-maven-plugin instead of findbugs-maven-plugin | Major | build | Akira Ajisaka | Akira Ajisaka | +| [HDFS-15932](https://issues.apache.org/jira/browse/HDFS-15932) | Improve the balancer error message when process exits abnormally. | Major | . | Renukaprasad C | Renukaprasad C | +| [HDFS-15931](https://issues.apache.org/jira/browse/HDFS-15931) | Fix non-static inner classes for better memory management | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-15942](https://issues.apache.org/jira/browse/HDFS-15942) | Increase Quota initialization threads | Major | namenode | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-15937](https://issues.apache.org/jira/browse/HDFS-15937) | Reduce memory used during datanode layout upgrade | Major | datanode | Stephen O'Donnell | Stephen O'Donnell | +| [HADOOP-17569](https://issues.apache.org/jira/browse/HADOOP-17569) | Building native code fails on Fedora 33 | Major | build, common | Kengo Seki | Masatake Iwasaki | +| [HADOOP-17633](https://issues.apache.org/jira/browse/HADOOP-17633) | Bump json-smart to 2.4.2 and nimbus-jose-jwt to 9.8 due to CVEs | Major | auth, build | helen huang | Viraj Jasani | +| [HADOOP-16822](https://issues.apache.org/jira/browse/HADOOP-16822) | Provide source artifacts for hadoop-client-api | Major | . | Karel Kolman | Karel Kolman | +| [HADOOP-17680](https://issues.apache.org/jira/browse/HADOOP-17680) | Allow ProtobufRpcEngine to be extensible | Major | common | Hector Sandoval Chaverri | Hector Sandoval Chaverri | +| [YARN-10123](https://issues.apache.org/jira/browse/YARN-10123) | Error message around yarn app -stop/start can be improved to highlight that an implementation at framework level is needed for the stop/start functionality to work | Minor | client, documentation | Siddharth Ahuja | Siddharth Ahuja | +| [HADOOP-17756](https://issues.apache.org/jira/browse/HADOOP-17756) | Increase precommit job timeout from 20 hours to 24 hours. | Major | build | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-16073](https://issues.apache.org/jira/browse/HDFS-16073) | Remove redundant RPC requests for getFileLinkInfo in ClientNamenodeProtocolTranslatorPB | Minor | . | lei w | lei w | +| [HDFS-16074](https://issues.apache.org/jira/browse/HDFS-16074) | Remove an expensive debug string concatenation | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HDFS-15150](https://issues.apache.org/jira/browse/HDFS-15150) | Introduce read write lock to Datanode | Major | datanode | Stephen O'Donnell | Stephen O'Donnell | +| [YARN-10834](https://issues.apache.org/jira/browse/YARN-10834) | Intra-queue preemption: apps that don't use defined custom resource won't be preempted. | Major | . | Eric Payne | Eric Payne | +| [HADOOP-17749](https://issues.apache.org/jira/browse/HADOOP-17749) | Remove lock contention in SelectorPool of SocketIOWithTimeout | Major | common | Xuesen Liang | Xuesen Liang | +| [HADOOP-17775](https://issues.apache.org/jira/browse/HADOOP-17775) | Remove JavaScript package from Docker environment | Major | build | Masatake Iwasaki | Masatake Iwasaki | +| [HADOOP-17794](https://issues.apache.org/jira/browse/HADOOP-17794) | Add a sample configuration to use ZKDelegationTokenSecretManager in Hadoop KMS | Major | documentation, kms, security | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-12665](https://issues.apache.org/jira/browse/HADOOP-12665) | Document hadoop.security.token.service.use\_ip | Major | documentation | Arpit Agarwal | Akira Ajisaka | +| [YARN-10456](https://issues.apache.org/jira/browse/YARN-10456) | RM PartitionQueueMetrics records are named QueueMetrics in Simon metrics registry | Major | resourcemanager | Eric Payne | Eric Payne | +| [HDFS-15650](https://issues.apache.org/jira/browse/HDFS-15650) | Make the socket timeout for computing checksum of striped blocks configurable | Minor | datanode, ec, erasure-coding | Yushi Hayasaka | Yushi Hayasaka | +| [YARN-10858](https://issues.apache.org/jira/browse/YARN-10858) | [UI2] YARN-10826 breaks Queue view | Major | yarn-ui-v2 | Andras Gyori | Masatake Iwasaki | +| [YARN-10860](https://issues.apache.org/jira/browse/YARN-10860) | Make max container per heartbeat configs refreshable | Major | . | Eric Badger | Eric Badger | +| [HADOOP-17813](https://issues.apache.org/jira/browse/HADOOP-17813) | Checkstyle - Allow line length: 100 | Major | . | Akira Ajisaka | Viraj Jasani | +| [HADOOP-17819](https://issues.apache.org/jira/browse/HADOOP-17819) | Add extensions to ProtobufRpcEngine RequestHeaderProto | Major | common | Hector Sandoval Chaverri | Hector Sandoval Chaverri | +| [HDFS-16153](https://issues.apache.org/jira/browse/HDFS-16153) | Avoid evaluation of LOG.debug statement in QuorumJournalManager | Trivial | . | wangzhaohui | wangzhaohui | +| [HDFS-16154](https://issues.apache.org/jira/browse/HDFS-16154) | TestMiniJournalCluster failing intermittently because of not reseting UserGroupInformation completely | Minor | . | wangzhaohui | wangzhaohui | +| [HADOOP-17849](https://issues.apache.org/jira/browse/HADOOP-17849) | Exclude spotbugs-annotations from transitive dependencies on branch-3.2 | Major | . | Masatake Iwasaki | Masatake Iwasaki | +| [HDFS-16173](https://issues.apache.org/jira/browse/HDFS-16173) | Improve CopyCommands#Put#executor queue configurability | Major | fs | JiangHua Zhu | JiangHua Zhu | +| [HDFS-15160](https://issues.apache.org/jira/browse/HDFS-15160) | ReplicaMap, Disk Balancer, Directory Scanner and various FsDatasetImpl methods should use datanode readlock | Major | datanode | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-14997](https://issues.apache.org/jira/browse/HDFS-14997) | BPServiceActor processes commands from NameNode asynchronously | Major | datanode | Xiaoqiao He | Xiaoqiao He | +| [HDFS-16241](https://issues.apache.org/jira/browse/HDFS-16241) | Standby close reconstruction thread | Major | . | zhanghuazong | zhanghuazong | +| [HDFS-16286](https://issues.apache.org/jira/browse/HDFS-16286) | Debug tool to verify the correctness of erasure coding on file | Minor | erasure-coding, tools | daimin | daimin | +| [HADOOP-17998](https://issues.apache.org/jira/browse/HADOOP-17998) | Allow get command to run with multi threads. | Major | fs | Chengwei Wang | Chengwei Wang | +| [HADOOP-18023](https://issues.apache.org/jira/browse/HADOOP-18023) | Allow cp command to run with multi threads. | Major | fs | Chengwei Wang | Chengwei Wang | +| [HADOOP-17643](https://issues.apache.org/jira/browse/HADOOP-17643) | WASB : Make metadata checks case insensitive | Major | . | Anoop Sam John | Anoop Sam John | +| [HDFS-16386](https://issues.apache.org/jira/browse/HDFS-16386) | Reduce DataNode load when FsDatasetAsyncDiskService is working | Major | datanode | JiangHua Zhu | JiangHua Zhu | +| [HDFS-16430](https://issues.apache.org/jira/browse/HDFS-16430) | Validate maximum blocks in EC group when adding an EC policy | Minor | ec, erasure-coding | daimin | daimin | +| [HDFS-16403](https://issues.apache.org/jira/browse/HDFS-16403) | Improve FUSE IO performance by supporting FUSE parameter max\_background | Minor | fuse-dfs | daimin | daimin | +| [HADOOP-18093](https://issues.apache.org/jira/browse/HADOOP-18093) | Better exception handling for testFileStatusOnMountLink() in ViewFsBaseTest.java | Trivial | . | Xing Lin | Xing Lin | +| [HADOOP-18155](https://issues.apache.org/jira/browse/HADOOP-18155) | Refactor tests in TestFileUtil | Trivial | common | Gautham Banasandra | Gautham Banasandra | + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-15939](https://issues.apache.org/jira/browse/HADOOP-15939) | Filter overlapping objenesis class in hadoop-client-minicluster | Minor | build | Xiaoyu Yao | Xiaoyu Yao | +| [YARN-8936](https://issues.apache.org/jira/browse/YARN-8936) | Bump up Atsv2 hbase versions | Major | . | Rohith Sharma K S | Vrushali C | +| [HDFS-14189](https://issues.apache.org/jira/browse/HDFS-14189) | Fix intermittent failure of TestNameNodeMetrics | Major | . | Ayush Saxena | Ayush Saxena | +| [YARN-9246](https://issues.apache.org/jira/browse/YARN-9246) | NPE when executing a command yarn node -status or -states without additional arguments | Minor | client | Masahiro Tanaka | Masahiro Tanaka | +| [YARN-7266](https://issues.apache.org/jira/browse/YARN-7266) | Timeline Server event handler threads locked | Major | ATSv2, timelineserver | Venkata Puneet Ravuri | Prabhu Joseph | +| [YARN-9990](https://issues.apache.org/jira/browse/YARN-9990) | Testcase fails with "Insufficient configured threads: required=16 \< max=10" | Major | . | Prabhu Joseph | Prabhu Joseph | +| [YARN-10020](https://issues.apache.org/jira/browse/YARN-10020) | Fix build instruction of hadoop-yarn-ui | Minor | yarn-ui-v2 | Masatake Iwasaki | Masatake Iwasaki | +| [YARN-10037](https://issues.apache.org/jira/browse/YARN-10037) | Upgrade build tools for YARN Web UI v2 | Major | build, security, yarn-ui-v2 | Akira Ajisaka | Masatake Iwasaki | +| [HDFS-15187](https://issues.apache.org/jira/browse/HDFS-15187) | CORRUPT replica mismatch between namenodes after failover | Critical | . | Ayush Saxena | Ayush Saxena | +| [HDFS-15200](https://issues.apache.org/jira/browse/HDFS-15200) | Delete Corrupt Replica Immediately Irrespective of Replicas On Stale Storage | Critical | . | Ayush Saxena | Ayush Saxena | +| [HDFS-15113](https://issues.apache.org/jira/browse/HDFS-15113) | Missing IBR when NameNode restart if open processCommand async feature | Blocker | datanode | Xiaoqiao He | Xiaoqiao He | +| [HDFS-15210](https://issues.apache.org/jira/browse/HDFS-15210) | EC : File write hanged when DN is shutdown by admin command. | Major | ec | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [HADOOP-16768](https://issues.apache.org/jira/browse/HADOOP-16768) | SnappyCompressor test cases wrongly assume that the compressed data is always smaller than the input data | Major | io, test | zhao bo | Akira Ajisaka | +| [HDFS-11041](https://issues.apache.org/jira/browse/HDFS-11041) | Unable to unregister FsDatasetState MBean if DataNode is shutdown twice | Trivial | datanode | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HADOOP-17068](https://issues.apache.org/jira/browse/HADOOP-17068) | client fails forever when namenode ipaddr changed | Major | hdfs-client | Sean Chow | Sean Chow | +| [HDFS-15378](https://issues.apache.org/jira/browse/HDFS-15378) | TestReconstructStripedFile#testErasureCodingWorkerXmitsWeight is failing on trunk | Major | . | Hemanth Boyina | Hemanth Boyina | +| [YARN-10331](https://issues.apache.org/jira/browse/YARN-10331) | Upgrade node.js to 10.21.0 | Critical | build, yarn-ui-v2 | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17119](https://issues.apache.org/jira/browse/HADOOP-17119) | Jetty upgrade to 9.4.x causes MR app fail with IOException | Major | . | Bilwa S T | Bilwa S T | +| [HADOOP-17138](https://issues.apache.org/jira/browse/HADOOP-17138) | Fix spotbugs warnings surfaced after upgrade to 4.0.6 | Minor | . | Masatake Iwasaki | Masatake Iwasaki | +| [HDFS-15439](https://issues.apache.org/jira/browse/HDFS-15439) | Setting dfs.mover.retry.max.attempts to negative value will retry forever. | Major | balancer & mover | AMC-team | AMC-team | +| [YARN-10430](https://issues.apache.org/jira/browse/YARN-10430) | Log improvements in NodeStatusUpdaterImpl | Minor | nodemanager | Bilwa S T | Bilwa S T | +| [HDFS-15438](https://issues.apache.org/jira/browse/HDFS-15438) | Setting dfs.disk.balancer.max.disk.errors = 0 will fail the block copy | Major | balancer & mover | AMC-team | AMC-team | +| [YARN-10438](https://issues.apache.org/jira/browse/YARN-10438) | Handle null containerId in ClientRMService#getContainerReport() | Major | resourcemanager | Raghvendra Singh | Shubham Gupta | +| [HDFS-15628](https://issues.apache.org/jira/browse/HDFS-15628) | HttpFS server throws NPE if a file is a symlink | Major | fs, httpfs | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15627](https://issues.apache.org/jira/browse/HDFS-15627) | Audit log deletes before collecting blocks | Major | logging, namenode | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17309](https://issues.apache.org/jira/browse/HADOOP-17309) | Javadoc warnings and errors are ignored in the precommit jobs | Major | build, documentation | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17310](https://issues.apache.org/jira/browse/HADOOP-17310) | Touch command with -c option is broken | Major | . | Ayush Saxena | Ayush Saxena | +| [HDFS-15639](https://issues.apache.org/jira/browse/HDFS-15639) | [JDK 11] Fix Javadoc errors in hadoop-hdfs-client | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-15622](https://issues.apache.org/jira/browse/HDFS-15622) | Deleted blocks linger in the replications queue | Major | hdfs | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15641](https://issues.apache.org/jira/browse/HDFS-15641) | DataNode could meet deadlock if invoke refreshNameNode | Critical | . | Hongbing Wang | Hongbing Wang | +| [MAPREDUCE-7302](https://issues.apache.org/jira/browse/MAPREDUCE-7302) | Upgrading to JUnit 4.13 causes testcase TestFetcher.testCorruptedIFile() to fail | Major | test | Peter Bacsko | Peter Bacsko | +| [HDFS-15644](https://issues.apache.org/jira/browse/HDFS-15644) | Failed volumes can cause DNs to stop block reporting | Major | block placement, datanode | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17236](https://issues.apache.org/jira/browse/HADOOP-17236) | Bump up snakeyaml to 1.26 to mitigate CVE-2017-18640 | Major | . | Brahma Reddy Battula | Brahma Reddy Battula | +| [YARN-10467](https://issues.apache.org/jira/browse/YARN-10467) | ContainerIdPBImpl objects can be leaked in RMNodeImpl.completedContainers | Major | resourcemanager | Haibo Chen | Haibo Chen | +| [HADOOP-17329](https://issues.apache.org/jira/browse/HADOOP-17329) | mvn site commands fails due to MetricsSystemImpl changes | Major | . | Xiaoqiao He | Xiaoqiao He | +| [HDFS-15651](https://issues.apache.org/jira/browse/HDFS-15651) | Client could not obtain block when DN CommandProcessingThread exit | Major | . | Yiqun Lin | Mingxiang Li | +| [HADOOP-17340](https://issues.apache.org/jira/browse/HADOOP-17340) | TestLdapGroupsMapping failing -string mismatch in exception validation | Major | test | Steve Loughran | Steve Loughran | +| [HADOOP-17352](https://issues.apache.org/jira/browse/HADOOP-17352) | Update PATCH\_NAMING\_RULE in the personality file | Minor | build | Akira Ajisaka | Akira Ajisaka | +| [HDFS-15485](https://issues.apache.org/jira/browse/HDFS-15485) | Fix outdated properties of JournalNode when performing rollback | Minor | . | Deegue | Deegue | +| [HADOOP-17358](https://issues.apache.org/jira/browse/HADOOP-17358) | Improve excessive reloading of Configurations | Major | conf | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15538](https://issues.apache.org/jira/browse/HDFS-15538) | Fix the documentation for dfs.namenode.replication.max-streams in hdfs-default.xml | Major | . | Xieming Li | Xieming Li | +| [HADOOP-17362](https://issues.apache.org/jira/browse/HADOOP-17362) | Doing hadoop ls on Har file triggers too many RPC calls | Major | fs | Ahmed Hussein | Ahmed Hussein | +| [YARN-10485](https://issues.apache.org/jira/browse/YARN-10485) | TimelineConnector swallows InterruptedException | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17360](https://issues.apache.org/jira/browse/HADOOP-17360) | Log the remote address for authentication success | Minor | ipc | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17346](https://issues.apache.org/jira/browse/HADOOP-17346) | Fair call queue is defeated by abusive service principals | Major | common, ipc | Ahmed Hussein | Ahmed Hussein | +| [YARN-10470](https://issues.apache.org/jira/browse/YARN-10470) | When building new web ui with root user, the bower install should support it. | Major | build, yarn-ui-v2 | Qi Zhu | Qi Zhu | +| [YARN-10498](https://issues.apache.org/jira/browse/YARN-10498) | Fix Yarn CapacityScheduler Markdown document | Trivial | documentation | zhaoshengjie | zhaoshengjie | +| [HDFS-15695](https://issues.apache.org/jira/browse/HDFS-15695) | NN should not let the balancer run in safemode | Major | namenode | Ahmed Hussein | Ahmed Hussein | +| [YARN-10511](https://issues.apache.org/jira/browse/YARN-10511) | Update yarn.nodemanager.env-whitelist value in docs | Minor | documentation | Andrea Scarpino | Andrea Scarpino | +| [HDFS-15707](https://issues.apache.org/jira/browse/HDFS-15707) | NNTop counts don't add up as expected | Major | hdfs, metrics, namenode | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15709](https://issues.apache.org/jira/browse/HDFS-15709) | EC: Socket file descriptor leak in StripedBlockChecksumReconstructor | Major | datanode, ec, erasure-coding | Yushi Hayasaka | Yushi Hayasaka | +| [YARN-10491](https://issues.apache.org/jira/browse/YARN-10491) | Fix deprecation warnings in SLSWebApp.java | Minor | build | Akira Ajisaka | Ankit Kumar | +| [HADOOP-13571](https://issues.apache.org/jira/browse/HADOOP-13571) | ServerSocketUtil.getPort() should use loopback address, not 0.0.0.0 | Major | . | Eric Badger | Eric Badger | +| [HDFS-15725](https://issues.apache.org/jira/browse/HDFS-15725) | Lease Recovery never completes for a committed block which the DNs never finalize | Major | namenode | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-15170](https://issues.apache.org/jira/browse/HDFS-15170) | EC: Block gets marked as CORRUPT in case of failover and pipeline recovery | Critical | erasure-coding | Ayush Saxena | Ayush Saxena | +| [HDFS-15719](https://issues.apache.org/jira/browse/HDFS-15719) | [Hadoop 3] Both NameNodes can crash simultaneously due to the short JN socket timeout | Critical | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [YARN-10560](https://issues.apache.org/jira/browse/YARN-10560) | Upgrade node.js to 10.23.1 and yarn to 1.22.5 in Web UI v2 | Major | webapp, yarn-ui-v2 | Akira Ajisaka | Akira Ajisaka | +| [YARN-10528](https://issues.apache.org/jira/browse/YARN-10528) | maxAMShare should only be accepted for leaf queues, not parent queues | Major | . | Siddharth Ahuja | Siddharth Ahuja | +| [HADOOP-17438](https://issues.apache.org/jira/browse/HADOOP-17438) | Increase docker memory limit in Jenkins | Major | build, scripts, test, yetus | Ahmed Hussein | Ahmed Hussein | +| [MAPREDUCE-7310](https://issues.apache.org/jira/browse/MAPREDUCE-7310) | Clear the fileMap in JHEventHandlerForSigtermTest | Minor | test | Zhengxi Li | Zhengxi Li | +| [HADOOP-16947](https://issues.apache.org/jira/browse/HADOOP-16947) | Stale record should be remove when MutableRollingAverages generating aggregate data. | Major | . | Haibin Huang | Haibin Huang | +| [HDFS-15632](https://issues.apache.org/jira/browse/HDFS-15632) | AbstractContractDeleteTest should set recursive parameter to true for recursive test cases. | Major | . | Konstantin Shvachko | Anton Kutuzov | +| [HDFS-10498](https://issues.apache.org/jira/browse/HDFS-10498) | Intermittent test failure org.apache.hadoop.hdfs.server.namenode.snapshot.TestSnapshotFileLength.testSnapshotfileLength | Major | hdfs, snapshots | Hanisha Koneru | Jim Brennan | +| [HADOOP-17506](https://issues.apache.org/jira/browse/HADOOP-17506) | Fix typo in BUILDING.txt | Trivial | documentation | Gautham Banasandra | Gautham Banasandra | +| [HDFS-15795](https://issues.apache.org/jira/browse/HDFS-15795) | EC: Wrong checksum when reconstruction was failed by exception | Major | datanode, ec, erasure-coding | Yushi Hayasaka | Yushi Hayasaka | +| [HDFS-15779](https://issues.apache.org/jira/browse/HDFS-15779) | EC: fix NPE caused by StripedWriter.clearBuffers during reconstruct block | Major | . | Hongbing Wang | Hongbing Wang | +| [HDFS-15798](https://issues.apache.org/jira/browse/HDFS-15798) | EC: Reconstruct task failed, and It would be XmitsInProgress of DN has negative number | Major | . | Haiyang Hu | Haiyang Hu | +| [YARN-10428](https://issues.apache.org/jira/browse/YARN-10428) | Zombie applications in the YARN queue using FAIR + sizebasedweight | Critical | capacityscheduler | Guang Yang | Andras Gyori | +| [YARN-10607](https://issues.apache.org/jira/browse/YARN-10607) | User environment is unable to prepend PATH when mapreduce.admin.user.env also sets PATH | Major | . | Eric Badger | Eric Badger | +| [HADOOP-17516](https://issues.apache.org/jira/browse/HADOOP-17516) | Upgrade ant to 1.10.9 | Major | . | Akira Ajisaka | Akira Ajisaka | +| [YARN-10500](https://issues.apache.org/jira/browse/YARN-10500) | TestDelegationTokenRenewer fails intermittently | Major | test | Akira Ajisaka | Masatake Iwasaki | +| [HADOOP-17534](https://issues.apache.org/jira/browse/HADOOP-17534) | Upgrade Jackson databind to 2.10.5.1 | Major | build | Adam Roberts | Akira Ajisaka | +| [MAPREDUCE-7323](https://issues.apache.org/jira/browse/MAPREDUCE-7323) | Remove job\_history\_summary.py | Major | . | Akira Ajisaka | Akira Ajisaka | +| [YARN-10647](https://issues.apache.org/jira/browse/YARN-10647) | Fix TestRMNodeLabelsManager failed after YARN-10501. | Major | . | Qi Zhu | Qi Zhu | +| [HADOOP-17510](https://issues.apache.org/jira/browse/HADOOP-17510) | Hadoop prints sensitive Cookie information. | Major | . | Renukaprasad C | Renukaprasad C | +| [HDFS-15422](https://issues.apache.org/jira/browse/HDFS-15422) | Reported IBR is partially replaced with stored info when queuing. | Critical | namenode | Kihwal Lee | Stephen O'Donnell | +| [YARN-10651](https://issues.apache.org/jira/browse/YARN-10651) | CapacityScheduler crashed with NPE in AbstractYarnScheduler.updateNodeResource() | Major | . | Haibo Chen | Haibo Chen | +| [MAPREDUCE-7320](https://issues.apache.org/jira/browse/MAPREDUCE-7320) | ClusterMapReduceTestCase does not clean directories | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HDFS-14013](https://issues.apache.org/jira/browse/HDFS-14013) | Skip any credentials stored in HDFS when starting ZKFC | Major | hdfs | Krzysztof Adamski | Stephen O'Donnell | +| [HDFS-15849](https://issues.apache.org/jira/browse/HDFS-15849) | ExpiredHeartbeats metric should be of Type.COUNTER | Major | metrics | Konstantin Shvachko | Qi Zhu | +| [YARN-10672](https://issues.apache.org/jira/browse/YARN-10672) | All testcases in TestReservations are flaky | Major | . | Szilard Nemeth | Szilard Nemeth | +| [HADOOP-17557](https://issues.apache.org/jira/browse/HADOOP-17557) | skip-dir option is not processed by Yetus | Major | build, precommit, yetus | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15875](https://issues.apache.org/jira/browse/HDFS-15875) | Check whether file is being truncated before truncate | Major | . | Hui Fei | Hui Fei | +| [HADOOP-17582](https://issues.apache.org/jira/browse/HADOOP-17582) | Replace GitHub App Token with GitHub OAuth token | Major | build | Akira Ajisaka | Akira Ajisaka | +| [YARN-10687](https://issues.apache.org/jira/browse/YARN-10687) | Add option to disable/enable free disk space checking and percentage checking for full and not-full disks | Major | nodemanager | Qi Zhu | Qi Zhu | +| [HADOOP-17586](https://issues.apache.org/jira/browse/HADOOP-17586) | Upgrade org.codehaus.woodstox:stax2-api to 4.2.1 | Major | . | Ayush Saxena | Ayush Saxena | +| [HADOOP-17585](https://issues.apache.org/jira/browse/HADOOP-17585) | Correct timestamp format in the docs for the touch command | Major | . | Stephen O'Donnell | Stephen O'Donnell | +| [YARN-10588](https://issues.apache.org/jira/browse/YARN-10588) | Percentage of queue and cluster is zero in WebUI | Major | . | Bilwa S T | Bilwa S T | +| [MAPREDUCE-7322](https://issues.apache.org/jira/browse/MAPREDUCE-7322) | revisiting TestMRIntermediateDataEncryption | Major | job submission, security, test | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17592](https://issues.apache.org/jira/browse/HADOOP-17592) | Fix the wrong CIDR range example in Proxy User documentation | Minor | documentation | Kwangsun Noh | Kwangsun Noh | +| [YARN-10706](https://issues.apache.org/jira/browse/YARN-10706) | Upgrade com.github.eirslett:frontend-maven-plugin to 1.11.2 | Major | buid | Mingliang Liu | Mingliang Liu | +| [MAPREDUCE-7325](https://issues.apache.org/jira/browse/MAPREDUCE-7325) | Intermediate data encryption is broken in LocalJobRunner | Major | job submission, security | Ahmed Hussein | Ahmed Hussein | +| [YARN-10697](https://issues.apache.org/jira/browse/YARN-10697) | Resources are displayed in bytes in UI for schedulers other than capacity | Major | . | Bilwa S T | Bilwa S T | +| [HADOOP-17602](https://issues.apache.org/jira/browse/HADOOP-17602) | Upgrade JUnit to 4.13.1 | Major | build, security, test | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15900](https://issues.apache.org/jira/browse/HDFS-15900) | RBF: empty blockpool id on dfsrouter caused by UNAVAILABLE NameNode | Major | rbf | Harunobu Daikoku | Harunobu Daikoku | +| [YARN-10501](https://issues.apache.org/jira/browse/YARN-10501) | Can't remove all node labels after add node label without nodemanager port | Critical | yarn | caozhiqiang | caozhiqiang | +| [YARN-10716](https://issues.apache.org/jira/browse/YARN-10716) | Fix typo in ContainerRuntime | Trivial | documentation | Wanqiang Ji | xishuhai | +| [HDFS-15950](https://issues.apache.org/jira/browse/HDFS-15950) | Remove unused hdfs.proto import | Major | hdfs-client | Gautham Banasandra | Gautham Banasandra | +| [HDFS-15949](https://issues.apache.org/jira/browse/HDFS-15949) | Fix integer overflow | Major | libhdfs++ | Gautham Banasandra | Gautham Banasandra | +| [HDFS-15948](https://issues.apache.org/jira/browse/HDFS-15948) | Fix test4tests for libhdfspp | Critical | build, libhdfs++ | Gautham Banasandra | Gautham Banasandra | +| [HADOOP-17608](https://issues.apache.org/jira/browse/HADOOP-17608) | Fix TestKMS failure | Major | kms | Akira Ajisaka | Akira Ajisaka | +| [YARN-10460](https://issues.apache.org/jira/browse/YARN-10460) | Upgrading to JUnit 4.13 causes tests in TestNodeStatusUpdater to fail | Major | nodemanager, test | Peter Bacsko | Peter Bacsko | +| [HADOOP-17641](https://issues.apache.org/jira/browse/HADOOP-17641) | ITestWasbUriAndConfiguration.testCanonicalServiceName() failing now mockaccount exists | Minor | fs/azure, test | Steve Loughran | Steve Loughran | +| [HADOOP-17655](https://issues.apache.org/jira/browse/HADOOP-17655) | Upgrade Jetty to 9.4.40 | Blocker | . | Akira Ajisaka | Akira Ajisaka | +| [YARN-10749](https://issues.apache.org/jira/browse/YARN-10749) | Can't remove all node labels after add node label without nodemanager port, broken by YARN-10647 | Major | . | D M Murali Krishna Reddy | D M Murali Krishna Reddy | +| [HDFS-15621](https://issues.apache.org/jira/browse/HDFS-15621) | Datanode DirectoryScanner uses excessive memory | Major | datanode | Stephen O'Donnell | Stephen O'Donnell | +| [YARN-10756](https://issues.apache.org/jira/browse/YARN-10756) | Remove additional junit 4.11 dependency from javadoc | Major | build, test, timelineservice | ANANDA G B | Akira Ajisaka | +| [YARN-10555](https://issues.apache.org/jira/browse/YARN-10555) | Missing access check before getAppAttempts | Critical | webapp | lujie | lujie | +| [HADOOP-17703](https://issues.apache.org/jira/browse/HADOOP-17703) | checkcompatibility.py errors out when specifying annotations | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HADOOP-14922](https://issues.apache.org/jira/browse/HADOOP-14922) | Build of Mapreduce Native Task module fails with unknown opcode "bswap" | Major | . | Anup Halarnkar | Anup Halarnkar | +| [HADOOP-17718](https://issues.apache.org/jira/browse/HADOOP-17718) | Explicitly set locale in the Dockerfile | Blocker | build | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HADOOP-17700](https://issues.apache.org/jira/browse/HADOOP-17700) | ExitUtil#halt info log should log HaltException | Major | . | Viraj Jasani | Viraj Jasani | +| [YARN-10770](https://issues.apache.org/jira/browse/YARN-10770) | container-executor permission is wrong in SecureContainer.md | Major | documentation | Akira Ajisaka | Siddharth Ahuja | +| [HDFS-15915](https://issues.apache.org/jira/browse/HDFS-15915) | Race condition with async edits logging due to updating txId outside of the namesystem log | Major | hdfs, namenode | Konstantin Shvachko | Konstantin Shvachko | +| [HDFS-16040](https://issues.apache.org/jira/browse/HDFS-16040) | RpcQueueTime metric counts requeued calls as unique events. | Major | hdfs | Simbarashe Dzinamarira | Simbarashe Dzinamarira | +| [YARN-10809](https://issues.apache.org/jira/browse/YARN-10809) | testWithHbaseConfAtHdfsFileSystem consistently failing | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16055](https://issues.apache.org/jira/browse/HDFS-16055) | Quota is not preserved in snapshot INode | Major | hdfs | Siyao Meng | Siyao Meng | +| [HDFS-16068](https://issues.apache.org/jira/browse/HDFS-16068) | WebHdfsFileSystem has a possible connection leak in connection with HttpFS | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-10767](https://issues.apache.org/jira/browse/YARN-10767) | Yarn Logs Command retrying on Standby RM for 30 times | Major | . | D M Murali Krishna Reddy | D M Murali Krishna Reddy | +| [HDFS-15618](https://issues.apache.org/jira/browse/HDFS-15618) | Improve datanode shutdown latency | Major | datanode | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17760](https://issues.apache.org/jira/browse/HADOOP-17760) | Delete hadoop.ssl.enabled and dfs.https.enable from docs and core-default.xml | Major | documentation | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-13671](https://issues.apache.org/jira/browse/HDFS-13671) | Namenode deletes large dir slowly caused by FoldedTreeSet#removeAndGet | Major | . | Yiqun Lin | Haibin Huang | +| [HDFS-16061](https://issues.apache.org/jira/browse/HDFS-16061) | DFTestUtil.waitReplication can produce false positives | Major | hdfs | Ahmed Hussein | Ahmed Hussein | +| [HDFS-14575](https://issues.apache.org/jira/browse/HDFS-14575) | LeaseRenewer#daemon threads leak in DFSClient | Major | . | Tao Yang | Renukaprasad C | +| [YARN-10826](https://issues.apache.org/jira/browse/YARN-10826) | [UI2] Upgrade Node.js to at least v12.22.1 | Major | yarn-ui-v2 | Akira Ajisaka | Masatake Iwasaki | +| [YARN-10828](https://issues.apache.org/jira/browse/YARN-10828) | Backport YARN-9789 to branch-3.2 | Major | . | Tarun Parimi | Tarun Parimi | +| [HADOOP-17769](https://issues.apache.org/jira/browse/HADOOP-17769) | Upgrade JUnit to 4.13.2 | Major | . | Ahmed Hussein | Ahmed Hussein | +| [YARN-10824](https://issues.apache.org/jira/browse/YARN-10824) | Title not set for JHS and NM webpages | Major | . | Rajshree Mishra | Bilwa S T | +| [HDFS-16092](https://issues.apache.org/jira/browse/HDFS-16092) | Avoid creating LayoutFlags redundant objects | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16108](https://issues.apache.org/jira/browse/HDFS-16108) | Incorrect log placeholders used in JournalNodeSyncer | Minor | . | Viraj Jasani | Viraj Jasani | +| [MAPREDUCE-7353](https://issues.apache.org/jira/browse/MAPREDUCE-7353) | Mapreduce job fails when NM is stopped | Major | . | Bilwa S T | Bilwa S T | +| [HDFS-16121](https://issues.apache.org/jira/browse/HDFS-16121) | Iterative snapshot diff report can generate duplicate records for creates, deletes and Renames | Major | snapshots | Srinivasu Majeti | Shashikant Banerjee | +| [HDFS-15796](https://issues.apache.org/jira/browse/HDFS-15796) | ConcurrentModificationException error happens on NameNode occasionally | Critical | hdfs | Daniel Ma | Daniel Ma | +| [HADOOP-17793](https://issues.apache.org/jira/browse/HADOOP-17793) | Better token validation | Major | . | Artem Smotrakov | Artem Smotrakov | +| [HDFS-16042](https://issues.apache.org/jira/browse/HDFS-16042) | DatanodeAdminMonitor scan should be delay based | Major | datanode | Ahmed Hussein | Ahmed Hussein | +| [HDFS-16127](https://issues.apache.org/jira/browse/HDFS-16127) | Improper pipeline close recovery causes a permanent write failure or data loss. | Major | . | Kihwal Lee | Kihwal Lee | +| [HADOOP-17028](https://issues.apache.org/jira/browse/HADOOP-17028) | ViewFS should initialize target filesystems lazily | Major | client-mounts, fs, viewfs | Uma Maheswara Rao G | Abhishek Das | +| [HDFS-12920](https://issues.apache.org/jira/browse/HDFS-12920) | HDFS default value change (with adding time unit) breaks old version MR tarball work with Hadoop 3.x | Critical | configuration, hdfs | Junping Du | Akira Ajisaka | +| [YARN-10813](https://issues.apache.org/jira/browse/YARN-10813) | Set default capacity of root for node labels | Major | . | Andras Gyori | Andras Gyori | +| [YARN-9551](https://issues.apache.org/jira/browse/YARN-9551) | TestTimelineClientV2Impl.testSyncCall fails intermittently | Minor | ATSv2, test | Prabhu Joseph | Andras Gyori | +| [HDFS-15175](https://issues.apache.org/jira/browse/HDFS-15175) | Multiple CloseOp shared block instance causes the standby namenode to crash when rolling editlog | Critical | . | Yicong Cai | Wan Chang | +| [YARN-10789](https://issues.apache.org/jira/browse/YARN-10789) | RM HA startup can fail due to race conditions in ZKConfigurationStore | Major | . | Tarun Parimi | Tarun Parimi | +| [YARN-6221](https://issues.apache.org/jira/browse/YARN-6221) | Entities missing from ATS when summary log file info got returned to the ATS before the domain log | Critical | yarn | Sushmitha Sreenivasan | Xiaomin Zhang | +| [MAPREDUCE-7258](https://issues.apache.org/jira/browse/MAPREDUCE-7258) | HistoryServerRest.html#Task\_Counters\_API, modify the jobTaskCounters's itemName from "taskcounterGroup" to "taskCounterGroup". | Minor | documentation | jenny | jenny | +| [YARN-8990](https://issues.apache.org/jira/browse/YARN-8990) | Fix fair scheduler race condition in app submit and queue cleanup | Blocker | fairscheduler | Wilfred Spiegelenburg | Wilfred Spiegelenburg | +| [YARN-8992](https://issues.apache.org/jira/browse/YARN-8992) | Fair scheduler can delete a dynamic queue while an application attempt is being added to the queue | Major | fairscheduler | Haibo Chen | Wilfred Spiegelenburg | +| [HADOOP-17370](https://issues.apache.org/jira/browse/HADOOP-17370) | Upgrade commons-compress to 1.21 | Major | common | Dongjoon Hyun | Akira Ajisaka | +| [HADOOP-17844](https://issues.apache.org/jira/browse/HADOOP-17844) | Upgrade JSON smart to 2.4.7 | Major | . | Renukaprasad C | Renukaprasad C | +| [HADOOP-17850](https://issues.apache.org/jira/browse/HADOOP-17850) | Upgrade ZooKeeper to 3.4.14 in branch-3.2 | Major | . | Akira Ajisaka | Masatake Iwasaki | +| [HDFS-16177](https://issues.apache.org/jira/browse/HDFS-16177) | Bug fix for Util#receiveFile | Minor | . | tomscut | tomscut | +| [YARN-10814](https://issues.apache.org/jira/browse/YARN-10814) | YARN shouldn't start with empty hadoop.http.authentication.signature.secret.file | Major | . | Benjamin Teke | Tamas Domok | +| [HADOOP-17858](https://issues.apache.org/jira/browse/HADOOP-17858) | Avoid possible class loading deadlock with VerifierNone initialization | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-17886](https://issues.apache.org/jira/browse/HADOOP-17886) | Upgrade ant to 1.10.11 | Major | . | Ahmed Hussein | Ahmed Hussein | +| [YARN-10901](https://issues.apache.org/jira/browse/YARN-10901) | Permission checking error on an existing directory in LogAggregationFileController#verifyAndCreateRemoteLogDir | Major | nodemanager | Tamas Domok | Tamas Domok | +| [HDFS-16187](https://issues.apache.org/jira/browse/HDFS-16187) | SnapshotDiff behaviour with Xattrs and Acls is not consistent across NN restarts with checkpointing | Major | snapshots | Srinivasu Majeti | Shashikant Banerjee | +| [HDFS-16198](https://issues.apache.org/jira/browse/HDFS-16198) | Short circuit read leaks Slot objects when InvalidToken exception is thrown | Major | . | Eungsop Yoo | Eungsop Yoo | +| [HADOOP-17917](https://issues.apache.org/jira/browse/HADOOP-17917) | Backport HADOOP-15993 to branch-3.2 which address CVE-2014-4611 | Major | . | Brahma Reddy Battula | Brahma Reddy Battula | +| [HDFS-16233](https://issues.apache.org/jira/browse/HDFS-16233) | Do not use exception handler to implement copy-on-write for EnumCounters | Major | namenode | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HDFS-16235](https://issues.apache.org/jira/browse/HDFS-16235) | Deadlock in LeaseRenewer for static remove method | Major | hdfs | angerszhu | angerszhu | +| [HADOOP-17940](https://issues.apache.org/jira/browse/HADOOP-17940) | Upgrade Kafka to 2.8.1 | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-16272](https://issues.apache.org/jira/browse/HDFS-16272) | Int overflow in computing safe length during EC block recovery | Critical | 3.1.1 | daimin | daimin | +| [HADOOP-17971](https://issues.apache.org/jira/browse/HADOOP-17971) | Exclude IBM Java security classes from being shaded/relocated | Major | build | Nicholas Marion | Nicholas Marion | +| [HADOOP-17972](https://issues.apache.org/jira/browse/HADOOP-17972) | Backport HADOOP-17683 for branch-3.2 | Major | security | Ananya Singh | Ananya Singh | +| [HADOOP-17993](https://issues.apache.org/jira/browse/HADOOP-17993) | Disable JIRA plugin for YETUS on Hadoop | Critical | build | Gautham Banasandra | Gautham Banasandra | +| [HDFS-16182](https://issues.apache.org/jira/browse/HDFS-16182) | numOfReplicas is given the wrong value in BlockPlacementPolicyDefault$chooseTarget can cause DataStreamer to fail with Heterogeneous Storage | Major | namanode | Max Xie | Max Xie | +| [HDFS-16350](https://issues.apache.org/jira/browse/HDFS-16350) | Datanode start time should be set after RPC server starts successfully | Minor | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-13500](https://issues.apache.org/jira/browse/HADOOP-13500) | Synchronizing iteration of Configuration properties object | Major | conf | Jason Darrell Lowe | Dhananjay Badaya | +| [HDFS-16317](https://issues.apache.org/jira/browse/HDFS-16317) | Backport HDFS-14729 for branch-3.2 | Major | security | Ananya Singh | Ananya Singh | +| [HDFS-14099](https://issues.apache.org/jira/browse/HDFS-14099) | Unknown frame descriptor when decompressing multiple frames in ZStandardDecompressor | Major | . | xuzq | xuzq | +| [HDFS-16410](https://issues.apache.org/jira/browse/HDFS-16410) | Insecure Xml parsing in OfflineEditsXmlLoader | Minor | . | Ashutosh Gupta | Ashutosh Gupta | +| [HDFS-16420](https://issues.apache.org/jira/browse/HDFS-16420) | Avoid deleting unique data blocks when deleting redundancy striped blocks | Critical | ec, erasure-coding | qinyuren | Jackson Wang | +| [HDFS-16428](https://issues.apache.org/jira/browse/HDFS-16428) | Source path with storagePolicy cause wrong typeConsumed while rename | Major | hdfs, namenode | lei w | lei w | +| [HDFS-16437](https://issues.apache.org/jira/browse/HDFS-16437) | ReverseXML processor doesn't accept XML files without the SnapshotDiffSection. | Critical | hdfs | yanbin.zhang | yanbin.zhang | +| [HDFS-16422](https://issues.apache.org/jira/browse/HDFS-16422) | Fix thread safety of EC decoding during concurrent preads | Critical | dfsclient, ec, erasure-coding | daimin | daimin | + + +### TESTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-9338](https://issues.apache.org/jira/browse/YARN-9338) | Timeline related testcases are failing | Major | . | Prabhu Joseph | Abhishek Modi | +| [HDFS-15092](https://issues.apache.org/jira/browse/HDFS-15092) | TestRedudantBlocks#testProcessOverReplicatedAndRedudantBlock sometimes fails | Minor | test | Hui Fei | Hui Fei | + + +### SUB-TASKS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-15775](https://issues.apache.org/jira/browse/HADOOP-15775) | [JDK9] Add missing javax.activation-api dependency | Critical | test | Akira Ajisaka | Akira Ajisaka | +| [YARN-9875](https://issues.apache.org/jira/browse/YARN-9875) | FSSchedulerConfigurationStore fails to update with hdfs path | Major | capacityscheduler | Prabhu Joseph | Prabhu Joseph | +| [HADOOP-16764](https://issues.apache.org/jira/browse/HADOOP-16764) | Rewrite Python example codes using Python3 | Minor | documentation | Kengo Seki | Kengo Seki | +| [HADOOP-16905](https://issues.apache.org/jira/browse/HADOOP-16905) | Update jackson-databind to 2.10.3 to relieve us from the endless CVE patches | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [YARN-10337](https://issues.apache.org/jira/browse/YARN-10337) | TestRMHATimelineCollectors fails on hadoop trunk | Major | test, yarn | Ahmed Hussein | Bilwa S T | +| [HDFS-15464](https://issues.apache.org/jira/browse/HDFS-15464) | ViewFsOverloadScheme should work when -fs option pointing to remote cluster without mount links | Major | viewfsOverloadScheme | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-15478](https://issues.apache.org/jira/browse/HDFS-15478) | When Empty mount points, we are assigning fallback link to self. But it should not use full URI for target fs. | Major | . | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-15459](https://issues.apache.org/jira/browse/HDFS-15459) | TestBlockTokenWithDFSStriped fails intermittently | Major | hdfs | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15461](https://issues.apache.org/jira/browse/HDFS-15461) | TestDFSClientRetries#testGetFileChecksum fails intermittently | Major | dfsclient, test | Ahmed Hussein | Ahmed Hussein | +| [HDFS-9776](https://issues.apache.org/jira/browse/HDFS-9776) | TestHAAppend#testMultipleAppendsDuringCatchupTailing is flaky | Major | . | Vinayakumar B | Ahmed Hussein | +| [HDFS-15457](https://issues.apache.org/jira/browse/HDFS-15457) | TestFsDatasetImpl fails intermittently | Major | hdfs | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17330](https://issues.apache.org/jira/browse/HADOOP-17330) | Backport HADOOP-16005-"NativeAzureFileSystem does not support setXAttr" to branch-3.2 | Major | fs/azure | Sally Zuo | Sally Zuo | +| [HDFS-15643](https://issues.apache.org/jira/browse/HDFS-15643) | EC: Fix checksum computation in case of native encoders | Blocker | . | Ahmed Hussein | Ayush Saxena | +| [HADOOP-17325](https://issues.apache.org/jira/browse/HADOOP-17325) | WASB: Test failures | Major | fs/azure, test | Sneha Vijayarajan | Steve Loughran | +| [HDFS-15716](https://issues.apache.org/jira/browse/HDFS-15716) | TestUpgradeDomainBlockPlacementPolicy flaky | Major | namenode, test | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15762](https://issues.apache.org/jira/browse/HDFS-15762) | TestMultipleNNPortQOP#testMultipleNNPortOverwriteDownStream fails intermittently | Minor | . | Toshihiko Uchida | Toshihiko Uchida | +| [HDFS-15672](https://issues.apache.org/jira/browse/HDFS-15672) | TestBalancerWithMultipleNameNodes#testBalancingBlockpoolsWithBlockPoolPolicy fails on trunk | Major | . | Ahmed Hussein | Masatake Iwasaki | +| [HDFS-15818](https://issues.apache.org/jira/browse/HDFS-15818) | Fix TestFsDatasetImpl.testReadLockCanBeDisabledByConfig | Minor | test | Leon Gao | Leon Gao | +| [HADOOP-16748](https://issues.apache.org/jira/browse/HADOOP-16748) | Migrate to Python 3 and upgrade Yetus to 0.13.0 | Major | . | Akira Ajisaka | Akira Ajisaka | +| [HDFS-15890](https://issues.apache.org/jira/browse/HDFS-15890) | Improve the Logs for File Concat Operation | Minor | namenode | Bhavik Patel | Bhavik Patel | +| [HDFS-13975](https://issues.apache.org/jira/browse/HDFS-13975) | TestBalancer#testMaxIterationTime fails sporadically | Major | . | Jason Darrell Lowe | Toshihiko Uchida | +| [YARN-10688](https://issues.apache.org/jira/browse/YARN-10688) | ClusterMetrics should support GPU capacity related metrics. | Major | metrics, resourcemanager | Qi Zhu | Qi Zhu | +| [HDFS-15902](https://issues.apache.org/jira/browse/HDFS-15902) | Improve the log for HTTPFS server operation | Minor | httpfs | Bhavik Patel | Bhavik Patel | +| [HDFS-15940](https://issues.apache.org/jira/browse/HDFS-15940) | Some tests in TestBlockRecovery are consistently failing | Major | . | Viraj Jasani | Viraj Jasani | +| [YARN-10702](https://issues.apache.org/jira/browse/YARN-10702) | Add cluster metric for amount of CPU used by RM Event Processor | Minor | yarn | Jim Brennan | Jim Brennan | +| [HADOOP-17630](https://issues.apache.org/jira/browse/HADOOP-17630) | [JDK 15] TestPrintableString fails due to Unicode 13.0 support | Major | . | Akira Ajisaka | Akira Ajisaka | +| [YARN-10723](https://issues.apache.org/jira/browse/YARN-10723) | Change CS nodes page in UI to support custom resource. | Major | . | Qi Zhu | Qi Zhu | +| [HADOOP-17112](https://issues.apache.org/jira/browse/HADOOP-17112) | whitespace not allowed in paths when saving files to s3a via committer | Blocker | fs/s3 | Krzysztof Adamski | Krzysztof Adamski | +| [HADOOP-17661](https://issues.apache.org/jira/browse/HADOOP-17661) | mvn versions:set fails to parse pom.xml | Blocker | build | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [YARN-10642](https://issues.apache.org/jira/browse/YARN-10642) | Race condition: AsyncDispatcher can get stuck by the changes introduced in YARN-8995 | Critical | resourcemanager | zhengchenyu | zhengchenyu | +| [HDFS-15659](https://issues.apache.org/jira/browse/HDFS-15659) | Set dfs.namenode.redundancy.considerLoad to false in MiniDFSCluster | Major | test | Akira Ajisaka | Ahmed Hussein | +| [HADOOP-17840](https://issues.apache.org/jira/browse/HADOOP-17840) | Backport HADOOP-17837 to branch-3.2 | Minor | . | Bryan Beaudreault | Bryan Beaudreault | +| [HADOOP-17126](https://issues.apache.org/jira/browse/HADOOP-17126) | implement non-guava Precondition checkNotNull | Major | . | Ahmed Hussein | Ahmed Hussein | + + +### OTHER: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-15870](https://issues.apache.org/jira/browse/HDFS-15870) | Remove unused configuration dfs.namenode.stripe.min | Minor | . | tomscut | tomscut | +| [HDFS-15808](https://issues.apache.org/jira/browse/HDFS-15808) | Add metrics for FSNamesystem read/write lock hold long time | Major | hdfs | tomscut | tomscut | +| [HDFS-15873](https://issues.apache.org/jira/browse/HDFS-15873) | Add namenode address in logs for block report | Minor | datanode, hdfs | tomscut | tomscut | +| [HDFS-15906](https://issues.apache.org/jira/browse/HDFS-15906) | Close FSImage and FSNamesystem after formatting is complete | Minor | . | tomscut | tomscut | +| [HDFS-15892](https://issues.apache.org/jira/browse/HDFS-15892) | Add metric for editPendingQ in FSEditLogAsync | Minor | . | tomscut | tomscut | +| [HDFS-16078](https://issues.apache.org/jira/browse/HDFS-16078) | Remove unused parameters for DatanodeManager.handleLifeline() | Minor | . | tomscut | tomscut | +| [YARN-10278](https://issues.apache.org/jira/browse/YARN-10278) | CapacityScheduler test framework ProportionalCapacityPreemptionPolicyMockFramework need some review | Major | . | Gergely Pollák | Szilard Nemeth | +| [HDFS-15731](https://issues.apache.org/jira/browse/HDFS-15731) | Reduce threadCount for unit tests to reduce the memory usage | Major | build, test | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17571](https://issues.apache.org/jira/browse/HADOOP-17571) | Upgrade com.fasterxml.woodstox:woodstox-core for security reasons | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-15895](https://issues.apache.org/jira/browse/HDFS-15895) | DFSAdmin#printOpenFiles has redundant String#format usage | Minor | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-17614](https://issues.apache.org/jira/browse/HADOOP-17614) | Bump netty to the latest 4.1.61 | Blocker | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HADOOP-17627](https://issues.apache.org/jira/browse/HADOOP-17627) | Backport to branch-3.2 HADOOP-17371, HADOOP-17621, HADOOP-17625 to update Jetty to 9.4.39 | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HDFS-15989](https://issues.apache.org/jira/browse/HDFS-15989) | Split TestBalancer into two classes | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-17808](https://issues.apache.org/jira/browse/HADOOP-17808) | ipc.Client not setting interrupt flag after catching InterruptedException | Minor | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-17834](https://issues.apache.org/jira/browse/HADOOP-17834) | Bump aliyun-sdk-oss to 3.13.0 | Major | . | Siyao Meng | Siyao Meng | +| [HADOOP-17955](https://issues.apache.org/jira/browse/HADOOP-17955) | Bump netty to the latest 4.1.68 | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [HADOOP-18061](https://issues.apache.org/jira/browse/HADOOP-18061) | Update the year to 2022 | Major | . | Ayush Saxena | Ayush Saxena | +| [HADOOP-18125](https://issues.apache.org/jira/browse/HADOOP-18125) | Utility to identify git commit / Jira fixVersion discrepancies for RC preparation | Major | . | Viraj Jasani | Viraj Jasani | + + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.3/RELEASENOTES.3.2.3.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.3/RELEASENOTES.3.2.3.md new file mode 100644 index 0000000000000..5c53bb4cb876b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.3/RELEASENOTES.3.2.3.md @@ -0,0 +1,71 @@ + + +# Apache Hadoop 3.2.3 Release Notes + +These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. + + +--- + +* [YARN-10036](https://issues.apache.org/jira/browse/YARN-10036) | *Major* | **Install yarnpkg and upgrade nodejs in Dockerfile** + +In the Dockerfile, nodejs is upgraded to 8.17.0 and yarn 1.12.1 is installed. + + +--- + +* [HADOOP-16054](https://issues.apache.org/jira/browse/HADOOP-16054) | *Major* | **Update Dockerfile to use Bionic** + +The build image has been upgraded to Bionic. + + +--- + +* [HDFS-15719](https://issues.apache.org/jira/browse/HDFS-15719) | *Critical* | **[Hadoop 3] Both NameNodes can crash simultaneously due to the short JN socket timeout** + +The default value of the configuration hadoop.http.idle\_timeout.ms (how long does Jetty disconnect an idle connection) is changed from 10000 to 60000. +This property is inlined during compile time, so an application that references this property must be recompiled in order for it to take effect. + + +--- + +* [HADOOP-16748](https://issues.apache.org/jira/browse/HADOOP-16748) | *Major* | **Migrate to Python 3 and upgrade Yetus to 0.13.0** + + +- Upgraded Yetus to 0.13.0. +- Removed determine-flaky-tests-hadoop.py. +- Temporarily disabled shelldocs check in the Jenkins jobs due to YETUS-1099. + + +--- + +* [HADOOP-16870](https://issues.apache.org/jira/browse/HADOOP-16870) | *Major* | **Use spotbugs-maven-plugin instead of findbugs-maven-plugin** + +Removed findbugs from the hadoop build images and added spotbugs instead. +Upgraded SpotBugs to 4.2.2 and spotbugs-maven-plugin to 4.2.0. + + +--- + +* [HDFS-15942](https://issues.apache.org/jira/browse/HDFS-15942) | *Major* | **Increase Quota initialization threads** + +The default quota initialization thread count during the NameNode startup process (dfs.namenode.quota.init-threads) is increased from 4 to 12. + + + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.4/CHANGELOG.3.2.4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.4/CHANGELOG.3.2.4.md new file mode 100644 index 0000000000000..fc0079d1c9bd8 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.4/CHANGELOG.3.2.4.md @@ -0,0 +1,213 @@ + + +# Apache Hadoop Changelog + +## Release 3.2.4 - 2022-07-12 + + + +### NEW FEATURES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-16337](https://issues.apache.org/jira/browse/HDFS-16337) | Show start time of Datanode on Web | Minor | . | Tao Li | Tao Li | + + +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-15075](https://issues.apache.org/jira/browse/HDFS-15075) | Remove process command timing from BPServiceActor | Major | . | Íñigo Goiri | Xiaoqiao He | +| [HDFS-15150](https://issues.apache.org/jira/browse/HDFS-15150) | Introduce read write lock to Datanode | Major | datanode | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-16175](https://issues.apache.org/jira/browse/HDFS-16175) | Improve the configurable value of Server #PURGE\_INTERVAL\_NANOS | Major | ipc | JiangHua Zhu | JiangHua Zhu | +| [HDFS-16173](https://issues.apache.org/jira/browse/HDFS-16173) | Improve CopyCommands#Put#executor queue configurability | Major | fs | JiangHua Zhu | JiangHua Zhu | +| [HADOOP-17897](https://issues.apache.org/jira/browse/HADOOP-17897) | Allow nested blocks in switch case in checkstyle settings | Minor | build | Masatake Iwasaki | Masatake Iwasaki | +| [HADOOP-17857](https://issues.apache.org/jira/browse/HADOOP-17857) | Check real user ACLs in addition to proxied user ACLs | Major | . | Eric Payne | Eric Payne | +| [HDFS-14997](https://issues.apache.org/jira/browse/HDFS-14997) | BPServiceActor processes commands from NameNode asynchronously | Major | datanode | Xiaoqiao He | Xiaoqiao He | +| [HADOOP-17926](https://issues.apache.org/jira/browse/HADOOP-17926) | Maven-eclipse-plugin is no longer needed since Eclipse can import Maven projects by itself. | Minor | documentation | Rintaro Ikeda | Rintaro Ikeda | +| [YARN-10935](https://issues.apache.org/jira/browse/YARN-10935) | AM Total Queue Limit goes below per-user AM Limit if parent is full. | Major | capacity scheduler, capacityscheduler | Eric Payne | Eric Payne | +| [HDFS-16241](https://issues.apache.org/jira/browse/HDFS-16241) | Standby close reconstruction thread | Major | . | zhanghuazong | zhanghuazong | +| [YARN-1115](https://issues.apache.org/jira/browse/YARN-1115) | Provide optional means for a scheduler to check real user ACLs | Major | capacity scheduler, scheduler | Eric Payne | | +| [HDFS-16279](https://issues.apache.org/jira/browse/HDFS-16279) | Print detail datanode info when process first storage report | Minor | . | Tao Li | Tao Li | +| [HDFS-16294](https://issues.apache.org/jira/browse/HDFS-16294) | Remove invalid DataNode#CONFIG\_PROPERTY\_SIMULATED | Major | datanode | JiangHua Zhu | JiangHua Zhu | +| [HDFS-16299](https://issues.apache.org/jira/browse/HDFS-16299) | Fix bug for TestDataNodeVolumeMetrics#verifyDataNodeVolumeMetrics | Minor | . | Tao Li | Tao Li | +| [HDFS-16301](https://issues.apache.org/jira/browse/HDFS-16301) | Improve BenchmarkThroughput#SIZE naming standardization | Minor | benchmarks, test | JiangHua Zhu | JiangHua Zhu | +| [YARN-10997](https://issues.apache.org/jira/browse/YARN-10997) | Revisit allocation and reservation logging | Major | . | Andras Gyori | Andras Gyori | +| [HDFS-16315](https://issues.apache.org/jira/browse/HDFS-16315) | Add metrics related to Transfer and NativeCopy for DataNode | Major | . | Tao Li | Tao Li | +| [HADOOP-17998](https://issues.apache.org/jira/browse/HADOOP-17998) | Allow get command to run with multi threads. | Major | fs | Chengwei Wang | Chengwei Wang | +| [HDFS-16345](https://issues.apache.org/jira/browse/HDFS-16345) | Fix test cases fail in TestBlockStoragePolicy | Major | build | guophilipse | guophilipse | +| [HADOOP-18035](https://issues.apache.org/jira/browse/HADOOP-18035) | Skip unit test failures to run all the unit tests | Major | build | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-18040](https://issues.apache.org/jira/browse/HADOOP-18040) | Use maven.test.failure.ignore instead of ignoreTestFailure | Major | build | Akira Ajisaka | Akira Ajisaka | +| [HDFS-16352](https://issues.apache.org/jira/browse/HDFS-16352) | return the real datanode numBlocks in #getDatanodeStorageReport | Major | . | qinyuren | qinyuren | +| [HDFS-16386](https://issues.apache.org/jira/browse/HDFS-16386) | Reduce DataNode load when FsDatasetAsyncDiskService is working | Major | datanode | JiangHua Zhu | JiangHua Zhu | +| [HDFS-16391](https://issues.apache.org/jira/browse/HDFS-16391) | Avoid evaluation of LOG.debug statement in NameNodeHeartbeatService | Trivial | . | wangzhaohui | wangzhaohui | +| [YARN-8234](https://issues.apache.org/jira/browse/YARN-8234) | Improve RM system metrics publisher's performance by pushing events to timeline server in batch | Critical | resourcemanager, timelineserver | Hu Ziqian | Ashutosh Gupta | +| [HDFS-16430](https://issues.apache.org/jira/browse/HDFS-16430) | Validate maximum blocks in EC group when adding an EC policy | Minor | ec, erasure-coding | daimin | daimin | +| [HDFS-16403](https://issues.apache.org/jira/browse/HDFS-16403) | Improve FUSE IO performance by supporting FUSE parameter max\_background | Minor | fuse-dfs | daimin | daimin | +| [HADOOP-18136](https://issues.apache.org/jira/browse/HADOOP-18136) | Verify FileUtils.unTar() handling of missing .tar files | Minor | test, util | Steve Loughran | Steve Loughran | +| [HDFS-16529](https://issues.apache.org/jira/browse/HDFS-16529) | Remove unnecessary setObserverRead in TestConsistentReadsObserver | Trivial | test | wangzhaohui | wangzhaohui | +| [HDFS-16530](https://issues.apache.org/jira/browse/HDFS-16530) | setReplication debug log creates a new string even if debug is disabled | Major | namenode | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-16427](https://issues.apache.org/jira/browse/HDFS-16427) | Add debug log for BlockManager#chooseExcessRedundancyStriped | Minor | erasure-coding | Tao Li | Tao Li | +| [HDFS-16389](https://issues.apache.org/jira/browse/HDFS-16389) | Improve NNThroughputBenchmark test mkdirs | Major | benchmarks, namenode | JiangHua Zhu | JiangHua Zhu | +| [MAPREDUCE-7373](https://issues.apache.org/jira/browse/MAPREDUCE-7373) | Building MapReduce NativeTask fails on Fedora 34+ | Major | build, nativetask | Kengo Seki | Kengo Seki | +| [HDFS-16355](https://issues.apache.org/jira/browse/HDFS-16355) | Improve the description of dfs.block.scanner.volume.bytes.per.second | Minor | documentation, hdfs | guophilipse | guophilipse | +| [HADOOP-18088](https://issues.apache.org/jira/browse/HADOOP-18088) | Replace log4j 1.x with reload4j | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HDFS-16501](https://issues.apache.org/jira/browse/HDFS-16501) | Print the exception when reporting a bad block | Major | datanode | qinyuren | qinyuren | +| [YARN-11116](https://issues.apache.org/jira/browse/YARN-11116) | Migrate Times util from SimpleDateFormat to thread-safe DateTimeFormatter class | Minor | . | Jonathan Turner Eagles | Jonathan Turner Eagles | +| [YARN-10080](https://issues.apache.org/jira/browse/YARN-10080) | Support show app id on localizer thread pool | Major | nodemanager | zhoukang | Ashutosh Gupta | +| [HADOOP-18240](https://issues.apache.org/jira/browse/HADOOP-18240) | Upgrade Yetus to 0.14.0 | Major | build | Akira Ajisaka | Ashutosh Gupta | +| [HDFS-16585](https://issues.apache.org/jira/browse/HDFS-16585) | Add @VisibleForTesting in Dispatcher.java after HDFS-16268 | Trivial | . | Wei-Chiu Chuang | Ashutosh Gupta | +| [HDFS-16610](https://issues.apache.org/jira/browse/HDFS-16610) | Make fsck read timeout configurable | Major | hdfs-client | Stephen O'Donnell | Stephen O'Donnell | + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-13983](https://issues.apache.org/jira/browse/HDFS-13983) | TestOfflineImageViewer crashes in windows | Major | . | Vinayakumar B | Vinayakumar B | +| [YARN-9744](https://issues.apache.org/jira/browse/YARN-9744) | RollingLevelDBTimelineStore.getEntityByTime fails with NPE | Major | timelineserver | Prabhu Joseph | Prabhu Joseph | +| [HDFS-15113](https://issues.apache.org/jira/browse/HDFS-15113) | Missing IBR when NameNode restart if open processCommand async feature | Blocker | datanode | Xiaoqiao He | Xiaoqiao He | +| [HADOOP-16985](https://issues.apache.org/jira/browse/HADOOP-16985) | Handle release package related issues | Major | . | Vinayakumar B | Vinayakumar B | +| [HADOOP-17116](https://issues.apache.org/jira/browse/HADOOP-17116) | Skip Retry INFO logging on first failover from a proxy | Major | ha | Hanisha Koneru | Hanisha Koneru | +| [HDFS-15651](https://issues.apache.org/jira/browse/HDFS-15651) | Client could not obtain block when DN CommandProcessingThread exit | Major | . | Yiqun Lin | Mingxiang Li | +| [HDFS-15963](https://issues.apache.org/jira/browse/HDFS-15963) | Unreleased volume references cause an infinite loop | Critical | datanode | Shuyan Zhang | Shuyan Zhang | +| [HDFS-14575](https://issues.apache.org/jira/browse/HDFS-14575) | LeaseRenewer#daemon threads leak in DFSClient | Major | . | Tao Yang | Renukaprasad C | +| [HADOOP-17796](https://issues.apache.org/jira/browse/HADOOP-17796) | Upgrade jetty version to 9.4.43 | Major | . | Wei-Chiu Chuang | Renukaprasad C | +| [HDFS-15175](https://issues.apache.org/jira/browse/HDFS-15175) | Multiple CloseOp shared block instance causes the standby namenode to crash when rolling editlog | Critical | . | Yicong Cai | Wan Chang | +| [HDFS-16177](https://issues.apache.org/jira/browse/HDFS-16177) | Bug fix for Util#receiveFile | Minor | . | Tao Li | Tao Li | +| [YARN-10814](https://issues.apache.org/jira/browse/YARN-10814) | YARN shouldn't start with empty hadoop.http.authentication.signature.secret.file | Major | . | Benjamin Teke | Tamas Domok | +| [HADOOP-17874](https://issues.apache.org/jira/browse/HADOOP-17874) | ExceptionsHandler to add terse/suppressed Exceptions in thread-safe manner | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-15129](https://issues.apache.org/jira/browse/HADOOP-15129) | Datanode caches namenode DNS lookup failure and cannot startup | Minor | ipc | Karthik Palaniappan | Chris Nauroth | +| [YARN-10901](https://issues.apache.org/jira/browse/YARN-10901) | Permission checking error on an existing directory in LogAggregationFileController#verifyAndCreateRemoteLogDir | Major | nodemanager | Tamas Domok | Tamas Domok | +| [HDFS-16207](https://issues.apache.org/jira/browse/HDFS-16207) | Remove NN logs stack trace for non-existent xattr query | Major | namenode | Ahmed Hussein | Ahmed Hussein | +| [HDFS-16187](https://issues.apache.org/jira/browse/HDFS-16187) | SnapshotDiff behaviour with Xattrs and Acls is not consistent across NN restarts with checkpointing | Major | snapshots | Srinivasu Majeti | Shashikant Banerjee | +| [HDFS-16198](https://issues.apache.org/jira/browse/HDFS-16198) | Short circuit read leaks Slot objects when InvalidToken exception is thrown | Major | . | Eungsop Yoo | Eungsop Yoo | +| [YARN-10870](https://issues.apache.org/jira/browse/YARN-10870) | Missing user filtering check -\> yarn.webapp.filter-entity-list-by-user for RM Scheduler page | Major | yarn | Siddharth Ahuja | Gergely Pollák | +| [HADOOP-17919](https://issues.apache.org/jira/browse/HADOOP-17919) | Fix command line example in Hadoop Cluster Setup documentation | Minor | documentation | Rintaro Ikeda | Rintaro Ikeda | +| [HDFS-16235](https://issues.apache.org/jira/browse/HDFS-16235) | Deadlock in LeaseRenewer for static remove method | Major | hdfs | angerszhu | angerszhu | +| [HDFS-16181](https://issues.apache.org/jira/browse/HDFS-16181) | [SBN Read] Fix metric of RpcRequestCacheMissAmount can't display when tailEditLog form JN | Critical | . | wangzhaohui | wangzhaohui | +| [HADOOP-17925](https://issues.apache.org/jira/browse/HADOOP-17925) | BUILDING.txt should not encourage to activate docs profile on building binary artifacts | Minor | documentation | Rintaro Ikeda | Masatake Iwasaki | +| [HADOOP-16532](https://issues.apache.org/jira/browse/HADOOP-16532) | Fix TestViewFsTrash to use the correct homeDir. | Minor | test, viewfs | Steve Loughran | Xing Lin | +| [HDFS-16268](https://issues.apache.org/jira/browse/HDFS-16268) | Balancer stuck when moving striped blocks due to NPE | Major | balancer & mover, erasure-coding | Leon Gao | Leon Gao | +| [HDFS-7612](https://issues.apache.org/jira/browse/HDFS-7612) | TestOfflineEditsViewer.testStored() uses incorrect default value for cacheDir | Major | test | Konstantin Shvachko | Michael Kuchenbecker | +| [HDFS-16311](https://issues.apache.org/jira/browse/HDFS-16311) | Metric metadataOperationRate calculation error in DataNodeVolumeMetrics | Major | . | Tao Li | Tao Li | +| [HDFS-16182](https://issues.apache.org/jira/browse/HDFS-16182) | numOfReplicas is given the wrong value in BlockPlacementPolicyDefault$chooseTarget can cause DataStreamer to fail with Heterogeneous Storage | Major | namanode | Max Xie | Max Xie | +| [HADOOP-17999](https://issues.apache.org/jira/browse/HADOOP-17999) | No-op implementation of setWriteChecksum and setVerifyChecksum in ViewFileSystem | Major | . | Abhishek Das | Abhishek Das | +| [HDFS-16329](https://issues.apache.org/jira/browse/HDFS-16329) | Fix log format for BlockManager | Minor | . | Tao Li | Tao Li | +| [HDFS-16330](https://issues.apache.org/jira/browse/HDFS-16330) | Fix incorrect placeholder for Exception logs in DiskBalancer | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16328](https://issues.apache.org/jira/browse/HDFS-16328) | Correct disk balancer param desc | Minor | documentation, hdfs | guophilipse | guophilipse | +| [HDFS-16343](https://issues.apache.org/jira/browse/HDFS-16343) | Add some debug logs when the dfsUsed are not used during Datanode startup | Major | datanode | Mukul Kumar Singh | Mukul Kumar Singh | +| [YARN-10991](https://issues.apache.org/jira/browse/YARN-10991) | Fix to ignore the grouping "[]" for resourcesStr in parseResourcesString method | Minor | distributed-shell | Ashutosh Gupta | Ashutosh Gupta | +| [HADOOP-17975](https://issues.apache.org/jira/browse/HADOOP-17975) | Fallback to simple auth does not work for a secondary DistributedFileSystem instance | Major | ipc | István Fajth | István Fajth | +| [HDFS-16350](https://issues.apache.org/jira/browse/HDFS-16350) | Datanode start time should be set after RPC server starts successfully | Minor | . | Viraj Jasani | Viraj Jasani | +| [YARN-11007](https://issues.apache.org/jira/browse/YARN-11007) | Correct words in YARN documents | Minor | documentation | guophilipse | guophilipse | +| [HDFS-16332](https://issues.apache.org/jira/browse/HDFS-16332) | Expired block token causes slow read due to missing handling in sasl handshake | Major | datanode, dfs, dfsclient | Shinya Yoshida | Shinya Yoshida | +| [YARN-9063](https://issues.apache.org/jira/browse/YARN-9063) | ATS 1.5 fails to start if RollingLevelDb files are corrupt or missing | Major | timelineserver, timelineservice | Tarun Parimi | Ashutosh Gupta | +| [HDFS-16333](https://issues.apache.org/jira/browse/HDFS-16333) | fix balancer bug when transfer an EC block | Major | balancer & mover, erasure-coding | qinyuren | qinyuren | +| [HDFS-16373](https://issues.apache.org/jira/browse/HDFS-16373) | Fix MiniDFSCluster restart in case of multiple namenodes | Major | . | Ayush Saxena | Ayush Saxena | +| [HDFS-16377](https://issues.apache.org/jira/browse/HDFS-16377) | Should CheckNotNull before access FsDatasetSpi | Major | . | Tao Li | Tao Li | +| [YARN-6862](https://issues.apache.org/jira/browse/YARN-6862) | Nodemanager resource usage metrics sometimes are negative | Major | nodemanager | YunFan Zhou | Benjamin Teke | +| [YARN-10178](https://issues.apache.org/jira/browse/YARN-10178) | Global Scheduler async thread crash caused by 'Comparison method violates its general contract | Major | capacity scheduler | tuyu | Andras Gyori | +| [HDFS-16395](https://issues.apache.org/jira/browse/HDFS-16395) | Remove useless NNThroughputBenchmark#dummyActionNoSynch() | Major | benchmarks, namenode | JiangHua Zhu | JiangHua Zhu | +| [HADOOP-18063](https://issues.apache.org/jira/browse/HADOOP-18063) | Remove unused import AbstractJavaKeyStoreProvider in Shell class | Minor | . | JiangHua Zhu | JiangHua Zhu | +| [HDFS-16409](https://issues.apache.org/jira/browse/HDFS-16409) | Fix typo: testHasExeceptionsReturnsCorrectValue -\> testHasExceptionsReturnsCorrectValue | Trivial | . | Ashutosh Gupta | Ashutosh Gupta | +| [HDFS-16408](https://issues.apache.org/jira/browse/HDFS-16408) | Ensure LeaseRecheckIntervalMs is greater than zero | Major | namenode | Jingxuan Fu | Jingxuan Fu | +| [YARN-11055](https://issues.apache.org/jira/browse/YARN-11055) | In cgroups-operations.c some fprintf format strings don't end with "\\n" | Minor | nodemanager | Gera Shegalov | Gera Shegalov | +| [HDFS-16303](https://issues.apache.org/jira/browse/HDFS-16303) | Losing over 100 datanodes in state decommissioning results in full blockage of all datanode decommissioning | Major | . | Kevin Wikant | Kevin Wikant | +| [HDFS-16443](https://issues.apache.org/jira/browse/HDFS-16443) | Fix edge case where DatanodeAdminDefaultMonitor doubly enqueues a DatanodeDescriptor on exception | Major | hdfs | Kevin Wikant | Kevin Wikant | +| [HDFS-16449](https://issues.apache.org/jira/browse/HDFS-16449) | Fix hadoop web site release notes and changelog not available | Minor | documentation | guophilipse | guophilipse | +| [HADOOP-18192](https://issues.apache.org/jira/browse/HADOOP-18192) | Fix multiple\_bindings warning about slf4j-reload4j | Major | . | Masatake Iwasaki | Masatake Iwasaki | +| [HDFS-16479](https://issues.apache.org/jira/browse/HDFS-16479) | EC: NameNode should not send a reconstruction work when the source datanodes are insufficient | Critical | ec, erasure-coding | Yuanbo Liu | Takanobu Asanuma | +| [HDFS-16509](https://issues.apache.org/jira/browse/HDFS-16509) | Fix decommission UnsupportedOperationException: Remove unsupported | Major | namenode | daimin | daimin | +| [HDFS-16456](https://issues.apache.org/jira/browse/HDFS-16456) | EC: Decommission a rack with only on dn will fail when the rack number is equal with replication | Critical | ec, namenode | caozhiqiang | caozhiqiang | +| [HDFS-16437](https://issues.apache.org/jira/browse/HDFS-16437) | ReverseXML processor doesn't accept XML files without the SnapshotDiffSection. | Critical | hdfs | yanbin.zhang | yanbin.zhang | +| [HDFS-16507](https://issues.apache.org/jira/browse/HDFS-16507) | [SBN read] Avoid purging edit log which is in progress | Critical | . | Tao Li | Tao Li | +| [YARN-10720](https://issues.apache.org/jira/browse/YARN-10720) | YARN WebAppProxyServlet should support connection timeout to prevent proxy server from hanging | Critical | . | Qi Zhu | Qi Zhu | +| [HDFS-16428](https://issues.apache.org/jira/browse/HDFS-16428) | Source path with storagePolicy cause wrong typeConsumed while rename | Major | hdfs, namenode | lei w | lei w | +| [YARN-11014](https://issues.apache.org/jira/browse/YARN-11014) | YARN incorrectly validates maximum capacity resources on the validation API | Major | . | Benjamin Teke | Benjamin Teke | +| [YARN-11075](https://issues.apache.org/jira/browse/YARN-11075) | Explicitly declare serialVersionUID in LogMutation class | Major | . | Benjamin Teke | Benjamin Teke | +| [HDFS-11041](https://issues.apache.org/jira/browse/HDFS-11041) | Unable to unregister FsDatasetState MBean if DataNode is shutdown twice | Trivial | datanode | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HDFS-16538](https://issues.apache.org/jira/browse/HDFS-16538) | EC decoding failed due to not enough valid inputs | Major | erasure-coding | qinyuren | qinyuren | +| [HDFS-16544](https://issues.apache.org/jira/browse/HDFS-16544) | EC decoding failed due to invalid buffer | Major | erasure-coding | qinyuren | qinyuren | +| [HDFS-16546](https://issues.apache.org/jira/browse/HDFS-16546) | Fix UT TestOfflineImageViewer#testReverseXmlWithoutSnapshotDiffSection to branch branch-3.2 | Major | test | daimin | daimin | +| [HDFS-16552](https://issues.apache.org/jira/browse/HDFS-16552) | Fix NPE for TestBlockManager | Major | . | Tao Li | Tao Li | +| [MAPREDUCE-7246](https://issues.apache.org/jira/browse/MAPREDUCE-7246) | In MapredAppMasterRest#Mapreduce\_Application\_Master\_Info\_API, the datatype of appId should be "string". | Major | documentation | jenny | Ashutosh Gupta | +| [YARN-10187](https://issues.apache.org/jira/browse/YARN-10187) | Removing hadoop-yarn-project/hadoop-yarn/README as it is no longer maintained. | Minor | documentation | N Sanketh Reddy | Ashutosh Gupta | +| [HDFS-16185](https://issues.apache.org/jira/browse/HDFS-16185) | Fix comment in LowRedundancyBlocks.java | Minor | documentation | Akira Ajisaka | Ashutosh Gupta | +| [HADOOP-17479](https://issues.apache.org/jira/browse/HADOOP-17479) | Fix the examples of hadoop config prefix | Minor | documentation | Akira Ajisaka | Ashutosh Gupta | +| [HDFS-16579](https://issues.apache.org/jira/browse/HDFS-16579) | Fix build failure for TestBlockManager on branch-3.2 | Major | . | Tao Li | Tao Li | +| [YARN-11092](https://issues.apache.org/jira/browse/YARN-11092) | Upgrade jquery ui to 1.13.1 | Major | . | D M Murali Krishna Reddy | Ashutosh Gupta | +| [YARN-11133](https://issues.apache.org/jira/browse/YARN-11133) | YarnClient gets the wrong EffectiveMinCapacity value | Major | api | Zilong Zhu | Zilong Zhu | +| [YARN-10850](https://issues.apache.org/jira/browse/YARN-10850) | TimelineService v2 lists containers for all attempts when filtering for one | Major | timelinereader | Benjamin Teke | Benjamin Teke | +| [YARN-11126](https://issues.apache.org/jira/browse/YARN-11126) | ZKConfigurationStore Java deserialisation vulnerability | Major | yarn | Tamas Domok | Tamas Domok | +| [YARN-11162](https://issues.apache.org/jira/browse/YARN-11162) | Set the zk acl for nodes created by ZKConfigurationStore. | Major | resourcemanager | Owen O'Malley | Owen O'Malley | +| [HDFS-16586](https://issues.apache.org/jira/browse/HDFS-16586) | Purge FsDatasetAsyncDiskService threadgroup; it causes BPServiceActor$CommandProcessingThread IllegalThreadStateException 'fatal exception and exit' | Major | datanode | Michael Stack | Michael Stack | +| [HADOOP-18251](https://issues.apache.org/jira/browse/HADOOP-18251) | Fix failure of extracting JIRA id from commit message in git\_jira\_fix\_version\_check.py | Minor | build | Masatake Iwasaki | Masatake Iwasaki | +| [HDFS-16583](https://issues.apache.org/jira/browse/HDFS-16583) | DatanodeAdminDefaultMonitor can get stuck in an infinite loop | Major | . | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-16623](https://issues.apache.org/jira/browse/HDFS-16623) | IllegalArgumentException in LifelineSender | Major | . | ZanderXu | ZanderXu | +| [HDFS-16064](https://issues.apache.org/jira/browse/HDFS-16064) | Determine when to invalidate corrupt replicas based on number of usable replicas | Major | datanode, namenode | Kevin Wikant | Kevin Wikant | +| [HADOOP-18100](https://issues.apache.org/jira/browse/HADOOP-18100) | Change scope of inner classes in InodeTree to make them accessible outside package | Major | . | Abhishek Das | Abhishek Das | +| [HADOOP-18334](https://issues.apache.org/jira/browse/HADOOP-18334) | Fix create-release to address removal of GPG\_AGENT\_INFO in branch-3.2 | Major | build | Masatake Iwasaki | Masatake Iwasaki | + + +### TESTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [MAPREDUCE-7342](https://issues.apache.org/jira/browse/MAPREDUCE-7342) | Stop RMService in TestClientRedirect.testRedirect() | Minor | . | Zhengxi Li | Zhengxi Li | +| [MAPREDUCE-7311](https://issues.apache.org/jira/browse/MAPREDUCE-7311) | Fix non-idempotent test in TestTaskProgressReporter | Minor | . | Zhengxi Li | Zhengxi Li | +| [HDFS-15862](https://issues.apache.org/jira/browse/HDFS-15862) | Make TestViewfsWithNfs3.testNfsRenameSingleNN() idempotent | Minor | nfs | Zhengxi Li | Zhengxi Li | + + +### SUB-TASKS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-15457](https://issues.apache.org/jira/browse/HDFS-15457) | TestFsDatasetImpl fails intermittently | Major | hdfs | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15818](https://issues.apache.org/jira/browse/HDFS-15818) | Fix TestFsDatasetImpl.testReadLockCanBeDisabledByConfig | Minor | test | Leon Gao | Leon Gao | +| [YARN-10503](https://issues.apache.org/jira/browse/YARN-10503) | Support queue capacity in terms of absolute resources with custom resourceType. | Critical | . | Qi Zhu | Qi Zhu | +| [HADOOP-17126](https://issues.apache.org/jira/browse/HADOOP-17126) | implement non-guava Precondition checkNotNull | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17929](https://issues.apache.org/jira/browse/HADOOP-17929) | implement non-guava Precondition checkArgument | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17947](https://issues.apache.org/jira/browse/HADOOP-17947) | Provide alternative to Guava VisibleForTesting | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-17930](https://issues.apache.org/jira/browse/HADOOP-17930) | implement non-guava Precondition checkState | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17374](https://issues.apache.org/jira/browse/HADOOP-17374) | AliyunOSS: support ListObjectsV2 | Major | fs/oss | wujinhu | wujinhu | +| [HDFS-16336](https://issues.apache.org/jira/browse/HDFS-16336) | De-flake TestRollingUpgrade#testRollback | Minor | hdfs, test | Kevin Wikant | Viraj Jasani | +| [HDFS-16171](https://issues.apache.org/jira/browse/HDFS-16171) | De-flake testDecommissionStatus | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16169](https://issues.apache.org/jira/browse/HDFS-16169) | Fix TestBlockTokenWithDFSStriped#testEnd2End failure | Major | test | Hui Fei | secfree | +| [HDFS-16484](https://issues.apache.org/jira/browse/HDFS-16484) | [SPS]: Fix an infinite loop bug in SPSPathIdProcessor thread | Major | . | qinyuren | qinyuren | +| [HADOOP-16663](https://issues.apache.org/jira/browse/HADOOP-16663) | Backport "HADOOP-16560 [YARN] use protobuf-maven-plugin to generate protobuf classes" to all active branches | Major | . | Duo Zhang | Duo Zhang | +| [HADOOP-16664](https://issues.apache.org/jira/browse/HADOOP-16664) | Backport "HADOOP-16561 [MAPREDUCE] use protobuf-maven-plugin to generate protobuf classes" to all active branches | Major | . | Duo Zhang | Duo Zhang | + + +### OTHER: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-16298](https://issues.apache.org/jira/browse/HDFS-16298) | Improve error msg for BlockMissingException | Minor | . | Tao Li | Tao Li | +| [HDFS-16312](https://issues.apache.org/jira/browse/HDFS-16312) | Fix typo for DataNodeVolumeMetrics and ProfilingFileIoEvents | Minor | . | Tao Li | Tao Li | +| [HDFS-16326](https://issues.apache.org/jira/browse/HDFS-16326) | Simplify the code for DiskBalancer | Minor | . | Tao Li | Tao Li | +| [HDFS-16339](https://issues.apache.org/jira/browse/HDFS-16339) | Show the threshold when mover threads quota is exceeded | Minor | . | Tao Li | Tao Li | +| [YARN-10820](https://issues.apache.org/jira/browse/YARN-10820) | Make GetClusterNodesRequestPBImpl thread safe | Major | client | Prabhu Joseph | SwathiChandrashekar | +| [HADOOP-13464](https://issues.apache.org/jira/browse/HADOOP-13464) | update GSON to 2.7+ | Minor | build | Sean Busbey | Igor Dvorzhak | +| [HADOOP-18191](https://issues.apache.org/jira/browse/HADOOP-18191) | Log retry count while handling exceptions in RetryInvocationHandler | Minor | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16551](https://issues.apache.org/jira/browse/HDFS-16551) | Backport HADOOP-17588 to 3.3 and other active old branches. | Major | . | Renukaprasad C | Renukaprasad C | +| [HDFS-16618](https://issues.apache.org/jira/browse/HDFS-16618) | sync\_file\_range error should include more volume and file info | Minor | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-18300](https://issues.apache.org/jira/browse/HADOOP-18300) | Update Gson to 2.9.0 | Minor | build | Igor Dvorzhak | Igor Dvorzhak | + + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.4/RELEASENOTES.3.2.4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.4/RELEASENOTES.3.2.4.md new file mode 100644 index 0000000000000..fac976d655da1 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.4/RELEASENOTES.3.2.4.md @@ -0,0 +1,55 @@ + + +# Apache Hadoop 3.2.4 Release Notes + +These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. + + +--- + +* [YARN-10820](https://issues.apache.org/jira/browse/YARN-10820) | *Major* | **Make GetClusterNodesRequestPBImpl thread safe** + +Added syncronization so that the "yarn node list" command does not fail intermittently + + +--- + +* [YARN-8234](https://issues.apache.org/jira/browse/YARN-8234) | *Critical* | **Improve RM system metrics publisher's performance by pushing events to timeline server in batch** + +When Timeline Service V1 or V1.5 is used, if "yarn.resourcemanager.system-metrics-publisher.timeline-server-v1.enable-batch" is set to true, ResourceManager sends timeline events in batch. The default value is false. If this functionality is enabled, the maximum number that events published in batch is configured by "yarn.resourcemanager.system-metrics-publisher.timeline-server-v1.batch-size". The default value is 1000. The interval of publishing events can be configured by "yarn.resourcemanager.system-metrics-publisher.timeline-server-v1.interval-seconds". By default, it is set to 60 seconds. + + +--- + +* [HADOOP-18088](https://issues.apache.org/jira/browse/HADOOP-18088) | *Major* | **Replace log4j 1.x with reload4j** + +log4j 1 was replaced with reload4j which is fork of log4j 1.2.17 with the goal of fixing pressing security issues. + +If you are depending on the hadoop artifacts in your build were explicitly excluding log4 artifacts, and now want to exclude the reload4j files, you will need to update your exclusion lists +\ + \org.slf4j\ + \slf4j-reload4j\ +\ +\ + \ch.qos.reload4j\ + \reload4j\ +\ + + + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.1/CHANGELOG.3.3.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.1/CHANGELOG.3.3.1.md new file mode 100644 index 0000000000000..210d7f0c320bc --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.1/CHANGELOG.3.3.1.md @@ -0,0 +1,750 @@ + + +# Apache Hadoop Changelog + +## Release 3.3.1 - 2021-06-13 + + + +### IMPORTANT ISSUES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-17338](https://issues.apache.org/jira/browse/HADOOP-17338) | Intermittent S3AInputStream failures: Premature end of Content-Length delimited message body etc | Major | fs/s3 | Yongjun Zhang | Yongjun Zhang | +| [HDFS-15380](https://issues.apache.org/jira/browse/HDFS-15380) | RBF: Could not fetch real remote IP in RouterWebHdfsMethods | Major | webhdfs | tomscut | tomscut | + + +### NEW FEATURES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-16916](https://issues.apache.org/jira/browse/HADOOP-16916) | ABFS: Delegation SAS generator for integration with Ranger | Minor | fs/azure | Thomas Marqardt | Thomas Marqardt | +| [HDFS-13183](https://issues.apache.org/jira/browse/HDFS-13183) | Standby NameNode process getBlocks request to reduce Active load | Major | balancer & mover, namenode | Xiaoqiao He | Xiaoqiao He | +| [HADOOP-17076](https://issues.apache.org/jira/browse/HADOOP-17076) | ABFS: Delegation SAS Generator Updates | Minor | fs/azure | Thomas Marqardt | Thomas Marqardt | +| [HADOOP-15891](https://issues.apache.org/jira/browse/HADOOP-15891) | Provide Regex Based Mount Point In Inode Tree | Major | viewfs | zhenzhao wang | zhenzhao wang | +| [HADOOP-17125](https://issues.apache.org/jira/browse/HADOOP-17125) | Using snappy-java in SnappyCodec | Major | common | DB Tsai | L. C. Hsieh | +| [HADOOP-17292](https://issues.apache.org/jira/browse/HADOOP-17292) | Using lz4-java in Lz4Codec | Major | common | L. C. Hsieh | L. C. Hsieh | +| [HDFS-15711](https://issues.apache.org/jira/browse/HDFS-15711) | Add Metrics to HttpFS Server | Major | httpfs | Ahmed Hussein | Ahmed Hussein | +| [MAPREDUCE-7315](https://issues.apache.org/jira/browse/MAPREDUCE-7315) | LocatedFileStatusFetcher to collect/publish IOStatistics | Minor | client | Steve Loughran | Steve Loughran | +| [HADOOP-16830](https://issues.apache.org/jira/browse/HADOOP-16830) | Add Public IOStatistics API | Major | fs, fs/s3 | Steve Loughran | Steve Loughran | +| [HDFS-15759](https://issues.apache.org/jira/browse/HDFS-15759) | EC: Verify EC reconstruction correctness on DataNode | Major | datanode, ec, erasure-coding | Toshihiko Uchida | Toshihiko Uchida | +| [HADOOP-16829](https://issues.apache.org/jira/browse/HADOOP-16829) | Über-jira: S3A Hadoop 3.3.1 features | Major | fs/s3 | Steve Loughran | Steve Loughran | + + +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-15245](https://issues.apache.org/jira/browse/HDFS-15245) | Improve JournalNode web UI | Major | journal-node, ui | Jianfei Jiang | Jianfei Jiang | +| [HADOOP-16952](https://issues.apache.org/jira/browse/HADOOP-16952) | Add .diff to gitignore | Minor | . | Ayush Saxena | Ayush Saxena | +| [HADOOP-16954](https://issues.apache.org/jira/browse/HADOOP-16954) | Add -S option in "Count" command to show only Snapshot Counts | Major | . | Hemanth Boyina | Hemanth Boyina | +| [HDFS-15247](https://issues.apache.org/jira/browse/HDFS-15247) | RBF: Provide Non DFS Used per DataNode in DataNode UI | Major | . | Ayush Saxena | Lisheng Sun | +| [MAPREDUCE-7199](https://issues.apache.org/jira/browse/MAPREDUCE-7199) | HsJobsBlock reuse JobACLsManager for checkAccess | Minor | . | Bibin Chundatt | Bilwa S T | +| [HDFS-15295](https://issues.apache.org/jira/browse/HDFS-15295) | AvailableSpaceBlockPlacementPolicy should use chooseRandomWithStorageTypeTwoTrial() for better performance. | Minor | . | Jinglun | Jinglun | +| [HADOOP-16054](https://issues.apache.org/jira/browse/HADOOP-16054) | Update Dockerfile to use Bionic | Major | build, test | Akira Ajisaka | Akira Ajisaka | +| [YARN-10237](https://issues.apache.org/jira/browse/YARN-10237) | Add isAbsoluteResource config for queue in scheduler response | Minor | scheduler | Prabhu Joseph | Prabhu Joseph | +| [HADOOP-16886](https://issues.apache.org/jira/browse/HADOOP-16886) | Add hadoop.http.idle\_timeout.ms to core-default.xml | Major | . | Wei-Chiu Chuang | Lisheng Sun | +| [HDFS-14283](https://issues.apache.org/jira/browse/HDFS-14283) | DFSInputStream to prefer cached replica | Major | . | Wei-Chiu Chuang | Lisheng Sun | +| [HDFS-15338](https://issues.apache.org/jira/browse/HDFS-15338) | listOpenFiles() should throw InvalidPathException in case of invalid paths | Minor | . | Jinglun | Jinglun | +| [YARN-10160](https://issues.apache.org/jira/browse/YARN-10160) | Add auto queue creation related configs to RMWebService#CapacitySchedulerQueueInfo | Major | . | Prabhu Joseph | Prabhu Joseph | +| [HDFS-15255](https://issues.apache.org/jira/browse/HDFS-15255) | Consider StorageType when DatanodeManager#sortLocatedBlock() | Major | . | Lisheng Sun | Lisheng Sun | +| [YARN-10260](https://issues.apache.org/jira/browse/YARN-10260) | Allow transitioning queue from DRAINING to RUNNING state | Major | . | Jonathan Hung | Bilwa S T | +| [HADOOP-17036](https://issues.apache.org/jira/browse/HADOOP-17036) | TestFTPFileSystem failing as ftp server dir already exists | Minor | fs, test | Steve Loughran | Mikhail Pryakhin | +| [HDFS-15356](https://issues.apache.org/jira/browse/HDFS-15356) | Unify configuration \`dfs.ha.allow.stale.reads\` to DFSConfigKeys | Major | hdfs | Xiaoqiao He | Xiaoqiao He | +| [HDFS-15358](https://issues.apache.org/jira/browse/HDFS-15358) | RBF: Unify router datanode UI with namenode datanode UI | Major | . | Ayush Saxena | Ayush Saxena | +| [HADOOP-17042](https://issues.apache.org/jira/browse/HADOOP-17042) | Hadoop distcp throws "ERROR: Tools helper ///usr/lib/hadoop/libexec/tools/hadoop-distcp.sh was not found" | Minor | tools/distcp | Aki Tanaka | Aki Tanaka | +| [HDFS-15202](https://issues.apache.org/jira/browse/HDFS-15202) | HDFS-client: boost ShortCircuit Cache | Minor | dfsclient | Danil Lipovoy | Danil Lipovoy | +| [HDFS-15207](https://issues.apache.org/jira/browse/HDFS-15207) | VolumeScanner skip to scan blocks accessed during recent scan peroid | Minor | datanode | Yang Yun | Yang Yun | +| [HDFS-14999](https://issues.apache.org/jira/browse/HDFS-14999) | Avoid Potential Infinite Loop in DFSNetworkTopology | Major | . | Ayush Saxena | Ayush Saxena | +| [HDFS-13639](https://issues.apache.org/jira/browse/HDFS-13639) | SlotReleaser is not fast enough | Major | hdfs-client | Gang Xie | Lisheng Sun | +| [HDFS-15369](https://issues.apache.org/jira/browse/HDFS-15369) | Refactor method VolumeScanner#runLoop() | Minor | datanode | Yang Yun | Yang Yun | +| [HADOOP-14698](https://issues.apache.org/jira/browse/HADOOP-14698) | Make copyFromLocal's -t option available for put as well | Major | . | Andras Bokor | Andras Bokor | +| [HDFS-10792](https://issues.apache.org/jira/browse/HDFS-10792) | RedundantEditLogInputStream should log caught exceptions | Minor | namenode | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [YARN-6492](https://issues.apache.org/jira/browse/YARN-6492) | Generate queue metrics for each partition | Major | capacity scheduler | Jonathan Hung | Manikandan R | +| [HADOOP-17016](https://issues.apache.org/jira/browse/HADOOP-17016) | Adding Common Counters in ABFS | Major | fs/azure | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-16828](https://issues.apache.org/jira/browse/HADOOP-16828) | Zookeeper Delegation Token Manager fetch sequence number by batch | Major | . | Fengnan Li | Fengnan Li | +| [HADOOP-14566](https://issues.apache.org/jira/browse/HADOOP-14566) | Add seek support for SFTP FileSystem | Minor | fs | Azhagu Selvan SP | Mikhail Pryakhin | +| [HADOOP-17047](https://issues.apache.org/jira/browse/HADOOP-17047) | TODO comments exist in trunk while the related issues are already fixed. | Trivial | . | Rungroj Maipradit | Rungroj Maipradit | +| [HADOOP-17020](https://issues.apache.org/jira/browse/HADOOP-17020) | Improve RawFileSystem Performance | Minor | fs | Rajesh Balamohan | Mehakmeet Singh | +| [HDFS-15406](https://issues.apache.org/jira/browse/HDFS-15406) | Improve the speed of Datanode Block Scan | Major | . | Hemanth Boyina | Hemanth Boyina | +| [HADOOP-17090](https://issues.apache.org/jira/browse/HADOOP-17090) | Increase precommit job timeout from 5 hours to 20 hours | Major | build | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17084](https://issues.apache.org/jira/browse/HADOOP-17084) | Update Dockerfile\_aarch64 to use Bionic | Major | build, test | RuiChen | zhaorenhai | +| [YARN-8047](https://issues.apache.org/jira/browse/YARN-8047) | RMWebApp make external class pluggable | Minor | . | Bibin Chundatt | Bilwa S T | +| [YARN-10297](https://issues.apache.org/jira/browse/YARN-10297) | TestContinuousScheduling#testFairSchedulerContinuousSchedulingInitTime fails intermittently | Major | . | Jonathan Hung | Jim Brennan | +| [HADOOP-17127](https://issues.apache.org/jira/browse/HADOOP-17127) | Use RpcMetrics.TIMEUNIT to initialize rpc queueTime and processingTime | Minor | common | Jim Brennan | Jim Brennan | +| [HDFS-15404](https://issues.apache.org/jira/browse/HDFS-15404) | ShellCommandFencer should expose info about source | Major | . | Chen Liang | Chen Liang | +| [HADOOP-17147](https://issues.apache.org/jira/browse/HADOOP-17147) | Dead link in hadoop-kms/index.md.vm | Minor | documentation, kms | Akira Ajisaka | Xieming Li | +| [HADOOP-17113](https://issues.apache.org/jira/browse/HADOOP-17113) | Adding ReadAhead Counters in ABFS | Major | fs/azure | Mehakmeet Singh | Mehakmeet Singh | +| [YARN-10343](https://issues.apache.org/jira/browse/YARN-10343) | Legacy RM UI should include labeled metrics for allocated, total, and reserved resources. | Major | . | Eric Payne | Eric Payne | +| [YARN-1529](https://issues.apache.org/jira/browse/YARN-1529) | Add Localization overhead metrics to NM | Major | nodemanager | Gera Shegalov | Jim Brennan | +| [YARN-10361](https://issues.apache.org/jira/browse/YARN-10361) | Make custom DAO classes configurable into RMWebApp#JAXBContextResolver | Major | . | Prabhu Joseph | Bilwa S T | +| [YARN-10251](https://issues.apache.org/jira/browse/YARN-10251) | Show extended resources on legacy RM UI. | Major | . | Eric Payne | Eric Payne | +| [HDFS-15493](https://issues.apache.org/jira/browse/HDFS-15493) | Update block map and name cache in parallel while loading fsimage. | Major | namenode | Chengwei Wang | Chengwei Wang | +| [HADOOP-17057](https://issues.apache.org/jira/browse/HADOOP-17057) | ABFS driver enhancement - Allow customizable translation from AAD SPNs and security groups to Linux user and group | Major | fs/azure | Karthik Amarnath | Karthik Amarnath | +| [HADOOP-17194](https://issues.apache.org/jira/browse/HADOOP-17194) | Adding Context class for AbfsClient to pass AbfsConfigurations to limit number of parameters | Major | fs/azure | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-17065](https://issues.apache.org/jira/browse/HADOOP-17065) | Adding Network Counters in ABFS | Major | fs/azure | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-17159](https://issues.apache.org/jira/browse/HADOOP-17159) | Make UGI support forceful relogin from keytab ignoring the last login time | Major | security | Sandeep Guggilam | Sandeep Guggilam | +| [YARN-10407](https://issues.apache.org/jira/browse/YARN-10407) | Add phantomjsdriver.log to gitignore | Minor | . | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-10353](https://issues.apache.org/jira/browse/YARN-10353) | Log vcores used and cumulative cpu in containers monitor | Minor | yarn | Jim Brennan | Jim Brennan | +| [YARN-10369](https://issues.apache.org/jira/browse/YARN-10369) | Make NMTokenSecretManagerInRM sending NMToken for nodeId DEBUG | Minor | yarn | Jim Brennan | Jim Brennan | +| [YARN-10390](https://issues.apache.org/jira/browse/YARN-10390) | LeafQueue: retain user limits cache across assignContainers() calls | Major | capacity scheduler, capacityscheduler | Muhammad Samir Khan | Muhammad Samir Khan | +| [HDFS-15574](https://issues.apache.org/jira/browse/HDFS-15574) | Remove unnecessary sort of block list in DirectoryScanner | Major | . | Stephen O'Donnell | Stephen O'Donnell | +| [HADOOP-17270](https://issues.apache.org/jira/browse/HADOOP-17270) | Fix testCompressorDecompressorWithExeedBufferLimit to cover the intended scenario | Minor | test | Masatake Iwasaki | Masatake Iwasaki | +| [HDFS-15581](https://issues.apache.org/jira/browse/HDFS-15581) | Access Controlled HTTPFS Proxy | Minor | httpfs | Richard | Richard | +| [HADOOP-17283](https://issues.apache.org/jira/browse/HADOOP-17283) | Hadoop - Upgrade to JQuery 3.5.1 | Major | . | Aryan Gupta | Aryan Gupta | +| [HADOOP-17267](https://issues.apache.org/jira/browse/HADOOP-17267) | Add debug-level logs in Filesystem#close | Minor | fs | Karen Coppage | Karen Coppage | +| [HADOOP-17284](https://issues.apache.org/jira/browse/HADOOP-17284) | Support BCFKS keystores for Hadoop Credential Provider | Major | . | Xiaoyu Yao | Xiaoyu Yao | +| [HDFS-15415](https://issues.apache.org/jira/browse/HDFS-15415) | Reduce locking in Datanode DirectoryScanner | Major | datanode | Stephen O'Donnell | Stephen O'Donnell | +| [YARN-10451](https://issues.apache.org/jira/browse/YARN-10451) | RM (v1) UI NodesPage can NPE when yarn.io/gpu resource type is defined. | Major | . | Eric Payne | Eric Payne | +| [HADOOP-17021](https://issues.apache.org/jira/browse/HADOOP-17021) | Add concat fs command | Minor | fs | Jinglun | Jinglun | +| [MAPREDUCE-7301](https://issues.apache.org/jira/browse/MAPREDUCE-7301) | Expose Mini MR Cluster attribute for testing | Minor | test | Swaroopa Kadam | Swaroopa Kadam | +| [HDFS-15567](https://issues.apache.org/jira/browse/HDFS-15567) | [SBN Read] HDFS should expose msync() API to allow downstream applications call it explicitly. | Major | ha, hdfs-client | Konstantin Shvachko | Konstantin Shvachko | +| [HDFS-15633](https://issues.apache.org/jira/browse/HDFS-15633) | Avoid redundant RPC calls for getDiskStatus | Major | dfsclient | Ayush Saxena | Ayush Saxena | +| [YARN-10450](https://issues.apache.org/jira/browse/YARN-10450) | Add cpu and memory utilization per node and cluster-wide metrics | Minor | yarn | Jim Brennan | Jim Brennan | +| [HADOOP-17302](https://issues.apache.org/jira/browse/HADOOP-17302) | Upgrade to jQuery 3.5.1 in hadoop-sls | Major | . | Aryan Gupta | Aryan Gupta | +| [HDFS-15652](https://issues.apache.org/jira/browse/HDFS-15652) | Make block size from NNThroughputBenchmark configurable | Minor | benchmarks | Hui Fei | Hui Fei | +| [YARN-10475](https://issues.apache.org/jira/browse/YARN-10475) | Scale RM-NM heartbeat interval based on node utilization | Minor | yarn | Jim Brennan | Jim Brennan | +| [HDFS-15665](https://issues.apache.org/jira/browse/HDFS-15665) | Balancer logging improvement | Major | balancer & mover | Konstantin Shvachko | Konstantin Shvachko | +| [HADOOP-17342](https://issues.apache.org/jira/browse/HADOOP-17342) | Creating a token identifier should not do kerberos name resolution | Major | common | Jim Brennan | Jim Brennan | +| [YARN-10479](https://issues.apache.org/jira/browse/YARN-10479) | RMProxy should retry on SocketTimeout Exceptions | Major | yarn | Jim Brennan | Jim Brennan | +| [HDFS-15623](https://issues.apache.org/jira/browse/HDFS-15623) | Respect configured values of rpc.engine | Major | hdfs | Hector Sandoval Chaverri | Hector Sandoval Chaverri | +| [HADOOP-17369](https://issues.apache.org/jira/browse/HADOOP-17369) | Bump up snappy-java to 1.1.8.1 | Minor | . | Masatake Iwasaki | Masatake Iwasaki | +| [YARN-10480](https://issues.apache.org/jira/browse/YARN-10480) | replace href tags with ng-href | Trivial | . | Gabriel Medeiros Coelho | Gabriel Medeiros Coelho | +| [HADOOP-17367](https://issues.apache.org/jira/browse/HADOOP-17367) | Add InetAddress api to ProxyUsers.authorize | Major | performance, security | Ahmed Hussein | Ahmed Hussein | +| [MAPREDUCE-7304](https://issues.apache.org/jira/browse/MAPREDUCE-7304) | Enhance the map-reduce Job end notifier to be able to notify the given URL via a custom class | Major | mrv2 | Daniel Fritsi | Zoltán Erdmann | +| [MAPREDUCE-7309](https://issues.apache.org/jira/browse/MAPREDUCE-7309) | Improve performance of reading resource request for mapper/reducers from config | Major | applicationmaster | Wangda Tan | Peter Bacsko | +| [HDFS-15694](https://issues.apache.org/jira/browse/HDFS-15694) | Avoid calling UpdateHeartBeatState inside DataNodeDescriptor | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15703](https://issues.apache.org/jira/browse/HDFS-15703) | Don't generate edits for set operations that are no-op | Major | namenode | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17392](https://issues.apache.org/jira/browse/HADOOP-17392) | Remote exception messages should not include the exception class | Major | ipc | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15706](https://issues.apache.org/jira/browse/HDFS-15706) | HttpFS: Log more information on request failures | Major | httpfs | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17389](https://issues.apache.org/jira/browse/HADOOP-17389) | KMS should log full UGI principal | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17425](https://issues.apache.org/jira/browse/HADOOP-17425) | Bump up snappy-java to 1.1.8.2 | Minor | . | L. C. Hsieh | L. C. Hsieh | +| [HDFS-15720](https://issues.apache.org/jira/browse/HDFS-15720) | namenode audit async logger should add some log4j config | Minor | hdfs | Max Xie | | +| [HDFS-15717](https://issues.apache.org/jira/browse/HDFS-15717) | Improve fsck logging | Major | logging, namenode | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15704](https://issues.apache.org/jira/browse/HDFS-15704) | Mitigate lease monitor's rapid infinite loop | Major | namenode | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15569](https://issues.apache.org/jira/browse/HDFS-15569) | Speed up the Storage#doRecover during datanode rolling upgrade | Major | . | Hemanth Boyina | Hemanth Boyina | +| [HDFS-15751](https://issues.apache.org/jira/browse/HDFS-15751) | Add documentation for msync() API to filesystem.md | Major | documentation | Konstantin Shvachko | Konstantin Shvachko | +| [HADOOP-17454](https://issues.apache.org/jira/browse/HADOOP-17454) | [s3a] Disable bucket existence check - set fs.s3a.bucket.probe to 0 | Major | . | Gabor Bota | Gabor Bota | +| [YARN-10538](https://issues.apache.org/jira/browse/YARN-10538) | Add recommissioning nodes to the list of updated nodes returned to the AM | Major | . | Srinivas S T | Srinivas S T | +| [YARN-10541](https://issues.apache.org/jira/browse/YARN-10541) | capture the performance metrics of ZKRMStateStore | Minor | resourcemanager | Max Xie | Max Xie | +| [HADOOP-17408](https://issues.apache.org/jira/browse/HADOOP-17408) | Optimize NetworkTopology while sorting of block locations | Major | common, net | Ahmed Hussein | Ahmed Hussein | +| [YARN-4589](https://issues.apache.org/jira/browse/YARN-4589) | Diagnostics for localization timeouts is lacking | Major | . | Chang Li | Chang Li | +| [YARN-10562](https://issues.apache.org/jira/browse/YARN-10562) | Follow up changes for YARN-9833 | Major | yarn | Jim Brennan | Jim Brennan | +| [HDFS-15783](https://issues.apache.org/jira/browse/HDFS-15783) | Speed up BlockPlacementPolicyRackFaultTolerant#verifyBlockPlacement | Major | block placement | Akira Ajisaka | Akira Ajisaka | +| [YARN-10519](https://issues.apache.org/jira/browse/YARN-10519) | Refactor QueueMetricsForCustomResources class to move to yarn-common package | Major | . | Minni Mittal | Minni Mittal | +| [HADOOP-17484](https://issues.apache.org/jira/browse/HADOOP-17484) | Typo in hadop-aws index.md | Trivial | documentation, fs/s3 | Maksim | Maksim | +| [HADOOP-17478](https://issues.apache.org/jira/browse/HADOOP-17478) | Improve the description of hadoop.http.authentication.signature.secret.file | Minor | documentation | Akira Ajisaka | Akira Ajisaka | +| [MAPREDUCE-7317](https://issues.apache.org/jira/browse/MAPREDUCE-7317) | Add latency information in FileOutputCommitter.mergePaths | Minor | client | Jungtaek Lim | Jungtaek Lim | +| [HDFS-15789](https://issues.apache.org/jira/browse/HDFS-15789) | Lease renewal does not require namesystem lock | Major | hdfs | Jim Brennan | Jim Brennan | +| [HADOOP-17501](https://issues.apache.org/jira/browse/HADOOP-17501) | Fix logging typo in ShutdownHookManager | Major | common | Konstantin Shvachko | Fengnan Li | +| [HADOOP-17354](https://issues.apache.org/jira/browse/HADOOP-17354) | Move Jenkinsfile outside of the root directory | Major | build | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17508](https://issues.apache.org/jira/browse/HADOOP-17508) | Simplify dependency installation instructions | Trivial | documentation | Gautham Banasandra | Gautham Banasandra | +| [HADOOP-17509](https://issues.apache.org/jira/browse/HADOOP-17509) | Parallelize building of dependencies | Minor | build | Gautham Banasandra | Gautham Banasandra | +| [HDFS-15799](https://issues.apache.org/jira/browse/HDFS-15799) | Make DisallowedDatanodeException terse | Minor | hdfs | Richard | Richard | +| [HDFS-15813](https://issues.apache.org/jira/browse/HDFS-15813) | DataStreamer: keep sending heartbeat packets while streaming | Major | hdfs | Jim Brennan | Jim Brennan | +| [MAPREDUCE-7319](https://issues.apache.org/jira/browse/MAPREDUCE-7319) | Log list of mappers at trace level in ShuffleHandler audit log | Minor | yarn | Jim Brennan | Jim Brennan | +| [HADOOP-14402](https://issues.apache.org/jira/browse/HADOOP-14402) | roll out StreamCapabilities across output streams of all filesystems | Major | fs, fs/adl, fs/azure, fs/oss, fs/s3, fs/swift | Steve Loughran | Steve Loughran | +| [HDFS-15821](https://issues.apache.org/jira/browse/HDFS-15821) | Add metrics for in-service datanodes | Minor | . | Zehao Chen | Zehao Chen | +| [YARN-10626](https://issues.apache.org/jira/browse/YARN-10626) | Log resource allocation in NM log at container start time | Major | . | Eric Badger | Eric Badger | +| [HDFS-15815](https://issues.apache.org/jira/browse/HDFS-15815) | if required storageType are unavailable, log the failed reason during choosing Datanode | Minor | block placement | Yang Yun | Yang Yun | +| [HDFS-15830](https://issues.apache.org/jira/browse/HDFS-15830) | Support to make dfs.image.parallel.load reconfigurable | Major | namenode | Hui Fei | Hui Fei | +| [HDFS-15835](https://issues.apache.org/jira/browse/HDFS-15835) | Erasure coding: Add/remove logs for the better readability/debugging | Minor | erasure-coding, hdfs | Bhavik Patel | Bhavik Patel | +| [HDFS-15826](https://issues.apache.org/jira/browse/HDFS-15826) | Solve the problem of incorrect progress of delegation tokens when loading FsImage | Major | . | JiangHua Zhu | JiangHua Zhu | +| [HDFS-15734](https://issues.apache.org/jira/browse/HDFS-15734) | [READ] DirectoryScanner#scan need not check StorageType.PROVIDED | Minor | datanode | Yuxuan Wang | Yuxuan Wang | +| [HADOOP-17538](https://issues.apache.org/jira/browse/HADOOP-17538) | Add kms-default.xml and httpfs-default.xml to site index | Minor | documentation | Masatake Iwasaki | Masatake Iwasaki | +| [YARN-10613](https://issues.apache.org/jira/browse/YARN-10613) | Config to allow Intra- and Inter-queue preemption to enable/disable conservativeDRF | Minor | capacity scheduler, scheduler preemption | Eric Payne | Eric Payne | +| [YARN-10653](https://issues.apache.org/jira/browse/YARN-10653) | Fixed the findbugs issues introduced by YARN-10647. | Major | . | Qi Zhu | Qi Zhu | +| [MAPREDUCE-7324](https://issues.apache.org/jira/browse/MAPREDUCE-7324) | ClientHSSecurityInfo class is in wrong META-INF file | Major | . | Eric Badger | Eric Badger | +| [HADOOP-17546](https://issues.apache.org/jira/browse/HADOOP-17546) | Update Description of hadoop-http-auth-signature-secret in HttpAuthentication.md | Minor | . | Ravuri Sushma sree | Ravuri Sushma sree | +| [YARN-10664](https://issues.apache.org/jira/browse/YARN-10664) | Allow parameter expansion in NM\_ADMIN\_USER\_ENV | Major | yarn | Jim Brennan | Jim Brennan | +| [HADOOP-17570](https://issues.apache.org/jira/browse/HADOOP-17570) | Apply YETUS-1102 to re-enable GitHub comments | Major | build | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17594](https://issues.apache.org/jira/browse/HADOOP-17594) | DistCp: Expose the JobId for applications executing through run method | Major | . | Ayush Saxena | Ayush Saxena | +| [HDFS-15907](https://issues.apache.org/jira/browse/HDFS-15907) | Reduce Memory Overhead of AclFeature by avoiding AtomicInteger | Major | namenode | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-15911](https://issues.apache.org/jira/browse/HDFS-15911) | Provide blocks moved count in Balancer iteration result | Major | balancer & mover | Viraj Jasani | Viraj Jasani | +| [HDFS-15919](https://issues.apache.org/jira/browse/HDFS-15919) | BlockPoolManager should log stack trace if unable to get Namenode addresses | Major | datanode | Stephen O'Donnell | Stephen O'Donnell | +| [HADOOP-17531](https://issues.apache.org/jira/browse/HADOOP-17531) | DistCp: Reduce memory usage on copying huge directories | Critical | . | Ayush Saxena | Ayush Saxena | +| [HDFS-15879](https://issues.apache.org/jira/browse/HDFS-15879) | Exclude slow nodes when choose targets for blocks | Major | . | tomscut | tomscut | +| [HADOOP-16870](https://issues.apache.org/jira/browse/HADOOP-16870) | Use spotbugs-maven-plugin instead of findbugs-maven-plugin | Major | build | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17222](https://issues.apache.org/jira/browse/HADOOP-17222) | Create socket address leveraging URI cache | Major | common, hdfs-client | fanrui | fanrui | +| [HDFS-15932](https://issues.apache.org/jira/browse/HDFS-15932) | Improve the balancer error message when process exits abnormally. | Major | . | Renukaprasad C | Renukaprasad C | +| [HADOOP-16524](https://issues.apache.org/jira/browse/HADOOP-16524) | Automatic keystore reloading for HttpServer2 | Major | . | Kihwal Lee | Borislav Iordanov | +| [HDFS-15931](https://issues.apache.org/jira/browse/HDFS-15931) | Fix non-static inner classes for better memory management | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-17371](https://issues.apache.org/jira/browse/HADOOP-17371) | Bump Jetty to the latest version 9.4.35 | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HDFS-15942](https://issues.apache.org/jira/browse/HDFS-15942) | Increase Quota initialization threads | Major | namenode | Stephen O'Donnell | Stephen O'Donnell | +| [HADOOP-17613](https://issues.apache.org/jira/browse/HADOOP-17613) | Log not flushed fully when daemon shutdown | Major | common | Renukaprasad C | Renukaprasad C | +| [HDFS-15937](https://issues.apache.org/jira/browse/HDFS-15937) | Reduce memory used during datanode layout upgrade | Major | datanode | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-15160](https://issues.apache.org/jira/browse/HDFS-15160) | ReplicaMap, Disk Balancer, Directory Scanner and various FsDatasetImpl methods should use datanode readlock | Major | datanode | Stephen O'Donnell | Stephen O'Donnell | +| [HADOOP-17569](https://issues.apache.org/jira/browse/HADOOP-17569) | Building native code fails on Fedora 33 | Major | build, common | Kengo Seki | Masatake Iwasaki | +| [HADOOP-17633](https://issues.apache.org/jira/browse/HADOOP-17633) | Please upgrade json-smart dependency to the latest version | Major | auth, build | helen huang | Viraj Jasani | +| [HADOOP-17620](https://issues.apache.org/jira/browse/HADOOP-17620) | DistCp: Use Iterator for listing target directory as well | Major | . | Ayush Saxena | Ayush Saxena | +| [YARN-10743](https://issues.apache.org/jira/browse/YARN-10743) | Add a policy for not aggregating for containers which are killed because exceeding container log size limit. | Major | . | Qi Zhu | Qi Zhu | +| [HDFS-15967](https://issues.apache.org/jira/browse/HDFS-15967) | Improve the log for Short Circuit Local Reads | Minor | . | Bhavik Patel | Bhavik Patel | +| [HADOOP-17675](https://issues.apache.org/jira/browse/HADOOP-17675) | LdapGroupsMapping$LdapSslSocketFactory ClassNotFoundException | Major | common | Tamas Mate | István Fajth | +| [HADOOP-11616](https://issues.apache.org/jira/browse/HADOOP-11616) | Remove workaround for Curator's ChildReaper requiring Guava 15+ | Major | . | Robert Kanter | Viraj Jasani | +| [HDFS-16003](https://issues.apache.org/jira/browse/HDFS-16003) | ProcessReport print invalidatedBlocks should judge debug level at first | Minor | namanode | lei w | lei w | +| [HDFS-16007](https://issues.apache.org/jira/browse/HDFS-16007) | Deserialization of ReplicaState should avoid throwing ArrayIndexOutOfBoundsException | Major | . | junwen yang | Viraj Jasani | +| [HADOOP-17615](https://issues.apache.org/jira/browse/HADOOP-17615) | ADLFS: Update SDK version from 2.3.6 to 2.3.9 | Minor | fs/adl | Bilahari T H | Bilahari T H | +| [HADOOP-16822](https://issues.apache.org/jira/browse/HADOOP-16822) | Provide source artifacts for hadoop-client-api | Major | . | Karel Kolman | Karel Kolman | +| [HADOOP-17680](https://issues.apache.org/jira/browse/HADOOP-17680) | Allow ProtobufRpcEngine to be extensible | Major | common | Hector Sandoval Chaverri | Hector Sandoval Chaverri | +| [YARN-10258](https://issues.apache.org/jira/browse/YARN-10258) | Add metrics for 'ApplicationsRunning' in NodeManager | Minor | nodemanager | ANANDA G B | ANANDA G B | +| [HDFS-15790](https://issues.apache.org/jira/browse/HDFS-15790) | Make ProtobufRpcEngineProtos and ProtobufRpcEngineProtos2 Co-Exist | Critical | . | David Mollitor | Vinayakumar B | + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-15196](https://issues.apache.org/jira/browse/HDFS-15196) | RBF: RouterRpcServer getListing cannot list large dirs correctly | Critical | . | Fengnan Li | Fengnan Li | +| [HDFS-15252](https://issues.apache.org/jira/browse/HDFS-15252) | HttpFS: setWorkingDirectory should not accept invalid paths | Major | . | Hemanth Boyina | Hemanth Boyina | +| [HDFS-15249](https://issues.apache.org/jira/browse/HDFS-15249) | ThrottledAsyncChecker is not thread-safe. | Major | federation | Toshihiro Suzuki | Toshihiro Suzuki | +| [HDFS-15266](https://issues.apache.org/jira/browse/HDFS-15266) | Add missing DFSOps Statistics in WebHDFS | Major | . | Ayush Saxena | Ayush Saxena | +| [HDFS-15275](https://issues.apache.org/jira/browse/HDFS-15275) | HttpFS: Response of Create was not correct with noredirect and data are true | Major | . | Hemanth Boyina | Hemanth Boyina | +| [HDFS-15281](https://issues.apache.org/jira/browse/HDFS-15281) | ZKFC ignores dfs.namenode.rpc-bind-host and uses dfs.namenode.rpc-address to bind to host address | Major | ha, namenode | Dhiraj Hegde | Dhiraj Hegde | +| [HDFS-15297](https://issues.apache.org/jira/browse/HDFS-15297) | TestNNHandlesBlockReportPerStorage::blockReport\_02 fails intermittently in trunk | Major | datanode, test | Mingliang Liu | Ayush Saxena | +| [HDFS-15210](https://issues.apache.org/jira/browse/HDFS-15210) | EC : File write hanged when DN is shutdown by admin command. | Major | ec | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [HDFS-15285](https://issues.apache.org/jira/browse/HDFS-15285) | The same distance and load nodes don't shuffle when consider DataNode load | Major | . | Lisheng Sun | Lisheng Sun | +| [HDFS-15265](https://issues.apache.org/jira/browse/HDFS-15265) | HttpFS: validate content-type in HttpFSUtils | Major | . | Hemanth Boyina | Hemanth Boyina | +| [HDFS-15320](https://issues.apache.org/jira/browse/HDFS-15320) | StringIndexOutOfBoundsException in HostRestrictingAuthorizationFilter | Major | webhdfs | Akira Ajisaka | Akira Ajisaka | +| [YARN-10256](https://issues.apache.org/jira/browse/YARN-10256) | Refactor TestContainerSchedulerQueuing.testContainerUpdateExecTypeGuaranteedToOpportunistic | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15270](https://issues.apache.org/jira/browse/HDFS-15270) | Account for \*env == NULL in hdfsThreadDestructor | Major | . | Babneet Singh | Babneet Singh | +| [HDFS-15331](https://issues.apache.org/jira/browse/HDFS-15331) | Remove invalid exclusions that minicluster dependency on HDFS | Major | . | Wanqiang Ji | Wanqiang Ji | +| [YARN-8959](https://issues.apache.org/jira/browse/YARN-8959) | TestContainerResizing fails randomly | Minor | . | Bibin Chundatt | Ahmed Hussein | +| [HDFS-15332](https://issues.apache.org/jira/browse/HDFS-15332) | Quota Space consumed was wrong in truncate with Snapshots | Major | . | Hemanth Boyina | Hemanth Boyina | +| [YARN-9017](https://issues.apache.org/jira/browse/YARN-9017) | PlacementRule order is not maintained in CS | Major | . | Bibin Chundatt | Bilwa S T | +| [HADOOP-17025](https://issues.apache.org/jira/browse/HADOOP-17025) | Fix invalid metastore configuration in S3GuardTool tests | Minor | fs/s3, test | Masatake Iwasaki | Masatake Iwasaki | +| [HDFS-15339](https://issues.apache.org/jira/browse/HDFS-15339) | TestHDFSCLI fails for user names with the dot/dash character | Major | test | Yan Xiaole | Yan Xiaole | +| [HDFS-15250](https://issues.apache.org/jira/browse/HDFS-15250) | Setting \`dfs.client.use.datanode.hostname\` to true can crash the system because of unhandled UnresolvedAddressException | Major | . | Ctest | Ctest | +| [HADOOP-16768](https://issues.apache.org/jira/browse/HADOOP-16768) | SnappyCompressor test cases wrongly assume that the compressed data is always smaller than the input data | Major | io, test | zhao bo | Akira Ajisaka | +| [HDFS-1820](https://issues.apache.org/jira/browse/HDFS-1820) | FTPFileSystem attempts to close the outputstream even when it is not initialised | Major | hdfs-client | Sudharsan Sampath | Mikhail Pryakhin | +| [HDFS-15243](https://issues.apache.org/jira/browse/HDFS-15243) | Add an option to prevent sub-directories of protected directories from deletion | Major | 3.1.1 | liuyanyu | liuyanyu | +| [HDFS-14367](https://issues.apache.org/jira/browse/HDFS-14367) | EC: Parameter maxPoolSize in striped reconstruct thread pool isn't affecting number of threads | Major | ec | Guo Lei | Guo Lei | +| [YARN-9301](https://issues.apache.org/jira/browse/YARN-9301) | Too many InvalidStateTransitionException with SLS | Major | . | Bibin Chundatt | Bilwa S T | +| [HDFS-15300](https://issues.apache.org/jira/browse/HDFS-15300) | RBF: updateActiveNamenode() is invalid when RPC address is IP | Major | . | xuzq | xuzq | +| [HDFS-15316](https://issues.apache.org/jira/browse/HDFS-15316) | Deletion failure should not remove directory from snapshottables | Major | . | Hemanth Boyina | Hemanth Boyina | +| [YARN-8942](https://issues.apache.org/jira/browse/YARN-8942) | PriorityBasedRouterPolicy throws exception if all sub-cluster weights have negative value | Minor | . | Akshay Agarwal | Bilwa S T | +| [HADOOP-17044](https://issues.apache.org/jira/browse/HADOOP-17044) | Revert "HADOOP-8143. Change distcp to have -pb on by default" | Major | tools/distcp | Steve Loughran | Steve Loughran | +| [HADOOP-17024](https://issues.apache.org/jira/browse/HADOOP-17024) | ListStatus on ViewFS root (ls "/") should list the linkFallBack root (configured target root). | Major | fs, viewfs | Uma Maheswara Rao G | Abhishek Das | +| [MAPREDUCE-6826](https://issues.apache.org/jira/browse/MAPREDUCE-6826) | Job fails with InvalidStateTransitonException: Invalid event: JOB\_TASK\_COMPLETED at SUCCEEDED/COMMITTING | Major | . | Varun Saxena | Bilwa S T | +| [HADOOP-16900](https://issues.apache.org/jira/browse/HADOOP-16900) | Very large files can be truncated when written through S3AFileSystem | Major | fs/s3 | Andrew Olson | Mukund Thakur | +| [HADOOP-17049](https://issues.apache.org/jira/browse/HADOOP-17049) | javax.activation-api and jakarta.activation-api define overlapping classes | Major | build | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17040](https://issues.apache.org/jira/browse/HADOOP-17040) | Fix intermittent failure of ITestBlockingThreadPoolExecutorService | Minor | fs/s3, test | Masatake Iwasaki | Masatake Iwasaki | +| [HDFS-15363](https://issues.apache.org/jira/browse/HDFS-15363) | BlockPlacementPolicyWithNodeGroup should validate if it is initialized by NetworkTopologyWithNodeGroup | Major | . | Hemanth Boyina | Hemanth Boyina | +| [HDFS-15093](https://issues.apache.org/jira/browse/HDFS-15093) | RENAME.TO\_TRASH is ignored When RENAME.OVERWRITE is specified | Major | . | Harshakiran Reddy | Ayush Saxena | +| [HDFS-12288](https://issues.apache.org/jira/browse/HDFS-12288) | Fix DataNode's xceiver count calculation | Major | datanode, hdfs | Lukas Majercak | Lisheng Sun | +| [HDFS-15362](https://issues.apache.org/jira/browse/HDFS-15362) | FileWithSnapshotFeature#updateQuotaAndCollectBlocks should collect all distinct blocks | Major | . | Hemanth Boyina | Hemanth Boyina | +| [HADOOP-7002](https://issues.apache.org/jira/browse/HADOOP-7002) | Wrong description of copyFromLocal and copyToLocal in documentation | Minor | . | Jingguo Yao | Andras Bokor | +| [HADOOP-17052](https://issues.apache.org/jira/browse/HADOOP-17052) | NetUtils.connect() throws unchecked exception (UnresolvedAddressException) causing clients to abort | Major | net | Dhiraj Hegde | Dhiraj Hegde | +| [HADOOP-17018](https://issues.apache.org/jira/browse/HADOOP-17018) | Intermittent failing of ITestAbfsStreamStatistics in ABFS | Minor | fs/azure, test | Mehakmeet Singh | Mehakmeet Singh | +| [YARN-10254](https://issues.apache.org/jira/browse/YARN-10254) | CapacityScheduler incorrect User Group Mapping after leaf queue change | Major | . | Gergely Pollák | Gergely Pollák | +| [HADOOP-17062](https://issues.apache.org/jira/browse/HADOOP-17062) | Fix shelldocs path in Jenkinsfile | Major | build | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17056](https://issues.apache.org/jira/browse/HADOOP-17056) | shelldoc fails in hadoop-common | Major | build | Akira Ajisaka | Akira Ajisaka | +| [YARN-10286](https://issues.apache.org/jira/browse/YARN-10286) | PendingContainers bugs in the scheduler outputs | Critical | . | Adam Antal | Andras Gyori | +| [HDFS-15396](https://issues.apache.org/jira/browse/HDFS-15396) | Fix TestViewFileSystemOverloadSchemeHdfsFileSystemContract#testListStatusRootDir | Major | . | Ayush Saxena | Ayush Saxena | +| [HDFS-15386](https://issues.apache.org/jira/browse/HDFS-15386) | ReplicaNotFoundException keeps happening in DN after removing multiple DN's data directories | Major | . | Toshihiro Suzuki | Toshihiro Suzuki | +| [HDFS-15398](https://issues.apache.org/jira/browse/HDFS-15398) | EC: hdfs client hangs due to exception during addBlock | Critical | ec, hdfs-client | Hongbing Wang | Hongbing Wang | +| [YARN-10300](https://issues.apache.org/jira/browse/YARN-10300) | appMasterHost not set in RM ApplicationSummary when AM fails before first heartbeat | Major | . | Eric Badger | Eric Badger | +| [HADOOP-17059](https://issues.apache.org/jira/browse/HADOOP-17059) | ArrayIndexOfboundsException in ViewFileSystem#listStatus | Major | viewfs | Hemanth Boyina | Hemanth Boyina | +| [YARN-10296](https://issues.apache.org/jira/browse/YARN-10296) | Make ContainerPBImpl#getId/setId synchronized | Minor | . | Benjamin Teke | Benjamin Teke | +| [HADOOP-17060](https://issues.apache.org/jira/browse/HADOOP-17060) | listStatus and getFileStatus behave inconsistent in the case of ViewFs implementation for isDirectory | Major | viewfs | Srinivasu Majeti | Uma Maheswara Rao G | +| [YARN-10312](https://issues.apache.org/jira/browse/YARN-10312) | Add support for yarn logs -logFile to retain backward compatibility | Major | client | Jim Brennan | Jim Brennan | +| [HDFS-15351](https://issues.apache.org/jira/browse/HDFS-15351) | Blocks scheduled count was wrong on truncate | Major | . | Hemanth Boyina | Hemanth Boyina | +| [HDFS-15403](https://issues.apache.org/jira/browse/HDFS-15403) | NPE in FileIoProvider#transferToSocketFully | Major | . | Hemanth Boyina | Hemanth Boyina | +| [HDFS-15372](https://issues.apache.org/jira/browse/HDFS-15372) | Files in snapshots no longer see attribute provider permissions | Major | . | Stephen O'Donnell | Stephen O'Donnell | +| [MAPREDUCE-7281](https://issues.apache.org/jira/browse/MAPREDUCE-7281) | Fix NoClassDefFoundError on 'mapred minicluster' | Major | . | Masatake Iwasaki | Masatake Iwasaki | +| [HADOOP-17029](https://issues.apache.org/jira/browse/HADOOP-17029) | ViewFS does not return correct user/group and ACL | Major | fs, viewfs | Abhishek Das | Abhishek Das | +| [HDFS-14546](https://issues.apache.org/jira/browse/HDFS-14546) | Document block placement policies | Major | . | Íñigo Goiri | Amithsha | +| [HADOOP-17068](https://issues.apache.org/jira/browse/HADOOP-17068) | client fails forever when namenode ipaddr changed | Major | hdfs-client | Sean Chow | Sean Chow | +| [HADOOP-17089](https://issues.apache.org/jira/browse/HADOOP-17089) | WASB: Update azure-storage-java SDK | Critical | fs/azure | Thomas Marqardt | Thomas Marqardt | +| [HDFS-15378](https://issues.apache.org/jira/browse/HDFS-15378) | TestReconstructStripedFile#testErasureCodingWorkerXmitsWeight is failing on trunk | Major | . | Hemanth Boyina | Hemanth Boyina | +| [YARN-9903](https://issues.apache.org/jira/browse/YARN-9903) | Support reservations continue looking for Node Labels | Major | . | Tarun Parimi | Jim Brennan | +| [YARN-10331](https://issues.apache.org/jira/browse/YARN-10331) | Upgrade node.js to 10.21.0 | Critical | build, yarn-ui-v2 | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17032](https://issues.apache.org/jira/browse/HADOOP-17032) | Handle an internal dir in viewfs having multiple children mount points pointing to different filesystems | Major | fs, viewfs | Abhishek Das | Abhishek Das | +| [YARN-10318](https://issues.apache.org/jira/browse/YARN-10318) | ApplicationHistory Web UI incorrect column indexing | Minor | yarn | Andras Gyori | Andras Gyori | +| [YARN-10330](https://issues.apache.org/jira/browse/YARN-10330) | Add missing test scenarios to TestUserGroupMappingPlacementRule and TestAppNameMappingPlacementRule | Major | capacity scheduler, capacityscheduler, test | Peter Bacsko | Peter Bacsko | +| [HDFS-15446](https://issues.apache.org/jira/browse/HDFS-15446) | CreateSnapshotOp fails during edit log loading for /.reserved/raw/path with error java.io.FileNotFoundException: Directory does not exist: /.reserved/raw/path | Major | hdfs | Srinivasu Majeti | Stephen O'Donnell | +| [HADOOP-17081](https://issues.apache.org/jira/browse/HADOOP-17081) | MetricsSystem doesn't start the sink adapters on restart | Minor | metrics | Madhusoodan | Madhusoodan | +| [HDFS-15451](https://issues.apache.org/jira/browse/HDFS-15451) | Restarting name node stuck in safe mode when using provided storage | Major | namenode | shanyu zhao | shanyu zhao | +| [HADOOP-17117](https://issues.apache.org/jira/browse/HADOOP-17117) | Fix typos in hadoop-aws documentation | Trivial | documentation, fs/s3 | Sebastian Nagel | Sebastian Nagel | +| [HADOOP-17120](https://issues.apache.org/jira/browse/HADOOP-17120) | Fix failure of docker image creation due to pip2 install error | Major | . | Masatake Iwasaki | Masatake Iwasaki | +| [YARN-10344](https://issues.apache.org/jira/browse/YARN-10344) | Sync netty versions in hadoop-yarn-csi | Major | build | Akira Ajisaka | Akira Ajisaka | +| [YARN-10341](https://issues.apache.org/jira/browse/YARN-10341) | Yarn Service Container Completed event doesn't get processed | Critical | . | Bilwa S T | Bilwa S T | +| [HADOOP-16998](https://issues.apache.org/jira/browse/HADOOP-16998) | WASB : NativeAzureFsOutputStream#close() throwing IllegalArgumentException | Major | fs/azure | Anoop Sam John | Anoop Sam John | +| [YARN-10348](https://issues.apache.org/jira/browse/YARN-10348) | Allow RM to always cancel tokens after app completes | Major | yarn | Jim Brennan | Jim Brennan | +| [MAPREDUCE-7284](https://issues.apache.org/jira/browse/MAPREDUCE-7284) | TestCombineFileInputFormat#testMissingBlocks fails | Major | test | Akira Ajisaka | Akira Ajisaka | +| [HDFS-14498](https://issues.apache.org/jira/browse/HDFS-14498) | LeaseManager can loop forever on the file for which create has failed | Major | namenode | Sergey Shelukhin | Stephen O'Donnell | +| [HADOOP-17130](https://issues.apache.org/jira/browse/HADOOP-17130) | Configuration.getValByRegex() shouldn't update the results while fetching. | Major | common | Mukund Thakur | Mukund Thakur | +| [HDFS-15198](https://issues.apache.org/jira/browse/HDFS-15198) | RBF: Add test for MountTableRefresherService failed to refresh other router MountTableEntries in secure mode | Major | rbf | zhengchenyu | zhengchenyu | +| [HADOOP-17119](https://issues.apache.org/jira/browse/HADOOP-17119) | Jetty upgrade to 9.4.x causes MR app fail with IOException | Major | . | Bilwa S T | Bilwa S T | +| [HDFS-15246](https://issues.apache.org/jira/browse/HDFS-15246) | ArrayIndexOfboundsException in BlockManager CreateLocatedBlock | Major | . | Hemanth Boyina | Hemanth Boyina | +| [HADOOP-17138](https://issues.apache.org/jira/browse/HADOOP-17138) | Fix spotbugs warnings surfaced after upgrade to 4.0.6 | Minor | . | Masatake Iwasaki | Masatake Iwasaki | +| [YARN-4771](https://issues.apache.org/jira/browse/YARN-4771) | Some containers can be skipped during log aggregation after NM restart | Major | nodemanager | Jason Darrell Lowe | Jim Brennan | +| [YARN-10367](https://issues.apache.org/jira/browse/YARN-10367) | Failed to get nodejs 10.21.0 when building docker image | Blocker | build, webapp | Akira Ajisaka | Akira Ajisaka | +| [MAPREDUCE-7051](https://issues.apache.org/jira/browse/MAPREDUCE-7051) | Fix typo in MultipleOutputFormat | Trivial | . | ywheel | ywheel | +| [HDFS-15313](https://issues.apache.org/jira/browse/HDFS-15313) | Ensure inodes in active filesystem are not deleted during snapshot delete | Major | snapshots | Shashikant Banerjee | Shashikant Banerjee | +| [HDFS-14950](https://issues.apache.org/jira/browse/HDFS-14950) | missing libhdfspp libs in dist-package | Major | build, libhdfs++ | Yuan Zhou | Yuan Zhou | +| [YARN-10359](https://issues.apache.org/jira/browse/YARN-10359) | Log container report only if list is not empty | Minor | . | Bilwa S T | Bilwa S T | +| [YARN-10229](https://issues.apache.org/jira/browse/YARN-10229) | [Federation] Client should be able to submit application to RM directly using normal client conf | Major | amrmproxy, federation | JohnsonGuo | Bilwa S T | +| [HDFS-15503](https://issues.apache.org/jira/browse/HDFS-15503) | File and directory permissions are not able to be modified from WebUI | Major | . | Hemanth Boyina | Hemanth Boyina | +| [HADOOP-17184](https://issues.apache.org/jira/browse/HADOOP-17184) | Add --mvn-custom-repos parameter to yetus calls | Major | build | Mingliang Liu | Mingliang Liu | +| [HDFS-15499](https://issues.apache.org/jira/browse/HDFS-15499) | Clean up httpfs/pom.xml to remove aws-java-sdk-s3 exclusion | Major | httpfs | Mingliang Liu | Mingliang Liu | +| [HADOOP-17186](https://issues.apache.org/jira/browse/HADOOP-17186) | Fixing javadoc in ListingOperationCallbacks | Major | build, documentation | Akira Ajisaka | Mukund Thakur | +| [HADOOP-17164](https://issues.apache.org/jira/browse/HADOOP-17164) | UGI loginUserFromKeytab doesn't set the last login time | Major | security | Sandeep Guggilam | Sandeep Guggilam | +| [YARN-4575](https://issues.apache.org/jira/browse/YARN-4575) | ApplicationResourceUsageReport should return ALL reserved resource | Major | . | Bibin Chundatt | Bibin Chundatt | +| [YARN-10388](https://issues.apache.org/jira/browse/YARN-10388) | RMNode updatedCapability flag not set while RecommissionNodeTransition | Major | resourcemanager | Pranjal Protim Borah | Pranjal Protim Borah | +| [HDFS-15443](https://issues.apache.org/jira/browse/HDFS-15443) | Setting dfs.datanode.max.transfer.threads to a very small value can cause strange failure. | Major | datanode | AMC-team | AMC-team | +| [HDFS-15508](https://issues.apache.org/jira/browse/HDFS-15508) | [JDK 11] Fix javadoc errors in hadoop-hdfs-rbf module | Major | documentation | Akira Ajisaka | Akira Ajisaka | +| [HDFS-15506](https://issues.apache.org/jira/browse/HDFS-15506) | [JDK 11] Fix javadoc errors in hadoop-hdfs module | Major | documentation | Akira Ajisaka | Xieming Li | +| [HDFS-15507](https://issues.apache.org/jira/browse/HDFS-15507) | [JDK 11] Fix javadoc errors in hadoop-hdfs-client module | Major | documentation | Akira Ajisaka | Xieming Li | +| [HADOOP-17196](https://issues.apache.org/jira/browse/HADOOP-17196) | Fix C/C++ standard warnings | Major | build | Gautham Banasandra | Gautham Banasandra | +| [HADOOP-17204](https://issues.apache.org/jira/browse/HADOOP-17204) | Fix typo in Hadoop KMS document | Trivial | documentation, kms | Akira Ajisaka | Xieming Li | +| [HADOOP-17192](https://issues.apache.org/jira/browse/HADOOP-17192) | ITestS3AHugeFilesSSECDiskBlock failing because of bucket overrides | Major | fs/s3 | Mukund Thakur | Mukund Thakur | +| [YARN-10336](https://issues.apache.org/jira/browse/YARN-10336) | RM page should throw exception when command injected in RM REST API to get applications | Major | . | Rajshree Mishra | Bilwa S T | +| [HDFS-15439](https://issues.apache.org/jira/browse/HDFS-15439) | Setting dfs.mover.retry.max.attempts to negative value will retry forever. | Major | balancer & mover | AMC-team | AMC-team | +| [YARN-10391](https://issues.apache.org/jira/browse/YARN-10391) | --module-gpu functionality is broken in container-executor | Major | nodemanager | Eric Badger | Eric Badger | +| [HADOOP-17122](https://issues.apache.org/jira/browse/HADOOP-17122) | Bug in preserving Directory Attributes in DistCp with Atomic Copy | Major | tools/distcp | Swaminathan Balachandran | | +| [HDFS-14504](https://issues.apache.org/jira/browse/HDFS-14504) | Rename with Snapshots does not honor quota limit | Major | . | Shashikant Banerjee | Hemanth Boyina | +| [HADOOP-17209](https://issues.apache.org/jira/browse/HADOOP-17209) | Erasure Coding: Native library memory leak | Major | native | Sean Chow | Sean Chow | +| [HADOOP-16925](https://issues.apache.org/jira/browse/HADOOP-16925) | MetricsConfig incorrectly loads the configuration whose value is String list in the properties file | Major | metrics | Jiayi Liu | Jiayi Liu | +| [HADOOP-17220](https://issues.apache.org/jira/browse/HADOOP-17220) | Upgrade slf4j to 1.7.30 ( To Address: CVE-2018-8088) | Major | . | Brahma Reddy Battula | Brahma Reddy Battula | +| [HDFS-14852](https://issues.apache.org/jira/browse/HDFS-14852) | Removing from LowRedundancyBlocks does not remove the block from all queues | Major | namenode | Hui Fei | Hui Fei | +| [HDFS-15536](https://issues.apache.org/jira/browse/HDFS-15536) | RBF: Clear Quota in Router was not consistent | Critical | . | Hemanth Boyina | Hemanth Boyina | +| [HDFS-15510](https://issues.apache.org/jira/browse/HDFS-15510) | RBF: Quota and Content Summary was not correct in Multiple Destinations | Critical | . | Hemanth Boyina | Hemanth Boyina | +| [HDFS-15540](https://issues.apache.org/jira/browse/HDFS-15540) | Directories protected from delete can still be moved to the trash | Major | namenode | Stephen O'Donnell | Stephen O'Donnell | +| [HADOOP-17129](https://issues.apache.org/jira/browse/HADOOP-17129) | Validating storage keys in ABFS correctly | Major | fs/azure | Mehakmeet Singh | Mehakmeet Singh | +| [HDFS-15471](https://issues.apache.org/jira/browse/HDFS-15471) | TestHDFSContractMultipartUploader fails on trunk | Major | test | Ahmed Hussein | Steve Loughran | +| [HDFS-15290](https://issues.apache.org/jira/browse/HDFS-15290) | NPE in HttpServer during NameNode startup | Major | namenode | Konstantin Shvachko | Simbarashe Dzinamarira | +| [HADOOP-17158](https://issues.apache.org/jira/browse/HADOOP-17158) | Test timeout for ITestAbfsInputStreamStatistics#testReadAheadCounters | Major | fs/azure | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-17229](https://issues.apache.org/jira/browse/HADOOP-17229) | Test failure as failed request body counted in byte received metric - ITestAbfsNetworkStatistics#testAbfsHttpResponseStatistics | Major | fs/azure, test | Sneha Vijayarajan | Mehakmeet Singh | +| [YARN-10397](https://issues.apache.org/jira/browse/YARN-10397) | SchedulerRequest should be forwarded to scheduler if custom scheduler supports placement constraints | Minor | . | Bilwa S T | Bilwa S T | +| [HDFS-15573](https://issues.apache.org/jira/browse/HDFS-15573) | Only log warning if considerLoad and considerStorageType are both true | Major | . | Stephen O'Donnell | Stephen O'Donnell | +| [YARN-10430](https://issues.apache.org/jira/browse/YARN-10430) | Log improvements in NodeStatusUpdaterImpl | Minor | nodemanager | Bilwa S T | Bilwa S T | +| [HADOOP-17246](https://issues.apache.org/jira/browse/HADOOP-17246) | Fix build the hadoop-build Docker image failed | Major | build | Wanqiang Ji | Wanqiang Ji | +| [HDFS-15438](https://issues.apache.org/jira/browse/HDFS-15438) | Setting dfs.disk.balancer.max.disk.errors = 0 will fail the block copy | Major | balancer & mover | AMC-team | AMC-team | +| [HADOOP-15136](https://issues.apache.org/jira/browse/HADOOP-15136) | Typo in rename spec pseudocode | Major | documentation | Rae Marks | | +| [HADOOP-17088](https://issues.apache.org/jira/browse/HADOOP-17088) | Failed to load XInclude files with relative path. | Minor | conf | Yushi Hayasaka | Yushi Hayasaka | +| [MAPREDUCE-7294](https://issues.apache.org/jira/browse/MAPREDUCE-7294) | Only application master should upload resource to Yarn Shared Cache | Major | mrv2 | zhenzhao wang | zhenzhao wang | +| [HADOOP-17277](https://issues.apache.org/jira/browse/HADOOP-17277) | Correct spelling errors for separator | Trivial | common | Hui Fei | Hui Fei | +| [HADOOP-17286](https://issues.apache.org/jira/browse/HADOOP-17286) | Upgrade to jQuery 3.5.1 in hadoop-yarn-common | Major | . | Wei-Chiu Chuang | Aryan Gupta | +| [HDFS-15591](https://issues.apache.org/jira/browse/HDFS-15591) | RBF: Fix webHdfs file display error | Major | . | wangzhaohui | wangzhaohui | +| [MAPREDUCE-7289](https://issues.apache.org/jira/browse/MAPREDUCE-7289) | Fix wrong comment in LongLong.java | Trivial | documentation, examples | Akira Ajisaka | Wanqiang Ji | +| [YARN-9809](https://issues.apache.org/jira/browse/YARN-9809) | NMs should supply a health status when registering with RM | Major | . | Eric Badger | Eric Badger | +| [HADOOP-17300](https://issues.apache.org/jira/browse/HADOOP-17300) | FileSystem.DirListingIterator.next() call should return NoSuchElementException | Major | common, fs | Mukund Thakur | Mukund Thakur | +| [YARN-10393](https://issues.apache.org/jira/browse/YARN-10393) | MR job live lock caused by completed state container leak in heartbeat between node manager and RM | Major | nodemanager, yarn | zhenzhao wang | Jim Brennan | +| [HDFS-15253](https://issues.apache.org/jira/browse/HDFS-15253) | Set default throttle value on dfs.image.transfer.bandwidthPerSec | Major | namenode | Karthik Palanisamy | Karthik Palanisamy | +| [HDFS-15610](https://issues.apache.org/jira/browse/HDFS-15610) | Reduce datanode upgrade/hardlink thread | Major | datanode | Karthik Palanisamy | Karthik Palanisamy | +| [YARN-10455](https://issues.apache.org/jira/browse/YARN-10455) | TestNMProxy.testNMProxyRPCRetry is not consistent | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15456](https://issues.apache.org/jira/browse/HDFS-15456) | TestExternalStoragePolicySatisfier fails intermittently | Major | . | Ahmed Hussein | Leon Gao | +| [HADOOP-17223](https://issues.apache.org/jira/browse/HADOOP-17223) | update org.apache.httpcomponents:httpclient to 4.5.13 and httpcore to 4.4.13 | Blocker | . | Pranav Bheda | Pranav Bheda | +| [HDFS-15628](https://issues.apache.org/jira/browse/HDFS-15628) | HttpFS server throws NPE if a file is a symlink | Major | fs, httpfs | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15627](https://issues.apache.org/jira/browse/HDFS-15627) | Audit log deletes before collecting blocks | Major | logging, namenode | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17309](https://issues.apache.org/jira/browse/HADOOP-17309) | Javadoc warnings and errors are ignored in the precommit jobs | Major | build, documentation | Akira Ajisaka | Akira Ajisaka | +| [HDFS-14383](https://issues.apache.org/jira/browse/HDFS-14383) | Compute datanode load based on StoragePolicy | Major | hdfs, namenode | Karthik Palanisamy | Ayush Saxena | +| [HADOOP-17310](https://issues.apache.org/jira/browse/HADOOP-17310) | Touch command with -c option is broken | Major | . | Ayush Saxena | Ayush Saxena | +| [HDFS-15626](https://issues.apache.org/jira/browse/HDFS-15626) | TestWebHDFS.testLargeDirectory failing | Major | test, webhdfs | Mukund Thakur | Mukund Thakur | +| [HADOOP-17298](https://issues.apache.org/jira/browse/HADOOP-17298) | Backslash in username causes build failure in the environment started by start-build-env.sh. | Minor | build | Takeru Kuramoto | Takeru Kuramoto | +| [HDFS-15639](https://issues.apache.org/jira/browse/HDFS-15639) | [JDK 11] Fix Javadoc errors in hadoop-hdfs-client | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-15622](https://issues.apache.org/jira/browse/HDFS-15622) | Deleted blocks linger in the replications queue | Major | hdfs | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17308](https://issues.apache.org/jira/browse/HADOOP-17308) | WASB : PageBlobOutputStream succeeding hflush even when underlying flush to storage failed | Critical | . | Anoop Sam John | Anoop Sam John | +| [HDFS-15641](https://issues.apache.org/jira/browse/HDFS-15641) | DataNode could meet deadlock if invoke refreshNameNode | Critical | . | Hongbing Wang | Hongbing Wang | +| [HADOOP-17328](https://issues.apache.org/jira/browse/HADOOP-17328) | LazyPersist Overwrite fails in direct write mode | Major | . | Ayush Saxena | Ayush Saxena | +| [MAPREDUCE-7302](https://issues.apache.org/jira/browse/MAPREDUCE-7302) | Upgrading to JUnit 4.13 causes testcase TestFetcher.testCorruptedIFile() to fail | Major | test | Peter Bacsko | Peter Bacsko | +| [HDFS-15644](https://issues.apache.org/jira/browse/HDFS-15644) | Failed volumes can cause DNs to stop block reporting | Major | block placement, datanode | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17236](https://issues.apache.org/jira/browse/HADOOP-17236) | Bump up snakeyaml to 1.26 to mitigate CVE-2017-18640 | Major | . | Brahma Reddy Battula | Brahma Reddy Battula | +| [YARN-10467](https://issues.apache.org/jira/browse/YARN-10467) | ContainerIdPBImpl objects can be leaked in RMNodeImpl.completedContainers | Major | resourcemanager | Haibo Chen | Haibo Chen | +| [HADOOP-17329](https://issues.apache.org/jira/browse/HADOOP-17329) | mvn site commands fails due to MetricsSystemImpl changes | Major | . | Xiaoqiao He | Xiaoqiao He | +| [HDFS-15651](https://issues.apache.org/jira/browse/HDFS-15651) | Client could not obtain block when DN CommandProcessingThread exit | Major | . | Yiqun Lin | Aiphago | +| [HADOOP-17340](https://issues.apache.org/jira/browse/HADOOP-17340) | TestLdapGroupsMapping failing -string mismatch in exception validation | Major | test | Steve Loughran | Steve Loughran | +| [HDFS-15667](https://issues.apache.org/jira/browse/HDFS-15667) | Audit log record the unexpected allowed result when delete called | Major | hdfs | Baolong Mao | Baolong Mao | +| [HADOOP-17352](https://issues.apache.org/jira/browse/HADOOP-17352) | Update PATCH\_NAMING\_RULE in the personality file | Minor | build | Akira Ajisaka | Akira Ajisaka | +| [YARN-10458](https://issues.apache.org/jira/browse/YARN-10458) | Hive On Tez queries fails upon submission to dynamically created pools | Major | resourcemanager | Anand Srinivasan | Peter Bacsko | +| [HDFS-15485](https://issues.apache.org/jira/browse/HDFS-15485) | Fix outdated properties of JournalNode when performing rollback | Minor | . | Deegue | Deegue | +| [HADOOP-17096](https://issues.apache.org/jira/browse/HADOOP-17096) | ZStandardCompressor throws java.lang.InternalError: Error (generic) | Major | io | Stephen Jung (Stripe) | Stephen Jung (Stripe) | +| [HADOOP-17327](https://issues.apache.org/jira/browse/HADOOP-17327) | NPE when starting MiniYARNCluster from hadoop-client-minicluster | Critical | . | Chao Sun | | +| [HADOOP-17324](https://issues.apache.org/jira/browse/HADOOP-17324) | Don't relocate org.bouncycastle in shaded client jars | Critical | . | Chao Sun | Chao Sun | +| [HADOOP-17373](https://issues.apache.org/jira/browse/HADOOP-17373) | hadoop-client-integration-tests doesn't work when building with skipShade | Major | . | Chao Sun | Chao Sun | +| [HADOOP-17365](https://issues.apache.org/jira/browse/HADOOP-17365) | Contract test for renaming over existing file is too lenient | Minor | test | Attila Doroszlai | Attila Doroszlai | +| [HADOOP-17358](https://issues.apache.org/jira/browse/HADOOP-17358) | Improve excessive reloading of Configurations | Major | conf | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15538](https://issues.apache.org/jira/browse/HDFS-15538) | Fix the documentation for dfs.namenode.replication.max-streams in hdfs-default.xml | Major | . | Xieming Li | Xieming Li | +| [HADOOP-17362](https://issues.apache.org/jira/browse/HADOOP-17362) | Doing hadoop ls on Har file triggers too many RPC calls | Major | fs | Ahmed Hussein | Ahmed Hussein | +| [YARN-10485](https://issues.apache.org/jira/browse/YARN-10485) | TimelineConnector swallows InterruptedException | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17360](https://issues.apache.org/jira/browse/HADOOP-17360) | Log the remote address for authentication success | Minor | ipc | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15685](https://issues.apache.org/jira/browse/HDFS-15685) | [JDK 14] TestConfiguredFailoverProxyProvider#testResolveDomainNameUsingDNS fails | Major | . | Akira Ajisaka | Akira Ajisaka | +| [MAPREDUCE-7305](https://issues.apache.org/jira/browse/MAPREDUCE-7305) | [JDK 11] TestMRJobsWithProfiler fails | Major | test | Akira Ajisaka | Akira Ajisaka | +| [YARN-10396](https://issues.apache.org/jira/browse/YARN-10396) | Max applications calculation per queue disregards queue level settings in absolute mode | Major | capacity scheduler | Benjamin Teke | Benjamin Teke | +| [HADOOP-17390](https://issues.apache.org/jira/browse/HADOOP-17390) | Skip license check on lz4 code files | Major | build | Zhihua Deng | Zhihua Deng | +| [MAPREDUCE-7307](https://issues.apache.org/jira/browse/MAPREDUCE-7307) | Potential thread leak in LocatedFileStatusFetcher | Major | job submission | Zhihua Deng | Zhihua Deng | +| [HADOOP-17346](https://issues.apache.org/jira/browse/HADOOP-17346) | Fair call queue is defeated by abusive service principals | Major | common, ipc | Ahmed Hussein | Ahmed Hussein | +| [YARN-10470](https://issues.apache.org/jira/browse/YARN-10470) | When building new web ui with root user, the bower install should support it. | Major | build, yarn-ui-v2 | Qi Zhu | Qi Zhu | +| [HADOOP-17398](https://issues.apache.org/jira/browse/HADOOP-17398) | Skipping network I/O in S3A getFileStatus(/) breaks some tests | Major | fs/s3, test | Mukund Thakur | Mukund Thakur | +| [HDFS-15698](https://issues.apache.org/jira/browse/HDFS-15698) | Fix the typo of dfshealth.html after HDFS-15358 | Trivial | namenode | Hui Fei | Hui Fei | +| [YARN-10498](https://issues.apache.org/jira/browse/YARN-10498) | Fix Yarn CapacityScheduler Markdown document | Trivial | documentation | zhaoshengjie | zhaoshengjie | +| [HADOOP-17399](https://issues.apache.org/jira/browse/HADOOP-17399) | lz4 sources missing for native Visual Studio project | Major | native | Gautham Banasandra | Gautham Banasandra | +| [HDFS-15695](https://issues.apache.org/jira/browse/HDFS-15695) | NN should not let the balancer run in safemode | Major | namenode | Ahmed Hussein | Ahmed Hussein | +| [YARN-10511](https://issues.apache.org/jira/browse/YARN-10511) | Update yarn.nodemanager.env-whitelist value in docs | Minor | documentation | Andrea Scarpino | Andrea Scarpino | +| [HADOOP-16080](https://issues.apache.org/jira/browse/HADOOP-16080) | hadoop-aws does not work with hadoop-client-api | Major | fs/s3 | Keith Turner | Chao Sun | +| [HDFS-15660](https://issues.apache.org/jira/browse/HDFS-15660) | StorageTypeProto is not compatiable between 3.x and 2.6 | Major | . | Ryan Wu | Ryan Wu | +| [HDFS-15707](https://issues.apache.org/jira/browse/HDFS-15707) | NNTop counts don't add up as expected | Major | hdfs, metrics, namenode | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15709](https://issues.apache.org/jira/browse/HDFS-15709) | EC: Socket file descriptor leak in StripedBlockChecksumReconstructor | Major | datanode, ec, erasure-coding | Yushi Hayasaka | Yushi Hayasaka | +| [YARN-10495](https://issues.apache.org/jira/browse/YARN-10495) | make the rpath of container-executor configurable | Major | yarn | angerszhu | angerszhu | +| [HDFS-15240](https://issues.apache.org/jira/browse/HDFS-15240) | Erasure Coding: dirty buffer causes reconstruction block error | Blocker | datanode, erasure-coding | HuangTao | HuangTao | +| [YARN-10491](https://issues.apache.org/jira/browse/YARN-10491) | Fix deprecation warnings in SLSWebApp.java | Minor | build | Akira Ajisaka | Ankit Kumar | +| [HADOOP-13571](https://issues.apache.org/jira/browse/HADOOP-13571) | ServerSocketUtil.getPort() should use loopback address, not 0.0.0.0 | Major | . | Eric Badger | Eric Badger | +| [HDFS-15725](https://issues.apache.org/jira/browse/HDFS-15725) | Lease Recovery never completes for a committed block which the DNs never finalize | Major | namenode | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-15170](https://issues.apache.org/jira/browse/HDFS-15170) | EC: Block gets marked as CORRUPT in case of failover and pipeline recovery | Critical | erasure-coding | Ayush Saxena | Ayush Saxena | +| [YARN-10536](https://issues.apache.org/jira/browse/YARN-10536) | Client in distributedShell swallows interrupt exceptions | Major | client, distributed-shell | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15743](https://issues.apache.org/jira/browse/HDFS-15743) | Fix -Pdist build failure of hadoop-hdfs-native-client | Major | . | Masatake Iwasaki | Masatake Iwasaki | +| [YARN-10334](https://issues.apache.org/jira/browse/YARN-10334) | TestDistributedShell leaks resources on timeout/failure | Major | distributed-shell, test, yarn | Ahmed Hussein | Ahmed Hussein | +| [YARN-10558](https://issues.apache.org/jira/browse/YARN-10558) | Fix failure of TestDistributedShell#testDSShellWithOpportunisticContainers | Minor | test | Masatake Iwasaki | Masatake Iwasaki | +| [HDFS-15719](https://issues.apache.org/jira/browse/HDFS-15719) | [Hadoop 3] Both NameNodes can crash simultaneously due to the short JN socket timeout | Critical | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [YARN-10560](https://issues.apache.org/jira/browse/YARN-10560) | Upgrade node.js to 10.23.1 and yarn to 1.22.5 in Web UI v2 | Major | webapp, yarn-ui-v2 | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17444](https://issues.apache.org/jira/browse/HADOOP-17444) | ADLFS: Update SDK version from 2.3.6 to 2.3.9 | Minor | fs/adl | Bilahari T H | Bilahari T H | +| [YARN-10528](https://issues.apache.org/jira/browse/YARN-10528) | maxAMShare should only be accepted for leaf queues, not parent queues | Major | . | Siddharth Ahuja | Siddharth Ahuja | +| [HADOOP-17438](https://issues.apache.org/jira/browse/HADOOP-17438) | Increase docker memory limit in Jenkins | Major | build, scripts, test, yetus | Ahmed Hussein | Ahmed Hussein | +| [MAPREDUCE-7310](https://issues.apache.org/jira/browse/MAPREDUCE-7310) | Clear the fileMap in JHEventHandlerForSigtermTest | Minor | test | Zhengxi Li | Zhengxi Li | +| [HADOOP-16947](https://issues.apache.org/jira/browse/HADOOP-16947) | Stale record should be remove when MutableRollingAverages generating aggregate data. | Major | . | Haibin Huang | Haibin Huang | +| [YARN-10515](https://issues.apache.org/jira/browse/YARN-10515) | Fix flaky test TestCapacitySchedulerAutoQueueCreation.testDynamicAutoQueueCreationWithTags | Major | test | Peter Bacsko | Peter Bacsko | +| [HADOOP-17224](https://issues.apache.org/jira/browse/HADOOP-17224) | Install Intel ISA-L library in Dockerfile | Blocker | . | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-15632](https://issues.apache.org/jira/browse/HDFS-15632) | AbstractContractDeleteTest should set recursive parameter to true for recursive test cases. | Major | . | Konstantin Shvachko | Anton Kutuzov | +| [HADOOP-17258](https://issues.apache.org/jira/browse/HADOOP-17258) | MagicS3GuardCommitter fails with \`pendingset\` already exists | Major | fs/s3 | Dongjoon Hyun | Dongjoon Hyun | +| [HDFS-15661](https://issues.apache.org/jira/browse/HDFS-15661) | The DeadNodeDetector shouldn't be shared by different DFSClients. | Major | . | Jinglun | Jinglun | +| [HDFS-10498](https://issues.apache.org/jira/browse/HDFS-10498) | Intermittent test failure org.apache.hadoop.hdfs.server.namenode.snapshot.TestSnapshotFileLength.testSnapshotfileLength | Major | hdfs, snapshots | Hanisha Koneru | Jim Brennan | +| [HADOOP-17506](https://issues.apache.org/jira/browse/HADOOP-17506) | Fix typo in BUILDING.txt | Trivial | documentation | Gautham Banasandra | Gautham Banasandra | +| [HDFS-15791](https://issues.apache.org/jira/browse/HDFS-15791) | Possible Resource Leak in FSImageFormatProtobuf | Major | namenode | Narges Shadab | Narges Shadab | +| [HDFS-15795](https://issues.apache.org/jira/browse/HDFS-15795) | EC: Wrong checksum when reconstruction was failed by exception | Major | datanode, ec, erasure-coding | Yushi Hayasaka | Yushi Hayasaka | +| [HDFS-15779](https://issues.apache.org/jira/browse/HDFS-15779) | EC: fix NPE caused by StripedWriter.clearBuffers during reconstruct block | Major | . | Hongbing Wang | Hongbing Wang | +| [HADOOP-17217](https://issues.apache.org/jira/browse/HADOOP-17217) | S3A FileSystem does not correctly delete directories with fake entries | Major | fs/s3 | Kaya Kupferschmidt | | +| [HDFS-15798](https://issues.apache.org/jira/browse/HDFS-15798) | EC: Reconstruct task failed, and It would be XmitsInProgress of DN has negative number | Major | . | Haiyang Hu | Haiyang Hu | +| [YARN-10607](https://issues.apache.org/jira/browse/YARN-10607) | User environment is unable to prepend PATH when mapreduce.admin.user.env also sets PATH | Major | . | Eric Badger | Eric Badger | +| [HDFS-15792](https://issues.apache.org/jira/browse/HDFS-15792) | ClasscastException while loading FSImage | Major | nn | Renukaprasad C | Renukaprasad C | +| [HADOOP-17516](https://issues.apache.org/jira/browse/HADOOP-17516) | Upgrade ant to 1.10.9 | Major | . | Akira Ajisaka | Akira Ajisaka | +| [YARN-10500](https://issues.apache.org/jira/browse/YARN-10500) | TestDelegationTokenRenewer fails intermittently | Major | test | Akira Ajisaka | Masatake Iwasaki | +| [HDFS-15806](https://issues.apache.org/jira/browse/HDFS-15806) | DeadNodeDetector should close all the threads when it is closed. | Major | . | Jinglun | Jinglun | +| [HADOOP-17534](https://issues.apache.org/jira/browse/HADOOP-17534) | Upgrade Jackson databind to 2.10.5.1 | Major | build | Adam Roberts | Akira Ajisaka | +| [MAPREDUCE-7323](https://issues.apache.org/jira/browse/MAPREDUCE-7323) | Remove job\_history\_summary.py | Major | . | Akira Ajisaka | Akira Ajisaka | +| [YARN-10647](https://issues.apache.org/jira/browse/YARN-10647) | Fix TestRMNodeLabelsManager failed after YARN-10501. | Major | . | Qi Zhu | Qi Zhu | +| [HADOOP-17528](https://issues.apache.org/jira/browse/HADOOP-17528) | Not closing an SFTP File System instance prevents JVM from exiting. | Major | . | Mikhail Pryakhin | Mikhail Pryakhin | +| [HADOOP-17510](https://issues.apache.org/jira/browse/HADOOP-17510) | Hadoop prints sensitive Cookie information. | Major | . | Renukaprasad C | Renukaprasad C | +| [HDFS-15422](https://issues.apache.org/jira/browse/HDFS-15422) | Reported IBR is partially replaced with stored info when queuing. | Critical | namenode | Kihwal Lee | Stephen O'Donnell | +| [YARN-10651](https://issues.apache.org/jira/browse/YARN-10651) | CapacityScheduler crashed with NPE in AbstractYarnScheduler.updateNodeResource() | Major | . | Haibo Chen | Haibo Chen | +| [MAPREDUCE-7320](https://issues.apache.org/jira/browse/MAPREDUCE-7320) | ClusterMapReduceTestCase does not clean directories | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HDFS-14013](https://issues.apache.org/jira/browse/HDFS-14013) | Skip any credentials stored in HDFS when starting ZKFC | Major | hdfs | Krzysztof Adamski | Stephen O'Donnell | +| [HDFS-15849](https://issues.apache.org/jira/browse/HDFS-15849) | ExpiredHeartbeats metric should be of Type.COUNTER | Major | metrics | Konstantin Shvachko | Qi Zhu | +| [YARN-10649](https://issues.apache.org/jira/browse/YARN-10649) | Fix RMNodeImpl.updateExistContainers leak | Major | resourcemanager | Max Xie | Max Xie | +| [YARN-10672](https://issues.apache.org/jira/browse/YARN-10672) | All testcases in TestReservations are flaky | Major | . | Szilard Nemeth | Szilard Nemeth | +| [HADOOP-17557](https://issues.apache.org/jira/browse/HADOOP-17557) | skip-dir option is not processed by Yetus | Major | build, precommit, yetus | Ahmed Hussein | Ahmed Hussein | +| [YARN-10671](https://issues.apache.org/jira/browse/YARN-10671) | Fix Typo in TestSchedulingRequestContainerAllocation | Minor | . | D M Murali Krishna Reddy | D M Murali Krishna Reddy | +| [HDFS-15875](https://issues.apache.org/jira/browse/HDFS-15875) | Check whether file is being truncated before truncate | Major | . | Hui Fei | Hui Fei | +| [HADOOP-17582](https://issues.apache.org/jira/browse/HADOOP-17582) | Replace GitHub App Token with GitHub OAuth token | Major | build | Akira Ajisaka | Akira Ajisaka | +| [YARN-10687](https://issues.apache.org/jira/browse/YARN-10687) | Add option to disable/enable free disk space checking and percentage checking for full and not-full disks | Major | nodemanager | Qi Zhu | Qi Zhu | +| [HADOOP-17586](https://issues.apache.org/jira/browse/HADOOP-17586) | Upgrade org.codehaus.woodstox:stax2-api to 4.2.1 | Major | . | Ayush Saxena | Ayush Saxena | +| [HADOOP-17585](https://issues.apache.org/jira/browse/HADOOP-17585) | Correct timestamp format in the docs for the touch command | Major | . | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-15809](https://issues.apache.org/jira/browse/HDFS-15809) | DeadNodeDetector doesn't remove live nodes from dead node set. | Major | . | Jinglun | Jinglun | +| [HADOOP-17532](https://issues.apache.org/jira/browse/HADOOP-17532) | Yarn Job execution get failed when LZ4 Compression Codec is used | Major | common | Bhavik Patel | Bhavik Patel | +| [YARN-10588](https://issues.apache.org/jira/browse/YARN-10588) | Percentage of queue and cluster is zero in WebUI | Major | . | Bilwa S T | Bilwa S T | +| [MAPREDUCE-7322](https://issues.apache.org/jira/browse/MAPREDUCE-7322) | revisiting TestMRIntermediateDataEncryption | Major | job submission, security, test | Ahmed Hussein | Ahmed Hussein | +| [YARN-10703](https://issues.apache.org/jira/browse/YARN-10703) | Fix potential null pointer error of gpuNodeResourceUpdateHandler in NodeResourceMonitorImpl. | Major | . | Qi Zhu | Qi Zhu | +| [HDFS-15868](https://issues.apache.org/jira/browse/HDFS-15868) | Possible Resource Leak in EditLogFileOutputStream | Major | . | Narges Shadab | Narges Shadab | +| [HADOOP-17592](https://issues.apache.org/jira/browse/HADOOP-17592) | Fix the wrong CIDR range example in Proxy User documentation | Minor | documentation | Kwangsun Noh | Kwangsun Noh | +| [YARN-10706](https://issues.apache.org/jira/browse/YARN-10706) | Upgrade com.github.eirslett:frontend-maven-plugin to 1.11.2 | Major | buid | Mingliang Liu | Mingliang Liu | +| [MAPREDUCE-7325](https://issues.apache.org/jira/browse/MAPREDUCE-7325) | Intermediate data encryption is broken in LocalJobRunner | Major | job submission, security | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15908](https://issues.apache.org/jira/browse/HDFS-15908) | Possible Resource Leak in org.apache.hadoop.hdfs.qjournal.server.Journal | Major | . | Narges Shadab | Narges Shadab | +| [HDFS-15910](https://issues.apache.org/jira/browse/HDFS-15910) | Replace bzero with explicit\_bzero for better safety | Critical | libhdfs++ | Gautham Banasandra | Gautham Banasandra | +| [YARN-10697](https://issues.apache.org/jira/browse/YARN-10697) | Resources are displayed in bytes in UI for schedulers other than capacity | Major | . | Bilwa S T | Bilwa S T | +| [HADOOP-17602](https://issues.apache.org/jira/browse/HADOOP-17602) | Upgrade JUnit to 4.13.1 | Major | build, security, test | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15900](https://issues.apache.org/jira/browse/HDFS-15900) | RBF: empty blockpool id on dfsrouter caused by UNAVAILABLE NameNode | Major | rbf | Harunobu Daikoku | Harunobu Daikoku | +| [YARN-10501](https://issues.apache.org/jira/browse/YARN-10501) | Can't remove all node labels after add node label without nodemanager port | Critical | yarn | caozhiqiang | caozhiqiang | +| [YARN-10437](https://issues.apache.org/jira/browse/YARN-10437) | Destroy yarn service if any YarnException occurs during submitApp | Minor | yarn-native-services | D M Murali Krishna Reddy | D M Murali Krishna Reddy | +| [YARN-10439](https://issues.apache.org/jira/browse/YARN-10439) | Yarn Service AM listens on all IP's on the machine | Minor | security, yarn-native-services | D M Murali Krishna Reddy | D M Murali Krishna Reddy | +| [YARN-10441](https://issues.apache.org/jira/browse/YARN-10441) | Add support for hadoop.http.rmwebapp.scheduler.page.class | Major | scheduler | D M Murali Krishna Reddy | D M Murali Krishna Reddy | +| [YARN-10466](https://issues.apache.org/jira/browse/YARN-10466) | Fix NullPointerException in yarn-services Component.java | Minor | . | D M Murali Krishna Reddy | D M Murali Krishna Reddy | +| [YARN-10716](https://issues.apache.org/jira/browse/YARN-10716) | Fix typo in ContainerRuntime | Trivial | documentation | Wanqiang Ji | xishuhai | +| [HDFS-15494](https://issues.apache.org/jira/browse/HDFS-15494) | TestReplicaCachingGetSpaceUsed#testReplicaCachingGetSpaceUsedByRBWReplica Fails on Windows | Major | . | Ravuri Sushma sree | Ravuri Sushma sree | +| [HADOOP-17610](https://issues.apache.org/jira/browse/HADOOP-17610) | DelegationTokenAuthenticator prints token information | Major | . | Ravuri Sushma sree | Ravuri Sushma sree | +| [HADOOP-17587](https://issues.apache.org/jira/browse/HADOOP-17587) | Kinit with keytab should not display the keytab file's full path in any logs | Major | . | Ravuri Sushma sree | Ravuri Sushma sree | +| [HDFS-15950](https://issues.apache.org/jira/browse/HDFS-15950) | Remove unused hdfs.proto import | Major | hdfs-client | Gautham Banasandra | Gautham Banasandra | +| [HDFS-15940](https://issues.apache.org/jira/browse/HDFS-15940) | Some tests in TestBlockRecovery are consistently failing | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-15949](https://issues.apache.org/jira/browse/HDFS-15949) | Fix integer overflow | Major | libhdfs++ | Gautham Banasandra | Gautham Banasandra | +| [HADOOP-17621](https://issues.apache.org/jira/browse/HADOOP-17621) | hadoop-auth to remove jetty-server dependency | Major | auth | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HDFS-15948](https://issues.apache.org/jira/browse/HDFS-15948) | Fix test4tests for libhdfspp | Critical | build, libhdfs++ | Gautham Banasandra | Gautham Banasandra | +| [HADOOP-17617](https://issues.apache.org/jira/browse/HADOOP-17617) | Incorrect representation of RESPONSE for Get Key Version in KMS index.md.vm file | Major | . | Ravuri Sushma sree | Ravuri Sushma sree | +| [MAPREDUCE-7329](https://issues.apache.org/jira/browse/MAPREDUCE-7329) | HadoopPipes task may fail when linux kernel version change from 3.x to 4.x | Major | pipes | chaoli | chaoli | +| [HADOOP-17608](https://issues.apache.org/jira/browse/HADOOP-17608) | Fix TestKMS failure | Major | kms | Akira Ajisaka | Akira Ajisaka | +| [HDFS-15963](https://issues.apache.org/jira/browse/HDFS-15963) | Unreleased volume references cause an infinite loop | Major | datanode | Shuyan Zhang | Shuyan Zhang | +| [YARN-10460](https://issues.apache.org/jira/browse/YARN-10460) | Upgrading to JUnit 4.13 causes tests in TestNodeStatusUpdater to fail | Major | nodemanager, test | Peter Bacsko | Peter Bacsko | +| [HADOOP-17641](https://issues.apache.org/jira/browse/HADOOP-17641) | ITestWasbUriAndConfiguration.testCanonicalServiceName() failing now mockaccount exists | Minor | fs/azure, test | Steve Loughran | Steve Loughran | +| [HDFS-15974](https://issues.apache.org/jira/browse/HDFS-15974) | RBF: Unable to display the datanode UI of the router | Major | rbf, ui | Xiangyi Zhu | Xiangyi Zhu | +| [HADOOP-17655](https://issues.apache.org/jira/browse/HADOOP-17655) | Upgrade Jetty to 9.4.40 | Blocker | . | Akira Ajisaka | Akira Ajisaka | +| [YARN-10749](https://issues.apache.org/jira/browse/YARN-10749) | Can't remove all node labels after add node label without nodemanager port, broken by YARN-10647 | Major | . | D M Murali Krishna Reddy | D M Murali Krishna Reddy | +| [HDFS-15566](https://issues.apache.org/jira/browse/HDFS-15566) | NN restart fails after RollingUpgrade from 3.1.3/3.2.1 to 3.3.0 | Blocker | . | Brahma Reddy Battula | Brahma Reddy Battula | +| [HDFS-15621](https://issues.apache.org/jira/browse/HDFS-15621) | Datanode DirectoryScanner uses excessive memory | Major | datanode | Stephen O'Donnell | Stephen O'Donnell | +| [YARN-10752](https://issues.apache.org/jira/browse/YARN-10752) | Shaded guava not found when compiling with profile hbase2.0 | Blocker | timelineserver | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HDFS-15865](https://issues.apache.org/jira/browse/HDFS-15865) | Interrupt DataStreamer thread | Minor | datanode | Karthik Palanisamy | | +| [HDFS-15810](https://issues.apache.org/jira/browse/HDFS-15810) | RBF: RBFMetrics's TotalCapacity out of bounds | Major | . | Xiaoxing Wei | Fengnan Li | +| [HADOOP-17657](https://issues.apache.org/jira/browse/HADOOP-17657) | SequeneFile.Writer should implement StreamCapabilities | Major | . | Kishen Das | Kishen Das | +| [YARN-10756](https://issues.apache.org/jira/browse/YARN-10756) | Remove additional junit 4.11 dependency from javadoc | Major | build, test, timelineservice | ANANDA G B | Akira Ajisaka | +| [HADOOP-17375](https://issues.apache.org/jira/browse/HADOOP-17375) | Fix the error of TestDynamometerInfra | Major | test | Akira Ajisaka | Takanobu Asanuma | +| [HDFS-16001](https://issues.apache.org/jira/browse/HDFS-16001) | TestOfflineEditsViewer.testStored() fails reading negative value of FSEditLogOpCodes | Blocker | hdfs | Konstantin Shvachko | Akira Ajisaka | +| [HADOOP-17142](https://issues.apache.org/jira/browse/HADOOP-17142) | Fix outdated properties of journal node when perform rollback | Minor | . | Deegue | | +| [HADOOP-17107](https://issues.apache.org/jira/browse/HADOOP-17107) | hadoop-azure parallel tests not working on recent JDKs | Major | build, fs/azure | Steve Loughran | Steve Loughran | +| [YARN-10555](https://issues.apache.org/jira/browse/YARN-10555) | Missing access check before getAppAttempts | Critical | webapp | lujie | lujie | +| [HADOOP-17703](https://issues.apache.org/jira/browse/HADOOP-17703) | checkcompatibility.py errors out when specifying annotations | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HDFS-16027](https://issues.apache.org/jira/browse/HDFS-16027) | HDFS-15245 breaks source code compatibility between 3.3.0 and 3.3.1. | Blocker | journal-node, ui | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [YARN-10725](https://issues.apache.org/jira/browse/YARN-10725) | Backport YARN-10120 to branch-3.3 | Major | . | Bilwa S T | Bilwa S T | +| [YARN-10701](https://issues.apache.org/jira/browse/YARN-10701) | The yarn.resource-types should support multi types without trimmed. | Major | . | Qi Zhu | Qi Zhu | +| [HADOOP-17718](https://issues.apache.org/jira/browse/HADOOP-17718) | Explicitly set locale in the Dockerfile | Blocker | build | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [MAPREDUCE-7348](https://issues.apache.org/jira/browse/MAPREDUCE-7348) | TestFrameworkUploader#testNativeIO fails | Major | test | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17723](https://issues.apache.org/jira/browse/HADOOP-17723) | [build] fix the Dockerfile for ARM | Blocker | build | Wei-Chiu Chuang | Wei-Chiu Chuang | + + +### TESTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [MAPREDUCE-7288](https://issues.apache.org/jira/browse/MAPREDUCE-7288) | Fix TestLongLong#testRightShift | Minor | . | Wanqiang Ji | Wanqiang Ji | +| [HDFS-15514](https://issues.apache.org/jira/browse/HDFS-15514) | Remove useless dfs.webhdfs.enabled | Minor | test | Hui Fei | Hui Fei | +| [HADOOP-17205](https://issues.apache.org/jira/browse/HADOOP-17205) | Move personality file from Yetus to Hadoop repository | Major | test, yetus | Chao Sun | Chao Sun | +| [HDFS-15564](https://issues.apache.org/jira/browse/HDFS-15564) | Add Test annotation for TestPersistBlocks#testRestartDfsWithSync | Minor | hdfs | Hui Fei | Hui Fei | +| [YARN-9333](https://issues.apache.org/jira/browse/YARN-9333) | TestFairSchedulerPreemption.testRelaxLocalityPreemptionWithNoLessAMInRemainingNodes fails intermittently | Major | yarn | Prabhu Joseph | Peter Bacsko | +| [HDFS-15690](https://issues.apache.org/jira/browse/HDFS-15690) | Add lz4-java as hadoop-hdfs test dependency | Major | . | L. C. Hsieh | L. C. Hsieh | +| [HADOOP-17459](https://issues.apache.org/jira/browse/HADOOP-17459) | ADL Gen1: Fix the test case failures which are failing after the contract test update in hadoop-common | Minor | fs/adl | Bilahari T H | Bilahari T H | +| [HDFS-15898](https://issues.apache.org/jira/browse/HDFS-15898) | Test case TestOfflineImageViewer fails | Minor | . | Hui Fei | Hui Fei | + + +### SUB-TASKS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-16857](https://issues.apache.org/jira/browse/HADOOP-16857) | ABFS: Optimize HttpRequest retry triggers | Major | fs/azure | Sneha Vijayarajan | Sneha Vijayarajan | +| [HADOOP-17002](https://issues.apache.org/jira/browse/HADOOP-17002) | ABFS: Avoid storage calls to check if the account is HNS enabled or not | Minor | fs/azure | Bilahari T H | Bilahari T H | +| [YARN-10215](https://issues.apache.org/jira/browse/YARN-10215) | Endpoint for obtaining direct URL for the logs | Major | yarn | Adam Antal | Andras Gyori | +| [HDFS-14353](https://issues.apache.org/jira/browse/HDFS-14353) | Erasure Coding: metrics xmitsInProgress become to negative. | Major | datanode, erasure-coding | Baolong Mao | Baolong Mao | +| [HDFS-15305](https://issues.apache.org/jira/browse/HDFS-15305) | Extend ViewFS and provide ViewFSOverloadScheme implementation with scheme configurable. | Major | fs, hadoop-client, hdfs-client, viewfs | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [YARN-10259](https://issues.apache.org/jira/browse/YARN-10259) | Reserved Containers not allocated from available space of other nodes in CandidateNodeSet in MultiNodePlacement | Major | capacityscheduler | Prabhu Joseph | Prabhu Joseph | +| [HDFS-15306](https://issues.apache.org/jira/browse/HDFS-15306) | Make mount-table to read from central place ( Let's say from HDFS) | Major | configuration, hadoop-client | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HADOOP-16756](https://issues.apache.org/jira/browse/HADOOP-16756) | distcp -update to S3A; abfs, etc always overwrites due to block size mismatch | Major | fs/s3, tools/distcp | Daisuke Kobayashi | Steve Loughran | +| [HDFS-15322](https://issues.apache.org/jira/browse/HDFS-15322) | Make NflyFS to work when ViewFsOverloadScheme's scheme and target uris schemes are same. | Major | fs, nflyFs, viewfs, viewfsOverloadScheme | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [YARN-10108](https://issues.apache.org/jira/browse/YARN-10108) | FS-CS converter: nestedUserQueue with default rule results in invalid queue mapping | Major | . | Prabhu Joseph | Gergely Pollák | +| [HADOOP-16852](https://issues.apache.org/jira/browse/HADOOP-16852) | ABFS: Send error back to client for Read Ahead request failure | Major | fs/azure | Sneha Vijayarajan | Sneha Vijayarajan | +| [HADOOP-17053](https://issues.apache.org/jira/browse/HADOOP-17053) | ABFS: FS initialize fails for incompatible account-agnostic Token Provider setting | Major | fs/azure | Sneha Vijayarajan | Sneha Vijayarajan | +| [HDFS-15321](https://issues.apache.org/jira/browse/HDFS-15321) | Make DFSAdmin tool to work with ViewFSOverloadScheme | Major | dfsadmin, fs, viewfs | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HADOOP-16568](https://issues.apache.org/jira/browse/HADOOP-16568) | S3A FullCredentialsTokenBinding fails if local credentials are unset | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [YARN-10284](https://issues.apache.org/jira/browse/YARN-10284) | Add lazy initialization of LogAggregationFileControllerFactory in LogServlet | Major | log-aggregation, yarn | Adam Antal | Adam Antal | +| [HDFS-15330](https://issues.apache.org/jira/browse/HDFS-15330) | Document the ViewFSOverloadScheme details in ViewFS guide | Major | viewfs, viewfsOverloadScheme | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-15389](https://issues.apache.org/jira/browse/HDFS-15389) | DFSAdmin should close filesystem and dfsadmin -setBalancerBandwidth should work with ViewFSOverloadScheme | Major | dfsadmin, viewfsOverloadScheme | Ayush Saxena | Ayush Saxena | +| [HDFS-15394](https://issues.apache.org/jira/browse/HDFS-15394) | Add all available fs.viewfs.overload.scheme.target.\.impl classes in core-default.xml bydefault. | Major | configuration, viewfs, viewfsOverloadScheme | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-15387](https://issues.apache.org/jira/browse/HDFS-15387) | FSUsage$DF should consider ViewFSOverloadScheme in processPath | Minor | viewfs | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [YARN-10292](https://issues.apache.org/jira/browse/YARN-10292) | FS-CS converter: add an option to enable asynchronous scheduling in CapacityScheduler | Major | fairscheduler | Benjamin Teke | Benjamin Teke | +| [HADOOP-17004](https://issues.apache.org/jira/browse/HADOOP-17004) | ABFS: Improve the ABFS driver documentation | Minor | fs/azure | Bilahari T H | Bilahari T H | +| [HDFS-15418](https://issues.apache.org/jira/browse/HDFS-15418) | ViewFileSystemOverloadScheme should represent mount links as non symlinks | Major | . | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [YARN-9930](https://issues.apache.org/jira/browse/YARN-9930) | Support max running app logic for CapacityScheduler | Major | capacity scheduler, capacityscheduler | zhoukang | Peter Bacsko | +| [HDFS-15427](https://issues.apache.org/jira/browse/HDFS-15427) | Merged ListStatus with Fallback target filesystem and InternalDirViewFS. | Major | viewfs | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [YARN-10316](https://issues.apache.org/jira/browse/YARN-10316) | FS-CS converter: convert maxAppsDefault, maxRunningApps settings | Major | . | Peter Bacsko | Peter Bacsko | +| [HADOOP-17054](https://issues.apache.org/jira/browse/HADOOP-17054) | ABFS: Fix idempotency test failures when SharedKey is set as AuthType | Major | fs/azure | Sneha Vijayarajan | Sneha Vijayarajan | +| [HADOOP-17050](https://issues.apache.org/jira/browse/HADOOP-17050) | S3A to support additional token issuers | Minor | fs/s3 | Gabor Bota | Steve Loughran | +| [HADOOP-17015](https://issues.apache.org/jira/browse/HADOOP-17015) | ABFS: Make PUT and POST operations idempotent | Major | fs/azure | Sneha Vijayarajan | Sneha Vijayarajan | +| [HDFS-15429](https://issues.apache.org/jira/browse/HDFS-15429) | mkdirs should work when parent dir is internalDir and fallback configured. | Major | . | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-15436](https://issues.apache.org/jira/browse/HDFS-15436) | Default mount table name used by ViewFileSystem should be configurable | Major | viewfs, viewfsOverloadScheme | Virajith Jalaparti | Virajith Jalaparti | +| [HADOOP-16798](https://issues.apache.org/jira/browse/HADOOP-16798) | job commit failure in S3A MR magic committer test | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [YARN-10325](https://issues.apache.org/jira/browse/YARN-10325) | Document max-parallel-apps for Capacity Scheduler | Major | capacity scheduler, capacityscheduler | Peter Bacsko | Peter Bacsko | +| [HADOOP-16961](https://issues.apache.org/jira/browse/HADOOP-16961) | ABFS: Adding metrics to AbfsInputStream (AbfsInputStreamStatistics) | Major | fs/azure | Gabor Bota | Mehakmeet Singh | +| [HADOOP-17086](https://issues.apache.org/jira/browse/HADOOP-17086) | ABFS: Fix the parsing errors in ABFS Driver with creation Time (being returned in ListPath) | Major | fs/azure | Ishani | Bilahari T H | +| [HDFS-15430](https://issues.apache.org/jira/browse/HDFS-15430) | create should work when parent dir is internalDir and fallback configured. | Major | . | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-15450](https://issues.apache.org/jira/browse/HDFS-15450) | Fix NN trash emptier to work if ViewFSOveroadScheme enabled | Major | namenode, viewfsOverloadScheme | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HADOOP-17111](https://issues.apache.org/jira/browse/HADOOP-17111) | Replace Guava Optional with Java8+ Optional | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15449](https://issues.apache.org/jira/browse/HDFS-15449) | Optionally ignore port number in mount-table name when picking from initialized uri | Major | . | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HADOOP-17058](https://issues.apache.org/jira/browse/HADOOP-17058) | Support for Appendblob in abfs driver | Major | fs/azure | Ishani | Ishani | +| [HDFS-15462](https://issues.apache.org/jira/browse/HDFS-15462) | Add fs.viewfs.overload.scheme.target.ofs.impl to core-default.xml | Major | configuration, viewfs, viewfsOverloadScheme | Siyao Meng | Siyao Meng | +| [HDFS-15464](https://issues.apache.org/jira/browse/HDFS-15464) | ViewFsOverloadScheme should work when -fs option pointing to remote cluster without mount links | Major | viewfsOverloadScheme | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HADOOP-17105](https://issues.apache.org/jira/browse/HADOOP-17105) | S3AFS globStatus attempts to resolve symlinks | Minor | fs/s3 | Jimmy Zuber | Jimmy Zuber | +| [HADOOP-17022](https://issues.apache.org/jira/browse/HADOOP-17022) | Tune S3A listFiles() api. | Major | fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-17101](https://issues.apache.org/jira/browse/HADOOP-17101) | Replace Guava Function with Java8+ Function | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17099](https://issues.apache.org/jira/browse/HADOOP-17099) | Replace Guava Predicate with Java8+ Predicate | Minor | . | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-16682](https://issues.apache.org/jira/browse/HADOOP-16682) | Remove unnecessary ABFS toString() invocations | Minor | fs/azure | Jeetesh Mangwani | Bilahari T H | +| [HADOOP-17136](https://issues.apache.org/jira/browse/HADOOP-17136) | ITestS3ADirectoryPerformance.testListOperations failing | Minor | fs/s3, test | Mukund Thakur | Mukund Thakur | +| [HDFS-15478](https://issues.apache.org/jira/browse/HDFS-15478) | When Empty mount points, we are assigning fallback link to self. But it should not use full URI for target fs. | Major | . | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HADOOP-17100](https://issues.apache.org/jira/browse/HADOOP-17100) | Replace Guava Supplier with Java8+ Supplier in Hadoop | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17132](https://issues.apache.org/jira/browse/HADOOP-17132) | ABFS: Fix For Idempotency code | Major | fs/azure | Sneha Vijayarajan | Sneha Vijayarajan | +| [HADOOP-17092](https://issues.apache.org/jira/browse/HADOOP-17092) | ABFS: Long waits and unintended retries when multiple threads try to fetch token using ClientCreds | Major | fs/azure | Sneha Vijayarajan | Bilahari T H | +| [HADOOP-17131](https://issues.apache.org/jira/browse/HADOOP-17131) | Refactor S3A Listing code for better isolation | Major | fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-17137](https://issues.apache.org/jira/browse/HADOOP-17137) | ABFS: Tests ITestAbfsNetworkStatistics need to be config setting agnostic | Minor | fs/azure, test | Sneha Vijayarajan | Bilahari T H | +| [HADOOP-17149](https://issues.apache.org/jira/browse/HADOOP-17149) | ABFS: Test failure: testFailedRequestWhenCredentialsNotCorrect fails when run with SharedKey | Minor | fs/azure | Sneha Vijayarajan | Bilahari T H | +| [HADOOP-17163](https://issues.apache.org/jira/browse/HADOOP-17163) | ABFS: Add debug log for rename failures | Major | fs/azure | Bilahari T H | Bilahari T H | +| [HDFS-15515](https://issues.apache.org/jira/browse/HDFS-15515) | mkdirs on fallback should throw IOE out instead of suppressing and returning false | Major | . | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HADOOP-13230](https://issues.apache.org/jira/browse/HADOOP-13230) | S3A to optionally retain directory markers | Major | fs/s3 | Aaron Fabbri | Steve Loughran | +| [HADOOP-14124](https://issues.apache.org/jira/browse/HADOOP-14124) | S3AFileSystem silently deletes "fake" directories when writing a file. | Minor | fs, fs/s3 | Joel Baranick | | +| [HADOOP-16966](https://issues.apache.org/jira/browse/HADOOP-16966) | ABFS: Upgrade Store REST API Version to 2019-12-12 | Major | fs/azure | Ishani | Sneha Vijayarajan | +| [HDFS-15533](https://issues.apache.org/jira/browse/HDFS-15533) | Provide DFS API compatible class(ViewDistributedFileSystem), but use ViewFileSystemOverloadScheme inside | Major | dfs, viewfs | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HADOOP-17074](https://issues.apache.org/jira/browse/HADOOP-17074) | Optimise s3a Listing to be fully asynchronous. | Major | fs/s3 | Mukund Thakur | Mukund Thakur | +| [HDFS-15529](https://issues.apache.org/jira/browse/HDFS-15529) | getChildFilesystems should include fallback fs as well | Critical | viewfs, viewfsOverloadScheme | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HADOOP-17167](https://issues.apache.org/jira/browse/HADOOP-17167) | ITestS3AEncryptionWithDefaultS3Settings fails if default bucket encryption != KMS | Minor | fs/s3 | Steve Loughran | Mukund Thakur | +| [HADOOP-17227](https://issues.apache.org/jira/browse/HADOOP-17227) | improve s3guard markers command line tool | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [YARN-10332](https://issues.apache.org/jira/browse/YARN-10332) | RESOURCE\_UPDATE event was repeatedly registered in DECOMMISSIONING state | Minor | resourcemanager | yehuanhuan | yehuanhuan | +| [HDFS-15558](https://issues.apache.org/jira/browse/HDFS-15558) | ViewDistributedFileSystem#recoverLease should call super.recoverLease when there are no mounts configured | Major | . | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HADOOP-17181](https://issues.apache.org/jira/browse/HADOOP-17181) | Handle transient stream read failures in FileSystem contract tests | Minor | fs/s3 | Steve Loughran | | +| [HDFS-15551](https://issues.apache.org/jira/browse/HDFS-15551) | Tiny Improve for DeadNode detector | Minor | hdfs-client | dark\_num | imbajin | +| [HDFS-15555](https://issues.apache.org/jira/browse/HDFS-15555) | RBF: Refresh cacheNS when SocketException occurs | Major | rbf | Akira Ajisaka | Akira Ajisaka | +| [HDFS-15532](https://issues.apache.org/jira/browse/HDFS-15532) | listFiles on root/InternalDir will fail if fallback root has file | Major | . | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-15578](https://issues.apache.org/jira/browse/HDFS-15578) | Fix the rename issues with fallback fs enabled | Major | viewfs, viewfsOverloadScheme | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-15585](https://issues.apache.org/jira/browse/HDFS-15585) | ViewDFS#getDelegationToken should not throw UnsupportedOperationException. | Major | . | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HADOOP-17215](https://issues.apache.org/jira/browse/HADOOP-17215) | ABFS: Support for conditional overwrite | Major | fs/azure | Sneha Vijayarajan | Sneha Vijayarajan | +| [HDFS-14811](https://issues.apache.org/jira/browse/HDFS-14811) | RBF: TestRouterRpc#testErasureCoding is flaky | Major | rbf | Chen Zhang | Chen Zhang | +| [HADOOP-17023](https://issues.apache.org/jira/browse/HADOOP-17023) | Tune listStatus() api of s3a. | Major | fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-17279](https://issues.apache.org/jira/browse/HADOOP-17279) | ABFS: Test testNegativeScenariosForCreateOverwriteDisabled fails for non-HNS account | Major | fs/azure | Sneha Vijayarajan | Sneha Vijayarajan | +| [HADOOP-17183](https://issues.apache.org/jira/browse/HADOOP-17183) | ABFS: Enable checkaccess API | Major | fs/azure | Bilahari T H | Bilahari T H | +| [HDFS-15613](https://issues.apache.org/jira/browse/HDFS-15613) | RBF: Router FSCK fails after HDFS-14442 | Major | rbf | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17281](https://issues.apache.org/jira/browse/HADOOP-17281) | Implement FileSystem.listStatusIterator() in S3AFileSystem | Major | fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-17293](https://issues.apache.org/jira/browse/HADOOP-17293) | S3A to always probe S3 in S3A getFileStatus on non-auth paths | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HDFS-15620](https://issues.apache.org/jira/browse/HDFS-15620) | RBF: Fix test failures after HADOOP-17281 | Major | rbf, test | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17261](https://issues.apache.org/jira/browse/HADOOP-17261) | s3a rename() now requires s3:deleteObjectVersion permission | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-16915](https://issues.apache.org/jira/browse/HADOOP-16915) | ABFS: Test failure ITestAzureBlobFileSystemRandomRead.testRandomReadPerformance | Major | . | Bilahari T H | Bilahari T H | +| [HADOOP-17166](https://issues.apache.org/jira/browse/HADOOP-17166) | ABFS: configure output stream thread pool | Minor | fs/azure | Bilahari T H | Bilahari T H | +| [HADOOP-17301](https://issues.apache.org/jira/browse/HADOOP-17301) | ABFS: read-ahead error reporting breaks buffer management | Critical | fs/azure | Sneha Vijayarajan | Sneha Vijayarajan | +| [HADOOP-17288](https://issues.apache.org/jira/browse/HADOOP-17288) | Use shaded guava from thirdparty | Major | . | Ayush Saxena | Ayush Saxena | +| [HDFS-15459](https://issues.apache.org/jira/browse/HDFS-15459) | TestBlockTokenWithDFSStriped fails intermittently | Major | hdfs | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15461](https://issues.apache.org/jira/browse/HDFS-15461) | TestDFSClientRetries#testGetFileChecksum fails intermittently | Major | dfsclient, test | Ahmed Hussein | Ahmed Hussein | +| [HDFS-9776](https://issues.apache.org/jira/browse/HDFS-9776) | TestHAAppend#testMultipleAppendsDuringCatchupTailing is flaky | Major | . | Vinayakumar B | Ahmed Hussein | +| [HDFS-15657](https://issues.apache.org/jira/browse/HDFS-15657) | RBF: TestRouter#testNamenodeHeartBeatEnableDefault fails by BindException | Major | rbf, test | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17305](https://issues.apache.org/jira/browse/HADOOP-17305) | ITestCustomSigner fails with gcs s3 compatible endpoint. | Major | fs/s3 | Mukund Thakur | Mukund Thakur | +| [HDFS-15643](https://issues.apache.org/jira/browse/HDFS-15643) | EC: Fix checksum computation in case of native encoders | Blocker | . | Ahmed Hussein | Ayush Saxena | +| [HADOOP-17344](https://issues.apache.org/jira/browse/HADOOP-17344) | Harmonize guava version and shade guava in yarn-csi | Major | . | Wei-Chiu Chuang | Akira Ajisaka | +| [HADOOP-17376](https://issues.apache.org/jira/browse/HADOOP-17376) | ITestS3AContractRename failing against stricter tests | Major | fs/s3, test | Steve Loughran | Attila Doroszlai | +| [HADOOP-17379](https://issues.apache.org/jira/browse/HADOOP-17379) | AbstractS3ATokenIdentifier to set issue date == now | Major | fs/s3 | Steve Loughran | Jungtaek Lim | +| [HADOOP-17244](https://issues.apache.org/jira/browse/HADOOP-17244) | HADOOP-17244. S3A directory delete tombstones dir markers prematurely. | Blocker | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-17388](https://issues.apache.org/jira/browse/HADOOP-17388) | AbstractS3ATokenIdentifier to issue date in UTC | Major | . | Steve Loughran | Jungtaek Lim | +| [HADOOP-17343](https://issues.apache.org/jira/browse/HADOOP-17343) | Upgrade aws-java-sdk to 1.11.901 | Minor | build, fs/s3 | Dongjoon Hyun | Steve Loughran | +| [HADOOP-17325](https://issues.apache.org/jira/browse/HADOOP-17325) | WASB: Test failures | Major | fs/azure, test | Sneha Vijayarajan | Steve Loughran | +| [HADOOP-17323](https://issues.apache.org/jira/browse/HADOOP-17323) | s3a getFileStatus("/") to skip IO | Minor | fs/s3 | Steve Loughran | Mukund Thakur | +| [HADOOP-17311](https://issues.apache.org/jira/browse/HADOOP-17311) | ABFS: Logs should redact SAS signature | Major | fs/azure, security | Sneha Vijayarajan | Bilahari T H | +| [HADOOP-17313](https://issues.apache.org/jira/browse/HADOOP-17313) | FileSystem.get to support slow-to-instantiate FS clients | Major | fs, fs/azure, fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-17394](https://issues.apache.org/jira/browse/HADOOP-17394) | [JDK 11] mvn package -Pdocs fails | Major | build, documentation | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17396](https://issues.apache.org/jira/browse/HADOOP-17396) | ABFS: testRenameFileOverExistingFile Fails after Contract test update | Major | fs/azure, test | Sneha Vijayarajan | Sneha Vijayarajan | +| [HADOOP-17318](https://issues.apache.org/jira/browse/HADOOP-17318) | S3A committer to support concurrent jobs with same app attempt ID & dest dir | Minor | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-17385](https://issues.apache.org/jira/browse/HADOOP-17385) | ITestS3ADeleteCost.testDirMarkersFileCreation failure | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-15844](https://issues.apache.org/jira/browse/HADOOP-15844) | tag S3GuardTool entry points as limitedPrivate("management-tools")/evolving | Minor | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-17332](https://issues.apache.org/jira/browse/HADOOP-17332) | S3A marker tool mixes up -min and -max | Trivial | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-17397](https://issues.apache.org/jira/browse/HADOOP-17397) | ABFS: SAS Test updates for version and permission update | Major | fs/azure | Sneha Vijayarajan | Sneha Vijayarajan | +| [HDFS-15708](https://issues.apache.org/jira/browse/HDFS-15708) | TestURLConnectionFactory fails by NoClassDefFoundError in branch-3.3 and branch-3.2 | Blocker | test | Akira Ajisaka | Chao Sun | +| [HDFS-15716](https://issues.apache.org/jira/browse/HDFS-15716) | TestUpgradeDomainBlockPlacementPolicy flaky | Major | namenode, test | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17422](https://issues.apache.org/jira/browse/HADOOP-17422) | ABFS: Set default ListMaxResults to max server limit | Major | fs/azure | Sumangala Patki | Thomas Marqardt | +| [HADOOP-17450](https://issues.apache.org/jira/browse/HADOOP-17450) | hadoop-common to add IOStatistics API | Major | fs | Steve Loughran | Steve Loughran | +| [HADOOP-17271](https://issues.apache.org/jira/browse/HADOOP-17271) | S3A statistics to support IOStatistics | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-17347](https://issues.apache.org/jira/browse/HADOOP-17347) | ABFS: Optimise read for small files/tails of files | Major | fs/azure | Bilahari T H | Bilahari T H | +| [HADOOP-17272](https://issues.apache.org/jira/browse/HADOOP-17272) | ABFS Streams to support IOStatistics API | Major | fs/azure | Steve Loughran | Mehakmeet Singh | +| [HADOOP-17451](https://issues.apache.org/jira/browse/HADOOP-17451) | Intermittent failure of S3A tests which make assertions on statistics/IOStatistics | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HDFS-15762](https://issues.apache.org/jira/browse/HDFS-15762) | TestMultipleNNPortQOP#testMultipleNNPortOverwriteDownStream fails intermittently | Minor | . | Toshihiko Uchida | Toshihiko Uchida | +| [HDFS-15672](https://issues.apache.org/jira/browse/HDFS-15672) | TestBalancerWithMultipleNameNodes#testBalancingBlockpoolsWithBlockPoolPolicy fails on trunk | Major | . | Ahmed Hussein | Masatake Iwasaki | +| [HADOOP-13845](https://issues.apache.org/jira/browse/HADOOP-13845) | s3a to instrument duration of HTTP calls | Minor | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-17456](https://issues.apache.org/jira/browse/HADOOP-17456) | S3A ITestPartialRenamesDeletes.testPartialDirDelete[bulk-delete=true] failure | Minor | fs/s3, test | Steve Loughran | Steve Loughran | +| [HADOOP-17455](https://issues.apache.org/jira/browse/HADOOP-17455) | [s3a] Intermittent failure of ITestS3ADeleteCost.testDeleteSingleFileInDir | Major | fs/s3, test | Gabor Bota | Steve Loughran | +| [HADOOP-17433](https://issues.apache.org/jira/browse/HADOOP-17433) | Skipping network I/O in S3A getFileStatus(/) breaks ITestAssumeRole | Minor | fs/s3, test | Steve Loughran | Steve Loughran | +| [HADOOP-17296](https://issues.apache.org/jira/browse/HADOOP-17296) | ABFS: Allow Random Reads to be of Buffer Size | Major | fs/azure | Sneha Vijayarajan | Sneha Vijayarajan | +| [HADOOP-17413](https://issues.apache.org/jira/browse/HADOOP-17413) | ABFS: Release Elastic ByteBuffer pool memory at outputStream close | Major | fs/azure | Sneha Vijayarajan | Sneha Vijayarajan | +| [HADOOP-17407](https://issues.apache.org/jira/browse/HADOOP-17407) | ABFS: Delete Idempotency handling can lead to NPE | Major | fs/azure | Sneha Vijayarajan | Sneha Vijayarajan | +| [HADOOP-17404](https://issues.apache.org/jira/browse/HADOOP-17404) | ABFS: Piggyback flush on Append calls for short writes | Major | fs/azure | Sneha Vijayarajan | Sneha Vijayarajan | +| [HADOOP-17480](https://issues.apache.org/jira/browse/HADOOP-17480) | S3A docs to state s3 is consistent, deprecate S3Guard | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-17414](https://issues.apache.org/jira/browse/HADOOP-17414) | Magic committer files don't have the count of bytes written collected by spark | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-17493](https://issues.apache.org/jira/browse/HADOOP-17493) | renaming S3A Statistic DELEGATION\_TOKENS\_ISSUED to DELEGATION\_TOKEN\_ISSUED broke tests downstream | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-17483](https://issues.apache.org/jira/browse/HADOOP-17483) | magic committer to be enabled for all S3 buckets | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-17337](https://issues.apache.org/jira/browse/HADOOP-17337) | S3A NetworkBinding has a runtime class dependency on a third-party shaded class | Blocker | fs/s3 | Chris Wensel | Steve Loughran | +| [HADOOP-17475](https://issues.apache.org/jira/browse/HADOOP-17475) | ABFS : add high performance listStatusIterator | Major | fs/azure | Bilahari T H | Bilahari T H | +| [HADOOP-17432](https://issues.apache.org/jira/browse/HADOOP-17432) | [JDK 16] KerberosUtil#getOidInstance is broken by JEP 396 | Major | auth | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-13327](https://issues.apache.org/jira/browse/HADOOP-13327) | Add OutputStream + Syncable to the Filesystem Specification | Major | fs | Steve Loughran | Steve Loughran | +| [HDFS-15836](https://issues.apache.org/jira/browse/HDFS-15836) | RBF: Fix contract tests after HADOOP-13327 | Major | rbf | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17038](https://issues.apache.org/jira/browse/HADOOP-17038) | Support disabling buffered reads in ABFS positional reads | Major | . | Anoop Sam John | Anoop Sam John | +| [HADOOP-15710](https://issues.apache.org/jira/browse/HADOOP-15710) | ABFS checkException to map 403 to AccessDeniedException | Blocker | fs/azure | Steve Loughran | Steve Loughran | +| [HDFS-15847](https://issues.apache.org/jira/browse/HDFS-15847) | create client protocol: add ecPolicyName & storagePolicy param to debug statement string | Minor | . | Bhavik Patel | Bhavik Patel | +| [HADOOP-16748](https://issues.apache.org/jira/browse/HADOOP-16748) | Migrate to Python 3 and upgrade Yetus to 0.13.0 | Major | . | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-16906](https://issues.apache.org/jira/browse/HADOOP-16906) | Add some Abortable.abort() interface for streams etc which can be terminated | Blocker | fs, fs/azure, fs/s3 | Steve Loughran | Jungtaek Lim | +| [HADOOP-17567](https://issues.apache.org/jira/browse/HADOOP-17567) | typo in MagicCommitTracker | Trivial | fs/s3 | Pierrick HYMBERT | Pierrick HYMBERT | +| [HADOOP-17191](https://issues.apache.org/jira/browse/HADOOP-17191) | ABFS: Run the integration tests with various combinations of configurations and publish consolidated results | Minor | fs/azure, test | Bilahari T H | Bilahari T H | +| [HADOOP-16721](https://issues.apache.org/jira/browse/HADOOP-16721) | Improve S3A rename resilience | Blocker | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-17548](https://issues.apache.org/jira/browse/HADOOP-17548) | ABFS: Toggle Store Mkdirs request overwrite parameter | Major | fs/azure | Sumangala Patki | Sumangala Patki | +| [HADOOP-17537](https://issues.apache.org/jira/browse/HADOOP-17537) | Correct abfs test assertion reversed in HADOOP-13327 | Major | fs/azure, test | Sumangala Patki | Sumangala Patki | +| [HDFS-15890](https://issues.apache.org/jira/browse/HDFS-15890) | Improve the Logs for File Concat Operation | Minor | namenode | Bhavik Patel | Bhavik Patel | +| [HDFS-13975](https://issues.apache.org/jira/browse/HDFS-13975) | TestBalancer#testMaxIterationTime fails sporadically | Major | . | Jason Darrell Lowe | Toshihiko Uchida | +| [YARN-10688](https://issues.apache.org/jira/browse/YARN-10688) | ClusterMetrics should support GPU capacity related metrics. | Major | metrics, resourcemanager | Qi Zhu | Qi Zhu | +| [YARN-10692](https://issues.apache.org/jira/browse/YARN-10692) | Add Node GPU Utilization and apply to NodeMetrics. | Major | . | Qi Zhu | Qi Zhu | +| [HDFS-15902](https://issues.apache.org/jira/browse/HDFS-15902) | Improve the log for HTTPFS server operation | Minor | httpfs | Bhavik Patel | Bhavik Patel | +| [HADOOP-17476](https://issues.apache.org/jira/browse/HADOOP-17476) | ITestAssumeRole.testAssumeRoleBadInnerAuth failure | Major | fs/s3, test | Steve Loughran | Steve Loughran | +| [HADOOP-13551](https://issues.apache.org/jira/browse/HADOOP-13551) | Collect AwsSdkMetrics in S3A FileSystem IOStatistics | Blocker | fs/s3 | Steve Loughran | Steve Loughran | +| [YARN-10713](https://issues.apache.org/jira/browse/YARN-10713) | ClusterMetrics should support custom resource capacity related metrics. | Major | . | Qi Zhu | Qi Zhu | +| [HDFS-15921](https://issues.apache.org/jira/browse/HDFS-15921) | Improve the log for the Storage Policy Operations | Minor | namenode | Bhavik Patel | Bhavik Patel | +| [YARN-10702](https://issues.apache.org/jira/browse/YARN-10702) | Add cluster metric for amount of CPU used by RM Event Processor | Minor | yarn | Jim Brennan | Jim Brennan | +| [YARN-10503](https://issues.apache.org/jira/browse/YARN-10503) | Support queue capacity in terms of absolute resources with custom resourceType. | Critical | . | Qi Zhu | Qi Zhu | +| [HADOOP-17630](https://issues.apache.org/jira/browse/HADOOP-17630) | [JDK 15] TestPrintableString fails due to Unicode 13.0 support | Major | . | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17576](https://issues.apache.org/jira/browse/HADOOP-17576) | ABFS: Disable throttling update for auth failures | Major | fs/azure | Sumangala Patki | Sumangala Patki | +| [YARN-10723](https://issues.apache.org/jira/browse/YARN-10723) | Change CS nodes page in UI to support custom resource. | Major | . | Qi Zhu | Qi Zhu | +| [HADOOP-16948](https://issues.apache.org/jira/browse/HADOOP-16948) | ABFS: Support infinite lease dirs | Minor | . | Billie Rinaldi | Billie Rinaldi | +| [HADOOP-17471](https://issues.apache.org/jira/browse/HADOOP-17471) | ABFS to collect IOStatistics | Major | fs/azure | Steve Loughran | Mehakmeet Singh | +| [HADOOP-17535](https://issues.apache.org/jira/browse/HADOOP-17535) | ABFS: ITestAzureBlobFileSystemCheckAccess test failure if test doesn't have oauth keys | Major | fs/azure | Steve Loughran | Steve Loughran | +| [HADOOP-17112](https://issues.apache.org/jira/browse/HADOOP-17112) | whitespace not allowed in paths when saving files to s3a via committer | Blocker | fs/s3 | Krzysztof Adamski | Krzysztof Adamski | +| [HADOOP-17597](https://issues.apache.org/jira/browse/HADOOP-17597) | Add option to downgrade S3A rejection of Syncable to warning | Minor | . | Steve Loughran | Steve Loughran | +| [HADOOP-17661](https://issues.apache.org/jira/browse/HADOOP-17661) | mvn versions:set fails to parse pom.xml | Blocker | build | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HADOOP-17536](https://issues.apache.org/jira/browse/HADOOP-17536) | ABFS: Suport for customer provided encryption key | Minor | fs/azure | Bilahari T H | Bilahari T H | +| [YARN-10707](https://issues.apache.org/jira/browse/YARN-10707) | Support custom resources in ResourceUtilization, and update Node GPU Utilization to use. | Major | yarn | Qi Zhu | Qi Zhu | +| [HADOOP-17653](https://issues.apache.org/jira/browse/HADOOP-17653) | Do not use guava's Files.createTempDir() | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HDFS-15952](https://issues.apache.org/jira/browse/HDFS-15952) | TestRouterRpcMultiDestination#testProxyGetTransactionID and testProxyVersionRequest are flaky | Major | rbf | Harunobu Daikoku | Akira Ajisaka | +| [HADOOP-16742](https://issues.apache.org/jira/browse/HADOOP-16742) | Possible NPE in S3A MultiObjectDeleteSupport error handling | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-17644](https://issues.apache.org/jira/browse/HADOOP-17644) | Add back the exceptions removed by HADOOP-17432 for compatibility | Blocker | bulid | Akira Ajisaka | Quan Li | +| [YARN-10642](https://issues.apache.org/jira/browse/YARN-10642) | Race condition: AsyncDispatcher can get stuck by the changes introduced in YARN-8995 | Critical | resourcemanager | zhengchenyu | zhengchenyu | +| [YARN-9615](https://issues.apache.org/jira/browse/YARN-9615) | Add dispatcher metrics to RM | Major | . | Jonathan Hung | Qi Zhu | +| [HDFS-13934](https://issues.apache.org/jira/browse/HDFS-13934) | Multipart uploaders to be created through API call to FileSystem/FileContext, not service loader | Major | fs, fs/s3, hdfs | Steve Loughran | Steve Loughran | +| [HADOOP-17665](https://issues.apache.org/jira/browse/HADOOP-17665) | Ignore missing keystore configuration in reloading mechanism | Major | . | Borislav Iordanov | Borislav Iordanov | +| [HADOOP-17663](https://issues.apache.org/jira/browse/HADOOP-17663) | Remove useless property hadoop.assemblies.version in pom file | Trivial | build | Wei-Chiu Chuang | Akira Ajisaka | +| [HADOOP-17666](https://issues.apache.org/jira/browse/HADOOP-17666) | Update LICENSE for 3.3.1 | Blocker | . | Wei-Chiu Chuang | Wei-Chiu Chuang | + + +### OTHER: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-17430](https://issues.apache.org/jira/browse/HADOOP-17430) | Restore ability to set Text to empty byte array | Minor | common | gaozhan ding | gaozhan ding | +| [HDFS-15870](https://issues.apache.org/jira/browse/HDFS-15870) | Remove unused configuration dfs.namenode.stripe.min | Minor | . | tomscut | tomscut | +| [HDFS-15808](https://issues.apache.org/jira/browse/HDFS-15808) | Add metrics for FSNamesystem read/write lock hold long time | Major | hdfs | tomscut | tomscut | +| [HDFS-15873](https://issues.apache.org/jira/browse/HDFS-15873) | Add namenode address in logs for block report | Minor | datanode, hdfs | tomscut | tomscut | +| [HDFS-15906](https://issues.apache.org/jira/browse/HDFS-15906) | Close FSImage and FSNamesystem after formatting is complete | Minor | . | tomscut | tomscut | +| [HDFS-15892](https://issues.apache.org/jira/browse/HDFS-15892) | Add metric for editPendingQ in FSEditLogAsync | Minor | . | tomscut | tomscut | +| [HDFS-15951](https://issues.apache.org/jira/browse/HDFS-15951) | Remove unused parameters in NameNodeProxiesClient | Minor | . | tomscut | tomscut | +| [HDFS-15975](https://issues.apache.org/jira/browse/HDFS-15975) | Use LongAdder instead of AtomicLong | Minor | . | tomscut | tomscut | +| [HDFS-15970](https://issues.apache.org/jira/browse/HDFS-15970) | Print network topology on the web | Minor | . | tomscut | tomscut | +| [HDFS-15991](https://issues.apache.org/jira/browse/HDFS-15991) | Add location into datanode info for NameNodeMXBean | Minor | . | tomscut | tomscut | +| [HADOOP-17055](https://issues.apache.org/jira/browse/HADOOP-17055) | Remove residual code of Ozone | Major | . | Wanqiang Ji | Wanqiang Ji | +| [YARN-10274](https://issues.apache.org/jira/browse/YARN-10274) | Merge QueueMapping and QueueMappingEntity | Major | yarn | Gergely Pollák | Gergely Pollák | +| [YARN-10281](https://issues.apache.org/jira/browse/YARN-10281) | Redundant QueuePath usage in UserGroupMappingPlacementRule and AppNameMappingPlacementRule | Major | . | Gergely Pollák | Gergely Pollák | +| [YARN-10279](https://issues.apache.org/jira/browse/YARN-10279) | Avoid unnecessary QueueMappingEntity creations | Minor | . | Gergely Pollák | Hudáky Márton Gyula | +| [YARN-10277](https://issues.apache.org/jira/browse/YARN-10277) | CapacityScheduler test TestUserGroupMappingPlacementRule should build proper hierarchy | Major | . | Gergely Pollák | Szilard Nemeth | +| [HADOOP-16990](https://issues.apache.org/jira/browse/HADOOP-16990) | Update Mockserver | Major | . | Wei-Chiu Chuang | Attila Doroszlai | +| [YARN-10278](https://issues.apache.org/jira/browse/YARN-10278) | CapacityScheduler test framework ProportionalCapacityPreemptionPolicyMockFramework need some review | Major | . | Gergely Pollák | Szilard Nemeth | +| [YARN-10540](https://issues.apache.org/jira/browse/YARN-10540) | Node page is broken in YARN UI1 and UI2 including RMWebService api for nodes | Critical | webapp | Sunil G | Jim Brennan | +| [HADOOP-17445](https://issues.apache.org/jira/browse/HADOOP-17445) | Update the year to 2021 | Major | . | Xiaoqiao He | Xiaoqiao He | +| [HDFS-15731](https://issues.apache.org/jira/browse/HDFS-15731) | Reduce threadCount for unit tests to reduce the memory usage | Major | build, test | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17571](https://issues.apache.org/jira/browse/HADOOP-17571) | Upgrade com.fasterxml.woodstox:woodstox-core for security reasons | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-15895](https://issues.apache.org/jira/browse/HDFS-15895) | DFSAdmin#printOpenFiles has redundant String#format usage | Minor | . | Viraj Jasani | Viraj Jasani | +| [HDFS-15926](https://issues.apache.org/jira/browse/HDFS-15926) | Removed duplicate dependency of hadoop-annotations | Minor | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-17614](https://issues.apache.org/jira/browse/HADOOP-17614) | Bump netty to the latest 4.1.61 | Blocker | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HADOOP-17622](https://issues.apache.org/jira/browse/HADOOP-17622) | Avoid usage of deprecated IOUtils#cleanup API | Minor | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-17624](https://issues.apache.org/jira/browse/HADOOP-17624) | Remove any rocksdb exclusion code | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HADOOP-17625](https://issues.apache.org/jira/browse/HADOOP-17625) | Update to Jetty 9.4.39 | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HDFS-15989](https://issues.apache.org/jira/browse/HDFS-15989) | Split TestBalancer into two classes | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-17676](https://issues.apache.org/jira/browse/HADOOP-17676) | Restrict imports from org.apache.curator.shaded | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-17683](https://issues.apache.org/jira/browse/HADOOP-17683) | Update commons-io to 2.8.0 | Major | . | Wei-Chiu Chuang | Akira Ajisaka | +| [HADOOP-17426](https://issues.apache.org/jira/browse/HADOOP-17426) | Upgrade to hadoop-thirdparty-1.1.0 | Major | . | Ayush Saxena | Wei-Chiu Chuang | +| [HADOOP-17739](https://issues.apache.org/jira/browse/HADOOP-17739) | Use hadoop-thirdparty 1.1.1 | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | + + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.1/RELEASENOTES.3.3.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.1/RELEASENOTES.3.3.1.md new file mode 100644 index 0000000000000..238dd5764693b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.1/RELEASENOTES.3.3.1.md @@ -0,0 +1,293 @@ + + +# Apache Hadoop 3.3.1 Release Notes + +These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. + + +--- + +* [HADOOP-16054](https://issues.apache.org/jira/browse/HADOOP-16054) | *Major* | **Update Dockerfile to use Bionic** + +The build image has been upgraded to Bionic. + + +--- + +* [HDFS-15281](https://issues.apache.org/jira/browse/HDFS-15281) | *Major* | **ZKFC ignores dfs.namenode.rpc-bind-host and uses dfs.namenode.rpc-address to bind to host address** + +ZKFC binds host address to "dfs.namenode.servicerpc-bind-host", if configured. Otherwise, it binds to "dfs.namenode.rpc-bind-host". If neither of those is configured, ZKFC binds itself to NameNode RPC server address (effectively "dfs.namenode.rpc-address"). + + +--- + +* [HADOOP-16916](https://issues.apache.org/jira/browse/HADOOP-16916) | *Minor* | **ABFS: Delegation SAS generator for integration with Ranger** + +Azure ABFS support for Shared Access Signatures (SAS) + + +--- + +* [HADOOP-17044](https://issues.apache.org/jira/browse/HADOOP-17044) | *Major* | **Revert "HADOOP-8143. Change distcp to have -pb on by default"** + +Distcp block size is not preserved by default, unless -pb is specified. This restores the behavior prior to Hadoop 3. + + +--- + +* [HADOOP-17024](https://issues.apache.org/jira/browse/HADOOP-17024) | *Major* | **ListStatus on ViewFS root (ls "/") should list the linkFallBack root (configured target root).** + +ViewFS#listStatus on root("/") considers listing from fallbackLink if available. If the same directory name is present in configured mount path as well as in fallback link, then only the configured mount path will be listed in the returned result. + + +--- + +* [HDFS-13183](https://issues.apache.org/jira/browse/HDFS-13183) | *Major* | **Standby NameNode process getBlocks request to reduce Active load** + +Enable balancer to redirect getBlocks request to a Standby Namenode, thus reducing the performance impact of balancer to the Active NameNode. + +The feature is disabled by default. To enable it, configure the hdfs-site.xml of balancer: +dfs.ha.allow.stale.reads = true. + + +--- + +* [HADOOP-17076](https://issues.apache.org/jira/browse/HADOOP-17076) | *Minor* | **ABFS: Delegation SAS Generator Updates** + +Azure Blob File System (ABFS) SAS Generator Update + + +--- + +* [HADOOP-17089](https://issues.apache.org/jira/browse/HADOOP-17089) | *Critical* | **WASB: Update azure-storage-java SDK** + +Azure WASB bug fix that can cause list results to appear empty. + + +--- + +* [HADOOP-17105](https://issues.apache.org/jira/browse/HADOOP-17105) | *Minor* | **S3AFS globStatus attempts to resolve symlinks** + +Remove unnecessary symlink resolution in S3AFileSystem globStatus + + +--- + +* [HADOOP-13230](https://issues.apache.org/jira/browse/HADOOP-13230) | *Major* | **S3A to optionally retain directory markers** + +The S3A connector now has an option to stop deleting directory markers as files are written. This eliminates the IO throttling the operations can cause, and avoids creating tombstone markers on versioned S3 buckets. + +This feature is incompatible with all versions of Hadoop which lack the HADOOP-17199 change to list and getFileStatus calls. + +Consult the S3A documentation for further details + + +--- + +* [HADOOP-17215](https://issues.apache.org/jira/browse/HADOOP-17215) | *Major* | **ABFS: Support for conditional overwrite** + +ABFS: Support for conditional overwrite. + + +--- + +* [YARN-9809](https://issues.apache.org/jira/browse/YARN-9809) | *Major* | **NMs should supply a health status when registering with RM** + +Improved node registration with node health status. + + +--- + +* [HADOOP-17125](https://issues.apache.org/jira/browse/HADOOP-17125) | *Major* | **Using snappy-java in SnappyCodec** + +The SnappyCodec uses the snappy-java compression library, rather than explicitly referencing native binaries. It contains the native libraries for many operating systems and instruction sets, falling back to a pure java implementation. It does requires the snappy-java.jar is on the classpath. It can be found in hadoop-common/lib, and has already been present as part of the avro dependencies + + +--- + +* [HDFS-15253](https://issues.apache.org/jira/browse/HDFS-15253) | *Major* | **Set default throttle value on dfs.image.transfer.bandwidthPerSec** + +The configuration dfs.image.transfer.bandwidthPerSec which defines the maximum bandwidth available for fsimage transfer is changed from 0 (meaning no throttle at all) to 50MB/s. + + +--- + +* [HADOOP-17021](https://issues.apache.org/jira/browse/HADOOP-17021) | *Minor* | **Add concat fs command** + +"hadoop fs" has a concat command. Available on all filesystems which support the concat API including HDFS and WebHDFS + + +--- + +* [HADOOP-17292](https://issues.apache.org/jira/browse/HADOOP-17292) | *Major* | **Using lz4-java in Lz4Codec** + +The Hadoop's LZ4 compression codec now depends on lz4-java. The native LZ4 is performed by the encapsulated JNI and it is no longer necessary to install and configure the lz4 system package. + +The lz4-java is declared in provided scope. Applications that wish to use lz4 codec must declare dependency on lz4-java explicitly. + + +--- + +* [HADOOP-17313](https://issues.apache.org/jira/browse/HADOOP-17313) | *Major* | **FileSystem.get to support slow-to-instantiate FS clients** + +The option "fs.creation.parallel.count" sets a a semaphore to throttle the number of FileSystem instances which +can be created simultaneously. + +This is designed to reduce the impact of many threads in an application calling +FileSystem.get() on a filesystem which takes time to instantiate -for example +to an object where HTTPS connections are set up during initialization. +Many threads trying to do this may create spurious delays by conflicting +for access to synchronized blocks, when simply limiting the parallelism +diminishes the conflict, so speeds up all threads trying to access +the store. + +The default value, 64, is larger than is likely to deliver any speedup -but +it does mean that there should be no adverse effects from the change. + +If a service appears to be blocking on all threads initializing connections to +abfs, s3a or store, try a smaller (possibly significantly smaller) value. + + +--- + +* [HADOOP-17338](https://issues.apache.org/jira/browse/HADOOP-17338) | *Major* | **Intermittent S3AInputStream failures: Premature end of Content-Length delimited message body etc** + +**WARNING: No release note provided for this change.** + + +--- + +* [HDFS-15380](https://issues.apache.org/jira/browse/HDFS-15380) | *Major* | **RBF: Could not fetch real remote IP in RouterWebHdfsMethods** + +**WARNING: No release note provided for this change.** + + +--- + +* [HADOOP-17422](https://issues.apache.org/jira/browse/HADOOP-17422) | *Major* | **ABFS: Set default ListMaxResults to max server limit** + +ABFS: The default value for "fs.azure.list.max.results" was changed from 500 to 5000. + + +--- + +* [HDFS-15719](https://issues.apache.org/jira/browse/HDFS-15719) | *Critical* | **[Hadoop 3] Both NameNodes can crash simultaneously due to the short JN socket timeout** + +The default value of the configuration hadoop.http.idle\_timeout.ms (how long does Jetty disconnect an idle connection) is changed from 10000 to 60000. +This property is inlined during compile time, so an application that references this property must be recompiled in order for it to take effect. + + +--- + +* [HADOOP-17454](https://issues.apache.org/jira/browse/HADOOP-17454) | *Major* | **[s3a] Disable bucket existence check - set fs.s3a.bucket.probe to 0** + +S3A bucket existence check is disabled (fs.s3a.bucket.probe is 0), so there will be no existence check on the bucket during the S3AFileSystem initialization. The first operation which attempts to interact with the bucket which will fail if the bucket does not exist. + + +--- + +* [HADOOP-17337](https://issues.apache.org/jira/browse/HADOOP-17337) | *Blocker* | **S3A NetworkBinding has a runtime class dependency on a third-party shaded class** + +the s3a filesystem will link against the unshaded AWS s3 SDK. Making an application's dependencies consistent with that SDK is left as exercise. Note: native openssl is not supported as a socket factory in unshaded deployments. + + +--- + +* [HADOOP-16748](https://issues.apache.org/jira/browse/HADOOP-16748) | *Major* | **Migrate to Python 3 and upgrade Yetus to 0.13.0** + + +- Upgraded Yetus to 0.13.0. +- Removed determine-flaky-tests-hadoop.py. +- Temporarily disabled shelldocs check in the Jenkins jobs due to YETUS-1099. + + +--- + +* [HADOOP-16721](https://issues.apache.org/jira/browse/HADOOP-16721) | *Blocker* | **Improve S3A rename resilience** + +The S3A connector's rename() operation now raises FileNotFoundException if the source doesn't exist; FileAlreadyExistsException if the destination is unsuitable. It no longer checks for a parent directory existing -instead it simply verifies that there is no file immediately above the destination path. + + +--- + +* [HADOOP-17531](https://issues.apache.org/jira/browse/HADOOP-17531) | *Critical* | **DistCp: Reduce memory usage on copying huge directories** + +Added a -useiterator option in distcp which uses listStatusIterator for building the listing. Primarily to reduce memory usage at client for building listing. + + +--- + +* [HADOOP-16870](https://issues.apache.org/jira/browse/HADOOP-16870) | *Major* | **Use spotbugs-maven-plugin instead of findbugs-maven-plugin** + +Removed findbugs from the hadoop build images and added spotbugs instead. +Upgraded SpotBugs to 4.2.2 and spotbugs-maven-plugin to 4.2.0. + + +--- + +* [HADOOP-17222](https://issues.apache.org/jira/browse/HADOOP-17222) | *Major* | ** Create socket address leveraging URI cache** + +DFS client can use the newly added URI cache when creating socket address for read operations. By default it is disabled. When enabled, creating socket address will use cached URI object based on host:port to reduce the frequency of URI object creation. + +To enable it, set the following config key to true: +\ + \dfs.client.read.uri.cache.enabled\ + \true\ +\ + + +--- + +* [HADOOP-16524](https://issues.apache.org/jira/browse/HADOOP-16524) | *Major* | **Automatic keystore reloading for HttpServer2** + +Adds auto-reload of keystore. + +Adds below new config (default 10 seconds): + + ssl.{0}.stores.reload.interval + +The refresh interval used to check if either of the truststore or keystore certificate file has changed. + + +--- + +* [HDFS-15942](https://issues.apache.org/jira/browse/HDFS-15942) | *Major* | **Increase Quota initialization threads** + +The default quota initialization thread count during the NameNode startup process (dfs.namenode.quota.init-threads) is increased from 4 to 12. + + +--- + +* [HDFS-15975](https://issues.apache.org/jira/browse/HDFS-15975) | *Minor* | **Use LongAdder instead of AtomicLong** + +This JIRA changes public fields in DFSHedgedReadMetrics. If you are using the public member variables of DFSHedgedReadMetrics, you need to use them through the public API. + + +--- + +* [HADOOP-17597](https://issues.apache.org/jira/browse/HADOOP-17597) | *Minor* | **Add option to downgrade S3A rejection of Syncable to warning** + +The S3A output streams now raise UnsupportedOperationException on calls to Syncable.hsync() or Syncable.hflush(). This is to make absolutely clear to programs trying to use the syncable API that the stream doesn't save any data at all until close. Programs which use this to flush their write ahead logs will fail immediately, rather than appear to succeed but without saving any data. + +To downgrade the API calls to simply printing a warning, set fs.s3a.downgrade.syncable.exceptions" to true. This will not change the other behaviour: no data is saved. + +Object stores are not filesystems. + + + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.2/CHANGELOG.3.3.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.2/CHANGELOG.3.3.2.md new file mode 100644 index 0000000000000..162f9928489ee --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.2/CHANGELOG.3.3.2.md @@ -0,0 +1,350 @@ + + +# Apache Hadoop Changelog + +## Release 3.3.2 - 2022-02-21 + + + +### IMPORTANT ISSUES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-15814](https://issues.apache.org/jira/browse/HDFS-15814) | Make some parameters configurable for DataNodeDiskMetrics | Major | hdfs | tomscut | tomscut | + + +### NEW FEATURES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-15288](https://issues.apache.org/jira/browse/HDFS-15288) | Add Available Space Rack Fault Tolerant BPP | Major | . | Ayush Saxena | Ayush Saxena | +| [HDFS-16048](https://issues.apache.org/jira/browse/HDFS-16048) | RBF: Print network topology on the router web | Minor | . | tomscut | tomscut | +| [HDFS-16337](https://issues.apache.org/jira/browse/HDFS-16337) | Show start time of Datanode on Web | Minor | . | tomscut | tomscut | +| [HADOOP-17979](https://issues.apache.org/jira/browse/HADOOP-17979) | Interface EtagSource to allow FileStatus subclasses to provide etags | Major | fs, fs/azure, fs/s3 | Steve Loughran | Steve Loughran | + + +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-10123](https://issues.apache.org/jira/browse/YARN-10123) | Error message around yarn app -stop/start can be improved to highlight that an implementation at framework level is needed for the stop/start functionality to work | Minor | client, documentation | Siddharth Ahuja | Siddharth Ahuja | +| [HADOOP-17756](https://issues.apache.org/jira/browse/HADOOP-17756) | Increase precommit job timeout from 20 hours to 24 hours. | Major | build | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-16073](https://issues.apache.org/jira/browse/HDFS-16073) | Remove redundant RPC requests for getFileLinkInfo in ClientNamenodeProtocolTranslatorPB | Minor | . | lei w | lei w | +| [HDFS-16074](https://issues.apache.org/jira/browse/HDFS-16074) | Remove an expensive debug string concatenation | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HDFS-16080](https://issues.apache.org/jira/browse/HDFS-16080) | RBF: Invoking method in all locations should break the loop after successful result | Minor | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16075](https://issues.apache.org/jira/browse/HDFS-16075) | Use empty array constants present in StorageType and DatanodeInfo to avoid creating redundant objects | Major | . | Viraj Jasani | Viraj Jasani | +| [MAPREDUCE-7354](https://issues.apache.org/jira/browse/MAPREDUCE-7354) | Use empty array constants present in TaskCompletionEvent to avoid creating redundant objects | Minor | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16082](https://issues.apache.org/jira/browse/HDFS-16082) | Avoid non-atomic operations on exceptionsSinceLastBalance and failedTimesSinceLastSuccessfulBalance in Balancer | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16076](https://issues.apache.org/jira/browse/HDFS-16076) | Avoid using slow DataNodes for reading by sorting locations | Major | hdfs | tomscut | tomscut | +| [HDFS-16085](https://issues.apache.org/jira/browse/HDFS-16085) | Move the getPermissionChecker out of the read lock | Minor | . | tomscut | tomscut | +| [YARN-10834](https://issues.apache.org/jira/browse/YARN-10834) | Intra-queue preemption: apps that don't use defined custom resource won't be preempted. | Major | . | Eric Payne | Eric Payne | +| [HADOOP-17777](https://issues.apache.org/jira/browse/HADOOP-17777) | Update clover-maven-plugin version from 3.3.0 to 4.4.1 | Major | . | Wanqiang Ji | Wanqiang Ji | +| [HDFS-16090](https://issues.apache.org/jira/browse/HDFS-16090) | Fine grained locking for datanodeNetworkCounts | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-17749](https://issues.apache.org/jira/browse/HADOOP-17749) | Remove lock contention in SelectorPool of SocketIOWithTimeout | Major | common | Xuesen Liang | Xuesen Liang | +| [HADOOP-17775](https://issues.apache.org/jira/browse/HADOOP-17775) | Remove JavaScript package from Docker environment | Major | build | Masatake Iwasaki | Masatake Iwasaki | +| [HADOOP-17402](https://issues.apache.org/jira/browse/HADOOP-17402) | Add GCS FS impl reference to core-default.xml | Major | fs | Rafal Wojdyla | Rafal Wojdyla | +| [HADOOP-17794](https://issues.apache.org/jira/browse/HADOOP-17794) | Add a sample configuration to use ZKDelegationTokenSecretManager in Hadoop KMS | Major | documentation, kms, security | Akira Ajisaka | Akira Ajisaka | +| [HDFS-16122](https://issues.apache.org/jira/browse/HDFS-16122) | Fix DistCpContext#toString() | Minor | . | tomscut | tomscut | +| [HADOOP-12665](https://issues.apache.org/jira/browse/HADOOP-12665) | Document hadoop.security.token.service.use\_ip | Major | documentation | Arpit Agarwal | Akira Ajisaka | +| [YARN-10456](https://issues.apache.org/jira/browse/YARN-10456) | RM PartitionQueueMetrics records are named QueueMetrics in Simon metrics registry | Major | resourcemanager | Eric Payne | Eric Payne | +| [HDFS-15650](https://issues.apache.org/jira/browse/HDFS-15650) | Make the socket timeout for computing checksum of striped blocks configurable | Minor | datanode, ec, erasure-coding | Yushi Hayasaka | Yushi Hayasaka | +| [YARN-10858](https://issues.apache.org/jira/browse/YARN-10858) | [UI2] YARN-10826 breaks Queue view | Major | yarn-ui-v2 | Andras Gyori | Masatake Iwasaki | +| [HADOOP-16290](https://issues.apache.org/jira/browse/HADOOP-16290) | Enable RpcMetrics units to be configurable | Major | ipc, metrics | Erik Krogen | Viraj Jasani | +| [YARN-10860](https://issues.apache.org/jira/browse/YARN-10860) | Make max container per heartbeat configs refreshable | Major | . | Eric Badger | Eric Badger | +| [HADOOP-17813](https://issues.apache.org/jira/browse/HADOOP-17813) | Checkstyle - Allow line length: 100 | Major | . | Akira Ajisaka | Viraj Jasani | +| [HADOOP-17811](https://issues.apache.org/jira/browse/HADOOP-17811) | ABFS ExponentialRetryPolicy doesn't pick up configuration values | Minor | documentation, fs/azure | Brian Frank Loss | Brian Frank Loss | +| [HADOOP-17819](https://issues.apache.org/jira/browse/HADOOP-17819) | Add extensions to ProtobufRpcEngine RequestHeaderProto | Major | common | Hector Sandoval Chaverri | Hector Sandoval Chaverri | +| [HDFS-15936](https://issues.apache.org/jira/browse/HDFS-15936) | Solve BlockSender#sendPacket() does not record SocketTimeout exception | Minor | . | JiangHua Zhu | JiangHua Zhu | +| [HDFS-16153](https://issues.apache.org/jira/browse/HDFS-16153) | Avoid evaluation of LOG.debug statement in QuorumJournalManager | Trivial | . | wangzhaohui | wangzhaohui | +| [HDFS-16154](https://issues.apache.org/jira/browse/HDFS-16154) | TestMiniJournalCluster failing intermittently because of not reseting UserGroupInformation completely | Minor | . | wangzhaohui | wangzhaohui | +| [HADOOP-17837](https://issues.apache.org/jira/browse/HADOOP-17837) | Make it easier to debug UnknownHostExceptions from NetUtils.connect | Minor | . | Bryan Beaudreault | Bryan Beaudreault | +| [HDFS-16175](https://issues.apache.org/jira/browse/HDFS-16175) | Improve the configurable value of Server #PURGE\_INTERVAL\_NANOS | Major | ipc | JiangHua Zhu | JiangHua Zhu | +| [HDFS-16173](https://issues.apache.org/jira/browse/HDFS-16173) | Improve CopyCommands#Put#executor queue configurability | Major | fs | JiangHua Zhu | JiangHua Zhu | +| [HADOOP-17897](https://issues.apache.org/jira/browse/HADOOP-17897) | Allow nested blocks in switch case in checkstyle settings | Minor | build | Masatake Iwasaki | Masatake Iwasaki | +| [HADOOP-17857](https://issues.apache.org/jira/browse/HADOOP-17857) | Check real user ACLs in addition to proxied user ACLs | Major | . | Eric Payne | Eric Payne | +| [HDFS-16210](https://issues.apache.org/jira/browse/HDFS-16210) | RBF: Add the option of refreshCallQueue to RouterAdmin | Major | . | Janus Chow | Janus Chow | +| [HDFS-16221](https://issues.apache.org/jira/browse/HDFS-16221) | RBF: Add usage of refreshCallQueue for Router | Major | . | Janus Chow | Janus Chow | +| [HDFS-16223](https://issues.apache.org/jira/browse/HDFS-16223) | AvailableSpaceRackFaultTolerantBlockPlacementPolicy should use chooseRandomWithStorageTypeTwoTrial() for better performance. | Major | . | Ayush Saxena | Ayush Saxena | +| [HADOOP-17893](https://issues.apache.org/jira/browse/HADOOP-17893) | Improve PrometheusSink for Namenode TopMetrics | Major | metrics | Max Xie | Max Xie | +| [HADOOP-17926](https://issues.apache.org/jira/browse/HADOOP-17926) | Maven-eclipse-plugin is no longer needed since Eclipse can import Maven projects by itself. | Minor | documentation | Rintaro Ikeda | Rintaro Ikeda | +| [YARN-10935](https://issues.apache.org/jira/browse/YARN-10935) | AM Total Queue Limit goes below per-user AM Limit if parent is full. | Major | capacity scheduler, capacityscheduler | Eric Payne | Eric Payne | +| [HADOOP-17939](https://issues.apache.org/jira/browse/HADOOP-17939) | Support building on Apple Silicon | Major | build, common | Dongjoon Hyun | Dongjoon Hyun | +| [HADOOP-17941](https://issues.apache.org/jira/browse/HADOOP-17941) | Update xerces to 2.12.1 | Minor | . | Zhongwei Zhu | Zhongwei Zhu | +| [HDFS-16246](https://issues.apache.org/jira/browse/HDFS-16246) | Print lockWarningThreshold in InstrumentedLock#logWarning and InstrumentedLock#logWaitWarning | Minor | . | tomscut | tomscut | +| [HDFS-16252](https://issues.apache.org/jira/browse/HDFS-16252) | Correct docs for dfs.http.client.retry.policy.spec | Major | . | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-16241](https://issues.apache.org/jira/browse/HDFS-16241) | Standby close reconstruction thread | Major | . | zhanghuazong | zhanghuazong | +| [HADOOP-17974](https://issues.apache.org/jira/browse/HADOOP-17974) | Fix the import statements in hadoop-aws module | Minor | build, fs/azure | Tamas Domok | | +| [HDFS-16277](https://issues.apache.org/jira/browse/HDFS-16277) | Improve decision in AvailableSpaceBlockPlacementPolicy | Major | block placement | guophilipse | guophilipse | +| [HADOOP-17770](https://issues.apache.org/jira/browse/HADOOP-17770) | WASB : Support disabling buffered reads in positional reads | Major | . | Anoop Sam John | Anoop Sam John | +| [HDFS-16282](https://issues.apache.org/jira/browse/HDFS-16282) | Duplicate generic usage information to hdfs debug command | Minor | tools | daimin | daimin | +| [YARN-1115](https://issues.apache.org/jira/browse/YARN-1115) | Provide optional means for a scheduler to check real user ACLs | Major | capacity scheduler, scheduler | Eric Payne | | +| [HDFS-16279](https://issues.apache.org/jira/browse/HDFS-16279) | Print detail datanode info when process first storage report | Minor | . | tomscut | tomscut | +| [HDFS-16286](https://issues.apache.org/jira/browse/HDFS-16286) | Debug tool to verify the correctness of erasure coding on file | Minor | erasure-coding, tools | daimin | daimin | +| [HDFS-16294](https://issues.apache.org/jira/browse/HDFS-16294) | Remove invalid DataNode#CONFIG\_PROPERTY\_SIMULATED | Major | datanode | JiangHua Zhu | JiangHua Zhu | +| [HDFS-16299](https://issues.apache.org/jira/browse/HDFS-16299) | Fix bug for TestDataNodeVolumeMetrics#verifyDataNodeVolumeMetrics | Minor | . | tomscut | tomscut | +| [HDFS-16301](https://issues.apache.org/jira/browse/HDFS-16301) | Improve BenchmarkThroughput#SIZE naming standardization | Minor | benchmarks, test | JiangHua Zhu | JiangHua Zhu | +| [HDFS-16287](https://issues.apache.org/jira/browse/HDFS-16287) | Support to make dfs.namenode.avoid.read.slow.datanode reconfigurable | Major | . | Haiyang Hu | Haiyang Hu | +| [HDFS-16321](https://issues.apache.org/jira/browse/HDFS-16321) | Fix invalid config in TestAvailableSpaceRackFaultTolerantBPP | Minor | test | guophilipse | guophilipse | +| [HDFS-16315](https://issues.apache.org/jira/browse/HDFS-16315) | Add metrics related to Transfer and NativeCopy for DataNode | Major | . | tomscut | tomscut | +| [HADOOP-17998](https://issues.apache.org/jira/browse/HADOOP-17998) | Allow get command to run with multi threads. | Major | fs | Chengwei Wang | Chengwei Wang | +| [HDFS-16344](https://issues.apache.org/jira/browse/HDFS-16344) | Improve DirectoryScanner.Stats#toString | Major | . | tomscut | tomscut | +| [HADOOP-18023](https://issues.apache.org/jira/browse/HADOOP-18023) | Allow cp command to run with multi threads. | Major | fs | Chengwei Wang | Chengwei Wang | +| [HDFS-16314](https://issues.apache.org/jira/browse/HDFS-16314) | Support to make dfs.namenode.block-placement-policy.exclude-slow-nodes.enabled reconfigurable | Major | . | Haiyang Hu | Haiyang Hu | +| [HADOOP-18026](https://issues.apache.org/jira/browse/HADOOP-18026) | Fix default value of Magic committer | Minor | common | guophilipse | guophilipse | +| [HDFS-16345](https://issues.apache.org/jira/browse/HDFS-16345) | Fix test cases fail in TestBlockStoragePolicy | Major | build | guophilipse | guophilipse | +| [HADOOP-18040](https://issues.apache.org/jira/browse/HADOOP-18040) | Use maven.test.failure.ignore instead of ignoreTestFailure | Major | build | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17643](https://issues.apache.org/jira/browse/HADOOP-17643) | WASB : Make metadata checks case insensitive | Major | . | Anoop Sam John | Anoop Sam John | +| [HADOOP-18033](https://issues.apache.org/jira/browse/HADOOP-18033) | Upgrade fasterxml Jackson to 2.13.0 | Major | build | Akira Ajisaka | Viraj Jasani | +| [HDFS-16327](https://issues.apache.org/jira/browse/HDFS-16327) | Make dfs.namenode.max.slowpeer.collect.nodes reconfigurable | Major | . | tomscut | tomscut | +| [HDFS-16375](https://issues.apache.org/jira/browse/HDFS-16375) | The FBR lease ID should be exposed to the log | Major | . | tomscut | tomscut | +| [HDFS-16386](https://issues.apache.org/jira/browse/HDFS-16386) | Reduce DataNode load when FsDatasetAsyncDiskService is working | Major | datanode | JiangHua Zhu | JiangHua Zhu | +| [HDFS-16391](https://issues.apache.org/jira/browse/HDFS-16391) | Avoid evaluation of LOG.debug statement in NameNodeHeartbeatService | Trivial | . | wangzhaohui | wangzhaohui | +| [YARN-8234](https://issues.apache.org/jira/browse/YARN-8234) | Improve RM system metrics publisher's performance by pushing events to timeline server in batch | Critical | resourcemanager, timelineserver | Hu Ziqian | Ashutosh Gupta | +| [HADOOP-18052](https://issues.apache.org/jira/browse/HADOOP-18052) | Support Apple Silicon in start-build-env.sh | Major | build | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-18056](https://issues.apache.org/jira/browse/HADOOP-18056) | DistCp: Filter duplicates in the source paths | Major | . | Ayush Saxena | Ayush Saxena | +| [HADOOP-18065](https://issues.apache.org/jira/browse/HADOOP-18065) | ExecutorHelper.logThrowableFromAfterExecute() is too noisy. | Minor | . | Mukund Thakur | Mukund Thakur | +| [HDFS-16043](https://issues.apache.org/jira/browse/HDFS-16043) | Add markedDeleteBlockScrubberThread to delete blocks asynchronously | Major | hdfs, namanode | Xiangyi Zhu | Xiangyi Zhu | +| [HADOOP-18094](https://issues.apache.org/jira/browse/HADOOP-18094) | Disable S3A auditing by default. | Blocker | fs/s3 | Steve Loughran | Steve Loughran | + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-10438](https://issues.apache.org/jira/browse/YARN-10438) | Handle null containerId in ClientRMService#getContainerReport() | Major | resourcemanager | Raghvendra Singh | Shubham Gupta | +| [YARN-10428](https://issues.apache.org/jira/browse/YARN-10428) | Zombie applications in the YARN queue using FAIR + sizebasedweight | Critical | capacityscheduler | Guang Yang | Andras Gyori | +| [HDFS-15916](https://issues.apache.org/jira/browse/HDFS-15916) | DistCp: Backward compatibility: Distcp fails from Hadoop 3 to Hadoop 2 for snapshotdiff | Major | distcp | Srinivasu Majeti | Ayush Saxena | +| [HDFS-15977](https://issues.apache.org/jira/browse/HDFS-15977) | Call explicit\_bzero only if it is available | Major | libhdfs++ | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-14922](https://issues.apache.org/jira/browse/HADOOP-14922) | Build of Mapreduce Native Task module fails with unknown opcode "bswap" | Major | . | Anup Halarnkar | Anup Halarnkar | +| [HADOOP-17700](https://issues.apache.org/jira/browse/HADOOP-17700) | ExitUtil#halt info log should log HaltException | Major | . | Viraj Jasani | Viraj Jasani | +| [YARN-10770](https://issues.apache.org/jira/browse/YARN-10770) | container-executor permission is wrong in SecureContainer.md | Major | documentation | Akira Ajisaka | Siddharth Ahuja | +| [YARN-10691](https://issues.apache.org/jira/browse/YARN-10691) | DominantResourceCalculator isInvalidDivisor should consider only countable resource types | Major | . | Bilwa S T | Bilwa S T | +| [HDFS-16031](https://issues.apache.org/jira/browse/HDFS-16031) | Possible Resource Leak in org.apache.hadoop.hdfs.server.aliasmap#InMemoryAliasMap | Major | . | Narges Shadab | Narges Shadab | +| [MAPREDUCE-7348](https://issues.apache.org/jira/browse/MAPREDUCE-7348) | TestFrameworkUploader#testNativeIO fails | Major | test | Akira Ajisaka | Akira Ajisaka | +| [HDFS-15915](https://issues.apache.org/jira/browse/HDFS-15915) | Race condition with async edits logging due to updating txId outside of the namesystem log | Major | hdfs, namenode | Konstantin Shvachko | Konstantin Shvachko | +| [HDFS-16040](https://issues.apache.org/jira/browse/HDFS-16040) | RpcQueueTime metric counts requeued calls as unique events. | Major | hdfs | Simbarashe Dzinamarira | Simbarashe Dzinamarira | +| [MAPREDUCE-7287](https://issues.apache.org/jira/browse/MAPREDUCE-7287) | Distcp will delete existing file , If we use "-delete and -update" options and distcp file. | Major | distcp | zhengchenyu | zhengchenyu | +| [HDFS-15998](https://issues.apache.org/jira/browse/HDFS-15998) | Fix NullPointException In listOpenFiles | Major | . | Haiyang Hu | Haiyang Hu | +| [HDFS-16050](https://issues.apache.org/jira/browse/HDFS-16050) | Some dynamometer tests fail | Major | test | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-17631](https://issues.apache.org/jira/browse/HADOOP-17631) | Configuration ${env.VAR:-FALLBACK} should eval FALLBACK when restrictSystemProps=true | Minor | common | Steve Loughran | Steve Loughran | +| [YARN-10809](https://issues.apache.org/jira/browse/YARN-10809) | testWithHbaseConfAtHdfsFileSystem consistently failing | Major | . | Viraj Jasani | Viraj Jasani | +| [YARN-10803](https://issues.apache.org/jira/browse/YARN-10803) | [JDK 11] TestRMFailoverProxyProvider and TestNoHaRMFailoverProxyProvider fails by ClassCastException | Major | test | Akira Ajisaka | Akira Ajisaka | +| [HDFS-16057](https://issues.apache.org/jira/browse/HDFS-16057) | Make sure the order for location in ENTERING\_MAINTENANCE state | Minor | . | tomscut | tomscut | +| [HDFS-16055](https://issues.apache.org/jira/browse/HDFS-16055) | Quota is not preserved in snapshot INode | Major | hdfs | Siyao Meng | Siyao Meng | +| [HDFS-16068](https://issues.apache.org/jira/browse/HDFS-16068) | WebHdfsFileSystem has a possible connection leak in connection with HttpFS | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-10767](https://issues.apache.org/jira/browse/YARN-10767) | Yarn Logs Command retrying on Standby RM for 30 times | Major | . | D M Murali Krishna Reddy | D M Murali Krishna Reddy | +| [HADOOP-17760](https://issues.apache.org/jira/browse/HADOOP-17760) | Delete hadoop.ssl.enabled and dfs.https.enable from docs and core-default.xml | Major | documentation | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-13671](https://issues.apache.org/jira/browse/HDFS-13671) | Namenode deletes large dir slowly caused by FoldedTreeSet#removeAndGet | Major | . | Yiqun Lin | Haibin Huang | +| [HDFS-16061](https://issues.apache.org/jira/browse/HDFS-16061) | DFTestUtil.waitReplication can produce false positives | Major | hdfs | Ahmed Hussein | Ahmed Hussein | +| [HDFS-14575](https://issues.apache.org/jira/browse/HDFS-14575) | LeaseRenewer#daemon threads leak in DFSClient | Major | . | Tao Yang | Renukaprasad C | +| [YARN-10826](https://issues.apache.org/jira/browse/YARN-10826) | [UI2] Upgrade Node.js to at least v12.22.1 | Major | yarn-ui-v2 | Akira Ajisaka | Masatake Iwasaki | +| [HADOOP-17769](https://issues.apache.org/jira/browse/HADOOP-17769) | Upgrade JUnit to 4.13.2 | Major | . | Ahmed Hussein | Ahmed Hussein | +| [YARN-10824](https://issues.apache.org/jira/browse/YARN-10824) | Title not set for JHS and NM webpages | Major | . | Rajshree Mishra | Bilwa S T | +| [HDFS-16092](https://issues.apache.org/jira/browse/HDFS-16092) | Avoid creating LayoutFlags redundant objects | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-17764](https://issues.apache.org/jira/browse/HADOOP-17764) | S3AInputStream read does not re-open the input stream on the second read retry attempt | Major | fs/s3 | Zamil Majdy | Zamil Majdy | +| [HDFS-16109](https://issues.apache.org/jira/browse/HDFS-16109) | Fix flaky some unit tests since they offen timeout | Minor | test | tomscut | tomscut | +| [HDFS-16108](https://issues.apache.org/jira/browse/HDFS-16108) | Incorrect log placeholders used in JournalNodeSyncer | Minor | . | Viraj Jasani | Viraj Jasani | +| [MAPREDUCE-7353](https://issues.apache.org/jira/browse/MAPREDUCE-7353) | Mapreduce job fails when NM is stopped | Major | . | Bilwa S T | Bilwa S T | +| [HDFS-16121](https://issues.apache.org/jira/browse/HDFS-16121) | Iterative snapshot diff report can generate duplicate records for creates, deletes and Renames | Major | snapshots | Srinivasu Majeti | Shashikant Banerjee | +| [HDFS-15796](https://issues.apache.org/jira/browse/HDFS-15796) | ConcurrentModificationException error happens on NameNode occasionally | Critical | hdfs | Daniel Ma | Daniel Ma | +| [HADOOP-17793](https://issues.apache.org/jira/browse/HADOOP-17793) | Better token validation | Major | . | Artem Smotrakov | Artem Smotrakov | +| [HDFS-16042](https://issues.apache.org/jira/browse/HDFS-16042) | DatanodeAdminMonitor scan should be delay based | Major | datanode | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17803](https://issues.apache.org/jira/browse/HADOOP-17803) | Remove WARN logging from LoggingAuditor when executing a request outside an audit span | Major | fs/s3 | Mehakmeet Singh | Mehakmeet Singh | +| [HDFS-16127](https://issues.apache.org/jira/browse/HDFS-16127) | Improper pipeline close recovery causes a permanent write failure or data loss. | Major | . | Kihwal Lee | Kihwal Lee | +| [HADOOP-17028](https://issues.apache.org/jira/browse/HADOOP-17028) | ViewFS should initialize target filesystems lazily | Major | client-mounts, fs, viewfs | Uma Maheswara Rao G | Abhishek Das | +| [HADOOP-17801](https://issues.apache.org/jira/browse/HADOOP-17801) | No error message reported when bucket doesn't exist in S3AFS | Major | fs/s3 | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-17796](https://issues.apache.org/jira/browse/HADOOP-17796) | Upgrade jetty version to 9.4.43 | Major | . | Wei-Chiu Chuang | Renukaprasad C | +| [HDFS-12920](https://issues.apache.org/jira/browse/HDFS-12920) | HDFS default value change (with adding time unit) breaks old version MR tarball work with Hadoop 3.x | Critical | configuration, hdfs | Junping Du | Akira Ajisaka | +| [HDFS-16145](https://issues.apache.org/jira/browse/HDFS-16145) | CopyListing fails with FNF exception with snapshot diff | Major | distcp | Shashikant Banerjee | Shashikant Banerjee | +| [YARN-10813](https://issues.apache.org/jira/browse/YARN-10813) | Set default capacity of root for node labels | Major | . | Andras Gyori | Andras Gyori | +| [HDFS-16144](https://issues.apache.org/jira/browse/HDFS-16144) | Revert HDFS-15372 (Files in snapshots no longer see attribute provider permissions) | Major | . | Stephen O'Donnell | Stephen O'Donnell | +| [HADOOP-17817](https://issues.apache.org/jira/browse/HADOOP-17817) | HADOOP-17817. S3A to raise IOE if both S3-CSE and S3Guard enabled | Major | fs/s3 | Mehakmeet Singh | Mehakmeet Singh | +| [YARN-9551](https://issues.apache.org/jira/browse/YARN-9551) | TestTimelineClientV2Impl.testSyncCall fails intermittently | Minor | ATSv2, test | Prabhu Joseph | Andras Gyori | +| [HDFS-15175](https://issues.apache.org/jira/browse/HDFS-15175) | Multiple CloseOp shared block instance causes the standby namenode to crash when rolling editlog | Critical | . | Yicong Cai | Wan Chang | +| [YARN-10869](https://issues.apache.org/jira/browse/YARN-10869) | CS considers only the default maximum-allocation-mb/vcore property as a maximum when it creates dynamic queues | Major | capacity scheduler | Benjamin Teke | Benjamin Teke | +| [YARN-10789](https://issues.apache.org/jira/browse/YARN-10789) | RM HA startup can fail due to race conditions in ZKConfigurationStore | Major | . | Tarun Parimi | Tarun Parimi | +| [HADOOP-17812](https://issues.apache.org/jira/browse/HADOOP-17812) | NPE in S3AInputStream read() after failure to reconnect to store | Major | fs/s3 | Bobby Wang | Bobby Wang | +| [YARN-6221](https://issues.apache.org/jira/browse/YARN-6221) | Entities missing from ATS when summary log file info got returned to the ATS before the domain log | Critical | yarn | Sushmitha Sreenivasan | Xiaomin Zhang | +| [MAPREDUCE-7258](https://issues.apache.org/jira/browse/MAPREDUCE-7258) | HistoryServerRest.html#Task\_Counters\_API, modify the jobTaskCounters's itemName from "taskcounterGroup" to "taskCounterGroup". | Minor | documentation | jenny | jenny | +| [HADOOP-17370](https://issues.apache.org/jira/browse/HADOOP-17370) | Upgrade commons-compress to 1.21 | Major | common | Dongjoon Hyun | Akira Ajisaka | +| [HDFS-16151](https://issues.apache.org/jira/browse/HDFS-16151) | Improve the parameter comments related to ProtobufRpcEngine2#Server() | Minor | documentation | JiangHua Zhu | JiangHua Zhu | +| [HADOOP-17844](https://issues.apache.org/jira/browse/HADOOP-17844) | Upgrade JSON smart to 2.4.7 | Major | . | Renukaprasad C | Renukaprasad C | +| [HDFS-16177](https://issues.apache.org/jira/browse/HDFS-16177) | Bug fix for Util#receiveFile | Minor | . | tomscut | tomscut | +| [YARN-10814](https://issues.apache.org/jira/browse/YARN-10814) | YARN shouldn't start with empty hadoop.http.authentication.signature.secret.file | Major | . | Benjamin Teke | Tamas Domok | +| [HADOOP-17858](https://issues.apache.org/jira/browse/HADOOP-17858) | Avoid possible class loading deadlock with VerifierNone initialization | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-17869](https://issues.apache.org/jira/browse/HADOOP-17869) | fs.s3a.connection.maximum should be bigger than fs.s3a.threads.max | Major | common | Dongjoon Hyun | Dongjoon Hyun | +| [HADOOP-17886](https://issues.apache.org/jira/browse/HADOOP-17886) | Upgrade ant to 1.10.11 | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17874](https://issues.apache.org/jira/browse/HADOOP-17874) | ExceptionsHandler to add terse/suppressed Exceptions in thread-safe manner | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-15129](https://issues.apache.org/jira/browse/HADOOP-15129) | Datanode caches namenode DNS lookup failure and cannot startup | Minor | ipc | Karthik Palaniappan | Chris Nauroth | +| [HADOOP-17870](https://issues.apache.org/jira/browse/HADOOP-17870) | HTTP Filesystem to qualify paths in open()/getFileStatus() | Minor | fs | VinothKumar Raman | VinothKumar Raman | +| [HADOOP-17899](https://issues.apache.org/jira/browse/HADOOP-17899) | Avoid using implicit dependency on junit-jupiter-api | Major | test | Masatake Iwasaki | Masatake Iwasaki | +| [YARN-10901](https://issues.apache.org/jira/browse/YARN-10901) | Permission checking error on an existing directory in LogAggregationFileController#verifyAndCreateRemoteLogDir | Major | nodemanager | Tamas Domok | Tamas Domok | +| [HADOOP-17804](https://issues.apache.org/jira/browse/HADOOP-17804) | Prometheus metrics only include the last set of labels | Major | common | Adam Binford | Adam Binford | +| [HDFS-16207](https://issues.apache.org/jira/browse/HDFS-16207) | Remove NN logs stack trace for non-existent xattr query | Major | namenode | Ahmed Hussein | Ahmed Hussein | +| [HDFS-16187](https://issues.apache.org/jira/browse/HDFS-16187) | SnapshotDiff behaviour with Xattrs and Acls is not consistent across NN restarts with checkpointing | Major | snapshots | Srinivasu Majeti | Shashikant Banerjee | +| [HDFS-16198](https://issues.apache.org/jira/browse/HDFS-16198) | Short circuit read leaks Slot objects when InvalidToken exception is thrown | Major | . | Eungsop Yoo | Eungsop Yoo | +| [YARN-10870](https://issues.apache.org/jira/browse/YARN-10870) | Missing user filtering check -\> yarn.webapp.filter-entity-list-by-user for RM Scheduler page | Major | yarn | Siddharth Ahuja | Gergely Pollák | +| [HADOOP-17891](https://issues.apache.org/jira/browse/HADOOP-17891) | lz4-java and snappy-java should be excluded from relocation in shaded Hadoop libraries | Major | . | L. C. Hsieh | L. C. Hsieh | +| [HADOOP-17919](https://issues.apache.org/jira/browse/HADOOP-17919) | Fix command line example in Hadoop Cluster Setup documentation | Minor | documentation | Rintaro Ikeda | Rintaro Ikeda | +| [YARN-9606](https://issues.apache.org/jira/browse/YARN-9606) | Set sslfactory for AuthenticatedURL() while creating LogsCLI#webServiceClient | Major | . | Bilwa S T | Bilwa S T | +| [HDFS-16233](https://issues.apache.org/jira/browse/HDFS-16233) | Do not use exception handler to implement copy-on-write for EnumCounters | Major | namenode | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HDFS-16235](https://issues.apache.org/jira/browse/HDFS-16235) | Deadlock in LeaseRenewer for static remove method | Major | hdfs | angerszhu | angerszhu | +| [HADOOP-17940](https://issues.apache.org/jira/browse/HADOOP-17940) | Upgrade Kafka to 2.8.1 | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-10970](https://issues.apache.org/jira/browse/YARN-10970) | Standby RM should expose prom endpoint | Major | resourcemanager | Max Xie | Max Xie | +| [HADOOP-17934](https://issues.apache.org/jira/browse/HADOOP-17934) | NullPointerException when no HTTP response set on AbfsRestOperation | Major | fs/azure | Josh Elser | Josh Elser | +| [HDFS-16181](https://issues.apache.org/jira/browse/HDFS-16181) | [SBN Read] Fix metric of RpcRequestCacheMissAmount can't display when tailEditLog form JN | Critical | . | wangzhaohui | wangzhaohui | +| [HADOOP-17922](https://issues.apache.org/jira/browse/HADOOP-17922) | Lookup old S3 encryption configs for JCEKS | Major | fs/s3 | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-17925](https://issues.apache.org/jira/browse/HADOOP-17925) | BUILDING.txt should not encourage to activate docs profile on building binary artifacts | Minor | documentation | Rintaro Ikeda | Masatake Iwasaki | +| [HADOOP-16532](https://issues.apache.org/jira/browse/HADOOP-16532) | Fix TestViewFsTrash to use the correct homeDir. | Minor | test, viewfs | Steve Loughran | Xing Lin | +| [HDFS-16268](https://issues.apache.org/jira/browse/HDFS-16268) | Balancer stuck when moving striped blocks due to NPE | Major | balancer & mover, erasure-coding | Leon Gao | Leon Gao | +| [HDFS-16271](https://issues.apache.org/jira/browse/HDFS-16271) | RBF: NullPointerException when setQuota through routers with quota disabled | Major | . | Chengwei Wang | Chengwei Wang | +| [YARN-10976](https://issues.apache.org/jira/browse/YARN-10976) | Fix resource leak due to Files.walk | Minor | . | lujie | lujie | +| [HADOOP-17932](https://issues.apache.org/jira/browse/HADOOP-17932) | Distcp file length comparison have no effect | Major | common, tools, tools/distcp | yinan zhan | yinan zhan | +| [HDFS-16272](https://issues.apache.org/jira/browse/HDFS-16272) | Int overflow in computing safe length during EC block recovery | Critical | 3.1.1 | daimin | daimin | +| [HADOOP-17953](https://issues.apache.org/jira/browse/HADOOP-17953) | S3A: ITestS3AFileContextStatistics test to lookup global or per-bucket configuration for encryption algorithm | Minor | fs/s3 | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-17971](https://issues.apache.org/jira/browse/HADOOP-17971) | Exclude IBM Java security classes from being shaded/relocated | Major | build | Nicholas Marion | Nicholas Marion | +| [HDFS-7612](https://issues.apache.org/jira/browse/HDFS-7612) | TestOfflineEditsViewer.testStored() uses incorrect default value for cacheDir | Major | test | Konstantin Shvachko | Michael Kuchenbecker | +| [HDFS-16269](https://issues.apache.org/jira/browse/HDFS-16269) | [Fix] Improve NNThroughputBenchmark#blockReport operation | Major | benchmarks, namenode | JiangHua Zhu | JiangHua Zhu | +| [HADOOP-17945](https://issues.apache.org/jira/browse/HADOOP-17945) | JsonSerialization raises EOFException reading JSON data stored on google GCS | Major | fs | Steve Loughran | Steve Loughran | +| [HDFS-16259](https://issues.apache.org/jira/browse/HDFS-16259) | Catch and re-throw sub-classes of AccessControlException thrown by any permission provider plugins (eg Ranger) | Major | namenode | Stephen O'Donnell | Stephen O'Donnell | +| [HADOOP-17988](https://issues.apache.org/jira/browse/HADOOP-17988) | Disable JIRA plugin for YETUS on Hadoop | Critical | build | Gautham Banasandra | Gautham Banasandra | +| [HDFS-16311](https://issues.apache.org/jira/browse/HDFS-16311) | Metric metadataOperationRate calculation error in DataNodeVolumeMetrics | Major | . | tomscut | tomscut | +| [HADOOP-18002](https://issues.apache.org/jira/browse/HADOOP-18002) | abfs rename idempotency broken -remove recovery | Major | fs/azure | Steve Loughran | Steve Loughran | +| [HDFS-16182](https://issues.apache.org/jira/browse/HDFS-16182) | numOfReplicas is given the wrong value in BlockPlacementPolicyDefault$chooseTarget can cause DataStreamer to fail with Heterogeneous Storage | Major | namanode | Max Xie | Max Xie | +| [HADOOP-17999](https://issues.apache.org/jira/browse/HADOOP-17999) | No-op implementation of setWriteChecksum and setVerifyChecksum in ViewFileSystem | Major | . | Abhishek Das | Abhishek Das | +| [HDFS-16329](https://issues.apache.org/jira/browse/HDFS-16329) | Fix log format for BlockManager | Minor | . | tomscut | tomscut | +| [HDFS-16330](https://issues.apache.org/jira/browse/HDFS-16330) | Fix incorrect placeholder for Exception logs in DiskBalancer | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16328](https://issues.apache.org/jira/browse/HDFS-16328) | Correct disk balancer param desc | Minor | documentation, hdfs | guophilipse | guophilipse | +| [HDFS-16334](https://issues.apache.org/jira/browse/HDFS-16334) | Correct NameNode ACL description | Minor | documentation | guophilipse | guophilipse | +| [HDFS-16343](https://issues.apache.org/jira/browse/HDFS-16343) | Add some debug logs when the dfsUsed are not used during Datanode startup | Major | datanode | Mukul Kumar Singh | Mukul Kumar Singh | +| [YARN-10991](https://issues.apache.org/jira/browse/YARN-10991) | Fix to ignore the grouping "[]" for resourcesStr in parseResourcesString method | Minor | distributed-shell | Ashutosh Gupta | Ashutosh Gupta | +| [HADOOP-17975](https://issues.apache.org/jira/browse/HADOOP-17975) | Fallback to simple auth does not work for a secondary DistributedFileSystem instance | Major | ipc | István Fajth | István Fajth | +| [HDFS-16350](https://issues.apache.org/jira/browse/HDFS-16350) | Datanode start time should be set after RPC server starts successfully | Minor | . | Viraj Jasani | Viraj Jasani | +| [YARN-11007](https://issues.apache.org/jira/browse/YARN-11007) | Correct words in YARN documents | Minor | documentation | guophilipse | guophilipse | +| [YARN-10975](https://issues.apache.org/jira/browse/YARN-10975) | EntityGroupFSTimelineStore#ActiveLogParser parses already processed files | Major | timelineserver | Prabhu Joseph | Ravuri Sushma sree | +| [HDFS-16332](https://issues.apache.org/jira/browse/HDFS-16332) | Expired block token causes slow read due to missing handling in sasl handshake | Major | datanode, dfs, dfsclient | Shinya Yoshida | Shinya Yoshida | +| [HDFS-16293](https://issues.apache.org/jira/browse/HDFS-16293) | Client sleeps and holds 'dataQueue' when DataNodes are congested | Major | hdfs-client | Yuanxin Zhu | Yuanxin Zhu | +| [YARN-9063](https://issues.apache.org/jira/browse/YARN-9063) | ATS 1.5 fails to start if RollingLevelDb files are corrupt or missing | Major | timelineserver, timelineservice | Tarun Parimi | Ashutosh Gupta | +| [HDFS-16333](https://issues.apache.org/jira/browse/HDFS-16333) | fix balancer bug when transfer an EC block | Major | balancer & mover, erasure-coding | qinyuren | qinyuren | +| [YARN-11020](https://issues.apache.org/jira/browse/YARN-11020) | [UI2] No container is found for an application attempt with a single AM container | Major | yarn-ui-v2 | Andras Gyori | Andras Gyori | +| [HDFS-16373](https://issues.apache.org/jira/browse/HDFS-16373) | Fix MiniDFSCluster restart in case of multiple namenodes | Major | . | Ayush Saxena | Ayush Saxena | +| [HADOOP-18048](https://issues.apache.org/jira/browse/HADOOP-18048) | [branch-3.3] Dockerfile\_aarch64 build fails with fatal error: Python.h: No such file or directory | Major | . | Siyao Meng | Siyao Meng | +| [HDFS-16377](https://issues.apache.org/jira/browse/HDFS-16377) | Should CheckNotNull before access FsDatasetSpi | Major | . | tomscut | tomscut | +| [YARN-6862](https://issues.apache.org/jira/browse/YARN-6862) | Nodemanager resource usage metrics sometimes are negative | Major | nodemanager | YunFan Zhou | Benjamin Teke | +| [HADOOP-13500](https://issues.apache.org/jira/browse/HADOOP-13500) | Synchronizing iteration of Configuration properties object | Major | conf | Jason Darrell Lowe | Dhananjay Badaya | +| [YARN-10178](https://issues.apache.org/jira/browse/YARN-10178) | Global Scheduler async thread crash caused by 'Comparison method violates its general contract | Major | capacity scheduler | tuyu | Andras Gyori | +| [YARN-11053](https://issues.apache.org/jira/browse/YARN-11053) | AuxService should not use class name as default system classes | Major | auxservices | Cheng Pan | Cheng Pan | +| [HDFS-16395](https://issues.apache.org/jira/browse/HDFS-16395) | Remove useless NNThroughputBenchmark#dummyActionNoSynch() | Major | benchmarks, namenode | JiangHua Zhu | JiangHua Zhu | +| [HADOOP-18045](https://issues.apache.org/jira/browse/HADOOP-18045) | Disable TestDynamometerInfra | Major | test | Akira Ajisaka | Akira Ajisaka | +| [HDFS-14099](https://issues.apache.org/jira/browse/HDFS-14099) | Unknown frame descriptor when decompressing multiple frames in ZStandardDecompressor | Major | . | xuzq | xuzq | +| [HADOOP-18063](https://issues.apache.org/jira/browse/HADOOP-18063) | Remove unused import AbstractJavaKeyStoreProvider in Shell class | Minor | . | JiangHua Zhu | JiangHua Zhu | +| [HDFS-16409](https://issues.apache.org/jira/browse/HDFS-16409) | Fix typo: testHasExeceptionsReturnsCorrectValue -\> testHasExceptionsReturnsCorrectValue | Trivial | . | Ashutosh Gupta | Ashutosh Gupta | +| [HDFS-16408](https://issues.apache.org/jira/browse/HDFS-16408) | Ensure LeaseRecheckIntervalMs is greater than zero | Major | namenode | Jingxuan Fu | Jingxuan Fu | +| [HDFS-16410](https://issues.apache.org/jira/browse/HDFS-16410) | Insecure Xml parsing in OfflineEditsXmlLoader | Minor | . | Ashutosh Gupta | Ashutosh Gupta | +| [HDFS-16420](https://issues.apache.org/jira/browse/HDFS-16420) | Avoid deleting unique data blocks when deleting redundancy striped blocks | Critical | ec, erasure-coding | qinyuren | Jackson Wang | +| [YARN-10561](https://issues.apache.org/jira/browse/YARN-10561) | Upgrade node.js to 12.22.1 and yarn to 1.22.5 in YARN application catalog webapp | Critical | webapp | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-18096](https://issues.apache.org/jira/browse/HADOOP-18096) | Distcp: Sync moves filtered file to home directory rather than deleting | Critical | . | Ayush Saxena | Ayush Saxena | + + +### TESTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [MAPREDUCE-7342](https://issues.apache.org/jira/browse/MAPREDUCE-7342) | Stop RMService in TestClientRedirect.testRedirect() | Minor | . | Zhengxi Li | Zhengxi Li | +| [MAPREDUCE-7311](https://issues.apache.org/jira/browse/MAPREDUCE-7311) | Fix non-idempotent test in TestTaskProgressReporter | Minor | . | Zhengxi Li | Zhengxi Li | +| [HADOOP-17936](https://issues.apache.org/jira/browse/HADOOP-17936) | TestLocalFSCopyFromLocal.testDestinationFileIsToParentDirectory failure after reverting HADOOP-16878 | Major | . | Chao Sun | Chao Sun | +| [HDFS-15862](https://issues.apache.org/jira/browse/HDFS-15862) | Make TestViewfsWithNfs3.testNfsRenameSingleNN() idempotent | Minor | nfs | Zhengxi Li | Zhengxi Li | + + +### SUB-TASKS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-10337](https://issues.apache.org/jira/browse/YARN-10337) | TestRMHATimelineCollectors fails on hadoop trunk | Major | test, yarn | Ahmed Hussein | Bilwa S T | +| [HDFS-15457](https://issues.apache.org/jira/browse/HDFS-15457) | TestFsDatasetImpl fails intermittently | Major | hdfs | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17424](https://issues.apache.org/jira/browse/HADOOP-17424) | Replace HTrace with No-Op tracer | Major | . | Siyao Meng | Siyao Meng | +| [HADOOP-17705](https://issues.apache.org/jira/browse/HADOOP-17705) | S3A to add option fs.s3a.endpoint.region to set AWS region | Major | fs/s3 | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-17670](https://issues.apache.org/jira/browse/HADOOP-17670) | S3AFS and ABFS to log IOStats at DEBUG mode or optionally at INFO level in close() | Minor | fs/azure, fs/s3 | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-17511](https://issues.apache.org/jira/browse/HADOOP-17511) | Add an Audit plugin point for S3A auditing/context | Major | . | Steve Loughran | Steve Loughran | +| [HADOOP-17470](https://issues.apache.org/jira/browse/HADOOP-17470) | Collect more S3A IOStatistics | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-17735](https://issues.apache.org/jira/browse/HADOOP-17735) | Upgrade aws-java-sdk to 1.11.1026 | Major | build, fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-17547](https://issues.apache.org/jira/browse/HADOOP-17547) | Magic committer to downgrade abort in cleanup if list uploads fails with access denied | Major | fs/s3 | Steve Loughran | Bogdan Stolojan | +| [HADOOP-17771](https://issues.apache.org/jira/browse/HADOOP-17771) | S3AFS creation fails "Unable to find a region via the region provider chain." | Blocker | fs/s3 | Steve Loughran | Steve Loughran | +| [HDFS-15659](https://issues.apache.org/jira/browse/HDFS-15659) | Set dfs.namenode.redundancy.considerLoad to false in MiniDFSCluster | Major | test | Akira Ajisaka | Ahmed Hussein | +| [HADOOP-17774](https://issues.apache.org/jira/browse/HADOOP-17774) | bytesRead FS statistic showing twice the correct value in S3A | Major | fs/s3 | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-17290](https://issues.apache.org/jira/browse/HADOOP-17290) | ABFS: Add Identifiers to Client Request Header | Major | fs/azure | Sumangala Patki | Sumangala Patki | +| [HADOOP-17250](https://issues.apache.org/jira/browse/HADOOP-17250) | ABFS: Random read perf improvement | Major | fs/azure | Sneha Vijayarajan | Mukund Thakur | +| [HADOOP-17596](https://issues.apache.org/jira/browse/HADOOP-17596) | ABFS: Change default Readahead Queue Depth from num(processors) to const | Major | fs/azure | Sumangala Patki | Sumangala Patki | +| [HADOOP-17715](https://issues.apache.org/jira/browse/HADOOP-17715) | ABFS: Append blob tests with non HNS accounts fail | Minor | . | Sneha Varma | Sneha Varma | +| [HADOOP-17714](https://issues.apache.org/jira/browse/HADOOP-17714) | ABFS: testBlobBackCompatibility, testRandomRead & WasbAbfsCompatibility tests fail when triggered with default configs | Minor | test | Sneha Varma | Sneha Varma | +| [HDFS-16140](https://issues.apache.org/jira/browse/HDFS-16140) | TestBootstrapAliasmap fails by BindException | Major | test | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-13887](https://issues.apache.org/jira/browse/HADOOP-13887) | Encrypt S3A data client-side with AWS SDK (S3-CSE) | Minor | fs/s3 | Jeeyoung Kim | Mehakmeet Singh | +| [HADOOP-17458](https://issues.apache.org/jira/browse/HADOOP-17458) | S3A to treat "SdkClientException: Data read has a different length than the expected" as EOFException | Minor | fs/s3 | Steve Loughran | Bogdan Stolojan | +| [HADOOP-17628](https://issues.apache.org/jira/browse/HADOOP-17628) | Distcp contract test is really slow with ABFS and S3A; timing out | Minor | fs/azure, fs/s3, test, tools/distcp | Bilahari T H | Steve Loughran | +| [HADOOP-17822](https://issues.apache.org/jira/browse/HADOOP-17822) | fs.s3a.acl.default not working after S3A Audit feature added | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-17139](https://issues.apache.org/jira/browse/HADOOP-17139) | Re-enable optimized copyFromLocal implementation in S3AFileSystem | Minor | fs/s3 | Sahil Takiar | Bogdan Stolojan | +| [HADOOP-17823](https://issues.apache.org/jira/browse/HADOOP-17823) | S3A Tests to skip if S3Guard and S3-CSE are enabled. | Major | build, fs/s3 | Mehakmeet Singh | Mehakmeet Singh | +| [HDFS-16184](https://issues.apache.org/jira/browse/HDFS-16184) | De-flake TestBlockScanner#testSkipRecentAccessFile | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-17677](https://issues.apache.org/jira/browse/HADOOP-17677) | Distcp is unable to determine region with S3 PrivateLink endpoints | Major | fs/s3, tools/distcp | KJ | | +| [HDFS-16192](https://issues.apache.org/jira/browse/HDFS-16192) | ViewDistributedFileSystem#rename wrongly using src in the place of dst. | Major | . | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HADOOP-17156](https://issues.apache.org/jira/browse/HADOOP-17156) | Clear abfs readahead requests on stream close | Major | fs/azure | Rajesh Balamohan | Mukund Thakur | +| [HADOOP-17618](https://issues.apache.org/jira/browse/HADOOP-17618) | ABFS: Partially obfuscate SAS object IDs in Logs | Major | fs/azure | Sumangala Patki | Sumangala Patki | +| [HADOOP-17894](https://issues.apache.org/jira/browse/HADOOP-17894) | CredentialProviderFactory.getProviders() recursion loading JCEKS file from s3a | Major | conf, fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-17126](https://issues.apache.org/jira/browse/HADOOP-17126) | implement non-guava Precondition checkNotNull | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17195](https://issues.apache.org/jira/browse/HADOOP-17195) | Intermittent OutOfMemory error while performing hdfs CopyFromLocal to abfs | Major | fs/azure | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-17929](https://issues.apache.org/jira/browse/HADOOP-17929) | implement non-guava Precondition checkArgument | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17198](https://issues.apache.org/jira/browse/HADOOP-17198) | Support S3 Access Points | Major | fs/s3 | Steve Loughran | Bogdan Stolojan | +| [HADOOP-17871](https://issues.apache.org/jira/browse/HADOOP-17871) | S3A CSE: minor tuning | Minor | fs/s3 | Steve Loughran | Mehakmeet Singh | +| [HADOOP-17947](https://issues.apache.org/jira/browse/HADOOP-17947) | Provide alternative to Guava VisibleForTesting | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-17930](https://issues.apache.org/jira/browse/HADOOP-17930) | implement non-guava Precondition checkState | Major | . | Ahmed Hussein | Ahmed Hussein | +| [HADOOP-17374](https://issues.apache.org/jira/browse/HADOOP-17374) | AliyunOSS: support ListObjectsV2 | Major | fs/oss | wujinhu | wujinhu | +| [HADOOP-17863](https://issues.apache.org/jira/browse/HADOOP-17863) | ABFS: Fix compiler deprecation warning in TextFileBasedIdentityHandler | Minor | fs/azure | Sumangala Patki | Sumangala Patki | +| [HADOOP-17928](https://issues.apache.org/jira/browse/HADOOP-17928) | s3a: set fs.s3a.downgrade.syncable.exceptions = true by default | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HDFS-16336](https://issues.apache.org/jira/browse/HDFS-16336) | De-flake TestRollingUpgrade#testRollback | Minor | hdfs, test | Kevin Wikant | Viraj Jasani | +| [HDFS-16171](https://issues.apache.org/jira/browse/HDFS-16171) | De-flake testDecommissionStatus | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-17226](https://issues.apache.org/jira/browse/HADOOP-17226) | Failure of ITestAssumeRole.testRestrictedCommitActions | Minor | fs/s3, test | Steve Loughran | Steve Loughran | +| [HADOOP-14334](https://issues.apache.org/jira/browse/HADOOP-14334) | S3 SSEC tests to downgrade when running against a mandatory encryption object store | Minor | fs/s3, test | Steve Loughran | Monthon Klongklaew | +| [HADOOP-16223](https://issues.apache.org/jira/browse/HADOOP-16223) | remove misleading fs.s3a.delegation.tokens.enabled prompt | Minor | fs/s3 | Steve Loughran | | + + +### OTHER: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-16078](https://issues.apache.org/jira/browse/HDFS-16078) | Remove unused parameters for DatanodeManager.handleLifeline() | Minor | . | tomscut | tomscut | +| [HDFS-16079](https://issues.apache.org/jira/browse/HDFS-16079) | Improve the block state change log | Minor | . | tomscut | tomscut | +| [HDFS-16089](https://issues.apache.org/jira/browse/HDFS-16089) | EC: Add metric EcReconstructionValidateTimeMillis for StripedBlockReconstructor | Minor | . | tomscut | tomscut | +| [HDFS-16298](https://issues.apache.org/jira/browse/HDFS-16298) | Improve error msg for BlockMissingException | Minor | . | tomscut | tomscut | +| [HDFS-16312](https://issues.apache.org/jira/browse/HDFS-16312) | Fix typo for DataNodeVolumeMetrics and ProfilingFileIoEvents | Minor | . | tomscut | tomscut | +| [HADOOP-18005](https://issues.apache.org/jira/browse/HADOOP-18005) | Correct log format for LdapGroupsMapping | Minor | . | tomscut | tomscut | +| [HDFS-16319](https://issues.apache.org/jira/browse/HDFS-16319) | Add metrics doc for ReadLockLongHoldCount and WriteLockLongHoldCount | Minor | . | tomscut | tomscut | +| [HDFS-16326](https://issues.apache.org/jira/browse/HDFS-16326) | Simplify the code for DiskBalancer | Minor | . | tomscut | tomscut | +| [HDFS-16335](https://issues.apache.org/jira/browse/HDFS-16335) | Fix HDFSCommands.md | Minor | . | tomscut | tomscut | +| [HDFS-16339](https://issues.apache.org/jira/browse/HDFS-16339) | Show the threshold when mover threads quota is exceeded | Minor | . | tomscut | tomscut | +| [YARN-10820](https://issues.apache.org/jira/browse/YARN-10820) | Make GetClusterNodesRequestPBImpl thread safe | Major | client | Prabhu Joseph | SwathiChandrashekar | +| [HADOOP-17808](https://issues.apache.org/jira/browse/HADOOP-17808) | ipc.Client not setting interrupt flag after catching InterruptedException | Minor | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-17834](https://issues.apache.org/jira/browse/HADOOP-17834) | Bump aliyun-sdk-oss to 3.13.0 | Major | . | Siyao Meng | Siyao Meng | +| [HADOOP-17950](https://issues.apache.org/jira/browse/HADOOP-17950) | Provide replacement for deprecated APIs of commons-io IOUtils | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-17955](https://issues.apache.org/jira/browse/HADOOP-17955) | Bump netty to the latest 4.1.68 | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [HADOOP-17946](https://issues.apache.org/jira/browse/HADOOP-17946) | Update commons-lang to latest 3.x | Minor | . | Sean Busbey | Renukaprasad C | +| [HDFS-16323](https://issues.apache.org/jira/browse/HDFS-16323) | DatanodeHttpServer doesn't require handler state map while retrieving filter handlers | Minor | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-13464](https://issues.apache.org/jira/browse/HADOOP-13464) | update GSON to 2.7+ | Minor | build | Sean Busbey | Igor Dvorzhak | +| [HADOOP-18061](https://issues.apache.org/jira/browse/HADOOP-18061) | Update the year to 2022 | Major | . | Ayush Saxena | Ayush Saxena | + + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.2/RELEASENOTES.3.3.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.2/RELEASENOTES.3.3.2.md new file mode 100644 index 0000000000000..9948d8ff3222c --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.2/RELEASENOTES.3.3.2.md @@ -0,0 +1,93 @@ + + +# Apache Hadoop 3.3.2 Release Notes + +These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. + + +--- + +* [HDFS-15288](https://issues.apache.org/jira/browse/HDFS-15288) | *Major* | **Add Available Space Rack Fault Tolerant BPP** + +Added a new BlockPlacementPolicy: "AvailableSpaceRackFaultTolerantBlockPlacementPolicy" which uses the same optimization logic as the AvailableSpaceBlockPlacementPolicy along with spreading the replicas across maximum number of racks, similar to BlockPlacementPolicyRackFaultTolerant. +The BPP can be configured by setting the blockplacement policy class as org.apache.hadoop.hdfs.server.blockmanagement.AvailableSpaceRackFaultTolerantBlockPlacementPolicy + + +--- + +* [HADOOP-17424](https://issues.apache.org/jira/browse/HADOOP-17424) | *Major* | **Replace HTrace with No-Op tracer** + +Dependency on HTrace and TraceAdmin protocol/utility were removed. Tracing functionality is no-op until alternative tracer implementation is added. + + +--- + +* [HDFS-15814](https://issues.apache.org/jira/browse/HDFS-15814) | *Major* | **Make some parameters configurable for DataNodeDiskMetrics** + +**WARNING: No release note provided for this change.** + + +--- + +* [YARN-10820](https://issues.apache.org/jira/browse/YARN-10820) | *Major* | **Make GetClusterNodesRequestPBImpl thread safe** + +Added syncronization so that the "yarn node list" command does not fail intermittently + + +--- + +* [HADOOP-13887](https://issues.apache.org/jira/browse/HADOOP-13887) | *Minor* | **Encrypt S3A data client-side with AWS SDK (S3-CSE)** + +Adds support for client side encryption in AWS S3, +with keys managed by AWS-KMS. + +Read the documentation in encryption.md very, very carefully before +use and consider it unstable. + +S3-CSE is enabled in the existing configuration option +"fs.s3a.server-side-encryption-algorithm": + +fs.s3a.server-side-encryption-algorithm=CSE-KMS +fs.s3a.server-side-encryption.key=\ + +You cannot enable CSE and SSE in the same client, although +you can still enable a default SSE option in the S3 console. + +\* Not compatible with S3Guard. +\* Filesystem list/get status operations subtract 16 bytes from the length + of all files \>= 16 bytes long to compensate for the padding which CSE + adds. +\* The SDK always warns about the specific algorithm chosen being + deprecated. It is critical to use this algorithm for ranged + GET requests to work (i.e. random IO). Ignore. +\* Unencrypted files CANNOT BE READ. + The entire bucket SHOULD be encrypted with S3-CSE. +\* Uploading files may be a bit slower as blocks are now + written sequentially. +\* The Multipart Upload API is disabled when S3-CSE is active. + + +--- + +* [YARN-8234](https://issues.apache.org/jira/browse/YARN-8234) | *Critical* | **Improve RM system metrics publisher's performance by pushing events to timeline server in batch** + +When Timeline Service V1 or V1.5 is used, if "yarn.resourcemanager.system-metrics-publisher.timeline-server-v1.enable-batch" is set to true, ResourceManager sends timeline events in batch. The default value is false. If this functionality is enabled, the maximum number that events published in batch is configured by "yarn.resourcemanager.system-metrics-publisher.timeline-server-v1.batch-size". The default value is 1000. The interval of publishing events can be configured by "yarn.resourcemanager.system-metrics-publisher.timeline-server-v1.interval-seconds". By default, it is set to 60 seconds. + + + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.3/CHANGELOG.3.3.3.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.3/CHANGELOG.3.3.3.md new file mode 100644 index 0000000000000..bf9498db08172 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.3/CHANGELOG.3.3.3.md @@ -0,0 +1,65 @@ + + +# Apache Hadoop Changelog + +## Release 3.3.3 - 2022-05-09 + + + +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [MAPREDUCE-7373](https://issues.apache.org/jira/browse/MAPREDUCE-7373) | Building MapReduce NativeTask fails on Fedora 34+ | Major | build, nativetask | Kengo Seki | Kengo Seki | +| [HDFS-16355](https://issues.apache.org/jira/browse/HDFS-16355) | Improve the description of dfs.block.scanner.volume.bytes.per.second | Minor | documentation, hdfs | guophilipse | guophilipse | +| [HADOOP-18155](https://issues.apache.org/jira/browse/HADOOP-18155) | Refactor tests in TestFileUtil | Trivial | common | Gautham Banasandra | Gautham Banasandra | +| [HADOOP-18088](https://issues.apache.org/jira/browse/HADOOP-18088) | Replace log4j 1.x with reload4j | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HDFS-16501](https://issues.apache.org/jira/browse/HDFS-16501) | Print the exception when reporting a bad block | Major | datanode | qinyuren | qinyuren | +| [HADOOP-18214](https://issues.apache.org/jira/browse/HADOOP-18214) | Update BUILDING.txt | Minor | build, documentation | Steve Loughran | | +| [HDFS-16556](https://issues.apache.org/jira/browse/HDFS-16556) | Fix typos in distcp | Minor | documentation | guophilipse | guophilipse | + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-17341](https://issues.apache.org/jira/browse/HADOOP-17341) | Upgrade commons-codec to 1.15 | Minor | . | Dongjoon Hyun | Dongjoon Hyun | +| [HADOOP-17650](https://issues.apache.org/jira/browse/HADOOP-17650) | Fails to build using Maven 3.8.1 | Major | build | Wei-Chiu Chuang | Viraj Jasani | +| [HADOOP-18178](https://issues.apache.org/jira/browse/HADOOP-18178) | Upgrade jackson to 2.13.2 and jackson-databind to 2.13.2.2 | Major | . | PJ Fanning | PJ Fanning | +| [HDFS-16535](https://issues.apache.org/jira/browse/HDFS-16535) | SlotReleaser should reuse the domain socket based on socket paths | Major | hdfs-client | Quanlong Huang | | +| [HADOOP-18109](https://issues.apache.org/jira/browse/HADOOP-18109) | Ensure that default permissions of directories under internal ViewFS directories are the same as directories on target filesystems | Major | viewfs | Chentao Yu | Chentao Yu | +| [HDFS-16422](https://issues.apache.org/jira/browse/HDFS-16422) | Fix thread safety of EC decoding during concurrent preads | Critical | dfsclient, ec, erasure-coding | daimin | daimin | +| [HDFS-16437](https://issues.apache.org/jira/browse/HDFS-16437) | ReverseXML processor doesn't accept XML files without the SnapshotDiffSection. | Critical | hdfs | yanbin.zhang | yanbin.zhang | +| [HDFS-16507](https://issues.apache.org/jira/browse/HDFS-16507) | [SBN read] Avoid purging edit log which is in progress | Critical | . | Tao Li | Tao Li | +| [YARN-10720](https://issues.apache.org/jira/browse/YARN-10720) | YARN WebAppProxyServlet should support connection timeout to prevent proxy server from hanging | Critical | . | Qi Zhu | Qi Zhu | +| [HDFS-16428](https://issues.apache.org/jira/browse/HDFS-16428) | Source path with storagePolicy cause wrong typeConsumed while rename | Major | hdfs, namenode | lei w | lei w | +| [YARN-11014](https://issues.apache.org/jira/browse/YARN-11014) | YARN incorrectly validates maximum capacity resources on the validation API | Major | . | Benjamin Teke | Benjamin Teke | +| [YARN-11075](https://issues.apache.org/jira/browse/YARN-11075) | Explicitly declare serialVersionUID in LogMutation class | Major | . | Benjamin Teke | Benjamin Teke | +| [HDFS-11041](https://issues.apache.org/jira/browse/HDFS-11041) | Unable to unregister FsDatasetState MBean if DataNode is shutdown twice | Trivial | datanode | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HADOOP-18160](https://issues.apache.org/jira/browse/HADOOP-18160) | \`org.wildfly.openssl\` should not be shaded by Hadoop build | Major | build | André F. | André F. | +| [HADOOP-18202](https://issues.apache.org/jira/browse/HADOOP-18202) | create-release fails fatal: unsafe repository ('/build/source' is owned by someone else) | Major | build | Steve Loughran | Steve Loughran | + + +### OTHER: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-18125](https://issues.apache.org/jira/browse/HADOOP-18125) | Utility to identify git commit / Jira fixVersion discrepancies for RC preparation | Major | . | Viraj Jasani | Viraj Jasani | + + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.3/RELEASENOTES.3.3.3.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.3/RELEASENOTES.3.3.3.md new file mode 100644 index 0000000000000..bd9ea8a2e49d6 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.3/RELEASENOTES.3.3.3.md @@ -0,0 +1,48 @@ + + +# Apache Hadoop 3.3.3 Release Notes + +These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. + + +--- + +* [HADOOP-17650](https://issues.apache.org/jira/browse/HADOOP-17650) | *Major* | **Fails to build using Maven 3.8.1** + +In order to resolve build issues with Maven 3.8.1, we have to bump SolrJ to latest version 8.8.2 as of now. Solr is used by YARN application catalog. Hence, we would recommend upgrading Solr cluster accordingly before upgrading entire Hadoop cluster to 3.4.0 if the YARN application catalog service is used. + + +--- + +* [HADOOP-18088](https://issues.apache.org/jira/browse/HADOOP-18088) | *Major* | **Replace log4j 1.x with reload4j** + +log4j 1 was replaced with reload4j which is fork of log4j 1.2.17 with the goal of fixing pressing security issues. + +If you are depending on the hadoop artifacts in your build were explicitly excluding log4 artifacts, and now want to exclude the reload4j files, you will need to update your exclusion lists +\ + \org.slf4j\ + \slf4j-reload4j\ +\ +\ + \ch.qos.reload4j\ + \reload4j\ +\ + + + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.4/CHANGELOG.3.3.4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.4/CHANGELOG.3.3.4.md new file mode 100644 index 0000000000000..78b805240c78e --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.4/CHANGELOG.3.3.4.md @@ -0,0 +1,56 @@ + + +# Apache Hadoop Changelog + +## Release 3.3.4 - 2022-07-29 + + + +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-18044](https://issues.apache.org/jira/browse/HADOOP-18044) | Hadoop - Upgrade to JQuery 3.6.0 | Major | . | Yuan Luo | Yuan Luo | +| [YARN-11195](https://issues.apache.org/jira/browse/YARN-11195) | Document how to configure NUMA in YARN | Major | documentation | Prabhu Joseph | Samrat Deb | +| [HADOOP-18332](https://issues.apache.org/jira/browse/HADOOP-18332) | Remove rs-api dependency by downgrading jackson to 2.12.7 | Major | build | PJ Fanning | PJ Fanning | +| [HADOOP-18354](https://issues.apache.org/jira/browse/HADOOP-18354) | Upgrade reload4j to 1.2.22 due to XXE vulnerability | Major | . | PJ Fanning | PJ Fanning | + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-18085](https://issues.apache.org/jira/browse/HADOOP-18085) | S3 SDK Upgrade causes AccessPoint ARN endpoint mistranslation | Major | fs/s3, test | Bogdan Stolojan | Bogdan Stolojan | +| [YARN-11092](https://issues.apache.org/jira/browse/YARN-11092) | Upgrade jquery ui to 1.13.1 | Major | . | D M Murali Krishna Reddy | groot | +| [HDFS-16453](https://issues.apache.org/jira/browse/HDFS-16453) | Upgrade okhttp from 2.7.5 to 4.9.3 | Major | hdfs-client | Ivan Viaznikov | groot | +| [YARN-10974](https://issues.apache.org/jira/browse/YARN-10974) | CS UI: queue filter and openQueues param do not work as expected | Major | capacity scheduler | Chengbing Liu | Chengbing Liu | +| [HADOOP-18237](https://issues.apache.org/jira/browse/HADOOP-18237) | Upgrade Apache Xerces Java to 2.12.2 | Major | build | groot | groot | +| [HADOOP-18074](https://issues.apache.org/jira/browse/HADOOP-18074) | Partial/Incomplete groups list can be returned in LDAP groups lookup | Major | security | Philippe Lanoe | Larry McCay | +| [HADOOP-18079](https://issues.apache.org/jira/browse/HADOOP-18079) | Upgrade Netty to 4.1.77.Final | Major | build | Renukaprasad C | Wei-Chiu Chuang | + + +### SUB-TASKS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-18068](https://issues.apache.org/jira/browse/HADOOP-18068) | Upgrade AWS SDK to 1.12.132 | Major | build, fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18307](https://issues.apache.org/jira/browse/HADOOP-18307) | remove hadoop-cos as a dependency of hadoop-cloud-storage | Major | bulid, fs | Steve Loughran | Steve Loughran | +| [HADOOP-18344](https://issues.apache.org/jira/browse/HADOOP-18344) | AWS SDK update to 1.12.262 to address jackson CVE-2018-7489 | Major | fs/s3 | Steve Loughran | Steve Loughran | + + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.4/RELEASENOTES.3.3.4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.4/RELEASENOTES.3.3.4.md new file mode 100644 index 0000000000000..79573880423d6 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.4/RELEASENOTES.3.3.4.md @@ -0,0 +1,66 @@ + + +# Apache Hadoop 3.3.4 Release Notes + +These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. + + +--- + +* [HDFS-16453](https://issues.apache.org/jira/browse/HDFS-16453) | *Major* | **Upgrade okhttp from 2.7.5 to 4.9.3** + +okhttp has been updated to address CVE-2021-0341 + + +--- + +* [HADOOP-18237](https://issues.apache.org/jira/browse/HADOOP-18237) | *Major* | **Upgrade Apache Xerces Java to 2.12.2** + +Apache Xerces has been updated to 2.12.2 to fix CVE-2022-23437 + + +--- + +* [HADOOP-18307](https://issues.apache.org/jira/browse/HADOOP-18307) | *Major* | **remove hadoop-cos as a dependency of hadoop-cloud-storage** + +We have recently become aware that libraries which include a shaded apache httpclient libraries (hadoop-client-runtime.jar, aws-java-sdk-bundle.jar, gcs-connector-shaded.jar, cos\_api-bundle-5.6.19.jar) all load and use the unshaded resource mozilla/public-suffix-list.txt. If an out of date version of this is found on the classpath first, attempts to negotiate TLS connections may fail with the error "Certificate doesn't match any of the subject alternative names". This release does not declare the hadoop-cos library to be a dependency of the hadoop-cloud-storage POM, so applications depending on that module are no longer exposed to this issue. If an application requires use of the hadoop-cos module, please declare an explicit dependency. + + +--- + +* [HADOOP-18332](https://issues.apache.org/jira/browse/HADOOP-18332) | *Major* | **Remove rs-api dependency by downgrading jackson to 2.12.7** + +Downgrades Jackson from 2.13.2 to 2.12.7 to fix class conflicts in downstream projects. This version of jackson does contain the fix for CVE-2020-36518. + + +--- + +* [HADOOP-18079](https://issues.apache.org/jira/browse/HADOOP-18079) | *Major* | **Upgrade Netty to 4.1.77.Final** + +Netty has been updated to address CVE-2019-20444, CVE-2019-20445 and CVE-2022-24823 + + +--- + +* [HADOOP-18344](https://issues.apache.org/jira/browse/HADOOP-18344) | *Major* | **AWS SDK update to 1.12.262 to address jackson CVE-2018-7489** + +The AWS SDK has been updated to 1.12.262 to address jackson CVE-2018-7489 + + + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/CHANGELOG.3.3.5.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/CHANGELOG.3.3.5.md new file mode 100644 index 0000000000000..0bdd1844b6e47 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/CHANGELOG.3.3.5.md @@ -0,0 +1,359 @@ + + +# Apache Hadoop Changelog + +## Release 3.3.5 - 2023-03-14 + + + +### IMPORTANT ISSUES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-17956](https://issues.apache.org/jira/browse/HADOOP-17956) | Replace all default Charset usage with UTF-8 | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-18621](https://issues.apache.org/jira/browse/HADOOP-18621) | CryptoOutputStream::close leak when encrypted zones + quota exceptions | Critical | fs | Colm Dougan | Colm Dougan | + + +### NEW FEATURES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-18003](https://issues.apache.org/jira/browse/HADOOP-18003) | Add a method appendIfAbsent for CallerContext | Minor | . | Tao Li | Tao Li | +| [HDFS-16331](https://issues.apache.org/jira/browse/HDFS-16331) | Make dfs.blockreport.intervalMsec reconfigurable | Major | . | Tao Li | Tao Li | +| [HDFS-16371](https://issues.apache.org/jira/browse/HDFS-16371) | Exclude slow disks when choosing volume | Major | . | Tao Li | Tao Li | +| [HDFS-16400](https://issues.apache.org/jira/browse/HDFS-16400) | Reconfig DataXceiver parameters for datanode | Major | . | Tao Li | Tao Li | +| [HDFS-16399](https://issues.apache.org/jira/browse/HDFS-16399) | Reconfig cache report parameters for datanode | Major | . | Tao Li | Tao Li | +| [HDFS-16398](https://issues.apache.org/jira/browse/HDFS-16398) | Reconfig block report parameters for datanode | Major | . | Tao Li | Tao Li | +| [HDFS-16396](https://issues.apache.org/jira/browse/HDFS-16396) | Reconfig slow peer parameters for datanode | Major | . | Tao Li | Tao Li | +| [HDFS-16397](https://issues.apache.org/jira/browse/HDFS-16397) | Reconfig slow disk parameters for datanode | Major | . | Tao Li | Tao Li | +| [MAPREDUCE-7341](https://issues.apache.org/jira/browse/MAPREDUCE-7341) | Add a task-manifest output committer for Azure and GCS | Major | client | Steve Loughran | Steve Loughran | +| [HADOOP-18163](https://issues.apache.org/jira/browse/HADOOP-18163) | hadoop-azure support for the Manifest Committer of MAPREDUCE-7341 | Major | fs/azure | Steve Loughran | Steve Loughran | +| [HDFS-16413](https://issues.apache.org/jira/browse/HDFS-16413) | Reconfig dfs usage parameters for datanode | Major | . | Tao Li | Tao Li | +| [HDFS-16521](https://issues.apache.org/jira/browse/HDFS-16521) | DFS API to retrieve slow datanodes | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16568](https://issues.apache.org/jira/browse/HDFS-16568) | dfsadmin -reconfig option to start/query reconfig on all live datanodes | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16582](https://issues.apache.org/jira/browse/HDFS-16582) | Expose aggregate latency of slow node as perceived by the reporting node | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16595](https://issues.apache.org/jira/browse/HDFS-16595) | Slow peer metrics - add median, mad and upper latency limits | Major | . | Viraj Jasani | Viraj Jasani | +| [YARN-11241](https://issues.apache.org/jira/browse/YARN-11241) | Add uncleaning option for local app log file with log-aggregation enabled | Major | log-aggregation | Ashutosh Gupta | Ashutosh Gupta | +| [HADOOP-18103](https://issues.apache.org/jira/browse/HADOOP-18103) | High performance vectored read API in Hadoop | Major | common, fs, fs/adl, fs/s3 | Mukund Thakur | Mukund Thakur | + + +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-17276](https://issues.apache.org/jira/browse/HADOOP-17276) | Extend CallerContext to make it include many items | Major | . | Hui Fei | Hui Fei | +| [HDFS-15745](https://issues.apache.org/jira/browse/HDFS-15745) | Make DataNodePeerMetrics#LOW\_THRESHOLD\_MS and MIN\_OUTLIER\_DETECTION\_NODES configurable | Major | . | Haibin Huang | Haibin Huang | +| [HDFS-16266](https://issues.apache.org/jira/browse/HDFS-16266) | Add remote port information to HDFS audit log | Major | . | Tao Li | Tao Li | +| [YARN-10997](https://issues.apache.org/jira/browse/YARN-10997) | Revisit allocation and reservation logging | Major | . | Andras Gyori | Andras Gyori | +| [HDFS-16310](https://issues.apache.org/jira/browse/HDFS-16310) | RBF: Add client port to CallerContext for Router | Major | . | Tao Li | Tao Li | +| [HDFS-16352](https://issues.apache.org/jira/browse/HDFS-16352) | return the real datanode numBlocks in #getDatanodeStorageReport | Major | . | qinyuren | qinyuren | +| [HDFS-16426](https://issues.apache.org/jira/browse/HDFS-16426) | fix nextBlockReportTime when trigger full block report force | Major | . | qinyuren | qinyuren | +| [HDFS-16430](https://issues.apache.org/jira/browse/HDFS-16430) | Validate maximum blocks in EC group when adding an EC policy | Minor | ec, erasure-coding | daimin | daimin | +| [HDFS-16403](https://issues.apache.org/jira/browse/HDFS-16403) | Improve FUSE IO performance by supporting FUSE parameter max\_background | Minor | fuse-dfs | daimin | daimin | +| [HDFS-16262](https://issues.apache.org/jira/browse/HDFS-16262) | Async refresh of cached locations in DFSInputStream | Major | . | Bryan Beaudreault | Bryan Beaudreault | +| [HADOOP-18093](https://issues.apache.org/jira/browse/HADOOP-18093) | Better exception handling for testFileStatusOnMountLink() in ViewFsBaseTest.java | Trivial | . | Xing Lin | Xing Lin | +| [HDFS-16423](https://issues.apache.org/jira/browse/HDFS-16423) | balancer should not get blocks on stale storages | Major | balancer & mover | qinyuren | qinyuren | +| [HADOOP-18139](https://issues.apache.org/jira/browse/HADOOP-18139) | Allow configuration of zookeeper server principal | Major | auth | Owen O'Malley | Owen O'Malley | +| [YARN-11076](https://issues.apache.org/jira/browse/YARN-11076) | Upgrade jQuery version in Yarn UI2 | Major | yarn-ui-v2 | Tamas Domok | Tamas Domok | +| [HDFS-16495](https://issues.apache.org/jira/browse/HDFS-16495) | RBF should prepend the client ip rather than append it. | Major | . | Owen O'Malley | Owen O'Malley | +| [HADOOP-18144](https://issues.apache.org/jira/browse/HADOOP-18144) | getTrashRoot/s in ViewFileSystem should return viewFS path, not targetFS path | Major | common | Xing Lin | Xing Lin | +| [HADOOP-18162](https://issues.apache.org/jira/browse/HADOOP-18162) | hadoop-common enhancements for the Manifest Committer of MAPREDUCE-7341 | Major | fs | Steve Loughran | Steve Loughran | +| [HDFS-16529](https://issues.apache.org/jira/browse/HDFS-16529) | Remove unnecessary setObserverRead in TestConsistentReadsObserver | Trivial | test | Zhaohui Wang | Zhaohui Wang | +| [HDFS-16530](https://issues.apache.org/jira/browse/HDFS-16530) | setReplication debug log creates a new string even if debug is disabled | Major | namenode | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-16457](https://issues.apache.org/jira/browse/HDFS-16457) | Make fs.getspaceused.classname reconfigurable | Major | namenode | yanbin.zhang | yanbin.zhang | +| [HDFS-16427](https://issues.apache.org/jira/browse/HDFS-16427) | Add debug log for BlockManager#chooseExcessRedundancyStriped | Minor | erasure-coding | Tao Li | Tao Li | +| [HDFS-16497](https://issues.apache.org/jira/browse/HDFS-16497) | EC: Add param comment for liveBusyBlockIndices with HDFS-14768 | Minor | erasure-coding, namanode | caozhiqiang | caozhiqiang | +| [HDFS-16389](https://issues.apache.org/jira/browse/HDFS-16389) | Improve NNThroughputBenchmark test mkdirs | Major | benchmarks, namenode | JiangHua Zhu | JiangHua Zhu | +| [HADOOP-17551](https://issues.apache.org/jira/browse/HADOOP-17551) | Upgrade maven-site-plugin to 3.11.0 | Major | . | Akira Ajisaka | Ashutosh Gupta | +| [HDFS-16519](https://issues.apache.org/jira/browse/HDFS-16519) | Add throttler to EC reconstruction | Minor | datanode, ec | daimin | daimin | +| [HDFS-14478](https://issues.apache.org/jira/browse/HDFS-14478) | Add libhdfs APIs for openFile | Major | hdfs-client, libhdfs, native | Sahil Takiar | Sahil Takiar | +| [HADOOP-16202](https://issues.apache.org/jira/browse/HADOOP-16202) | Enhance openFile() for better read performance against object stores | Major | fs, fs/s3, tools/distcp | Steve Loughran | Steve Loughran | +| [YARN-11116](https://issues.apache.org/jira/browse/YARN-11116) | Migrate Times util from SimpleDateFormat to thread-safe DateTimeFormatter class | Minor | . | Jonathan Turner Eagles | Jonathan Turner Eagles | +| [HDFS-16520](https://issues.apache.org/jira/browse/HDFS-16520) | Improve EC pread: avoid potential reading whole block | Major | dfsclient, ec, erasure-coding | daimin | daimin | +| [HADOOP-18167](https://issues.apache.org/jira/browse/HADOOP-18167) | Add metrics to track delegation token secret manager operations | Major | . | Hector Sandoval Chaverri | Hector Sandoval Chaverri | +| [YARN-10080](https://issues.apache.org/jira/browse/YARN-10080) | Support show app id on localizer thread pool | Major | nodemanager | zhoukang | Ashutosh Gupta | +| [HADOOP-18172](https://issues.apache.org/jira/browse/HADOOP-18172) | Change scope of getRootFallbackLink for InodeTree to make them accessible from outside package | Minor | . | Xing Lin | Xing Lin | +| [HDFS-16588](https://issues.apache.org/jira/browse/HDFS-16588) | Backport HDFS-16584 to branch-3.3. | Major | balancer & mover, namenode | JiangHua Zhu | JiangHua Zhu | +| [HADOOP-18240](https://issues.apache.org/jira/browse/HADOOP-18240) | Upgrade Yetus to 0.14.0 | Major | build | Akira Ajisaka | Ashutosh Gupta | +| [HDFS-16585](https://issues.apache.org/jira/browse/HDFS-16585) | Add @VisibleForTesting in Dispatcher.java after HDFS-16268 | Trivial | . | Wei-Chiu Chuang | Ashutosh Gupta | +| [HADOOP-18244](https://issues.apache.org/jira/browse/HADOOP-18244) | Fix Hadoop-Common JavaDoc Error on branch-3.3 | Major | common | Shilun Fan | Shilun Fan | +| [HADOOP-18269](https://issues.apache.org/jira/browse/HADOOP-18269) | Misleading method name in DistCpOptions | Minor | tools/distcp | guophilipse | guophilipse | +| [HADOOP-18275](https://issues.apache.org/jira/browse/HADOOP-18275) | update os-maven-plugin to 1.7.0 | Minor | build | Steve Loughran | Steve Loughran | +| [HDFS-16610](https://issues.apache.org/jira/browse/HDFS-16610) | Make fsck read timeout configurable | Major | hdfs-client | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-16576](https://issues.apache.org/jira/browse/HDFS-16576) | Remove unused imports in HDFS project | Minor | . | Ashutosh Gupta | Ashutosh Gupta | +| [HDFS-16629](https://issues.apache.org/jira/browse/HDFS-16629) | [JDK 11] Fix javadoc warnings in hadoop-hdfs module | Minor | hdfs | Shilun Fan | Shilun Fan | +| [YARN-11172](https://issues.apache.org/jira/browse/YARN-11172) | Fix testDelegationToken | Major | test | zhengchenyu | zhengchenyu | +| [HADOOP-17833](https://issues.apache.org/jira/browse/HADOOP-17833) | Improve Magic Committer Performance | Minor | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18288](https://issues.apache.org/jira/browse/HADOOP-18288) | Total requests and total requests per sec served by RPC servers | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-18336](https://issues.apache.org/jira/browse/HADOOP-18336) | tag FSDataInputStream.getWrappedStream() @Public/@Stable | Minor | fs | Steve Loughran | Ashutosh Gupta | +| [HADOOP-13144](https://issues.apache.org/jira/browse/HADOOP-13144) | Enhancing IPC client throughput via multiple connections per user | Minor | ipc | Jason Kace | Íñigo Goiri | +| [HDFS-16712](https://issues.apache.org/jira/browse/HDFS-16712) | Fix incorrect placeholder in DataNode.java | Major | . | ZanderXu | ZanderXu | +| [HDFS-16702](https://issues.apache.org/jira/browse/HDFS-16702) | MiniDFSCluster should report cause of exception in assertion error | Minor | hdfs | Steve Vaughan | Steve Vaughan | +| [HADOOP-18365](https://issues.apache.org/jira/browse/HADOOP-18365) | Updated addresses are still accessed using the old IP address | Major | common | Steve Vaughan | Steve Vaughan | +| [HDFS-16687](https://issues.apache.org/jira/browse/HDFS-16687) | RouterFsckServlet replicates code from DfsServlet base class | Major | federation | Steve Vaughan | Steve Vaughan | +| [HADOOP-18333](https://issues.apache.org/jira/browse/HADOOP-18333) | hadoop-client-runtime impact by CVE-2022-2047 CVE-2022-2048 due to shaded jetty | Major | build | phoebe chen | Ashutosh Gupta | +| [HADOOP-18406](https://issues.apache.org/jira/browse/HADOOP-18406) | Adds alignment context to call path for creating RPC proxy with multiple connections per user. | Major | ipc | Simbarashe Dzinamarira | Simbarashe Dzinamarira | +| [HDFS-16684](https://issues.apache.org/jira/browse/HDFS-16684) | Exclude self from JournalNodeSyncer when using a bind host | Major | journal-node | Steve Vaughan | Steve Vaughan | +| [HDFS-16686](https://issues.apache.org/jira/browse/HDFS-16686) | GetJournalEditServlet fails to authorize valid Kerberos request | Major | journal-node | Steve Vaughan | Steve Vaughan | +| [YARN-11303](https://issues.apache.org/jira/browse/YARN-11303) | Upgrade jquery ui to 1.13.2 | Major | security | D M Murali Krishna Reddy | Ashutosh Gupta | +| [HADOOP-16769](https://issues.apache.org/jira/browse/HADOOP-16769) | LocalDirAllocator to provide diagnostics when file creation fails | Minor | util | Ramesh Kumar Thangarajan | Ashutosh Gupta | +| [HADOOP-18341](https://issues.apache.org/jira/browse/HADOOP-18341) | upgrade commons-configuration2 to 2.8.0 and commons-text to 1.9 | Major | . | PJ Fanning | PJ Fanning | +| [HDFS-16776](https://issues.apache.org/jira/browse/HDFS-16776) | Erasure Coding: The length of targets should be checked when DN gets a reconstruction task | Major | . | Kidd5368 | Kidd5368 | +| [HADOOP-18469](https://issues.apache.org/jira/browse/HADOOP-18469) | Add XMLUtils methods to centralise code that creates secure XML parsers | Major | . | PJ Fanning | PJ Fanning | +| [HADOOP-18442](https://issues.apache.org/jira/browse/HADOOP-18442) | Remove the hadoop-openstack module | Major | build, fs, fs/swift | Steve Loughran | Steve Loughran | +| [HADOOP-18468](https://issues.apache.org/jira/browse/HADOOP-18468) | upgrade jettison json jar due to fix CVE-2022-40149 | Major | build | PJ Fanning | PJ Fanning | +| [HADOOP-17779](https://issues.apache.org/jira/browse/HADOOP-17779) | Lock File System Creator Semaphore Uninterruptibly | Minor | fs | David Mollitor | David Mollitor | +| [HADOOP-18360](https://issues.apache.org/jira/browse/HADOOP-18360) | Update commons-csv from 1.0 to 1.9.0. | Minor | common | Shilun Fan | Shilun Fan | +| [HADOOP-18493](https://issues.apache.org/jira/browse/HADOOP-18493) | update jackson-databind 2.12.7.1 due to CVE fixes | Major | . | PJ Fanning | PJ Fanning | +| [HADOOP-17563](https://issues.apache.org/jira/browse/HADOOP-17563) | Update Bouncy Castle to 1.68 or later | Major | build | Takanobu Asanuma | PJ Fanning | +| [HADOOP-18497](https://issues.apache.org/jira/browse/HADOOP-18497) | Upgrade commons-text version to fix CVE-2022-42889 | Major | build | Xiaoqiao He | PJ Fanning | +| [HDFS-16795](https://issues.apache.org/jira/browse/HDFS-16795) | Use secure XML parser utils in hdfs classes | Major | . | PJ Fanning | PJ Fanning | +| [YARN-11330](https://issues.apache.org/jira/browse/YARN-11330) | Use secure XML parser utils in YARN | Major | . | PJ Fanning | PJ Fanning | +| [MAPREDUCE-7411](https://issues.apache.org/jira/browse/MAPREDUCE-7411) | Use secure XML parser utils in MapReduce | Major | . | PJ Fanning | PJ Fanning | +| [HADOOP-18512](https://issues.apache.org/jira/browse/HADOOP-18512) | upgrade woodstox-core to 5.4.0 for security fix | Major | common | phoebe chen | PJ Fanning | +| [YARN-11363](https://issues.apache.org/jira/browse/YARN-11363) | Remove unused TimelineVersionWatcher and TimelineVersion from hadoop-yarn-server-tests | Major | test, yarn | Ashutosh Gupta | Ashutosh Gupta | +| [YARN-11364](https://issues.apache.org/jira/browse/YARN-11364) | Docker Container to accept docker Image name with sha256 digest | Major | yarn | Ashutosh Gupta | Ashutosh Gupta | +| [HADOOP-18517](https://issues.apache.org/jira/browse/HADOOP-18517) | ABFS: Add fs.azure.enable.readahead option to disable readahead | Major | fs/azure | Steve Loughran | Steve Loughran | +| [HADOOP-18484](https://issues.apache.org/jira/browse/HADOOP-18484) | upgrade hsqldb to v2.7.1 due to CVE | Major | . | PJ Fanning | Ashutosh Gupta | +| [HDFS-16844](https://issues.apache.org/jira/browse/HDFS-16844) | [RBF] The routers should be resiliant against exceptions from StateStore | Major | rbf | Owen O'Malley | Owen O'Malley | +| [HADOOP-18573](https://issues.apache.org/jira/browse/HADOOP-18573) | Improve error reporting on non-standard kerberos names | Blocker | security | Steve Loughran | Steve Loughran | +| [HADOOP-18561](https://issues.apache.org/jira/browse/HADOOP-18561) | CVE-2021-37533 on commons-net is included in hadoop common and hadoop-client-runtime | Blocker | build | phoebe chen | Steve Loughran | +| [HADOOP-18067](https://issues.apache.org/jira/browse/HADOOP-18067) | Über-jira: S3A Hadoop 3.3.5 features | Major | fs/s3 | Steve Loughran | Mukund Thakur | +| [YARN-10444](https://issues.apache.org/jira/browse/YARN-10444) | Node Manager to use openFile() with whole-file read policy for localizing files. | Minor | nodemanager | Steve Loughran | Steve Loughran | +| [HADOOP-18661](https://issues.apache.org/jira/browse/HADOOP-18661) | Fix bin/hadoop usage script terminology | Blocker | scripts | Steve Loughran | Steve Loughran | + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-17116](https://issues.apache.org/jira/browse/HADOOP-17116) | Skip Retry INFO logging on first failover from a proxy | Major | ha | Hanisha Koneru | Hanisha Koneru | +| [YARN-10553](https://issues.apache.org/jira/browse/YARN-10553) | Refactor TestDistributedShell | Major | distributed-shell, test | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15839](https://issues.apache.org/jira/browse/HDFS-15839) | RBF: Cannot get method setBalancerBandwidth on Router Client | Major | rbf | Yang Yun | Yang Yun | +| [HADOOP-17588](https://issues.apache.org/jira/browse/HADOOP-17588) | CryptoInputStream#close() should be synchronized | Major | . | Renukaprasad C | Renukaprasad C | +| [HADOOP-17836](https://issues.apache.org/jira/browse/HADOOP-17836) | Improve logging on ABFS error reporting | Minor | fs/azure | Steve Loughran | Steve Loughran | +| [HADOOP-17989](https://issues.apache.org/jira/browse/HADOOP-17989) | ITestAzureBlobFileSystemDelete failing "Operations has null HTTP response" | Major | fs/azure, test | Steve Loughran | Steve Loughran | +| [YARN-11055](https://issues.apache.org/jira/browse/YARN-11055) | In cgroups-operations.c some fprintf format strings don't end with "\\n" | Minor | nodemanager | Gera Shegalov | Gera Shegalov | +| [YARN-11065](https://issues.apache.org/jira/browse/YARN-11065) | Bump follow-redirects from 1.13.3 to 1.14.7 in hadoop-yarn-ui | Major | yarn-ui-v2 | Akira Ajisaka | | +| [HDFS-16303](https://issues.apache.org/jira/browse/HDFS-16303) | Losing over 100 datanodes in state decommissioning results in full blockage of all datanode decommissioning | Major | . | Kevin Wikant | Kevin Wikant | +| [HDFS-16443](https://issues.apache.org/jira/browse/HDFS-16443) | Fix edge case where DatanodeAdminDefaultMonitor doubly enqueues a DatanodeDescriptor on exception | Major | hdfs | Kevin Wikant | Kevin Wikant | +| [HDFS-16449](https://issues.apache.org/jira/browse/HDFS-16449) | Fix hadoop web site release notes and changelog not available | Minor | documentation | guophilipse | guophilipse | +| [YARN-10788](https://issues.apache.org/jira/browse/YARN-10788) | TestCsiClient fails | Major | test | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-18126](https://issues.apache.org/jira/browse/HADOOP-18126) | Update junit 5 version due to build issues | Major | bulid | PJ Fanning | PJ Fanning | +| [YARN-11033](https://issues.apache.org/jira/browse/YARN-11033) | isAbsoluteResource is not correct for dynamically created queues | Minor | yarn | Tamas Domok | Tamas Domok | +| [YARN-10894](https://issues.apache.org/jira/browse/YARN-10894) | Follow up YARN-10237: fix the new test case in TestRMWebServicesCapacitySched | Major | . | Tamas Domok | Tamas Domok | +| [YARN-11022](https://issues.apache.org/jira/browse/YARN-11022) | Fix the documentation for max-parallel-apps in CS | Major | capacity scheduler | Tamas Domok | Tamas Domok | +| [HADOOP-18150](https://issues.apache.org/jira/browse/HADOOP-18150) | Fix ITestAuditManagerDisabled after S3A audit logging was enabled in HADOOP-18091 | Major | fs/s3 | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-17976](https://issues.apache.org/jira/browse/HADOOP-17976) | abfs etag extraction inconsistent between LIST and HEAD calls | Minor | fs/azure | Steve Loughran | Steve Loughran | +| [HADOOP-18129](https://issues.apache.org/jira/browse/HADOOP-18129) | Change URI[] in INodeLink to String[] to reduce memory footprint of ViewFileSystem | Major | . | Abhishek Das | Abhishek Das | +| [HADOOP-18145](https://issues.apache.org/jira/browse/HADOOP-18145) | Fileutil's unzip method causes unzipped files to lose their original permissions | Major | common | jingxiong zhong | jingxiong zhong | +| [HDFS-16518](https://issues.apache.org/jira/browse/HDFS-16518) | KeyProviderCache close cached KeyProvider with Hadoop ShutdownHookManager | Major | hdfs | Lei Yang | Lei Yang | +| [HADOOP-18169](https://issues.apache.org/jira/browse/HADOOP-18169) | getDelegationTokens in ViewFs should also fetch the token from the fallback FS | Major | . | Xing Lin | Xing Lin | +| [HDFS-16479](https://issues.apache.org/jira/browse/HDFS-16479) | EC: NameNode should not send a reconstruction work when the source datanodes are insufficient | Critical | ec, erasure-coding | Yuanbo Liu | Takanobu Asanuma | +| [HDFS-16509](https://issues.apache.org/jira/browse/HDFS-16509) | Fix decommission UnsupportedOperationException: Remove unsupported | Major | namenode | daimin | daimin | +| [HDFS-16456](https://issues.apache.org/jira/browse/HDFS-16456) | EC: Decommission a rack with only on dn will fail when the rack number is equal with replication | Critical | ec, namenode | caozhiqiang | caozhiqiang | +| [HADOOP-18201](https://issues.apache.org/jira/browse/HADOOP-18201) | Remove base and bucket overrides for endpoint in ITestS3ARequesterPays.java | Major | fs/s3 | Mehakmeet Singh | Daniel Carl Jones | +| [HDFS-16536](https://issues.apache.org/jira/browse/HDFS-16536) | TestOfflineImageViewer fails on branch-3.3 | Major | test | Akira Ajisaka | Ashutosh Gupta | +| [HDFS-16538](https://issues.apache.org/jira/browse/HDFS-16538) | EC decoding failed due to not enough valid inputs | Major | erasure-coding | qinyuren | qinyuren | +| [HDFS-16544](https://issues.apache.org/jira/browse/HDFS-16544) | EC decoding failed due to invalid buffer | Major | erasure-coding | qinyuren | qinyuren | +| [HADOOP-17564](https://issues.apache.org/jira/browse/HADOOP-17564) | Fix typo in UnixShellGuide.html | Trivial | . | Takanobu Asanuma | Ashutosh Gupta | +| [HDFS-16552](https://issues.apache.org/jira/browse/HDFS-16552) | Fix NPE for TestBlockManager | Major | . | Tao Li | Tao Li | +| [MAPREDUCE-7246](https://issues.apache.org/jira/browse/MAPREDUCE-7246) | In MapredAppMasterRest#Mapreduce\_Application\_Master\_Info\_API, the datatype of appId should be "string". | Major | documentation | jenny | Ashutosh Gupta | +| [YARN-10187](https://issues.apache.org/jira/browse/YARN-10187) | Removing hadoop-yarn-project/hadoop-yarn/README as it is no longer maintained. | Minor | documentation | N Sanketh Reddy | Ashutosh Gupta | +| [HADOOP-16515](https://issues.apache.org/jira/browse/HADOOP-16515) | Update the link to compatibility guide | Minor | documentation | Akira Ajisaka | Ashutosh Gupta | +| [HDFS-16185](https://issues.apache.org/jira/browse/HDFS-16185) | Fix comment in LowRedundancyBlocks.java | Minor | documentation | Akira Ajisaka | Ashutosh Gupta | +| [HADOOP-17479](https://issues.apache.org/jira/browse/HADOOP-17479) | Fix the examples of hadoop config prefix | Minor | documentation | Akira Ajisaka | Ashutosh Gupta | +| [HADOOP-18222](https://issues.apache.org/jira/browse/HADOOP-18222) | Prevent DelegationTokenSecretManagerMetrics from registering multiple times | Major | . | Hector Sandoval Chaverri | Hector Sandoval Chaverri | +| [HDFS-16540](https://issues.apache.org/jira/browse/HDFS-16540) | Data locality is lost when DataNode pod restarts in kubernetes | Major | namenode | Huaxiang Sun | Huaxiang Sun | +| [YARN-11133](https://issues.apache.org/jira/browse/YARN-11133) | YarnClient gets the wrong EffectiveMinCapacity value | Major | api | Zilong Zhu | Zilong Zhu | +| [YARN-10850](https://issues.apache.org/jira/browse/YARN-10850) | TimelineService v2 lists containers for all attempts when filtering for one | Major | timelinereader | Benjamin Teke | Benjamin Teke | +| [YARN-11141](https://issues.apache.org/jira/browse/YARN-11141) | Capacity Scheduler does not support ambiguous queue names when moving application across queues | Major | capacity scheduler | András Győri | András Győri | +| [HDFS-16586](https://issues.apache.org/jira/browse/HDFS-16586) | Purge FsDatasetAsyncDiskService threadgroup; it causes BPServiceActor$CommandProcessingThread IllegalThreadStateException 'fatal exception and exit' | Major | datanode | Michael Stack | Michael Stack | +| [HADOOP-18251](https://issues.apache.org/jira/browse/HADOOP-18251) | Fix failure of extracting JIRA id from commit message in git\_jira\_fix\_version\_check.py | Minor | build | Masatake Iwasaki | Masatake Iwasaki | +| [YARN-11128](https://issues.apache.org/jira/browse/YARN-11128) | Fix comments in TestProportionalCapacityPreemptionPolicy\* | Minor | capacityscheduler, documentation | Ashutosh Gupta | Ashutosh Gupta | +| [HADOOP-18234](https://issues.apache.org/jira/browse/HADOOP-18234) | s3a access point xml examples are wrong | Minor | documentation, fs/s3 | Steve Loughran | Ashutosh Gupta | +| [HADOOP-18238](https://issues.apache.org/jira/browse/HADOOP-18238) | Fix reentrancy check in SFTPFileSystem.close() | Major | common | yi liu | Ashutosh Gupta | +| [HDFS-16583](https://issues.apache.org/jira/browse/HDFS-16583) | DatanodeAdminDefaultMonitor can get stuck in an infinite loop | Major | . | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-16608](https://issues.apache.org/jira/browse/HDFS-16608) | Fix the link in TestClientProtocolForPipelineRecovery | Minor | documentation | Samrat Deb | Samrat Deb | +| [HDFS-16563](https://issues.apache.org/jira/browse/HDFS-16563) | Namenode WebUI prints sensitive information on Token Expiry | Major | namanode, security, webhdfs | Renukaprasad C | Renukaprasad C | +| [HDFS-16623](https://issues.apache.org/jira/browse/HDFS-16623) | IllegalArgumentException in LifelineSender | Major | . | ZanderXu | ZanderXu | +| [HDFS-16064](https://issues.apache.org/jira/browse/HDFS-16064) | Determine when to invalidate corrupt replicas based on number of usable replicas | Major | datanode, namenode | Kevin Wikant | Kevin Wikant | +| [HADOOP-18255](https://issues.apache.org/jira/browse/HADOOP-18255) | fsdatainputstreambuilder.md refers to hadoop 3.3.3, when it shouldn't | Minor | documentation | Steve Loughran | Ashutosh Gupta | +| [MAPREDUCE-7387](https://issues.apache.org/jira/browse/MAPREDUCE-7387) | Fix TestJHSSecurity#testDelegationToken AssertionError due to HDFS-16563 | Major | . | Shilun Fan | Shilun Fan | +| [MAPREDUCE-7369](https://issues.apache.org/jira/browse/MAPREDUCE-7369) | MapReduce tasks timing out when spends more time on MultipleOutputs#close | Major | . | Prabhu Joseph | Ashutosh Gupta | +| [MAPREDUCE-7391](https://issues.apache.org/jira/browse/MAPREDUCE-7391) | TestLocalDistributedCacheManager failing after HADOOP-16202 | Major | test | Steve Loughran | Steve Loughran | +| [HDFS-16591](https://issues.apache.org/jira/browse/HDFS-16591) | StateStoreZooKeeper fails to initialize | Major | rbf | Hector Sandoval Chaverri | Hector Sandoval Chaverri | +| [HADOOP-18321](https://issues.apache.org/jira/browse/HADOOP-18321) | Fix when to read an additional record from a BZip2 text file split | Critical | io | Ashutosh Gupta | Ashutosh Gupta | +| [HADOOP-18100](https://issues.apache.org/jira/browse/HADOOP-18100) | Change scope of inner classes in InodeTree to make them accessible outside package | Major | . | Abhishek Das | Abhishek Das | +| [HADOOP-18217](https://issues.apache.org/jira/browse/HADOOP-18217) | shutdownhookmanager should not be multithreaded (deadlock possible) | Minor | util | Catherinot Remi | | +| [MAPREDUCE-7372](https://issues.apache.org/jira/browse/MAPREDUCE-7372) | MapReduce set permission too late in copyJar method | Major | mrv2 | Zhang Dongsheng | | +| [HADOOP-18330](https://issues.apache.org/jira/browse/HADOOP-18330) | S3AFileSystem removes Path when calling createS3Client | Minor | fs/s3 | Ashutosh Pant | Ashutosh Pant | +| [HADOOP-18390](https://issues.apache.org/jira/browse/HADOOP-18390) | Fix out of sync import for HADOOP-18321 | Minor | . | Ashutosh Gupta | Ashutosh Gupta | +| [HADOOP-18340](https://issues.apache.org/jira/browse/HADOOP-18340) | deleteOnExit does not work with S3AFileSystem | Minor | fs/s3 | Huaxiang Sun | Huaxiang Sun | +| [HADOOP-18383](https://issues.apache.org/jira/browse/HADOOP-18383) | Codecs with @DoNotPool annotation are not closed causing memory leak | Major | common | Kevin Sewell | Kevin Sewell | +| [HDFS-16729](https://issues.apache.org/jira/browse/HDFS-16729) | RBF: fix some unreasonably annotated docs | Major | documentation, rbf | JiangHua Zhu | JiangHua Zhu | +| [HADOOP-18398](https://issues.apache.org/jira/browse/HADOOP-18398) | Prevent AvroRecord\*.class from being included non-test jar | Major | common | YUBI LEE | YUBI LEE | +| [HDFS-4043](https://issues.apache.org/jira/browse/HDFS-4043) | Namenode Kerberos Login does not use proper hostname for host qualified hdfs principal name. | Major | security | Ahad Rana | Steve Vaughan | +| [MAPREDUCE-7403](https://issues.apache.org/jira/browse/MAPREDUCE-7403) | Support spark dynamic partitioning in the Manifest Committer | Major | mrv2 | Steve Loughran | Steve Loughran | +| [HDFS-16732](https://issues.apache.org/jira/browse/HDFS-16732) | [SBN READ] Avoid get location from observer when the block report is delayed. | Critical | hdfs | zhengchenyu | zhengchenyu | +| [HADOOP-18375](https://issues.apache.org/jira/browse/HADOOP-18375) | Fix failure of shelltest for hadoop\_add\_ldlibpath | Minor | test | Masatake Iwasaki | Masatake Iwasaki | +| [HDFS-16755](https://issues.apache.org/jira/browse/HDFS-16755) | TestQJMWithFaults.testUnresolvableHostName() can fail due to unexpected host resolution | Minor | test | Steve Vaughan | Steve Vaughan | +| [HADOOP-18400](https://issues.apache.org/jira/browse/HADOOP-18400) | Fix file split duplicating records from a succeeding split when reading BZip2 text files | Critical | . | Ashutosh Gupta | Ashutosh Gupta | +| [HADOOP-18242](https://issues.apache.org/jira/browse/HADOOP-18242) | ABFS Rename Failure when tracking metadata is in incomplete state | Major | fs/azure | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-18456](https://issues.apache.org/jira/browse/HADOOP-18456) | NullPointerException in ObjectListingIterator's constructor | Blocker | fs/s3 | Quanlong Huang | Steve Loughran | +| [HADOOP-18444](https://issues.apache.org/jira/browse/HADOOP-18444) | Add Support for localized trash for ViewFileSystem in Trash.moveToAppropriateTrash | Major | . | Xing Lin | Xing Lin | +| [HADOOP-18443](https://issues.apache.org/jira/browse/HADOOP-18443) | Upgrade snakeyaml to 1.32 | Major | security | Ashutosh Gupta | Ashutosh Gupta | +| [HDFS-16766](https://issues.apache.org/jira/browse/HDFS-16766) | hdfs ec command loads (administrator provided) erasure code policy files without disabling xml entity expansion | Major | security | Jing | Ashutosh Gupta | +| [HDFS-13369](https://issues.apache.org/jira/browse/HDFS-13369) | FSCK Report broken with RequestHedgingProxyProvider | Major | hdfs | Harshakiran Reddy | Ranith Sardar | +| [YARN-11039](https://issues.apache.org/jira/browse/YARN-11039) | LogAggregationFileControllerFactory::getFileControllerForRead can leak threads | Blocker | log-aggregation | Rajesh Balamohan | Steve Loughran | +| [HADOOP-18499](https://issues.apache.org/jira/browse/HADOOP-18499) | S3A to support HTTPS web proxies | Major | fs/s3 | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-18233](https://issues.apache.org/jira/browse/HADOOP-18233) | Possible race condition with TemporaryAWSCredentialsProvider | Major | auth, fs/s3 | Jason Sleight | Jimmy Wong | +| [MAPREDUCE-7425](https://issues.apache.org/jira/browse/MAPREDUCE-7425) | Document Fix for yarn.app.mapreduce.client-am.ipc.max-retries | Major | yarn | teng wang | teng wang | +| [HADOOP-18528](https://issues.apache.org/jira/browse/HADOOP-18528) | Disable abfs prefetching by default | Major | fs/azure | Mehakmeet Singh | Mehakmeet Singh | +| [HDFS-16836](https://issues.apache.org/jira/browse/HDFS-16836) | StandbyCheckpointer can still trigger rollback fs image after RU is finalized | Major | hdfs | Lei Yang | Lei Yang | +| [HADOOP-18324](https://issues.apache.org/jira/browse/HADOOP-18324) | Interrupting RPC Client calls can lead to thread exhaustion | Critical | ipc | Owen O'Malley | Owen O'Malley | +| [HDFS-16832](https://issues.apache.org/jira/browse/HDFS-16832) | [SBN READ] Fix NPE when check the block location of empty directory | Major | . | zhengchenyu | zhengchenyu | +| [HADOOP-18498](https://issues.apache.org/jira/browse/HADOOP-18498) | [ABFS]: Error introduced when SAS Token containing '?' prefix is passed | Minor | fs/azure | Sree Bhattacharyya | Sree Bhattacharyya | +| [HDFS-16847](https://issues.apache.org/jira/browse/HDFS-16847) | RBF: StateStore writer should not commit tmp fail if there was an error in writing the file. | Critical | hdfs, rbf | Simbarashe Dzinamarira | Simbarashe Dzinamarira | +| [HADOOP-18401](https://issues.apache.org/jira/browse/HADOOP-18401) | No ARM binaries in branch-3.3.x releases | Minor | build | Ling Xu | | +| [HADOOP-18408](https://issues.apache.org/jira/browse/HADOOP-18408) | [ABFS]: ITestAbfsManifestCommitProtocol fails on nonHNS configuration | Minor | fs/azure, test | Pranav Saxena | Sree Bhattacharyya | +| [HADOOP-18402](https://issues.apache.org/jira/browse/HADOOP-18402) | S3A committer NPE in spark job abort | Blocker | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18569](https://issues.apache.org/jira/browse/HADOOP-18569) | NFS Gateway may release buffer too early | Blocker | nfs | Attila Doroszlai | Attila Doroszlai | +| [HADOOP-18574](https://issues.apache.org/jira/browse/HADOOP-18574) | Changing log level of IOStatistics increment to make the DEBUG logs less noisy | Major | fs/s3 | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-18521](https://issues.apache.org/jira/browse/HADOOP-18521) | ABFS ReadBufferManager buffer sharing across concurrent HTTP requests | Critical | fs/azure | Steve Loughran | Steve Loughran | +| [MAPREDUCE-7375](https://issues.apache.org/jira/browse/MAPREDUCE-7375) | JobSubmissionFiles don't set right permission after mkdirs | Major | mrv2 | Zhang Dongsheng | | +| [HADOOP-17717](https://issues.apache.org/jira/browse/HADOOP-17717) | Update wildfly openssl to 1.1.3.Final | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HADOOP-18598](https://issues.apache.org/jira/browse/HADOOP-18598) | maven site generation doesn't include javadocs | Blocker | site | Steve Loughran | Steve Loughran | +| [HDFS-16895](https://issues.apache.org/jira/browse/HDFS-16895) | NamenodeHeartbeatService should use credentials of logged in user | Major | rbf | Hector Sandoval Chaverri | Hector Sandoval Chaverri | +| [HDFS-16853](https://issues.apache.org/jira/browse/HDFS-16853) | The UT TestLeaseRecovery2#testHardLeaseRecoveryAfterNameNodeRestart failed because HADOOP-18324 | Blocker | . | ZanderXu | ZanderXu | +| [HADOOP-18641](https://issues.apache.org/jira/browse/HADOOP-18641) | cyclonedx maven plugin breaks builds on recent maven releases (3.9.0) | Major | build | Steve Loughran | Steve Loughran | +| [HDFS-16923](https://issues.apache.org/jira/browse/HDFS-16923) | The getListing RPC will throw NPE if the path does not exist | Critical | . | ZanderXu | ZanderXu | +| [HDFS-16896](https://issues.apache.org/jira/browse/HDFS-16896) | HDFS Client hedged read has increased failure rate than without hedged read | Major | hdfs-client | Tom McCormick | Tom McCormick | +| [YARN-11383](https://issues.apache.org/jira/browse/YARN-11383) | Workflow priority mappings is case sensitive | Major | yarn | Aparajita Choudhary | Aparajita Choudhary | +| [HDFS-16939](https://issues.apache.org/jira/browse/HDFS-16939) | Fix the thread safety bug in LowRedundancyBlocks | Major | namanode | Shuyan Zhang | Shuyan Zhang | +| [HDFS-16934](https://issues.apache.org/jira/browse/HDFS-16934) | org.apache.hadoop.hdfs.tools.TestDFSAdmin#testAllDatanodesReconfig regression | Minor | dfsadmin, test | Steve Loughran | Shilun Fan | + + +### TESTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-16573](https://issues.apache.org/jira/browse/HDFS-16573) | Fix test TestDFSStripedInputStreamWithRandomECPolicy | Minor | test | daimin | daimin | +| [HDFS-16637](https://issues.apache.org/jira/browse/HDFS-16637) | TestHDFSCLI#testAll consistently failing | Major | . | Viraj Jasani | Viraj Jasani | +| [YARN-11248](https://issues.apache.org/jira/browse/YARN-11248) | Add unit test for FINISHED\_CONTAINERS\_PULLED\_BY\_AM event on DECOMMISSIONING | Major | test | Ashutosh Gupta | Ashutosh Gupta | +| [HDFS-16625](https://issues.apache.org/jira/browse/HDFS-16625) | Unit tests aren't checking for PMDK availability | Major | test | Steve Vaughan | Steve Vaughan | + + +### SUB-TASKS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-13293](https://issues.apache.org/jira/browse/HDFS-13293) | RBF: The RouterRPCServer should transfer client IP via CallerContext to NamenodeRpcServer | Major | rbf | Baolong Mao | Hui Fei | +| [HDFS-15630](https://issues.apache.org/jira/browse/HDFS-15630) | RBF: Fix wrong client IP info in CallerContext when requests mount points with multi-destinations. | Major | rbf | Chengwei Wang | Chengwei Wang | +| [HADOOP-17152](https://issues.apache.org/jira/browse/HADOOP-17152) | Implement wrapper for guava newArrayList and newLinkedList | Major | common | Ahmed Hussein | Viraj Jasani | +| [HADOOP-17851](https://issues.apache.org/jira/browse/HADOOP-17851) | S3A to support user-specified content encoding | Minor | fs/s3 | Holden Karau | Holden Karau | +| [HADOOP-17492](https://issues.apache.org/jira/browse/HADOOP-17492) | abfs listLocatedStatus to support incremental/async page fetching | Major | fs/azure | Steve Loughran | Steve Loughran | +| [HADOOP-17409](https://issues.apache.org/jira/browse/HADOOP-17409) | Remove S3Guard - no longer needed | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18084](https://issues.apache.org/jira/browse/HADOOP-18084) | ABFS: Add testfilePath while verifying test contents are read correctly | Minor | fs/azure, test | Anmol Asrani | Anmol Asrani | +| [HDFS-16169](https://issues.apache.org/jira/browse/HDFS-16169) | Fix TestBlockTokenWithDFSStriped#testEnd2End failure | Major | test | Hui Fei | secfree | +| [HADOOP-18091](https://issues.apache.org/jira/browse/HADOOP-18091) | S3A auditing leaks memory through ThreadLocal references | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18071](https://issues.apache.org/jira/browse/HADOOP-18071) | ABFS: Set driver global timeout for ITestAzureBlobFileSystemBasics | Major | fs/azure | Sumangala Patki | Sumangala Patki | +| [HADOOP-17765](https://issues.apache.org/jira/browse/HADOOP-17765) | ABFS: Use Unique File Paths in Tests | Major | fs/azure | Sumangala Patki | Sumangala Patki | +| [HADOOP-17862](https://issues.apache.org/jira/browse/HADOOP-17862) | ABFS: Fix unchecked cast compiler warning for AbfsListStatusRemoteIterator | Major | fs/azure | Sumangala Patki | Sumangala Patki | +| [HADOOP-18075](https://issues.apache.org/jira/browse/HADOOP-18075) | ABFS: Fix failure caused by listFiles() in ITestAbfsRestOperationException | Major | fs/azure | Sumangala Patki | Sumangala Patki | +| [HADOOP-18112](https://issues.apache.org/jira/browse/HADOOP-18112) | Implement paging during S3 multi object delete. | Critical | fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-16204](https://issues.apache.org/jira/browse/HADOOP-16204) | ABFS tests to include terasort | Minor | fs/azure, test | Steve Loughran | Steve Loughran | +| [HDFS-13248](https://issues.apache.org/jira/browse/HDFS-13248) | RBF: Namenode need to choose block location for the client | Major | . | Wu Weiwei | Owen O'Malley | +| [HADOOP-13704](https://issues.apache.org/jira/browse/HADOOP-13704) | S3A getContentSummary() to move to listFiles(recursive) to count children; instrument use | Minor | fs/s3 | Steve Loughran | Ahmar Suhail | +| [HADOOP-14661](https://issues.apache.org/jira/browse/HADOOP-14661) | S3A to support Requester Pays Buckets | Minor | common, util | Mandus Momberg | Daniel Carl Jones | +| [HDFS-16484](https://issues.apache.org/jira/browse/HDFS-16484) | [SPS]: Fix an infinite loop bug in SPSPathIdProcessor thread | Major | . | qinyuren | qinyuren | +| [HADOOP-17682](https://issues.apache.org/jira/browse/HADOOP-17682) | ABFS: Support FileStatus input to OpenFileWithOptions() via OpenFileParameters | Major | fs/azure | Sumangala Patki | Sumangala Patki | +| [HADOOP-15983](https://issues.apache.org/jira/browse/HADOOP-15983) | Use jersey-json that is built to use jackson2 | Major | build | Akira Ajisaka | PJ Fanning | +| [HADOOP-18104](https://issues.apache.org/jira/browse/HADOOP-18104) | Add configs to configure minSeekForVectorReads and maxReadSizeForVectorReads | Major | common, fs | Mukund Thakur | Mukund Thakur | +| [HADOOP-18168](https://issues.apache.org/jira/browse/HADOOP-18168) | ITestMarkerTool.testRunLimitedLandsatAudit failing due to most of bucket content purged | Minor | fs/s3, test | Steve Loughran | Daniel Carl Jones | +| [HADOOP-12020](https://issues.apache.org/jira/browse/HADOOP-12020) | Support configuration of different S3 storage classes | Major | fs/s3 | Yann Landrin-Schweitzer | Monthon Klongklaew | +| [HADOOP-18105](https://issues.apache.org/jira/browse/HADOOP-18105) | Implement a variant of ElasticByteBufferPool which uses weak references for garbage collection. | Major | common, fs | Mukund Thakur | Mukund Thakur | +| [HADOOP-18107](https://issues.apache.org/jira/browse/HADOOP-18107) | Vectored IO support for large S3 files. | Major | fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-18106](https://issues.apache.org/jira/browse/HADOOP-18106) | Handle memory fragmentation in S3 Vectored IO implementation. | Major | fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-17461](https://issues.apache.org/jira/browse/HADOOP-17461) | Add thread-level IOStatistics Context | Major | fs, fs/azure, fs/s3 | Steve Loughran | Mehakmeet Singh | +| [HADOOP-18372](https://issues.apache.org/jira/browse/HADOOP-18372) | ILoadTestS3ABulkDeleteThrottling failing | Minor | fs/s3, test | Steve Loughran | Ahmar Suhail | +| [HADOOP-18368](https://issues.apache.org/jira/browse/HADOOP-18368) | ITestCustomSigner fails when access point name has '-' | Minor | . | Ahmar Suhail | Ahmar Suhail | +| [HADOOP-15964](https://issues.apache.org/jira/browse/HADOOP-15964) | Add S3A support for Async Scatter/Gather IO | Major | fs/s3 | Steve Loughran | Mukund Thakur | +| [HADOOP-18366](https://issues.apache.org/jira/browse/HADOOP-18366) | ITestS3Select.testSelectSeekFullLandsat is timing out | Minor | . | Ahmar Suhail | Ahmar Suhail | +| [HADOOP-18373](https://issues.apache.org/jira/browse/HADOOP-18373) | IOStatisticsContext tuning | Minor | fs/s3, test | Steve Loughran | Viraj Jasani | +| [HADOOP-18227](https://issues.apache.org/jira/browse/HADOOP-18227) | Add input stream IOstats for vectored IO api in S3A. | Major | fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-18392](https://issues.apache.org/jira/browse/HADOOP-18392) | Propagate vectored s3a input stream stats to file system stats. | Major | fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-18355](https://issues.apache.org/jira/browse/HADOOP-18355) | Update previous index properly while validating overlapping ranges. | Major | common, fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-18371](https://issues.apache.org/jira/browse/HADOOP-18371) | s3a FS init logs at warn if fs.s3a.create.storage.class is unset | Blocker | fs/s3 | Steve Loughran | Viraj Jasani | +| [HADOOP-18385](https://issues.apache.org/jira/browse/HADOOP-18385) | ITestS3ACannedACLs failure; not in a span | Major | fs/s3, test | Steve Loughran | Ashutosh Gupta | +| [HADOOP-18403](https://issues.apache.org/jira/browse/HADOOP-18403) | Fix FileSystem leak in ITestS3AAWSCredentialsProvider | Minor | fs/s3 | Viraj Jasani | Viraj Jasani | +| [HADOOP-17882](https://issues.apache.org/jira/browse/HADOOP-17882) | distcp to use openFile() with sequential IO; ranges of reads | Major | tools/distcp | Steve Loughran | Steve Loughran | +| [HADOOP-18391](https://issues.apache.org/jira/browse/HADOOP-18391) | Improve VectoredReadUtils#readVectored() for direct buffers | Major | fs | Steve Loughran | Mukund Thakur | +| [HADOOP-18407](https://issues.apache.org/jira/browse/HADOOP-18407) | Improve vectored IO api spec. | Minor | fs, fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-18339](https://issues.apache.org/jira/browse/HADOOP-18339) | S3A storage class option only picked up when buffering writes to disk | Major | fs/s3 | Steve Loughran | Monthon Klongklaew | +| [HADOOP-18410](https://issues.apache.org/jira/browse/HADOOP-18410) | S3AInputStream.unbuffer() async drain not releasing http connections | Blocker | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18439](https://issues.apache.org/jira/browse/HADOOP-18439) | Fix VectoredIO for LocalFileSystem when checksum is enabled. | Major | common | Mukund Thakur | Mukund Thakur | +| [HADOOP-18416](https://issues.apache.org/jira/browse/HADOOP-18416) | ITestS3AIOStatisticsContext failure | Major | fs/s3, test | Steve Loughran | Mehakmeet Singh | +| [HADOOP-18347](https://issues.apache.org/jira/browse/HADOOP-18347) | Restrict vectoredIO threadpool to reduce memory pressure | Major | common, fs, fs/adl, fs/s3 | Rajesh Balamohan | Mukund Thakur | +| [HADOOP-18463](https://issues.apache.org/jira/browse/HADOOP-18463) | Add an integration test to process data asynchronously during vectored read. | Major | . | Mukund Thakur | Mukund Thakur | +| [HADOOP-15460](https://issues.apache.org/jira/browse/HADOOP-15460) | S3A FS to add "fs.s3a.create.performance" to the builder file creation option set | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18382](https://issues.apache.org/jira/browse/HADOOP-18382) | Upgrade AWS SDK to V2 - Prerequisites | Minor | . | Ahmar Suhail | Ahmar Suhail | +| [HADOOP-18480](https://issues.apache.org/jira/browse/HADOOP-18480) | upgrade AWS SDK to 1.12.316 | Major | build, fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18460](https://issues.apache.org/jira/browse/HADOOP-18460) | ITestS3AContractVectoredRead.testStopVectoredIoOperationsUnbuffer failing | Minor | fs/s3, test | Steve Loughran | Mukund Thakur | +| [HADOOP-18488](https://issues.apache.org/jira/browse/HADOOP-18488) | Cherrypick HADOOP-11245 to branch-3.3 | Major | . | Wei-Chiu Chuang | Ashutosh Gupta | +| [HADOOP-18481](https://issues.apache.org/jira/browse/HADOOP-18481) | AWS v2 SDK upgrade log to not warn of use standard AWS Credential Providers | Major | fs/s3 | Steve Loughran | Ahmar Suhail | +| [HADOOP-18476](https://issues.apache.org/jira/browse/HADOOP-18476) | Abfs and S3A FileContext bindings to close wrapped filesystems in finalizer | Blocker | fs/azure, fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18304](https://issues.apache.org/jira/browse/HADOOP-18304) | Improve S3A committers documentation clarity | Trivial | documentation | Daniel Carl Jones | Daniel Carl Jones | +| [HADOOP-18465](https://issues.apache.org/jira/browse/HADOOP-18465) | S3A server-side encryption tests fail before checking encryption tests should skip | Minor | fs/s3, test | Daniel Carl Jones | Daniel Carl Jones | +| [HADOOP-18530](https://issues.apache.org/jira/browse/HADOOP-18530) | ChecksumFileSystem::readVectored might return byte buffers not positioned at 0 | Blocker | fs | Harshit Gupta | Harshit Gupta | +| [HADOOP-18457](https://issues.apache.org/jira/browse/HADOOP-18457) | ABFS: Support for account level throttling | Major | . | Anmol Asrani | Anmol Asrani | +| [HADOOP-18560](https://issues.apache.org/jira/browse/HADOOP-18560) | AvroFSInput opens a stream twice and discards the second one without closing | Blocker | fs | Steve Loughran | Steve Loughran | +| [HADOOP-18526](https://issues.apache.org/jira/browse/HADOOP-18526) | Leak of S3AInstrumentation instances via hadoop Metrics references | Blocker | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18546](https://issues.apache.org/jira/browse/HADOOP-18546) | disable purging list of in progress reads in abfs stream closed | Blocker | fs/azure | Steve Loughran | Pranav Saxena | +| [HADOOP-18577](https://issues.apache.org/jira/browse/HADOOP-18577) | ABFS: add probes of readahead fix | Major | fs/azure | Steve Loughran | Steve Loughran | +| [HADOOP-11867](https://issues.apache.org/jira/browse/HADOOP-11867) | Add a high-performance vectored read API. | Major | fs, fs/azure, fs/s3, hdfs-client | Gopal Vijayaraghavan | Mukund Thakur | +| [HADOOP-18507](https://issues.apache.org/jira/browse/HADOOP-18507) | VectorIO FileRange type to support a "reference" field | Major | fs | Steve Loughran | Steve Loughran | +| [HADOOP-18627](https://issues.apache.org/jira/browse/HADOOP-18627) | site intro docs to make clear Kerberos is mandatory for secure clusters | Major | site | Steve Loughran | Arnout Engelen | +| [HADOOP-17584](https://issues.apache.org/jira/browse/HADOOP-17584) | s3a magic committer may commit more data | Major | fs/s3 | yinan zhan | Steve Loughran | +| [HADOOP-18642](https://issues.apache.org/jira/browse/HADOOP-18642) | Cut excess dependencies from hadoop-azure, hadoop-aliyun transitive imports; fix LICENSE-binary | Blocker | build, fs/azure, fs/oss | Steve Loughran | Steve Loughran | + + +### OTHER: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-15854](https://issues.apache.org/jira/browse/HDFS-15854) | Make some parameters configurable for SlowDiskTracker and SlowPeerTracker | Major | . | Tao Li | Tao Li | +| [YARN-10747](https://issues.apache.org/jira/browse/YARN-10747) | Bump YARN CSI protobuf version to 3.7.1 | Major | . | Siyao Meng | Siyao Meng | +| [HDFS-16139](https://issues.apache.org/jira/browse/HDFS-16139) | Update BPServiceActor Scheduler's nextBlockReportTime atomically | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-18014](https://issues.apache.org/jira/browse/HADOOP-18014) | CallerContext should not include some characters | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [MAPREDUCE-7371](https://issues.apache.org/jira/browse/MAPREDUCE-7371) | DistributedCache alternative APIs should not use DistributedCache APIs internally | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-18114](https://issues.apache.org/jira/browse/HADOOP-18114) | Documentation Syntax Error Fix \> AWS Assumed Roles | Trivial | documentation, fs/s3 | Joey Krabacher | Joey Krabacher | +| [HDFS-16481](https://issues.apache.org/jira/browse/HDFS-16481) | Provide support to set Http and Rpc ports in MiniJournalCluster | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16502](https://issues.apache.org/jira/browse/HDFS-16502) | Reconfigure Block Invalidate limit | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16522](https://issues.apache.org/jira/browse/HDFS-16522) | Set Http and Ipc ports for Datanodes in MiniDFSCluster | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-18191](https://issues.apache.org/jira/browse/HADOOP-18191) | Log retry count while handling exceptions in RetryInvocationHandler | Minor | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16551](https://issues.apache.org/jira/browse/HDFS-16551) | Backport HADOOP-17588 to 3.3 and other active old branches. | Major | . | Renukaprasad C | Renukaprasad C | +| [HDFS-16618](https://issues.apache.org/jira/browse/HDFS-16618) | sync\_file\_range error should include more volume and file info | Minor | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-18300](https://issues.apache.org/jira/browse/HADOOP-18300) | Update google-gson to 2.9.0 | Minor | build | Igor Dvorzhak | Igor Dvorzhak | +| [HADOOP-18397](https://issues.apache.org/jira/browse/HADOOP-18397) | Shutdown AWSSecurityTokenService when its resources are no longer in use | Major | fs/s3 | Viraj Jasani | Viraj Jasani | +| [HADOOP-18575](https://issues.apache.org/jira/browse/HADOOP-18575) | Make XML transformer factory more lenient | Major | common | PJ Fanning | PJ Fanning | +| [HADOOP-18586](https://issues.apache.org/jira/browse/HADOOP-18586) | Update the year to 2023 | Major | . | Ayush Saxena | Ayush Saxena | +| [HADOOP-18587](https://issues.apache.org/jira/browse/HADOOP-18587) | upgrade to jettison 1.5.3 to fix CVE-2022-40150 | Major | common | PJ Fanning | PJ Fanning | + + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/RELEASENOTES.3.3.5.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/RELEASENOTES.3.3.5.md new file mode 100644 index 0000000000000..b2357e827d2fd --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/RELEASENOTES.3.3.5.md @@ -0,0 +1,89 @@ + + +# Apache Hadoop 3.3.5 Release Notes + +These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. + + +--- + +* [HADOOP-17956](https://issues.apache.org/jira/browse/HADOOP-17956) | *Major* | **Replace all default Charset usage with UTF-8** + +All of the default charset usages have been replaced to UTF-8. If the default charset of your environment is not UTF-8, the behavior can be different. + + +--- + +* [HADOOP-15983](https://issues.apache.org/jira/browse/HADOOP-15983) | *Major* | **Use jersey-json that is built to use jackson2** + +Use modified jersey-json 1.20 in https://github.com/pjfanning/jersey-1.x/tree/v1.20 that uses Jackson 2.x. By this change, Jackson 1.x dependency has been removed from Hadoop. +downstream applications which explicitly exclude jersey from transitive dependencies must now exclude com.github.pjfanning:jersey-json + + +--- + +* [HDFS-16595](https://issues.apache.org/jira/browse/HDFS-16595) | *Major* | **Slow peer metrics - add median, mad and upper latency limits** + +Namenode metrics that represent Slownode Json now include three important factors (median, median absolute deviation, upper latency limit) that can help user determine how urgently a given slownode requires manual intervention. + + +--- + +* [HADOOP-17833](https://issues.apache.org/jira/browse/HADOOP-17833) | *Minor* | **Improve Magic Committer Performance** + +S3A filesytem's createFile() operation supports an option to disable all safety checks when creating a file. Consult the documentation and use with care + + +--- + +* [HADOOP-18382](https://issues.apache.org/jira/browse/HADOOP-18382) | *Minor* | **Upgrade AWS SDK to V2 - Prerequisites** + +In preparation for an (incompatible but necessary) move to the AWS SDK v2, some uses of internal/deprecated uses of AWS classes/interfaces are logged as warnings, though only once during the life of a JVM. Set the log "org.apache.hadoop.fs.s3a.SDKV2Upgrade" to only log at INFO to hide these. + + +--- + +* [HADOOP-18442](https://issues.apache.org/jira/browse/HADOOP-18442) | *Major* | **Remove the hadoop-openstack module** + +The swift:// connector for openstack support has been removed. It had fundamental problems (swift's handling of files \> 4GB). A subset of the S3 protocol is now exported by almost all object store services -please use that through the s3a connector instead. The hadoop-openstack jar remains, only now it is empty of code. This is to ensure that projects which declare the JAR a dependency will still have successful builds. + + +--- + +* [HADOOP-17563](https://issues.apache.org/jira/browse/HADOOP-17563) | *Major* | **Update Bouncy Castle to 1.68 or later** + +bouncy castle 1.68+ is a multirelease JAR containing java classes compiled for different target JREs. older versions of asm.jar and maven shade plugin may have problems with these. fix: upgrade the dependencies + + +--- + +* [HADOOP-18528](https://issues.apache.org/jira/browse/HADOOP-18528) | *Major* | **Disable abfs prefetching by default** + +ABFS block prefetching has been disabled to avoid HADOOP-18521 and buffer sharing on multithreaded processes (Hive, Spark etc). This will have little/no performance impact on queries against Parquet or ORC data, but can slow down sequential stream processing, including CSV files -however, the read data will be correct. +It may slow down distcp downloads, where the race condition does not arise. For maximum distcp performance re-enable the readahead by setting fs.abfs.enable.readahead to true. + + +--- + +* [HADOOP-18621](https://issues.apache.org/jira/browse/HADOOP-18621) | *Critical* | **CryptoOutputStream::close leak when encrypted zones + quota exceptions** + +**WARNING: No release note provided for this change.** + + + diff --git a/hadoop-common-project/hadoop-common/src/test/arm-java/org/apache/hadoop/ipc/protobuf/TestProtosLegacy.java b/hadoop-common-project/hadoop-common/src/test/arm-java/org/apache/hadoop/ipc/protobuf/TestProtosLegacy.java new file mode 100644 index 0000000000000..0a7c1f917bae4 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/arm-java/org/apache/hadoop/ipc/protobuf/TestProtosLegacy.java @@ -0,0 +1,9892 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// This is class is added to source because for arm protoc 2.5.0 executable +// is not available to generate the same code. +// Generated by the protocol buffer compiler. DO NOT EDIT! +// source: test_legacy.proto + +package org.apache.hadoop.ipc.protobuf; + +public final class TestProtosLegacy { + private TestProtosLegacy() {} + public static void registerAllExtensions( + com.google.protobuf.ExtensionRegistry registry) { + } + public interface EmptyRequestProtoOrBuilder + extends com.google.protobuf.MessageOrBuilder { + } + /** + * Protobuf type {@code hadoop.common.EmptyRequestProto} + */ + public static final class EmptyRequestProto extends + com.google.protobuf.GeneratedMessage + implements EmptyRequestProtoOrBuilder { + // Use EmptyRequestProto.newBuilder() to construct. + private EmptyRequestProto(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private EmptyRequestProto(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final EmptyRequestProto defaultInstance; + public static EmptyRequestProto getDefaultInstance() { + return defaultInstance; + } + + public EmptyRequestProto getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private EmptyRequestProto( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EmptyRequestProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EmptyRequestProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public EmptyRequestProto parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new EmptyRequestProto(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + private void initFields() { + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)) { + return super.equals(obj); + } + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto other = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto) obj; + + boolean result = true; + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + private int memoizedHashCode = 0; + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + hash = (29 * hash) + getUnknownFields().hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code hadoop.common.EmptyRequestProto} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProtoOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EmptyRequestProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EmptyRequestProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.Builder.class); + } + + // Construct using org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EmptyRequestProto_descriptor; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto getDefaultInstanceForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto build() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto buildPartial() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto result = new org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto(this); + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto) { + return mergeFrom((org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto other) { + if (other == org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance()) return this; + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + + // @@protoc_insertion_point(builder_scope:hadoop.common.EmptyRequestProto) + } + + static { + defaultInstance = new EmptyRequestProto(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:hadoop.common.EmptyRequestProto) + } + + public interface EmptyResponseProtoOrBuilder + extends com.google.protobuf.MessageOrBuilder { + } + /** + * Protobuf type {@code hadoop.common.EmptyResponseProto} + */ + public static final class EmptyResponseProto extends + com.google.protobuf.GeneratedMessage + implements EmptyResponseProtoOrBuilder { + // Use EmptyResponseProto.newBuilder() to construct. + private EmptyResponseProto(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private EmptyResponseProto(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final EmptyResponseProto defaultInstance; + public static EmptyResponseProto getDefaultInstance() { + return defaultInstance; + } + + public EmptyResponseProto getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private EmptyResponseProto( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EmptyResponseProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EmptyResponseProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public EmptyResponseProto parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new EmptyResponseProto(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + private void initFields() { + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto)) { + return super.equals(obj); + } + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto other = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto) obj; + + boolean result = true; + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + private int memoizedHashCode = 0; + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + hash = (29 * hash) + getUnknownFields().hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code hadoop.common.EmptyResponseProto} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProtoOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EmptyResponseProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EmptyResponseProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.Builder.class); + } + + // Construct using org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EmptyResponseProto_descriptor; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto getDefaultInstanceForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto build() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto buildPartial() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto result = new org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto(this); + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto) { + return mergeFrom((org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto other) { + if (other == org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance()) return this; + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + + // @@protoc_insertion_point(builder_scope:hadoop.common.EmptyResponseProto) + } + + static { + defaultInstance = new EmptyResponseProto(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:hadoop.common.EmptyResponseProto) + } + + public interface EchoRequestProtoOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // required string message = 1; + /** + * required string message = 1; + */ + boolean hasMessage(); + /** + * required string message = 1; + */ + java.lang.String getMessage(); + /** + * required string message = 1; + */ + com.google.protobuf.ByteString + getMessageBytes(); + } + /** + * Protobuf type {@code hadoop.common.EchoRequestProto} + */ + public static final class EchoRequestProto extends + com.google.protobuf.GeneratedMessage + implements EchoRequestProtoOrBuilder { + // Use EchoRequestProto.newBuilder() to construct. + private EchoRequestProto(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private EchoRequestProto(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final EchoRequestProto defaultInstance; + public static EchoRequestProto getDefaultInstance() { + return defaultInstance; + } + + public EchoRequestProto getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private EchoRequestProto( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + case 10: { + bitField0_ |= 0x00000001; + message_ = input.readBytes(); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EchoRequestProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EchoRequestProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public EchoRequestProto parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new EchoRequestProto(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + private int bitField0_; + // required string message = 1; + public static final int MESSAGE_FIELD_NUMBER = 1; + private java.lang.Object message_; + /** + * required string message = 1; + */ + public boolean hasMessage() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * required string message = 1; + */ + public java.lang.String getMessage() { + java.lang.Object ref = message_; + if (ref instanceof java.lang.String) { + return (java.lang.String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + if (bs.isValidUtf8()) { + message_ = s; + } + return s; + } + } + /** + * required string message = 1; + */ + public com.google.protobuf.ByteString + getMessageBytes() { + java.lang.Object ref = message_; + if (ref instanceof java.lang.String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + message_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + private void initFields() { + message_ = ""; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + if (!hasMessage()) { + memoizedIsInitialized = 0; + return false; + } + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeBytes(1, getMessageBytes()); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(1, getMessageBytes()); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto)) { + return super.equals(obj); + } + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto other = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto) obj; + + boolean result = true; + result = result && (hasMessage() == other.hasMessage()); + if (hasMessage()) { + result = result && getMessage() + .equals(other.getMessage()); + } + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + private int memoizedHashCode = 0; + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (hasMessage()) { + hash = (37 * hash) + MESSAGE_FIELD_NUMBER; + hash = (53 * hash) + getMessage().hashCode(); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code hadoop.common.EchoRequestProto} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProtoOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EchoRequestProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EchoRequestProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto.Builder.class); + } + + // Construct using org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + message_ = ""; + bitField0_ = (bitField0_ & ~0x00000001); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EchoRequestProto_descriptor; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto getDefaultInstanceForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto.getDefaultInstance(); + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto build() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto buildPartial() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto result = new org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.message_ = message_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto) { + return mergeFrom((org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto other) { + if (other == org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto.getDefaultInstance()) return this; + if (other.hasMessage()) { + bitField0_ |= 0x00000001; + message_ = other.message_; + onChanged(); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + if (!hasMessage()) { + return false; + } + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + private int bitField0_; + + // required string message = 1; + private java.lang.Object message_ = ""; + /** + * required string message = 1; + */ + public boolean hasMessage() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * required string message = 1; + */ + public java.lang.String getMessage() { + java.lang.Object ref = message_; + if (!(ref instanceof java.lang.String)) { + java.lang.String s = ((com.google.protobuf.ByteString) ref) + .toStringUtf8(); + message_ = s; + return s; + } else { + return (java.lang.String) ref; + } + } + /** + * required string message = 1; + */ + public com.google.protobuf.ByteString + getMessageBytes() { + java.lang.Object ref = message_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + message_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + /** + * required string message = 1; + */ + public Builder setMessage( + java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000001; + message_ = value; + onChanged(); + return this; + } + /** + * required string message = 1; + */ + public Builder clearMessage() { + bitField0_ = (bitField0_ & ~0x00000001); + message_ = getDefaultInstance().getMessage(); + onChanged(); + return this; + } + /** + * required string message = 1; + */ + public Builder setMessageBytes( + com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000001; + message_ = value; + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:hadoop.common.EchoRequestProto) + } + + static { + defaultInstance = new EchoRequestProto(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:hadoop.common.EchoRequestProto) + } + + public interface EchoResponseProtoOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // required string message = 1; + /** + * required string message = 1; + */ + boolean hasMessage(); + /** + * required string message = 1; + */ + java.lang.String getMessage(); + /** + * required string message = 1; + */ + com.google.protobuf.ByteString + getMessageBytes(); + } + /** + * Protobuf type {@code hadoop.common.EchoResponseProto} + */ + public static final class EchoResponseProto extends + com.google.protobuf.GeneratedMessage + implements EchoResponseProtoOrBuilder { + // Use EchoResponseProto.newBuilder() to construct. + private EchoResponseProto(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private EchoResponseProto(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final EchoResponseProto defaultInstance; + public static EchoResponseProto getDefaultInstance() { + return defaultInstance; + } + + public EchoResponseProto getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private EchoResponseProto( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + case 10: { + bitField0_ |= 0x00000001; + message_ = input.readBytes(); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EchoResponseProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EchoResponseProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public EchoResponseProto parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new EchoResponseProto(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + private int bitField0_; + // required string message = 1; + public static final int MESSAGE_FIELD_NUMBER = 1; + private java.lang.Object message_; + /** + * required string message = 1; + */ + public boolean hasMessage() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * required string message = 1; + */ + public java.lang.String getMessage() { + java.lang.Object ref = message_; + if (ref instanceof java.lang.String) { + return (java.lang.String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + if (bs.isValidUtf8()) { + message_ = s; + } + return s; + } + } + /** + * required string message = 1; + */ + public com.google.protobuf.ByteString + getMessageBytes() { + java.lang.Object ref = message_; + if (ref instanceof java.lang.String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + message_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + private void initFields() { + message_ = ""; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + if (!hasMessage()) { + memoizedIsInitialized = 0; + return false; + } + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeBytes(1, getMessageBytes()); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(1, getMessageBytes()); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto)) { + return super.equals(obj); + } + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto other = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto) obj; + + boolean result = true; + result = result && (hasMessage() == other.hasMessage()); + if (hasMessage()) { + result = result && getMessage() + .equals(other.getMessage()); + } + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + private int memoizedHashCode = 0; + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (hasMessage()) { + hash = (37 * hash) + MESSAGE_FIELD_NUMBER; + hash = (53 * hash) + getMessage().hashCode(); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code hadoop.common.EchoResponseProto} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProtoOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EchoResponseProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EchoResponseProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.Builder.class); + } + + // Construct using org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + message_ = ""; + bitField0_ = (bitField0_ & ~0x00000001); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EchoResponseProto_descriptor; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto getDefaultInstanceForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.getDefaultInstance(); + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto build() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto buildPartial() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto result = new org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.message_ = message_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto) { + return mergeFrom((org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto other) { + if (other == org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.getDefaultInstance()) return this; + if (other.hasMessage()) { + bitField0_ |= 0x00000001; + message_ = other.message_; + onChanged(); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + if (!hasMessage()) { + return false; + } + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + private int bitField0_; + + // required string message = 1; + private java.lang.Object message_ = ""; + /** + * required string message = 1; + */ + public boolean hasMessage() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * required string message = 1; + */ + public java.lang.String getMessage() { + java.lang.Object ref = message_; + if (!(ref instanceof java.lang.String)) { + java.lang.String s = ((com.google.protobuf.ByteString) ref) + .toStringUtf8(); + message_ = s; + return s; + } else { + return (java.lang.String) ref; + } + } + /** + * required string message = 1; + */ + public com.google.protobuf.ByteString + getMessageBytes() { + java.lang.Object ref = message_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + message_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + /** + * required string message = 1; + */ + public Builder setMessage( + java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000001; + message_ = value; + onChanged(); + return this; + } + /** + * required string message = 1; + */ + public Builder clearMessage() { + bitField0_ = (bitField0_ & ~0x00000001); + message_ = getDefaultInstance().getMessage(); + onChanged(); + return this; + } + /** + * required string message = 1; + */ + public Builder setMessageBytes( + com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000001; + message_ = value; + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:hadoop.common.EchoResponseProto) + } + + static { + defaultInstance = new EchoResponseProto(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:hadoop.common.EchoResponseProto) + } + + public interface OptRequestProtoOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // optional string message = 1; + /** + * optional string message = 1; + */ + boolean hasMessage(); + /** + * optional string message = 1; + */ + java.lang.String getMessage(); + /** + * optional string message = 1; + */ + com.google.protobuf.ByteString + getMessageBytes(); + } + /** + * Protobuf type {@code hadoop.common.OptRequestProto} + */ + public static final class OptRequestProto extends + com.google.protobuf.GeneratedMessage + implements OptRequestProtoOrBuilder { + // Use OptRequestProto.newBuilder() to construct. + private OptRequestProto(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private OptRequestProto(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final OptRequestProto defaultInstance; + public static OptRequestProto getDefaultInstance() { + return defaultInstance; + } + + public OptRequestProto getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private OptRequestProto( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + case 10: { + bitField0_ |= 0x00000001; + message_ = input.readBytes(); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_OptRequestProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_OptRequestProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public OptRequestProto parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new OptRequestProto(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + private int bitField0_; + // optional string message = 1; + public static final int MESSAGE_FIELD_NUMBER = 1; + private java.lang.Object message_; + /** + * optional string message = 1; + */ + public boolean hasMessage() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * optional string message = 1; + */ + public java.lang.String getMessage() { + java.lang.Object ref = message_; + if (ref instanceof java.lang.String) { + return (java.lang.String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + if (bs.isValidUtf8()) { + message_ = s; + } + return s; + } + } + /** + * optional string message = 1; + */ + public com.google.protobuf.ByteString + getMessageBytes() { + java.lang.Object ref = message_; + if (ref instanceof java.lang.String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + message_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + private void initFields() { + message_ = ""; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeBytes(1, getMessageBytes()); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(1, getMessageBytes()); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto)) { + return super.equals(obj); + } + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto other = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto) obj; + + boolean result = true; + result = result && (hasMessage() == other.hasMessage()); + if (hasMessage()) { + result = result && getMessage() + .equals(other.getMessage()); + } + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + private int memoizedHashCode = 0; + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (hasMessage()) { + hash = (37 * hash) + MESSAGE_FIELD_NUMBER; + hash = (53 * hash) + getMessage().hashCode(); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code hadoop.common.OptRequestProto} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProtoOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_OptRequestProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_OptRequestProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto.Builder.class); + } + + // Construct using org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + message_ = ""; + bitField0_ = (bitField0_ & ~0x00000001); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_OptRequestProto_descriptor; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto getDefaultInstanceForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto.getDefaultInstance(); + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto build() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto buildPartial() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto result = new org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.message_ = message_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto) { + return mergeFrom((org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto other) { + if (other == org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto.getDefaultInstance()) return this; + if (other.hasMessage()) { + bitField0_ |= 0x00000001; + message_ = other.message_; + onChanged(); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + private int bitField0_; + + // optional string message = 1; + private java.lang.Object message_ = ""; + /** + * optional string message = 1; + */ + public boolean hasMessage() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * optional string message = 1; + */ + public java.lang.String getMessage() { + java.lang.Object ref = message_; + if (!(ref instanceof java.lang.String)) { + java.lang.String s = ((com.google.protobuf.ByteString) ref) + .toStringUtf8(); + message_ = s; + return s; + } else { + return (java.lang.String) ref; + } + } + /** + * optional string message = 1; + */ + public com.google.protobuf.ByteString + getMessageBytes() { + java.lang.Object ref = message_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + message_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + /** + * optional string message = 1; + */ + public Builder setMessage( + java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000001; + message_ = value; + onChanged(); + return this; + } + /** + * optional string message = 1; + */ + public Builder clearMessage() { + bitField0_ = (bitField0_ & ~0x00000001); + message_ = getDefaultInstance().getMessage(); + onChanged(); + return this; + } + /** + * optional string message = 1; + */ + public Builder setMessageBytes( + com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000001; + message_ = value; + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:hadoop.common.OptRequestProto) + } + + static { + defaultInstance = new OptRequestProto(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:hadoop.common.OptRequestProto) + } + + public interface OptResponseProtoOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // optional string message = 1; + /** + * optional string message = 1; + */ + boolean hasMessage(); + /** + * optional string message = 1; + */ + java.lang.String getMessage(); + /** + * optional string message = 1; + */ + com.google.protobuf.ByteString + getMessageBytes(); + } + /** + * Protobuf type {@code hadoop.common.OptResponseProto} + */ + public static final class OptResponseProto extends + com.google.protobuf.GeneratedMessage + implements OptResponseProtoOrBuilder { + // Use OptResponseProto.newBuilder() to construct. + private OptResponseProto(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private OptResponseProto(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final OptResponseProto defaultInstance; + public static OptResponseProto getDefaultInstance() { + return defaultInstance; + } + + public OptResponseProto getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private OptResponseProto( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + case 10: { + bitField0_ |= 0x00000001; + message_ = input.readBytes(); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_OptResponseProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_OptResponseProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public OptResponseProto parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new OptResponseProto(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + private int bitField0_; + // optional string message = 1; + public static final int MESSAGE_FIELD_NUMBER = 1; + private java.lang.Object message_; + /** + * optional string message = 1; + */ + public boolean hasMessage() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * optional string message = 1; + */ + public java.lang.String getMessage() { + java.lang.Object ref = message_; + if (ref instanceof java.lang.String) { + return (java.lang.String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + if (bs.isValidUtf8()) { + message_ = s; + } + return s; + } + } + /** + * optional string message = 1; + */ + public com.google.protobuf.ByteString + getMessageBytes() { + java.lang.Object ref = message_; + if (ref instanceof java.lang.String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + message_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + private void initFields() { + message_ = ""; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeBytes(1, getMessageBytes()); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(1, getMessageBytes()); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto)) { + return super.equals(obj); + } + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto other = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto) obj; + + boolean result = true; + result = result && (hasMessage() == other.hasMessage()); + if (hasMessage()) { + result = result && getMessage() + .equals(other.getMessage()); + } + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + private int memoizedHashCode = 0; + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (hasMessage()) { + hash = (37 * hash) + MESSAGE_FIELD_NUMBER; + hash = (53 * hash) + getMessage().hashCode(); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code hadoop.common.OptResponseProto} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProtoOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_OptResponseProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_OptResponseProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto.Builder.class); + } + + // Construct using org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + message_ = ""; + bitField0_ = (bitField0_ & ~0x00000001); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_OptResponseProto_descriptor; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto getDefaultInstanceForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto.getDefaultInstance(); + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto build() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto buildPartial() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto result = new org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.message_ = message_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto) { + return mergeFrom((org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto other) { + if (other == org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto.getDefaultInstance()) return this; + if (other.hasMessage()) { + bitField0_ |= 0x00000001; + message_ = other.message_; + onChanged(); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + private int bitField0_; + + // optional string message = 1; + private java.lang.Object message_ = ""; + /** + * optional string message = 1; + */ + public boolean hasMessage() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * optional string message = 1; + */ + public java.lang.String getMessage() { + java.lang.Object ref = message_; + if (!(ref instanceof java.lang.String)) { + java.lang.String s = ((com.google.protobuf.ByteString) ref) + .toStringUtf8(); + message_ = s; + return s; + } else { + return (java.lang.String) ref; + } + } + /** + * optional string message = 1; + */ + public com.google.protobuf.ByteString + getMessageBytes() { + java.lang.Object ref = message_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + message_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + /** + * optional string message = 1; + */ + public Builder setMessage( + java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000001; + message_ = value; + onChanged(); + return this; + } + /** + * optional string message = 1; + */ + public Builder clearMessage() { + bitField0_ = (bitField0_ & ~0x00000001); + message_ = getDefaultInstance().getMessage(); + onChanged(); + return this; + } + /** + * optional string message = 1; + */ + public Builder setMessageBytes( + com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000001; + message_ = value; + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:hadoop.common.OptResponseProto) + } + + static { + defaultInstance = new OptResponseProto(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:hadoop.common.OptResponseProto) + } + + public interface SleepRequestProtoOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // required int32 milliSeconds = 1; + /** + * required int32 milliSeconds = 1; + */ + boolean hasMilliSeconds(); + /** + * required int32 milliSeconds = 1; + */ + int getMilliSeconds(); + } + /** + * Protobuf type {@code hadoop.common.SleepRequestProto} + */ + public static final class SleepRequestProto extends + com.google.protobuf.GeneratedMessage + implements SleepRequestProtoOrBuilder { + // Use SleepRequestProto.newBuilder() to construct. + private SleepRequestProto(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private SleepRequestProto(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final SleepRequestProto defaultInstance; + public static SleepRequestProto getDefaultInstance() { + return defaultInstance; + } + + public SleepRequestProto getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private SleepRequestProto( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + case 8: { + bitField0_ |= 0x00000001; + milliSeconds_ = input.readInt32(); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SleepRequestProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SleepRequestProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public SleepRequestProto parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new SleepRequestProto(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + private int bitField0_; + // required int32 milliSeconds = 1; + public static final int MILLISECONDS_FIELD_NUMBER = 1; + private int milliSeconds_; + /** + * required int32 milliSeconds = 1; + */ + public boolean hasMilliSeconds() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * required int32 milliSeconds = 1; + */ + public int getMilliSeconds() { + return milliSeconds_; + } + + private void initFields() { + milliSeconds_ = 0; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + if (!hasMilliSeconds()) { + memoizedIsInitialized = 0; + return false; + } + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeInt32(1, milliSeconds_); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeInt32Size(1, milliSeconds_); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto)) { + return super.equals(obj); + } + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto other = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto) obj; + + boolean result = true; + result = result && (hasMilliSeconds() == other.hasMilliSeconds()); + if (hasMilliSeconds()) { + result = result && (getMilliSeconds() + == other.getMilliSeconds()); + } + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + private int memoizedHashCode = 0; + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (hasMilliSeconds()) { + hash = (37 * hash) + MILLISECONDS_FIELD_NUMBER; + hash = (53 * hash) + getMilliSeconds(); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code hadoop.common.SleepRequestProto} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProtoOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SleepRequestProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SleepRequestProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto.Builder.class); + } + + // Construct using org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + milliSeconds_ = 0; + bitField0_ = (bitField0_ & ~0x00000001); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SleepRequestProto_descriptor; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto getDefaultInstanceForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto.getDefaultInstance(); + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto build() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto buildPartial() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto result = new org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.milliSeconds_ = milliSeconds_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto) { + return mergeFrom((org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto other) { + if (other == org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto.getDefaultInstance()) return this; + if (other.hasMilliSeconds()) { + setMilliSeconds(other.getMilliSeconds()); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + if (!hasMilliSeconds()) { + return false; + } + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + private int bitField0_; + + // required int32 milliSeconds = 1; + private int milliSeconds_ ; + /** + * required int32 milliSeconds = 1; + */ + public boolean hasMilliSeconds() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * required int32 milliSeconds = 1; + */ + public int getMilliSeconds() { + return milliSeconds_; + } + /** + * required int32 milliSeconds = 1; + */ + public Builder setMilliSeconds(int value) { + bitField0_ |= 0x00000001; + milliSeconds_ = value; + onChanged(); + return this; + } + /** + * required int32 milliSeconds = 1; + */ + public Builder clearMilliSeconds() { + bitField0_ = (bitField0_ & ~0x00000001); + milliSeconds_ = 0; + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:hadoop.common.SleepRequestProto) + } + + static { + defaultInstance = new SleepRequestProto(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:hadoop.common.SleepRequestProto) + } + + public interface SleepResponseProtoOrBuilder + extends com.google.protobuf.MessageOrBuilder { + } + /** + * Protobuf type {@code hadoop.common.SleepResponseProto} + */ + public static final class SleepResponseProto extends + com.google.protobuf.GeneratedMessage + implements SleepResponseProtoOrBuilder { + // Use SleepResponseProto.newBuilder() to construct. + private SleepResponseProto(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private SleepResponseProto(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final SleepResponseProto defaultInstance; + public static SleepResponseProto getDefaultInstance() { + return defaultInstance; + } + + public SleepResponseProto getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private SleepResponseProto( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SleepResponseProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SleepResponseProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public SleepResponseProto parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new SleepResponseProto(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + private void initFields() { + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto)) { + return super.equals(obj); + } + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto other = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto) obj; + + boolean result = true; + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + private int memoizedHashCode = 0; + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + hash = (29 * hash) + getUnknownFields().hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code hadoop.common.SleepResponseProto} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProtoOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SleepResponseProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SleepResponseProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto.Builder.class); + } + + // Construct using org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SleepResponseProto_descriptor; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto getDefaultInstanceForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto.getDefaultInstance(); + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto build() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto buildPartial() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto result = new org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto(this); + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto) { + return mergeFrom((org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto other) { + if (other == org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto.getDefaultInstance()) return this; + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + + // @@protoc_insertion_point(builder_scope:hadoop.common.SleepResponseProto) + } + + static { + defaultInstance = new SleepResponseProto(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:hadoop.common.SleepResponseProto) + } + + public interface SlowPingRequestProtoOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // required bool shouldSlow = 1; + /** + * required bool shouldSlow = 1; + */ + boolean hasShouldSlow(); + /** + * required bool shouldSlow = 1; + */ + boolean getShouldSlow(); + } + /** + * Protobuf type {@code hadoop.common.SlowPingRequestProto} + */ + public static final class SlowPingRequestProto extends + com.google.protobuf.GeneratedMessage + implements SlowPingRequestProtoOrBuilder { + // Use SlowPingRequestProto.newBuilder() to construct. + private SlowPingRequestProto(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private SlowPingRequestProto(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final SlowPingRequestProto defaultInstance; + public static SlowPingRequestProto getDefaultInstance() { + return defaultInstance; + } + + public SlowPingRequestProto getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private SlowPingRequestProto( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + case 8: { + bitField0_ |= 0x00000001; + shouldSlow_ = input.readBool(); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SlowPingRequestProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SlowPingRequestProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public SlowPingRequestProto parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new SlowPingRequestProto(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + private int bitField0_; + // required bool shouldSlow = 1; + public static final int SHOULDSLOW_FIELD_NUMBER = 1; + private boolean shouldSlow_; + /** + * required bool shouldSlow = 1; + */ + public boolean hasShouldSlow() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * required bool shouldSlow = 1; + */ + public boolean getShouldSlow() { + return shouldSlow_; + } + + private void initFields() { + shouldSlow_ = false; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + if (!hasShouldSlow()) { + memoizedIsInitialized = 0; + return false; + } + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeBool(1, shouldSlow_); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeBoolSize(1, shouldSlow_); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto)) { + return super.equals(obj); + } + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto other = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto) obj; + + boolean result = true; + result = result && (hasShouldSlow() == other.hasShouldSlow()); + if (hasShouldSlow()) { + result = result && (getShouldSlow() + == other.getShouldSlow()); + } + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + private int memoizedHashCode = 0; + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (hasShouldSlow()) { + hash = (37 * hash) + SHOULDSLOW_FIELD_NUMBER; + hash = (53 * hash) + hashBoolean(getShouldSlow()); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code hadoop.common.SlowPingRequestProto} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProtoOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SlowPingRequestProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SlowPingRequestProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto.Builder.class); + } + + // Construct using org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + shouldSlow_ = false; + bitField0_ = (bitField0_ & ~0x00000001); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SlowPingRequestProto_descriptor; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto getDefaultInstanceForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto.getDefaultInstance(); + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto build() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto buildPartial() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto result = new org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.shouldSlow_ = shouldSlow_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto) { + return mergeFrom((org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto other) { + if (other == org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto.getDefaultInstance()) return this; + if (other.hasShouldSlow()) { + setShouldSlow(other.getShouldSlow()); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + if (!hasShouldSlow()) { + + return false; + } + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + private int bitField0_; + + // required bool shouldSlow = 1; + private boolean shouldSlow_ ; + /** + * required bool shouldSlow = 1; + */ + public boolean hasShouldSlow() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * required bool shouldSlow = 1; + */ + public boolean getShouldSlow() { + return shouldSlow_; + } + /** + * required bool shouldSlow = 1; + */ + public Builder setShouldSlow(boolean value) { + bitField0_ |= 0x00000001; + shouldSlow_ = value; + onChanged(); + return this; + } + /** + * required bool shouldSlow = 1; + */ + public Builder clearShouldSlow() { + bitField0_ = (bitField0_ & ~0x00000001); + shouldSlow_ = false; + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:hadoop.common.SlowPingRequestProto) + } + + static { + defaultInstance = new SlowPingRequestProto(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:hadoop.common.SlowPingRequestProto) + } + + public interface EchoRequestProto2OrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // repeated string message = 1; + /** + * repeated string message = 1; + */ + java.util.List + getMessageList(); + /** + * repeated string message = 1; + */ + int getMessageCount(); + /** + * repeated string message = 1; + */ + java.lang.String getMessage(int index); + /** + * repeated string message = 1; + */ + com.google.protobuf.ByteString + getMessageBytes(int index); + } + /** + * Protobuf type {@code hadoop.common.EchoRequestProto2} + */ + public static final class EchoRequestProto2 extends + com.google.protobuf.GeneratedMessage + implements EchoRequestProto2OrBuilder { + // Use EchoRequestProto2.newBuilder() to construct. + private EchoRequestProto2(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private EchoRequestProto2(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final EchoRequestProto2 defaultInstance; + public static EchoRequestProto2 getDefaultInstance() { + return defaultInstance; + } + + public EchoRequestProto2 getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private EchoRequestProto2( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + case 10: { + if (!((mutable_bitField0_ & 0x00000001) == 0x00000001)) { + message_ = new com.google.protobuf.LazyStringArrayList(); + mutable_bitField0_ |= 0x00000001; + } + message_.add(input.readBytes()); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + if (((mutable_bitField0_ & 0x00000001) == 0x00000001)) { + message_ = new com.google.protobuf.UnmodifiableLazyStringList(message_); + } + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EchoRequestProto2_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EchoRequestProto2_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public EchoRequestProto2 parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new EchoRequestProto2(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + // repeated string message = 1; + public static final int MESSAGE_FIELD_NUMBER = 1; + private com.google.protobuf.LazyStringList message_; + /** + * repeated string message = 1; + */ + public java.util.List + getMessageList() { + return message_; + } + /** + * repeated string message = 1; + */ + public int getMessageCount() { + return message_.size(); + } + /** + * repeated string message = 1; + */ + public java.lang.String getMessage(int index) { + return message_.get(index); + } + /** + * repeated string message = 1; + */ + public com.google.protobuf.ByteString + getMessageBytes(int index) { + return message_.getByteString(index); + } + + private void initFields() { + message_ = com.google.protobuf.LazyStringArrayList.EMPTY; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + for (int i = 0; i < message_.size(); i++) { + output.writeBytes(1, message_.getByteString(i)); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + { + int dataSize = 0; + for (int i = 0; i < message_.size(); i++) { + dataSize += com.google.protobuf.CodedOutputStream + .computeBytesSizeNoTag(message_.getByteString(i)); + } + size += dataSize; + size += 1 * getMessageList().size(); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2)) { + return super.equals(obj); + } + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 other = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2) obj; + + boolean result = true; + result = result && getMessageList() + .equals(other.getMessageList()); + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + private int memoizedHashCode = 0; + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (getMessageCount() > 0) { + hash = (37 * hash) + MESSAGE_FIELD_NUMBER; + hash = (53 * hash) + getMessageList().hashCode(); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code hadoop.common.EchoRequestProto2} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2OrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EchoRequestProto2_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EchoRequestProto2_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2.Builder.class); + } + + // Construct using org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + message_ = com.google.protobuf.LazyStringArrayList.EMPTY; + bitField0_ = (bitField0_ & ~0x00000001); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EchoRequestProto2_descriptor; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 getDefaultInstanceForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2.getDefaultInstance(); + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 build() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 buildPartial() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 result = new org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2(this); + int from_bitField0_ = bitField0_; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + message_ = new com.google.protobuf.UnmodifiableLazyStringList( + message_); + bitField0_ = (bitField0_ & ~0x00000001); + } + result.message_ = message_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2) { + return mergeFrom((org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 other) { + if (other == org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2.getDefaultInstance()) return this; + if (!other.message_.isEmpty()) { + if (message_.isEmpty()) { + message_ = other.message_; + bitField0_ = (bitField0_ & ~0x00000001); + } else { + ensureMessageIsMutable(); + message_.addAll(other.message_); + } + onChanged(); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + private int bitField0_; + + // repeated string message = 1; + private com.google.protobuf.LazyStringList message_ = com.google.protobuf.LazyStringArrayList.EMPTY; + private void ensureMessageIsMutable() { + if (!((bitField0_ & 0x00000001) == 0x00000001)) { + message_ = new com.google.protobuf.LazyStringArrayList(message_); + bitField0_ |= 0x00000001; + } + } + /** + * repeated string message = 1; + */ + public java.util.List + getMessageList() { + return java.util.Collections.unmodifiableList(message_); + } + /** + * repeated string message = 1; + */ + public int getMessageCount() { + return message_.size(); + } + /** + * repeated string message = 1; + */ + public java.lang.String getMessage(int index) { + return message_.get(index); + } + /** + * repeated string message = 1; + */ + public com.google.protobuf.ByteString + getMessageBytes(int index) { + return message_.getByteString(index); + } + /** + * repeated string message = 1; + */ + public Builder setMessage( + int index, java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + ensureMessageIsMutable(); + message_.set(index, value); + onChanged(); + return this; + } + /** + * repeated string message = 1; + */ + public Builder addMessage( + java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + ensureMessageIsMutable(); + message_.add(value); + onChanged(); + return this; + } + /** + * repeated string message = 1; + */ + public Builder addAllMessage( + java.lang.Iterable values) { + ensureMessageIsMutable(); + super.addAll(values, message_); + onChanged(); + return this; + } + /** + * repeated string message = 1; + */ + public Builder clearMessage() { + message_ = com.google.protobuf.LazyStringArrayList.EMPTY; + bitField0_ = (bitField0_ & ~0x00000001); + onChanged(); + return this; + } + /** + * repeated string message = 1; + */ + public Builder addMessageBytes( + com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + ensureMessageIsMutable(); + message_.add(value); + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:hadoop.common.EchoRequestProto2) + } + + static { + defaultInstance = new EchoRequestProto2(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:hadoop.common.EchoRequestProto2) + } + + public interface EchoResponseProto2OrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // repeated string message = 1; + /** + * repeated string message = 1; + */ + java.util.List + getMessageList(); + /** + * repeated string message = 1; + */ + int getMessageCount(); + /** + * repeated string message = 1; + */ + java.lang.String getMessage(int index); + /** + * repeated string message = 1; + */ + com.google.protobuf.ByteString + getMessageBytes(int index); + } + /** + * Protobuf type {@code hadoop.common.EchoResponseProto2} + */ + public static final class EchoResponseProto2 extends + com.google.protobuf.GeneratedMessage + implements EchoResponseProto2OrBuilder { + // Use EchoResponseProto2.newBuilder() to construct. + private EchoResponseProto2(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private EchoResponseProto2(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final EchoResponseProto2 defaultInstance; + public static EchoResponseProto2 getDefaultInstance() { + return defaultInstance; + } + + public EchoResponseProto2 getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private EchoResponseProto2( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + case 10: { + if (!((mutable_bitField0_ & 0x00000001) == 0x00000001)) { + message_ = new com.google.protobuf.LazyStringArrayList(); + mutable_bitField0_ |= 0x00000001; + } + message_.add(input.readBytes()); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + if (((mutable_bitField0_ & 0x00000001) == 0x00000001)) { + message_ = new com.google.protobuf.UnmodifiableLazyStringList(message_); + } + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EchoResponseProto2_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EchoResponseProto2_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public EchoResponseProto2 parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new EchoResponseProto2(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + // repeated string message = 1; + public static final int MESSAGE_FIELD_NUMBER = 1; + private com.google.protobuf.LazyStringList message_; + /** + * repeated string message = 1; + */ + public java.util.List + getMessageList() { + return message_; + } + /** + * repeated string message = 1; + */ + public int getMessageCount() { + return message_.size(); + } + /** + * repeated string message = 1; + */ + public java.lang.String getMessage(int index) { + return message_.get(index); + } + /** + * repeated string message = 1; + */ + public com.google.protobuf.ByteString + getMessageBytes(int index) { + return message_.getByteString(index); + } + + private void initFields() { + message_ = com.google.protobuf.LazyStringArrayList.EMPTY; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + for (int i = 0; i < message_.size(); i++) { + output.writeBytes(1, message_.getByteString(i)); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + { + int dataSize = 0; + for (int i = 0; i < message_.size(); i++) { + dataSize += com.google.protobuf.CodedOutputStream + .computeBytesSizeNoTag(message_.getByteString(i)); + } + size += dataSize; + size += 1 * getMessageList().size(); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2)) { + return super.equals(obj); + } + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2 other = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2) obj; + + boolean result = true; + result = result && getMessageList() + .equals(other.getMessageList()); + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + private int memoizedHashCode = 0; + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (getMessageCount() > 0) { + hash = (37 * hash) + MESSAGE_FIELD_NUMBER; + hash = (53 * hash) + getMessageList().hashCode(); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2 parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2 parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2 parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2 parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2 parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2 parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2 parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2 parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2 parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2 parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2 prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code hadoop.common.EchoResponseProto2} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2OrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EchoResponseProto2_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EchoResponseProto2_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2.Builder.class); + } + + // Construct using org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + message_ = com.google.protobuf.LazyStringArrayList.EMPTY; + bitField0_ = (bitField0_ & ~0x00000001); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_EchoResponseProto2_descriptor; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2 getDefaultInstanceForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2.getDefaultInstance(); + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2 build() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2 result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2 buildPartial() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2 result = new org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2(this); + int from_bitField0_ = bitField0_; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + message_ = new com.google.protobuf.UnmodifiableLazyStringList( + message_); + bitField0_ = (bitField0_ & ~0x00000001); + } + result.message_ = message_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2) { + return mergeFrom((org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2 other) { + if (other == org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2.getDefaultInstance()) return this; + if (!other.message_.isEmpty()) { + if (message_.isEmpty()) { + message_ = other.message_; + bitField0_ = (bitField0_ & ~0x00000001); + } else { + ensureMessageIsMutable(); + message_.addAll(other.message_); + } + onChanged(); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2 parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + private int bitField0_; + + // repeated string message = 1; + private com.google.protobuf.LazyStringList message_ = com.google.protobuf.LazyStringArrayList.EMPTY; + private void ensureMessageIsMutable() { + if (!((bitField0_ & 0x00000001) == 0x00000001)) { + message_ = new com.google.protobuf.LazyStringArrayList(message_); + bitField0_ |= 0x00000001; + } + } + /** + * repeated string message = 1; + */ + public java.util.List + getMessageList() { + return java.util.Collections.unmodifiableList(message_); + } + /** + * repeated string message = 1; + */ + public int getMessageCount() { + return message_.size(); + } + /** + * repeated string message = 1; + */ + public java.lang.String getMessage(int index) { + return message_.get(index); + } + /** + * repeated string message = 1; + */ + public com.google.protobuf.ByteString + getMessageBytes(int index) { + return message_.getByteString(index); + } + /** + * repeated string message = 1; + */ + public Builder setMessage( + int index, java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + ensureMessageIsMutable(); + message_.set(index, value); + onChanged(); + return this; + } + /** + * repeated string message = 1; + */ + public Builder addMessage( + java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + ensureMessageIsMutable(); + message_.add(value); + onChanged(); + return this; + } + /** + * repeated string message = 1; + */ + public Builder addAllMessage( + java.lang.Iterable values) { + ensureMessageIsMutable(); + super.addAll(values, message_); + onChanged(); + return this; + } + /** + * repeated string message = 1; + */ + public Builder clearMessage() { + message_ = com.google.protobuf.LazyStringArrayList.EMPTY; + bitField0_ = (bitField0_ & ~0x00000001); + onChanged(); + return this; + } + /** + * repeated string message = 1; + */ + public Builder addMessageBytes( + com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + ensureMessageIsMutable(); + message_.add(value); + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:hadoop.common.EchoResponseProto2) + } + + static { + defaultInstance = new EchoResponseProto2(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:hadoop.common.EchoResponseProto2) + } + + public interface AddRequestProtoOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // required int32 param1 = 1; + /** + * required int32 param1 = 1; + */ + boolean hasParam1(); + /** + * required int32 param1 = 1; + */ + int getParam1(); + + // required int32 param2 = 2; + /** + * required int32 param2 = 2; + */ + boolean hasParam2(); + /** + * required int32 param2 = 2; + */ + int getParam2(); + } + /** + * Protobuf type {@code hadoop.common.AddRequestProto} + */ + public static final class AddRequestProto extends + com.google.protobuf.GeneratedMessage + implements AddRequestProtoOrBuilder { + // Use AddRequestProto.newBuilder() to construct. + private AddRequestProto(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private AddRequestProto(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final AddRequestProto defaultInstance; + public static AddRequestProto getDefaultInstance() { + return defaultInstance; + } + + public AddRequestProto getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private AddRequestProto( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + case 8: { + bitField0_ |= 0x00000001; + param1_ = input.readInt32(); + break; + } + case 16: { + bitField0_ |= 0x00000002; + param2_ = input.readInt32(); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_AddRequestProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_AddRequestProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public AddRequestProto parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new AddRequestProto(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + private int bitField0_; + // required int32 param1 = 1; + public static final int PARAM1_FIELD_NUMBER = 1; + private int param1_; + /** + * required int32 param1 = 1; + */ + public boolean hasParam1() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * required int32 param1 = 1; + */ + public int getParam1() { + return param1_; + } + + // required int32 param2 = 2; + public static final int PARAM2_FIELD_NUMBER = 2; + private int param2_; + /** + * required int32 param2 = 2; + */ + public boolean hasParam2() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + /** + * required int32 param2 = 2; + */ + public int getParam2() { + return param2_; + } + + private void initFields() { + param1_ = 0; + param2_ = 0; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + if (!hasParam1()) { + memoizedIsInitialized = 0; + return false; + } + if (!hasParam2()) { + memoizedIsInitialized = 0; + return false; + } + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeInt32(1, param1_); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + output.writeInt32(2, param2_); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeInt32Size(1, param1_); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + size += com.google.protobuf.CodedOutputStream + .computeInt32Size(2, param2_); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto)) { + return super.equals(obj); + } + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto other = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto) obj; + + boolean result = true; + result = result && (hasParam1() == other.hasParam1()); + if (hasParam1()) { + result = result && (getParam1() + == other.getParam1()); + } + result = result && (hasParam2() == other.hasParam2()); + if (hasParam2()) { + result = result && (getParam2() + == other.getParam2()); + } + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + private int memoizedHashCode = 0; + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (hasParam1()) { + hash = (37 * hash) + PARAM1_FIELD_NUMBER; + hash = (53 * hash) + getParam1(); + } + if (hasParam2()) { + hash = (37 * hash) + PARAM2_FIELD_NUMBER; + hash = (53 * hash) + getParam2(); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code hadoop.common.AddRequestProto} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProtoOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_AddRequestProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_AddRequestProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto.Builder.class); + } + + // Construct using org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + param1_ = 0; + bitField0_ = (bitField0_ & ~0x00000001); + param2_ = 0; + bitField0_ = (bitField0_ & ~0x00000002); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_AddRequestProto_descriptor; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto getDefaultInstanceForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto.getDefaultInstance(); + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto build() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto buildPartial() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto result = new org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.param1_ = param1_; + if (((from_bitField0_ & 0x00000002) == 0x00000002)) { + to_bitField0_ |= 0x00000002; + } + result.param2_ = param2_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto) { + return mergeFrom((org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto other) { + if (other == org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto.getDefaultInstance()) return this; + if (other.hasParam1()) { + setParam1(other.getParam1()); + } + if (other.hasParam2()) { + setParam2(other.getParam2()); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + if (!hasParam1()) { + + return false; + } + if (!hasParam2()) { + + return false; + } + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + private int bitField0_; + + // required int32 param1 = 1; + private int param1_ ; + /** + * required int32 param1 = 1; + */ + public boolean hasParam1() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * required int32 param1 = 1; + */ + public int getParam1() { + return param1_; + } + /** + * required int32 param1 = 1; + */ + public Builder setParam1(int value) { + bitField0_ |= 0x00000001; + param1_ = value; + onChanged(); + return this; + } + /** + * required int32 param1 = 1; + */ + public Builder clearParam1() { + bitField0_ = (bitField0_ & ~0x00000001); + param1_ = 0; + onChanged(); + return this; + } + + // required int32 param2 = 2; + private int param2_ ; + /** + * required int32 param2 = 2; + */ + public boolean hasParam2() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + /** + * required int32 param2 = 2; + */ + public int getParam2() { + return param2_; + } + /** + * required int32 param2 = 2; + */ + public Builder setParam2(int value) { + bitField0_ |= 0x00000002; + param2_ = value; + onChanged(); + return this; + } + /** + * required int32 param2 = 2; + */ + public Builder clearParam2() { + bitField0_ = (bitField0_ & ~0x00000002); + param2_ = 0; + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:hadoop.common.AddRequestProto) + } + + static { + defaultInstance = new AddRequestProto(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:hadoop.common.AddRequestProto) + } + + public interface AddRequestProto2OrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // repeated int32 params = 1; + /** + * repeated int32 params = 1; + */ + java.util.List getParamsList(); + /** + * repeated int32 params = 1; + */ + int getParamsCount(); + /** + * repeated int32 params = 1; + */ + int getParams(int index); + } + /** + * Protobuf type {@code hadoop.common.AddRequestProto2} + */ + public static final class AddRequestProto2 extends + com.google.protobuf.GeneratedMessage + implements AddRequestProto2OrBuilder { + // Use AddRequestProto2.newBuilder() to construct. + private AddRequestProto2(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private AddRequestProto2(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final AddRequestProto2 defaultInstance; + public static AddRequestProto2 getDefaultInstance() { + return defaultInstance; + } + + public AddRequestProto2 getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private AddRequestProto2( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + case 8: { + if (!((mutable_bitField0_ & 0x00000001) == 0x00000001)) { + params_ = new java.util.ArrayList(); + mutable_bitField0_ |= 0x00000001; + } + params_.add(input.readInt32()); + break; + } + case 10: { + int length = input.readRawVarint32(); + int limit = input.pushLimit(length); + if (!((mutable_bitField0_ & 0x00000001) == 0x00000001) && input.getBytesUntilLimit() > 0) { + params_ = new java.util.ArrayList(); + mutable_bitField0_ |= 0x00000001; + } + while (input.getBytesUntilLimit() > 0) { + params_.add(input.readInt32()); + } + input.popLimit(limit); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + if (((mutable_bitField0_ & 0x00000001) == 0x00000001)) { + params_ = java.util.Collections.unmodifiableList(params_); + } + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_AddRequestProto2_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_AddRequestProto2_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public AddRequestProto2 parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new AddRequestProto2(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + // repeated int32 params = 1; + public static final int PARAMS_FIELD_NUMBER = 1; + private java.util.List params_; + /** + * repeated int32 params = 1; + */ + public java.util.List + getParamsList() { + return params_; + } + /** + * repeated int32 params = 1; + */ + public int getParamsCount() { + return params_.size(); + } + /** + * repeated int32 params = 1; + */ + public int getParams(int index) { + return params_.get(index); + } + + private void initFields() { + params_ = java.util.Collections.emptyList(); + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + for (int i = 0; i < params_.size(); i++) { + output.writeInt32(1, params_.get(i)); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + { + int dataSize = 0; + for (int i = 0; i < params_.size(); i++) { + dataSize += com.google.protobuf.CodedOutputStream + .computeInt32SizeNoTag(params_.get(i)); + } + size += dataSize; + size += 1 * getParamsList().size(); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2)) { + return super.equals(obj); + } + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 other = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2) obj; + + boolean result = true; + result = result && getParamsList() + .equals(other.getParamsList()); + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + private int memoizedHashCode = 0; + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (getParamsCount() > 0) { + hash = (37 * hash) + PARAMS_FIELD_NUMBER; + hash = (53 * hash) + getParamsList().hashCode(); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code hadoop.common.AddRequestProto2} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2OrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_AddRequestProto2_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_AddRequestProto2_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2.Builder.class); + } + + // Construct using org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + params_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000001); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_AddRequestProto2_descriptor; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 getDefaultInstanceForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2.getDefaultInstance(); + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 build() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 buildPartial() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 result = new org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2(this); + int from_bitField0_ = bitField0_; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + params_ = java.util.Collections.unmodifiableList(params_); + bitField0_ = (bitField0_ & ~0x00000001); + } + result.params_ = params_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2) { + return mergeFrom((org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 other) { + if (other == org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2.getDefaultInstance()) return this; + if (!other.params_.isEmpty()) { + if (params_.isEmpty()) { + params_ = other.params_; + bitField0_ = (bitField0_ & ~0x00000001); + } else { + ensureParamsIsMutable(); + params_.addAll(other.params_); + } + onChanged(); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + private int bitField0_; + + // repeated int32 params = 1; + private java.util.List params_ = java.util.Collections.emptyList(); + private void ensureParamsIsMutable() { + if (!((bitField0_ & 0x00000001) == 0x00000001)) { + params_ = new java.util.ArrayList(params_); + bitField0_ |= 0x00000001; + } + } + /** + * repeated int32 params = 1; + */ + public java.util.List + getParamsList() { + return java.util.Collections.unmodifiableList(params_); + } + /** + * repeated int32 params = 1; + */ + public int getParamsCount() { + return params_.size(); + } + /** + * repeated int32 params = 1; + */ + public int getParams(int index) { + return params_.get(index); + } + /** + * repeated int32 params = 1; + */ + public Builder setParams( + int index, int value) { + ensureParamsIsMutable(); + params_.set(index, value); + onChanged(); + return this; + } + /** + * repeated int32 params = 1; + */ + public Builder addParams(int value) { + ensureParamsIsMutable(); + params_.add(value); + onChanged(); + return this; + } + /** + * repeated int32 params = 1; + */ + public Builder addAllParams( + java.lang.Iterable values) { + ensureParamsIsMutable(); + super.addAll(values, params_); + onChanged(); + return this; + } + /** + * repeated int32 params = 1; + */ + public Builder clearParams() { + params_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000001); + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:hadoop.common.AddRequestProto2) + } + + static { + defaultInstance = new AddRequestProto2(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:hadoop.common.AddRequestProto2) + } + + public interface AddResponseProtoOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // required int32 result = 1; + /** + * required int32 result = 1; + */ + boolean hasResult(); + /** + * required int32 result = 1; + */ + int getResult(); + } + /** + * Protobuf type {@code hadoop.common.AddResponseProto} + */ + public static final class AddResponseProto extends + com.google.protobuf.GeneratedMessage + implements AddResponseProtoOrBuilder { + // Use AddResponseProto.newBuilder() to construct. + private AddResponseProto(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private AddResponseProto(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final AddResponseProto defaultInstance; + public static AddResponseProto getDefaultInstance() { + return defaultInstance; + } + + public AddResponseProto getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private AddResponseProto( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + case 8: { + bitField0_ |= 0x00000001; + result_ = input.readInt32(); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_AddResponseProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_AddResponseProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public AddResponseProto parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new AddResponseProto(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + private int bitField0_; + // required int32 result = 1; + public static final int RESULT_FIELD_NUMBER = 1; + private int result_; + /** + * required int32 result = 1; + */ + public boolean hasResult() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * required int32 result = 1; + */ + public int getResult() { + return result_; + } + + private void initFields() { + result_ = 0; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + if (!hasResult()) { + memoizedIsInitialized = 0; + return false; + } + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeInt32(1, result_); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeInt32Size(1, result_); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto)) { + return super.equals(obj); + } + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto other = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto) obj; + + boolean result = true; + result = result && (hasResult() == other.hasResult()); + if (hasResult()) { + result = result && (getResult() + == other.getResult()); + } + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + private int memoizedHashCode = 0; + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (hasResult()) { + hash = (37 * hash) + RESULT_FIELD_NUMBER; + hash = (53 * hash) + getResult(); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code hadoop.common.AddResponseProto} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProtoOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_AddResponseProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_AddResponseProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto.Builder.class); + } + + // Construct using org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + result_ = 0; + bitField0_ = (bitField0_ & ~0x00000001); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_AddResponseProto_descriptor; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto getDefaultInstanceForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto.getDefaultInstance(); + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto build() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto buildPartial() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto result = new org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.result_ = result_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto) { + return mergeFrom((org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto other) { + if (other == org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto.getDefaultInstance()) return this; + if (other.hasResult()) { + setResult(other.getResult()); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + if (!hasResult()) { + + return false; + } + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + private int bitField0_; + + // required int32 result = 1; + private int result_ ; + /** + * required int32 result = 1; + */ + public boolean hasResult() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * required int32 result = 1; + */ + public int getResult() { + return result_; + } + /** + * required int32 result = 1; + */ + public Builder setResult(int value) { + bitField0_ |= 0x00000001; + result_ = value; + onChanged(); + return this; + } + /** + * required int32 result = 1; + */ + public Builder clearResult() { + bitField0_ = (bitField0_ & ~0x00000001); + result_ = 0; + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:hadoop.common.AddResponseProto) + } + + static { + defaultInstance = new AddResponseProto(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:hadoop.common.AddResponseProto) + } + + public interface ExchangeRequestProtoOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // repeated int32 values = 1; + /** + * repeated int32 values = 1; + */ + java.util.List getValuesList(); + /** + * repeated int32 values = 1; + */ + int getValuesCount(); + /** + * repeated int32 values = 1; + */ + int getValues(int index); + } + /** + * Protobuf type {@code hadoop.common.ExchangeRequestProto} + */ + public static final class ExchangeRequestProto extends + com.google.protobuf.GeneratedMessage + implements ExchangeRequestProtoOrBuilder { + // Use ExchangeRequestProto.newBuilder() to construct. + private ExchangeRequestProto(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private ExchangeRequestProto(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final ExchangeRequestProto defaultInstance; + public static ExchangeRequestProto getDefaultInstance() { + return defaultInstance; + } + + public ExchangeRequestProto getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private ExchangeRequestProto( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + case 8: { + if (!((mutable_bitField0_ & 0x00000001) == 0x00000001)) { + values_ = new java.util.ArrayList(); + mutable_bitField0_ |= 0x00000001; + } + values_.add(input.readInt32()); + break; + } + case 10: { + int length = input.readRawVarint32(); + int limit = input.pushLimit(length); + if (!((mutable_bitField0_ & 0x00000001) == 0x00000001) && input.getBytesUntilLimit() > 0) { + values_ = new java.util.ArrayList(); + mutable_bitField0_ |= 0x00000001; + } + while (input.getBytesUntilLimit() > 0) { + values_.add(input.readInt32()); + } + input.popLimit(limit); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + if (((mutable_bitField0_ & 0x00000001) == 0x00000001)) { + values_ = java.util.Collections.unmodifiableList(values_); + } + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_ExchangeRequestProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_ExchangeRequestProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public ExchangeRequestProto parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new ExchangeRequestProto(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + // repeated int32 values = 1; + public static final int VALUES_FIELD_NUMBER = 1; + private java.util.List values_; + /** + * repeated int32 values = 1; + */ + public java.util.List + getValuesList() { + return values_; + } + /** + * repeated int32 values = 1; + */ + public int getValuesCount() { + return values_.size(); + } + /** + * repeated int32 values = 1; + */ + public int getValues(int index) { + return values_.get(index); + } + + private void initFields() { + values_ = java.util.Collections.emptyList(); + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + for (int i = 0; i < values_.size(); i++) { + output.writeInt32(1, values_.get(i)); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + { + int dataSize = 0; + for (int i = 0; i < values_.size(); i++) { + dataSize += com.google.protobuf.CodedOutputStream + .computeInt32SizeNoTag(values_.get(i)); + } + size += dataSize; + size += 1 * getValuesList().size(); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto)) { + return super.equals(obj); + } + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto other = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto) obj; + + boolean result = true; + result = result && getValuesList() + .equals(other.getValuesList()); + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + private int memoizedHashCode = 0; + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (getValuesCount() > 0) { + hash = (37 * hash) + VALUES_FIELD_NUMBER; + hash = (53 * hash) + getValuesList().hashCode(); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code hadoop.common.ExchangeRequestProto} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProtoOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_ExchangeRequestProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_ExchangeRequestProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto.Builder.class); + } + + // Construct using org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + values_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000001); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_ExchangeRequestProto_descriptor; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto getDefaultInstanceForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto.getDefaultInstance(); + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto build() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto buildPartial() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto result = new org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto(this); + int from_bitField0_ = bitField0_; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + values_ = java.util.Collections.unmodifiableList(values_); + bitField0_ = (bitField0_ & ~0x00000001); + } + result.values_ = values_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto) { + return mergeFrom((org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto other) { + if (other == org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto.getDefaultInstance()) return this; + if (!other.values_.isEmpty()) { + if (values_.isEmpty()) { + values_ = other.values_; + bitField0_ = (bitField0_ & ~0x00000001); + } else { + ensureValuesIsMutable(); + values_.addAll(other.values_); + } + onChanged(); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + private int bitField0_; + + // repeated int32 values = 1; + private java.util.List values_ = java.util.Collections.emptyList(); + private void ensureValuesIsMutable() { + if (!((bitField0_ & 0x00000001) == 0x00000001)) { + values_ = new java.util.ArrayList(values_); + bitField0_ |= 0x00000001; + } + } + /** + * repeated int32 values = 1; + */ + public java.util.List + getValuesList() { + return java.util.Collections.unmodifiableList(values_); + } + /** + * repeated int32 values = 1; + */ + public int getValuesCount() { + return values_.size(); + } + /** + * repeated int32 values = 1; + */ + public int getValues(int index) { + return values_.get(index); + } + /** + * repeated int32 values = 1; + */ + public Builder setValues( + int index, int value) { + ensureValuesIsMutable(); + values_.set(index, value); + onChanged(); + return this; + } + /** + * repeated int32 values = 1; + */ + public Builder addValues(int value) { + ensureValuesIsMutable(); + values_.add(value); + onChanged(); + return this; + } + /** + * repeated int32 values = 1; + */ + public Builder addAllValues( + java.lang.Iterable values) { + ensureValuesIsMutable(); + super.addAll(values, values_); + onChanged(); + return this; + } + /** + * repeated int32 values = 1; + */ + public Builder clearValues() { + values_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000001); + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:hadoop.common.ExchangeRequestProto) + } + + static { + defaultInstance = new ExchangeRequestProto(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:hadoop.common.ExchangeRequestProto) + } + + public interface ExchangeResponseProtoOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // repeated int32 values = 1; + /** + * repeated int32 values = 1; + */ + java.util.List getValuesList(); + /** + * repeated int32 values = 1; + */ + int getValuesCount(); + /** + * repeated int32 values = 1; + */ + int getValues(int index); + } + /** + * Protobuf type {@code hadoop.common.ExchangeResponseProto} + */ + public static final class ExchangeResponseProto extends + com.google.protobuf.GeneratedMessage + implements ExchangeResponseProtoOrBuilder { + // Use ExchangeResponseProto.newBuilder() to construct. + private ExchangeResponseProto(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private ExchangeResponseProto(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final ExchangeResponseProto defaultInstance; + public static ExchangeResponseProto getDefaultInstance() { + return defaultInstance; + } + + public ExchangeResponseProto getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private ExchangeResponseProto( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + case 8: { + if (!((mutable_bitField0_ & 0x00000001) == 0x00000001)) { + values_ = new java.util.ArrayList(); + mutable_bitField0_ |= 0x00000001; + } + values_.add(input.readInt32()); + break; + } + case 10: { + int length = input.readRawVarint32(); + int limit = input.pushLimit(length); + if (!((mutable_bitField0_ & 0x00000001) == 0x00000001) && input.getBytesUntilLimit() > 0) { + values_ = new java.util.ArrayList(); + mutable_bitField0_ |= 0x00000001; + } + while (input.getBytesUntilLimit() > 0) { + values_.add(input.readInt32()); + } + input.popLimit(limit); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + if (((mutable_bitField0_ & 0x00000001) == 0x00000001)) { + values_ = java.util.Collections.unmodifiableList(values_); + } + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_ExchangeResponseProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_ExchangeResponseProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public ExchangeResponseProto parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new ExchangeResponseProto(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + // repeated int32 values = 1; + public static final int VALUES_FIELD_NUMBER = 1; + private java.util.List values_; + /** + * repeated int32 values = 1; + */ + public java.util.List + getValuesList() { + return values_; + } + /** + * repeated int32 values = 1; + */ + public int getValuesCount() { + return values_.size(); + } + /** + * repeated int32 values = 1; + */ + public int getValues(int index) { + return values_.get(index); + } + + private void initFields() { + values_ = java.util.Collections.emptyList(); + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + for (int i = 0; i < values_.size(); i++) { + output.writeInt32(1, values_.get(i)); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + { + int dataSize = 0; + for (int i = 0; i < values_.size(); i++) { + dataSize += com.google.protobuf.CodedOutputStream + .computeInt32SizeNoTag(values_.get(i)); + } + size += dataSize; + size += 1 * getValuesList().size(); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto)) { + return super.equals(obj); + } + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto other = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto) obj; + + boolean result = true; + result = result && getValuesList() + .equals(other.getValuesList()); + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + private int memoizedHashCode = 0; + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (getValuesCount() > 0) { + hash = (37 * hash) + VALUES_FIELD_NUMBER; + hash = (53 * hash) + getValuesList().hashCode(); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code hadoop.common.ExchangeResponseProto} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProtoOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_ExchangeResponseProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_ExchangeResponseProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto.Builder.class); + } + + // Construct using org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + values_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000001); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_ExchangeResponseProto_descriptor; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto getDefaultInstanceForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto.getDefaultInstance(); + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto build() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto buildPartial() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto result = new org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto(this); + int from_bitField0_ = bitField0_; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + values_ = java.util.Collections.unmodifiableList(values_); + bitField0_ = (bitField0_ & ~0x00000001); + } + result.values_ = values_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto) { + return mergeFrom((org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto other) { + if (other == org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto.getDefaultInstance()) return this; + if (!other.values_.isEmpty()) { + if (values_.isEmpty()) { + values_ = other.values_; + bitField0_ = (bitField0_ & ~0x00000001); + } else { + ensureValuesIsMutable(); + values_.addAll(other.values_); + } + onChanged(); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + private int bitField0_; + + // repeated int32 values = 1; + private java.util.List values_ = java.util.Collections.emptyList(); + private void ensureValuesIsMutable() { + if (!((bitField0_ & 0x00000001) == 0x00000001)) { + values_ = new java.util.ArrayList(values_); + bitField0_ |= 0x00000001; + } + } + /** + * repeated int32 values = 1; + */ + public java.util.List + getValuesList() { + return java.util.Collections.unmodifiableList(values_); + } + /** + * repeated int32 values = 1; + */ + public int getValuesCount() { + return values_.size(); + } + /** + * repeated int32 values = 1; + */ + public int getValues(int index) { + return values_.get(index); + } + /** + * repeated int32 values = 1; + */ + public Builder setValues( + int index, int value) { + ensureValuesIsMutable(); + values_.set(index, value); + onChanged(); + return this; + } + /** + * repeated int32 values = 1; + */ + public Builder addValues(int value) { + ensureValuesIsMutable(); + values_.add(value); + onChanged(); + return this; + } + /** + * repeated int32 values = 1; + */ + public Builder addAllValues( + java.lang.Iterable values) { + ensureValuesIsMutable(); + super.addAll(values, values_); + onChanged(); + return this; + } + /** + * repeated int32 values = 1; + */ + public Builder clearValues() { + values_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000001); + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:hadoop.common.ExchangeResponseProto) + } + + static { + defaultInstance = new ExchangeResponseProto(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:hadoop.common.ExchangeResponseProto) + } + + public interface AuthMethodResponseProtoOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // required int32 code = 1; + /** + * required int32 code = 1; + */ + boolean hasCode(); + /** + * required int32 code = 1; + */ + int getCode(); + + // required string mechanismName = 2; + /** + * required string mechanismName = 2; + */ + boolean hasMechanismName(); + /** + * required string mechanismName = 2; + */ + java.lang.String getMechanismName(); + /** + * required string mechanismName = 2; + */ + com.google.protobuf.ByteString + getMechanismNameBytes(); + } + /** + * Protobuf type {@code hadoop.common.AuthMethodResponseProto} + */ + public static final class AuthMethodResponseProto extends + com.google.protobuf.GeneratedMessage + implements AuthMethodResponseProtoOrBuilder { + // Use AuthMethodResponseProto.newBuilder() to construct. + private AuthMethodResponseProto(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private AuthMethodResponseProto(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final AuthMethodResponseProto defaultInstance; + public static AuthMethodResponseProto getDefaultInstance() { + return defaultInstance; + } + + public AuthMethodResponseProto getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private AuthMethodResponseProto( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + case 8: { + bitField0_ |= 0x00000001; + code_ = input.readInt32(); + break; + } + case 18: { + bitField0_ |= 0x00000002; + mechanismName_ = input.readBytes(); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_AuthMethodResponseProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_AuthMethodResponseProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public AuthMethodResponseProto parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new AuthMethodResponseProto(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + private int bitField0_; + // required int32 code = 1; + public static final int CODE_FIELD_NUMBER = 1; + private int code_; + /** + * required int32 code = 1; + */ + public boolean hasCode() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * required int32 code = 1; + */ + public int getCode() { + return code_; + } + + // required string mechanismName = 2; + public static final int MECHANISMNAME_FIELD_NUMBER = 2; + private java.lang.Object mechanismName_; + /** + * required string mechanismName = 2; + */ + public boolean hasMechanismName() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + /** + * required string mechanismName = 2; + */ + public java.lang.String getMechanismName() { + java.lang.Object ref = mechanismName_; + if (ref instanceof java.lang.String) { + return (java.lang.String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + if (bs.isValidUtf8()) { + mechanismName_ = s; + } + return s; + } + } + /** + * required string mechanismName = 2; + */ + public com.google.protobuf.ByteString + getMechanismNameBytes() { + java.lang.Object ref = mechanismName_; + if (ref instanceof java.lang.String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + mechanismName_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + private void initFields() { + code_ = 0; + mechanismName_ = ""; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + if (!hasCode()) { + memoizedIsInitialized = 0; + return false; + } + if (!hasMechanismName()) { + memoizedIsInitialized = 0; + return false; + } + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeInt32(1, code_); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + output.writeBytes(2, getMechanismNameBytes()); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeInt32Size(1, code_); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(2, getMechanismNameBytes()); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto)) { + return super.equals(obj); + } + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto other = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto) obj; + + boolean result = true; + result = result && (hasCode() == other.hasCode()); + if (hasCode()) { + result = result && (getCode() + == other.getCode()); + } + result = result && (hasMechanismName() == other.hasMechanismName()); + if (hasMechanismName()) { + result = result && getMechanismName() + .equals(other.getMechanismName()); + } + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + private int memoizedHashCode = 0; + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (hasCode()) { + hash = (37 * hash) + CODE_FIELD_NUMBER; + hash = (53 * hash) + getCode(); + } + if (hasMechanismName()) { + hash = (37 * hash) + MECHANISMNAME_FIELD_NUMBER; + hash = (53 * hash) + getMechanismName().hashCode(); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code hadoop.common.AuthMethodResponseProto} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProtoOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_AuthMethodResponseProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_AuthMethodResponseProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto.Builder.class); + } + + // Construct using org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + code_ = 0; + bitField0_ = (bitField0_ & ~0x00000001); + mechanismName_ = ""; + bitField0_ = (bitField0_ & ~0x00000002); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_AuthMethodResponseProto_descriptor; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto getDefaultInstanceForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto.getDefaultInstance(); + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto build() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto buildPartial() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto result = new org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.code_ = code_; + if (((from_bitField0_ & 0x00000002) == 0x00000002)) { + to_bitField0_ |= 0x00000002; + } + result.mechanismName_ = mechanismName_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto) { + return mergeFrom((org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto other) { + if (other == org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto.getDefaultInstance()) return this; + if (other.hasCode()) { + setCode(other.getCode()); + } + if (other.hasMechanismName()) { + bitField0_ |= 0x00000002; + mechanismName_ = other.mechanismName_; + onChanged(); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + if (!hasCode()) { + + return false; + } + if (!hasMechanismName()) { + + return false; + } + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + private int bitField0_; + + // required int32 code = 1; + private int code_ ; + /** + * required int32 code = 1; + */ + public boolean hasCode() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * required int32 code = 1; + */ + public int getCode() { + return code_; + } + /** + * required int32 code = 1; + */ + public Builder setCode(int value) { + bitField0_ |= 0x00000001; + code_ = value; + onChanged(); + return this; + } + /** + * required int32 code = 1; + */ + public Builder clearCode() { + bitField0_ = (bitField0_ & ~0x00000001); + code_ = 0; + onChanged(); + return this; + } + + // required string mechanismName = 2; + private java.lang.Object mechanismName_ = ""; + /** + * required string mechanismName = 2; + */ + public boolean hasMechanismName() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + /** + * required string mechanismName = 2; + */ + public java.lang.String getMechanismName() { + java.lang.Object ref = mechanismName_; + if (!(ref instanceof java.lang.String)) { + java.lang.String s = ((com.google.protobuf.ByteString) ref) + .toStringUtf8(); + mechanismName_ = s; + return s; + } else { + return (java.lang.String) ref; + } + } + /** + * required string mechanismName = 2; + */ + public com.google.protobuf.ByteString + getMechanismNameBytes() { + java.lang.Object ref = mechanismName_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + mechanismName_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + /** + * required string mechanismName = 2; + */ + public Builder setMechanismName( + java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000002; + mechanismName_ = value; + onChanged(); + return this; + } + /** + * required string mechanismName = 2; + */ + public Builder clearMechanismName() { + bitField0_ = (bitField0_ & ~0x00000002); + mechanismName_ = getDefaultInstance().getMechanismName(); + onChanged(); + return this; + } + /** + * required string mechanismName = 2; + */ + public Builder setMechanismNameBytes( + com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000002; + mechanismName_ = value; + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:hadoop.common.AuthMethodResponseProto) + } + + static { + defaultInstance = new AuthMethodResponseProto(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:hadoop.common.AuthMethodResponseProto) + } + + public interface UserResponseProtoOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // required string user = 1; + /** + * required string user = 1; + */ + boolean hasUser(); + /** + * required string user = 1; + */ + java.lang.String getUser(); + /** + * required string user = 1; + */ + com.google.protobuf.ByteString + getUserBytes(); + } + /** + * Protobuf type {@code hadoop.common.UserResponseProto} + */ + public static final class UserResponseProto extends + com.google.protobuf.GeneratedMessage + implements UserResponseProtoOrBuilder { + // Use UserResponseProto.newBuilder() to construct. + private UserResponseProto(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private UserResponseProto(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final UserResponseProto defaultInstance; + public static UserResponseProto getDefaultInstance() { + return defaultInstance; + } + + public UserResponseProto getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private UserResponseProto( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + case 10: { + bitField0_ |= 0x00000001; + user_ = input.readBytes(); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_UserResponseProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_UserResponseProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public UserResponseProto parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new UserResponseProto(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + private int bitField0_; + // required string user = 1; + public static final int USER_FIELD_NUMBER = 1; + private java.lang.Object user_; + /** + * required string user = 1; + */ + public boolean hasUser() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * required string user = 1; + */ + public java.lang.String getUser() { + java.lang.Object ref = user_; + if (ref instanceof java.lang.String) { + return (java.lang.String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + if (bs.isValidUtf8()) { + user_ = s; + } + return s; + } + } + /** + * required string user = 1; + */ + public com.google.protobuf.ByteString + getUserBytes() { + java.lang.Object ref = user_; + if (ref instanceof java.lang.String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + user_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + private void initFields() { + user_ = ""; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + if (!hasUser()) { + memoizedIsInitialized = 0; + return false; + } + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeBytes(1, getUserBytes()); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(1, getUserBytes()); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto)) { + return super.equals(obj); + } + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto other = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto) obj; + + boolean result = true; + result = result && (hasUser() == other.hasUser()); + if (hasUser()) { + result = result && getUser() + .equals(other.getUser()); + } + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + private int memoizedHashCode = 0; + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (hasUser()) { + hash = (37 * hash) + USER_FIELD_NUMBER; + hash = (53 * hash) + getUser().hashCode(); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code hadoop.common.UserResponseProto} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProtoOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_UserResponseProto_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_UserResponseProto_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.Builder.class); + } + + // Construct using org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + user_ = ""; + bitField0_ = (bitField0_ & ~0x00000001); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_UserResponseProto_descriptor; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto getDefaultInstanceForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.getDefaultInstance(); + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto build() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto buildPartial() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto result = new org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.user_ = user_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto) { + return mergeFrom((org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto other) { + if (other == org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.getDefaultInstance()) return this; + if (other.hasUser()) { + bitField0_ |= 0x00000001; + user_ = other.user_; + onChanged(); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + if (!hasUser()) { + + return false; + } + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + private int bitField0_; + + // required string user = 1; + private java.lang.Object user_ = ""; + /** + * required string user = 1; + */ + public boolean hasUser() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * required string user = 1; + */ + public java.lang.String getUser() { + java.lang.Object ref = user_; + if (!(ref instanceof java.lang.String)) { + java.lang.String s = ((com.google.protobuf.ByteString) ref) + .toStringUtf8(); + user_ = s; + return s; + } else { + return (java.lang.String) ref; + } + } + /** + * required string user = 1; + */ + public com.google.protobuf.ByteString + getUserBytes() { + java.lang.Object ref = user_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + user_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + /** + * required string user = 1; + */ + public Builder setUser( + java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000001; + user_ = value; + onChanged(); + return this; + } + /** + * required string user = 1; + */ + public Builder clearUser() { + bitField0_ = (bitField0_ & ~0x00000001); + user_ = getDefaultInstance().getUser(); + onChanged(); + return this; + } + /** + * required string user = 1; + */ + public Builder setUserBytes( + com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000001; + user_ = value; + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:hadoop.common.UserResponseProto) + } + + static { + defaultInstance = new UserResponseProto(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:hadoop.common.UserResponseProto) + } + + public interface SleepRequestProto2OrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // optional int64 sleep_time = 1; + /** + * optional int64 sleep_time = 1; + */ + boolean hasSleepTime(); + /** + * optional int64 sleep_time = 1; + */ + long getSleepTime(); + } + /** + * Protobuf type {@code hadoop.common.SleepRequestProto2} + */ + public static final class SleepRequestProto2 extends + com.google.protobuf.GeneratedMessage + implements SleepRequestProto2OrBuilder { + // Use SleepRequestProto2.newBuilder() to construct. + private SleepRequestProto2(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private SleepRequestProto2(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final SleepRequestProto2 defaultInstance; + public static SleepRequestProto2 getDefaultInstance() { + return defaultInstance; + } + + public SleepRequestProto2 getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private SleepRequestProto2( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + case 8: { + bitField0_ |= 0x00000001; + sleepTime_ = input.readInt64(); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SleepRequestProto2_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SleepRequestProto2_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public SleepRequestProto2 parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new SleepRequestProto2(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + private int bitField0_; + // optional int64 sleep_time = 1; + public static final int SLEEP_TIME_FIELD_NUMBER = 1; + private long sleepTime_; + /** + * optional int64 sleep_time = 1; + */ + public boolean hasSleepTime() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * optional int64 sleep_time = 1; + */ + public long getSleepTime() { + return sleepTime_; + } + + private void initFields() { + sleepTime_ = 0L; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeInt64(1, sleepTime_); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeInt64Size(1, sleepTime_); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2)) { + return super.equals(obj); + } + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 other = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2) obj; + + boolean result = true; + result = result && (hasSleepTime() == other.hasSleepTime()); + if (hasSleepTime()) { + result = result && (getSleepTime() + == other.getSleepTime()); + } + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + private int memoizedHashCode = 0; + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (hasSleepTime()) { + hash = (37 * hash) + SLEEP_TIME_FIELD_NUMBER; + hash = (53 * hash) + hashLong(getSleepTime()); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code hadoop.common.SleepRequestProto2} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2OrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SleepRequestProto2_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SleepRequestProto2_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2.Builder.class); + } + + // Construct using org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + sleepTime_ = 0L; + bitField0_ = (bitField0_ & ~0x00000001); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SleepRequestProto2_descriptor; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 getDefaultInstanceForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2.getDefaultInstance(); + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 build() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 buildPartial() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 result = new org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.sleepTime_ = sleepTime_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2) { + return mergeFrom((org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 other) { + if (other == org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2.getDefaultInstance()) return this; + if (other.hasSleepTime()) { + setSleepTime(other.getSleepTime()); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + private int bitField0_; + + // optional int64 sleep_time = 1; + private long sleepTime_ ; + /** + * optional int64 sleep_time = 1; + */ + public boolean hasSleepTime() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * optional int64 sleep_time = 1; + */ + public long getSleepTime() { + return sleepTime_; + } + /** + * optional int64 sleep_time = 1; + */ + public Builder setSleepTime(long value) { + bitField0_ |= 0x00000001; + sleepTime_ = value; + onChanged(); + return this; + } + /** + * optional int64 sleep_time = 1; + */ + public Builder clearSleepTime() { + bitField0_ = (bitField0_ & ~0x00000001); + sleepTime_ = 0L; + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:hadoop.common.SleepRequestProto2) + } + + static { + defaultInstance = new SleepRequestProto2(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:hadoop.common.SleepRequestProto2) + } + + public interface SleepResponseProto2OrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // optional int64 receive_time = 1; + /** + * optional int64 receive_time = 1; + */ + boolean hasReceiveTime(); + /** + * optional int64 receive_time = 1; + */ + long getReceiveTime(); + + // optional int64 response_time = 2; + /** + * optional int64 response_time = 2; + */ + boolean hasResponseTime(); + /** + * optional int64 response_time = 2; + */ + long getResponseTime(); + } + /** + * Protobuf type {@code hadoop.common.SleepResponseProto2} + */ + public static final class SleepResponseProto2 extends + com.google.protobuf.GeneratedMessage + implements SleepResponseProto2OrBuilder { + // Use SleepResponseProto2.newBuilder() to construct. + private SleepResponseProto2(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private SleepResponseProto2(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final SleepResponseProto2 defaultInstance; + public static SleepResponseProto2 getDefaultInstance() { + return defaultInstance; + } + + public SleepResponseProto2 getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private SleepResponseProto2( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + case 8: { + bitField0_ |= 0x00000001; + receiveTime_ = input.readInt64(); + break; + } + case 16: { + bitField0_ |= 0x00000002; + responseTime_ = input.readInt64(); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SleepResponseProto2_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SleepResponseProto2_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public SleepResponseProto2 parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new SleepResponseProto2(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + private int bitField0_; + // optional int64 receive_time = 1; + public static final int RECEIVE_TIME_FIELD_NUMBER = 1; + private long receiveTime_; + /** + * optional int64 receive_time = 1; + */ + public boolean hasReceiveTime() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * optional int64 receive_time = 1; + */ + public long getReceiveTime() { + return receiveTime_; + } + + // optional int64 response_time = 2; + public static final int RESPONSE_TIME_FIELD_NUMBER = 2; + private long responseTime_; + /** + * optional int64 response_time = 2; + */ + public boolean hasResponseTime() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + /** + * optional int64 response_time = 2; + */ + public long getResponseTime() { + return responseTime_; + } + + private void initFields() { + receiveTime_ = 0L; + responseTime_ = 0L; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeInt64(1, receiveTime_); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + output.writeInt64(2, responseTime_); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeInt64Size(1, receiveTime_); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + size += com.google.protobuf.CodedOutputStream + .computeInt64Size(2, responseTime_); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2)) { + return super.equals(obj); + } + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2 other = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2) obj; + + boolean result = true; + result = result && (hasReceiveTime() == other.hasReceiveTime()); + if (hasReceiveTime()) { + result = result && (getReceiveTime() + == other.getReceiveTime()); + } + result = result && (hasResponseTime() == other.hasResponseTime()); + if (hasResponseTime()) { + result = result && (getResponseTime() + == other.getResponseTime()); + } + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + private int memoizedHashCode = 0; + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (hasReceiveTime()) { + hash = (37 * hash) + RECEIVE_TIME_FIELD_NUMBER; + hash = (53 * hash) + hashLong(getReceiveTime()); + } + if (hasResponseTime()) { + hash = (37 * hash) + RESPONSE_TIME_FIELD_NUMBER; + hash = (53 * hash) + hashLong(getResponseTime()); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2 parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2 parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2 parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2 parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2 parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2 parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2 parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2 parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2 parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2 parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2 prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code hadoop.common.SleepResponseProto2} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2OrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SleepResponseProto2_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SleepResponseProto2_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2.class, org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2.Builder.class); + } + + // Construct using org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + receiveTime_ = 0L; + bitField0_ = (bitField0_ & ~0x00000001); + responseTime_ = 0L; + bitField0_ = (bitField0_ & ~0x00000002); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.internal_static_hadoop_common_SleepResponseProto2_descriptor; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2 getDefaultInstanceForType() { + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2.getDefaultInstance(); + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2 build() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2 result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2 buildPartial() { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2 result = new org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.receiveTime_ = receiveTime_; + if (((from_bitField0_ & 0x00000002) == 0x00000002)) { + to_bitField0_ |= 0x00000002; + } + result.responseTime_ = responseTime_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2) { + return mergeFrom((org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2 other) { + if (other == org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2.getDefaultInstance()) return this; + if (other.hasReceiveTime()) { + setReceiveTime(other.getReceiveTime()); + } + if (other.hasResponseTime()) { + setResponseTime(other.getResponseTime()); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2 parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + private int bitField0_; + + // optional int64 receive_time = 1; + private long receiveTime_ ; + /** + * optional int64 receive_time = 1; + */ + public boolean hasReceiveTime() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * optional int64 receive_time = 1; + */ + public long getReceiveTime() { + return receiveTime_; + } + /** + * optional int64 receive_time = 1; + */ + public Builder setReceiveTime(long value) { + bitField0_ |= 0x00000001; + receiveTime_ = value; + onChanged(); + return this; + } + /** + * optional int64 receive_time = 1; + */ + public Builder clearReceiveTime() { + bitField0_ = (bitField0_ & ~0x00000001); + receiveTime_ = 0L; + onChanged(); + return this; + } + + // optional int64 response_time = 2; + private long responseTime_ ; + /** + * optional int64 response_time = 2; + */ + public boolean hasResponseTime() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + /** + * optional int64 response_time = 2; + */ + public long getResponseTime() { + return responseTime_; + } + /** + * optional int64 response_time = 2; + */ + public Builder setResponseTime(long value) { + bitField0_ |= 0x00000002; + responseTime_ = value; + onChanged(); + return this; + } + /** + * optional int64 response_time = 2; + */ + public Builder clearResponseTime() { + bitField0_ = (bitField0_ & ~0x00000002); + responseTime_ = 0L; + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:hadoop.common.SleepResponseProto2) + } + + static { + defaultInstance = new SleepResponseProto2(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:hadoop.common.SleepResponseProto2) + } + + private static com.google.protobuf.Descriptors.Descriptor + internal_static_hadoop_common_EmptyRequestProto_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_hadoop_common_EmptyRequestProto_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_hadoop_common_EmptyResponseProto_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_hadoop_common_EmptyResponseProto_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_hadoop_common_EchoRequestProto_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_hadoop_common_EchoRequestProto_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_hadoop_common_EchoResponseProto_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_hadoop_common_EchoResponseProto_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_hadoop_common_OptRequestProto_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_hadoop_common_OptRequestProto_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_hadoop_common_OptResponseProto_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_hadoop_common_OptResponseProto_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_hadoop_common_SleepRequestProto_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_hadoop_common_SleepRequestProto_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_hadoop_common_SleepResponseProto_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_hadoop_common_SleepResponseProto_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_hadoop_common_SlowPingRequestProto_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_hadoop_common_SlowPingRequestProto_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_hadoop_common_EchoRequestProto2_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_hadoop_common_EchoRequestProto2_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_hadoop_common_EchoResponseProto2_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_hadoop_common_EchoResponseProto2_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_hadoop_common_AddRequestProto_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_hadoop_common_AddRequestProto_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_hadoop_common_AddRequestProto2_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_hadoop_common_AddRequestProto2_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_hadoop_common_AddResponseProto_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_hadoop_common_AddResponseProto_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_hadoop_common_ExchangeRequestProto_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_hadoop_common_ExchangeRequestProto_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_hadoop_common_ExchangeResponseProto_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_hadoop_common_ExchangeResponseProto_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_hadoop_common_AuthMethodResponseProto_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_hadoop_common_AuthMethodResponseProto_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_hadoop_common_UserResponseProto_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_hadoop_common_UserResponseProto_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_hadoop_common_SleepRequestProto2_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_hadoop_common_SleepRequestProto2_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_hadoop_common_SleepResponseProto2_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_hadoop_common_SleepResponseProto2_fieldAccessorTable; + + public static com.google.protobuf.Descriptors.FileDescriptor + getDescriptor() { + return descriptor; + } + private static com.google.protobuf.Descriptors.FileDescriptor + descriptor; + static { + java.lang.String[] descriptorData = { + "\n\021test_legacy.proto\022\rhadoop.common\"\023\n\021Em" + + "ptyRequestProto\"\024\n\022EmptyResponseProto\"#\n" + + "\020EchoRequestProto\022\017\n\007message\030\001 \002(\t\"$\n\021Ec" + + "hoResponseProto\022\017\n\007message\030\001 \002(\t\"\"\n\017OptR" + + "equestProto\022\017\n\007message\030\001 \001(\t\"#\n\020OptRespo" + + "nseProto\022\017\n\007message\030\001 \001(\t\")\n\021SleepReques" + + "tProto\022\024\n\014milliSeconds\030\001 \002(\005\"\024\n\022SleepRes" + + "ponseProto\"*\n\024SlowPingRequestProto\022\022\n\nsh" + + "ouldSlow\030\001 \002(\010\"$\n\021EchoRequestProto2\022\017\n\007m" + + "essage\030\001 \003(\t\"%\n\022EchoResponseProto2\022\017\n\007me", + "ssage\030\001 \003(\t\"1\n\017AddRequestProto\022\016\n\006param1" + + "\030\001 \002(\005\022\016\n\006param2\030\002 \002(\005\"\"\n\020AddRequestProt" + + "o2\022\016\n\006params\030\001 \003(\005\"\"\n\020AddResponseProto\022\016" + + "\n\006result\030\001 \002(\005\"&\n\024ExchangeRequestProto\022\016" + + "\n\006values\030\001 \003(\005\"\'\n\025ExchangeResponseProto\022" + + "\016\n\006values\030\001 \003(\005\">\n\027AuthMethodResponsePro" + + "to\022\014\n\004code\030\001 \002(\005\022\025\n\rmechanismName\030\002 \002(\t\"" + + "!\n\021UserResponseProto\022\014\n\004user\030\001 \002(\t\"(\n\022Sl" + + "eepRequestProto2\022\022\n\nsleep_time\030\001 \001(\003\"B\n\023" + + "SleepResponseProto2\022\024\n\014receive_time\030\001 \001(", + "\003\022\025\n\rresponse_time\030\002 \001(\003B5\n\036org.apache.h" + + "adoop.ipc.protobufB\020TestProtosLegacy\240\001\001" + }; + com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = + new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { + public com.google.protobuf.ExtensionRegistry assignDescriptors( + com.google.protobuf.Descriptors.FileDescriptor root) { + descriptor = root; + internal_static_hadoop_common_EmptyRequestProto_descriptor = + getDescriptor().getMessageTypes().get(0); + internal_static_hadoop_common_EmptyRequestProto_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_hadoop_common_EmptyRequestProto_descriptor, + new java.lang.String[] { }); + internal_static_hadoop_common_EmptyResponseProto_descriptor = + getDescriptor().getMessageTypes().get(1); + internal_static_hadoop_common_EmptyResponseProto_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_hadoop_common_EmptyResponseProto_descriptor, + new java.lang.String[] { }); + internal_static_hadoop_common_EchoRequestProto_descriptor = + getDescriptor().getMessageTypes().get(2); + internal_static_hadoop_common_EchoRequestProto_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_hadoop_common_EchoRequestProto_descriptor, + new java.lang.String[] { "Message", }); + internal_static_hadoop_common_EchoResponseProto_descriptor = + getDescriptor().getMessageTypes().get(3); + internal_static_hadoop_common_EchoResponseProto_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_hadoop_common_EchoResponseProto_descriptor, + new java.lang.String[] { "Message", }); + internal_static_hadoop_common_OptRequestProto_descriptor = + getDescriptor().getMessageTypes().get(4); + internal_static_hadoop_common_OptRequestProto_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_hadoop_common_OptRequestProto_descriptor, + new java.lang.String[] { "Message", }); + internal_static_hadoop_common_OptResponseProto_descriptor = + getDescriptor().getMessageTypes().get(5); + internal_static_hadoop_common_OptResponseProto_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_hadoop_common_OptResponseProto_descriptor, + new java.lang.String[] { "Message", }); + internal_static_hadoop_common_SleepRequestProto_descriptor = + getDescriptor().getMessageTypes().get(6); + internal_static_hadoop_common_SleepRequestProto_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_hadoop_common_SleepRequestProto_descriptor, + new java.lang.String[] { "MilliSeconds", }); + internal_static_hadoop_common_SleepResponseProto_descriptor = + getDescriptor().getMessageTypes().get(7); + internal_static_hadoop_common_SleepResponseProto_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_hadoop_common_SleepResponseProto_descriptor, + new java.lang.String[] { }); + internal_static_hadoop_common_SlowPingRequestProto_descriptor = + getDescriptor().getMessageTypes().get(8); + internal_static_hadoop_common_SlowPingRequestProto_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_hadoop_common_SlowPingRequestProto_descriptor, + new java.lang.String[] { "ShouldSlow", }); + internal_static_hadoop_common_EchoRequestProto2_descriptor = + getDescriptor().getMessageTypes().get(9); + internal_static_hadoop_common_EchoRequestProto2_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_hadoop_common_EchoRequestProto2_descriptor, + new java.lang.String[] { "Message", }); + internal_static_hadoop_common_EchoResponseProto2_descriptor = + getDescriptor().getMessageTypes().get(10); + internal_static_hadoop_common_EchoResponseProto2_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_hadoop_common_EchoResponseProto2_descriptor, + new java.lang.String[] { "Message", }); + internal_static_hadoop_common_AddRequestProto_descriptor = + getDescriptor().getMessageTypes().get(11); + internal_static_hadoop_common_AddRequestProto_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_hadoop_common_AddRequestProto_descriptor, + new java.lang.String[] { "Param1", "Param2", }); + internal_static_hadoop_common_AddRequestProto2_descriptor = + getDescriptor().getMessageTypes().get(12); + internal_static_hadoop_common_AddRequestProto2_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_hadoop_common_AddRequestProto2_descriptor, + new java.lang.String[] { "Params", }); + internal_static_hadoop_common_AddResponseProto_descriptor = + getDescriptor().getMessageTypes().get(13); + internal_static_hadoop_common_AddResponseProto_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_hadoop_common_AddResponseProto_descriptor, + new java.lang.String[] { "Result", }); + internal_static_hadoop_common_ExchangeRequestProto_descriptor = + getDescriptor().getMessageTypes().get(14); + internal_static_hadoop_common_ExchangeRequestProto_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_hadoop_common_ExchangeRequestProto_descriptor, + new java.lang.String[] { "Values", }); + internal_static_hadoop_common_ExchangeResponseProto_descriptor = + getDescriptor().getMessageTypes().get(15); + internal_static_hadoop_common_ExchangeResponseProto_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_hadoop_common_ExchangeResponseProto_descriptor, + new java.lang.String[] { "Values", }); + internal_static_hadoop_common_AuthMethodResponseProto_descriptor = + getDescriptor().getMessageTypes().get(16); + internal_static_hadoop_common_AuthMethodResponseProto_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_hadoop_common_AuthMethodResponseProto_descriptor, + new java.lang.String[] { "Code", "MechanismName", }); + internal_static_hadoop_common_UserResponseProto_descriptor = + getDescriptor().getMessageTypes().get(17); + internal_static_hadoop_common_UserResponseProto_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_hadoop_common_UserResponseProto_descriptor, + new java.lang.String[] { "User", }); + internal_static_hadoop_common_SleepRequestProto2_descriptor = + getDescriptor().getMessageTypes().get(18); + internal_static_hadoop_common_SleepRequestProto2_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_hadoop_common_SleepRequestProto2_descriptor, + new java.lang.String[] { "SleepTime", }); + internal_static_hadoop_common_SleepResponseProto2_descriptor = + getDescriptor().getMessageTypes().get(19); + internal_static_hadoop_common_SleepResponseProto2_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_hadoop_common_SleepResponseProto2_descriptor, + new java.lang.String[] { "ReceiveTime", "ResponseTime", }); + return null; + } + }; + com.google.protobuf.Descriptors.FileDescriptor + .internalBuildGeneratedFileFrom(descriptorData, + new com.google.protobuf.Descriptors.FileDescriptor[] { + }, assigner); + } + + // @@protoc_insertion_point(outer_class_scope) +} diff --git a/hadoop-common-project/hadoop-common/src/test/arm-java/org/apache/hadoop/ipc/protobuf/TestRpcServiceProtosLegacy.java b/hadoop-common-project/hadoop-common/src/test/arm-java/org/apache/hadoop/ipc/protobuf/TestRpcServiceProtosLegacy.java new file mode 100644 index 0000000000000..26cef9c75523c --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/arm-java/org/apache/hadoop/ipc/protobuf/TestRpcServiceProtosLegacy.java @@ -0,0 +1,3313 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// This is class is added to source because for arm protoc 2.5.0 executable +// is not available to generate the same code. +// Generated by the protocol buffer compiler. DO NOT EDIT! +// source: test_rpc_service_legacy.proto + +package org.apache.hadoop.ipc.protobuf; + +public final class TestRpcServiceProtosLegacy { + private TestRpcServiceProtosLegacy() {} + public static void registerAllExtensions( + com.google.protobuf.ExtensionRegistry registry) { + } + /** + * Protobuf service {@code hadoop.common.TestProtobufRpcProto} + * + *
      +   **
      +   * A protobuf service for use in tests
      +   * 
      + */ + public static abstract class TestProtobufRpcProto + implements com.google.protobuf.Service { + protected TestProtobufRpcProto() {} + + public interface Interface { + /** + * rpc ping(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc echo(.hadoop.common.EchoRequestProto) returns (.hadoop.common.EchoResponseProto); + */ + public abstract void echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc error(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void error( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc error2(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void error2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc slowPing(.hadoop.common.SlowPingRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void slowPing( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc echo2(.hadoop.common.EchoRequestProto2) returns (.hadoop.common.EchoResponseProto2); + */ + public abstract void echo2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 request, + com.google.protobuf.RpcCallback done); + + /** + * rpc add(.hadoop.common.AddRequestProto) returns (.hadoop.common.AddResponseProto); + */ + public abstract void add( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc add2(.hadoop.common.AddRequestProto2) returns (.hadoop.common.AddResponseProto); + */ + public abstract void add2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 request, + com.google.protobuf.RpcCallback done); + + /** + * rpc testServerGet(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void testServerGet( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc exchange(.hadoop.common.ExchangeRequestProto) returns (.hadoop.common.ExchangeResponseProto); + */ + public abstract void exchange( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc sleep(.hadoop.common.SleepRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void sleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc lockAndSleep(.hadoop.common.SleepRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void lockAndSleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc getAuthMethod(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.AuthMethodResponseProto); + */ + public abstract void getAuthMethod( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc getAuthUser(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.UserResponseProto); + */ + public abstract void getAuthUser( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc echoPostponed(.hadoop.common.EchoRequestProto) returns (.hadoop.common.EchoResponseProto); + */ + public abstract void echoPostponed( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc sendPostponed(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void sendPostponed( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc getCurrentUser(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.UserResponseProto); + */ + public abstract void getCurrentUser( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc getServerRemoteUser(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.UserResponseProto); + */ + public abstract void getServerRemoteUser( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + } + + public static com.google.protobuf.Service newReflectiveService( + final Interface impl) { + return new TestProtobufRpcProto() { + @java.lang.Override + public void ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.ping(controller, request, done); + } + + @java.lang.Override + public void echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.echo(controller, request, done); + } + + @java.lang.Override + public void error( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.error(controller, request, done); + } + + @java.lang.Override + public void error2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.error2(controller, request, done); + } + + @java.lang.Override + public void slowPing( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.slowPing(controller, request, done); + } + + @java.lang.Override + public void echo2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 request, + com.google.protobuf.RpcCallback done) { + impl.echo2(controller, request, done); + } + + @java.lang.Override + public void add( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.add(controller, request, done); + } + + @java.lang.Override + public void add2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 request, + com.google.protobuf.RpcCallback done) { + impl.add2(controller, request, done); + } + + @java.lang.Override + public void testServerGet( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.testServerGet(controller, request, done); + } + + @java.lang.Override + public void exchange( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.exchange(controller, request, done); + } + + @java.lang.Override + public void sleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.sleep(controller, request, done); + } + + @java.lang.Override + public void lockAndSleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.lockAndSleep(controller, request, done); + } + + @java.lang.Override + public void getAuthMethod( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.getAuthMethod(controller, request, done); + } + + @java.lang.Override + public void getAuthUser( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.getAuthUser(controller, request, done); + } + + @java.lang.Override + public void echoPostponed( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.echoPostponed(controller, request, done); + } + + @java.lang.Override + public void sendPostponed( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.sendPostponed(controller, request, done); + } + + @java.lang.Override + public void getCurrentUser( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.getCurrentUser(controller, request, done); + } + + @java.lang.Override + public void getServerRemoteUser( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.getServerRemoteUser(controller, request, done); + } + + }; + } + + public static com.google.protobuf.BlockingService + newReflectiveBlockingService(final BlockingInterface impl) { + return new com.google.protobuf.BlockingService() { + public final com.google.protobuf.Descriptors.ServiceDescriptor + getDescriptorForType() { + return getDescriptor(); + } + + public final com.google.protobuf.Message callBlockingMethod( + com.google.protobuf.Descriptors.MethodDescriptor method, + com.google.protobuf.RpcController controller, + com.google.protobuf.Message request) + throws com.google.protobuf.ServiceException { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.callBlockingMethod() given method descriptor for " + + "wrong service type."); + } + switch(method.getIndex()) { + case 0: + return impl.ping(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request); + case 1: + return impl.echo(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto)request); + case 2: + return impl.error(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request); + case 3: + return impl.error2(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request); + case 4: + return impl.slowPing(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto)request); + case 5: + return impl.echo2(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2)request); + case 6: + return impl.add(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto)request); + case 7: + return impl.add2(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2)request); + case 8: + return impl.testServerGet(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request); + case 9: + return impl.exchange(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto)request); + case 10: + return impl.sleep(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto)request); + case 11: + return impl.lockAndSleep(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto)request); + case 12: + return impl.getAuthMethod(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request); + case 13: + return impl.getAuthUser(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request); + case 14: + return impl.echoPostponed(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto)request); + case 15: + return impl.sendPostponed(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request); + case 16: + return impl.getCurrentUser(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request); + case 17: + return impl.getServerRemoteUser(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getRequestPrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getRequestPrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 1: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto.getDefaultInstance(); + case 2: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 3: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 4: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto.getDefaultInstance(); + case 5: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2.getDefaultInstance(); + case 6: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto.getDefaultInstance(); + case 7: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2.getDefaultInstance(); + case 8: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 9: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto.getDefaultInstance(); + case 10: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto.getDefaultInstance(); + case 11: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto.getDefaultInstance(); + case 12: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 13: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 14: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto.getDefaultInstance(); + case 15: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 16: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 17: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getResponsePrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getResponsePrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 1: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.getDefaultInstance(); + case 2: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 3: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 4: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 5: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2.getDefaultInstance(); + case 6: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto.getDefaultInstance(); + case 7: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto.getDefaultInstance(); + case 8: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 9: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto.getDefaultInstance(); + case 10: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 11: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 12: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto.getDefaultInstance(); + case 13: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.getDefaultInstance(); + case 14: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.getDefaultInstance(); + case 15: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 16: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.getDefaultInstance(); + case 17: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + }; + } + + /** + * rpc ping(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc echo(.hadoop.common.EchoRequestProto) returns (.hadoop.common.EchoResponseProto); + */ + public abstract void echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc error(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void error( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc error2(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void error2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc slowPing(.hadoop.common.SlowPingRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void slowPing( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc echo2(.hadoop.common.EchoRequestProto2) returns (.hadoop.common.EchoResponseProto2); + */ + public abstract void echo2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 request, + com.google.protobuf.RpcCallback done); + + /** + * rpc add(.hadoop.common.AddRequestProto) returns (.hadoop.common.AddResponseProto); + */ + public abstract void add( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc add2(.hadoop.common.AddRequestProto2) returns (.hadoop.common.AddResponseProto); + */ + public abstract void add2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 request, + com.google.protobuf.RpcCallback done); + + /** + * rpc testServerGet(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void testServerGet( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc exchange(.hadoop.common.ExchangeRequestProto) returns (.hadoop.common.ExchangeResponseProto); + */ + public abstract void exchange( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc sleep(.hadoop.common.SleepRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void sleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc lockAndSleep(.hadoop.common.SleepRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void lockAndSleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc getAuthMethod(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.AuthMethodResponseProto); + */ + public abstract void getAuthMethod( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc getAuthUser(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.UserResponseProto); + */ + public abstract void getAuthUser( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc echoPostponed(.hadoop.common.EchoRequestProto) returns (.hadoop.common.EchoResponseProto); + */ + public abstract void echoPostponed( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc sendPostponed(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void sendPostponed( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc getCurrentUser(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.UserResponseProto); + */ + public abstract void getCurrentUser( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc getServerRemoteUser(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.UserResponseProto); + */ + public abstract void getServerRemoteUser( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + public static final + com.google.protobuf.Descriptors.ServiceDescriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestRpcServiceProtosLegacy.getDescriptor().getServices().get(0); + } + public final com.google.protobuf.Descriptors.ServiceDescriptor + getDescriptorForType() { + return getDescriptor(); + } + + public final void callMethod( + com.google.protobuf.Descriptors.MethodDescriptor method, + com.google.protobuf.RpcController controller, + com.google.protobuf.Message request, + com.google.protobuf.RpcCallback< + com.google.protobuf.Message> done) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.callMethod() given method descriptor for wrong " + + "service type."); + } + switch(method.getIndex()) { + case 0: + this.ping(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + case 1: + this.echo(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + case 2: + this.error(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + case 3: + this.error2(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + case 4: + this.slowPing(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + case 5: + this.echo2(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + case 6: + this.add(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + case 7: + this.add2(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + case 8: + this.testServerGet(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + case 9: + this.exchange(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + case 10: + this.sleep(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + case 11: + this.lockAndSleep(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + case 12: + this.getAuthMethod(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + case 13: + this.getAuthUser(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + case 14: + this.echoPostponed(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + case 15: + this.sendPostponed(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + case 16: + this.getCurrentUser(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + case 17: + this.getServerRemoteUser(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getRequestPrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getRequestPrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 1: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto.getDefaultInstance(); + case 2: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 3: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 4: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto.getDefaultInstance(); + case 5: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2.getDefaultInstance(); + case 6: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto.getDefaultInstance(); + case 7: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2.getDefaultInstance(); + case 8: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 9: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto.getDefaultInstance(); + case 10: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto.getDefaultInstance(); + case 11: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto.getDefaultInstance(); + case 12: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 13: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 14: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto.getDefaultInstance(); + case 15: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 16: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 17: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getResponsePrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getResponsePrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 1: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.getDefaultInstance(); + case 2: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 3: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 4: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 5: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2.getDefaultInstance(); + case 6: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto.getDefaultInstance(); + case 7: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto.getDefaultInstance(); + case 8: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 9: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto.getDefaultInstance(); + case 10: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 11: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 12: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto.getDefaultInstance(); + case 13: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.getDefaultInstance(); + case 14: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.getDefaultInstance(); + case 15: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 16: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.getDefaultInstance(); + case 17: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public static Stub newStub( + com.google.protobuf.RpcChannel channel) { + return new Stub(channel); + } + + public static final class Stub extends org.apache.hadoop.ipc.protobuf.TestRpcServiceProtosLegacy.TestProtobufRpcProto implements Interface { + private Stub(com.google.protobuf.RpcChannel channel) { + this.channel = channel; + } + + private final com.google.protobuf.RpcChannel channel; + + public com.google.protobuf.RpcChannel getChannel() { + return channel; + } + + public void ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(0), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance())); + } + + public void echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(1), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.getDefaultInstance())); + } + + public void error( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(2), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance())); + } + + public void error2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(3), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance())); + } + + public void slowPing( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(4), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance())); + } + + public void echo2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(5), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2.getDefaultInstance())); + } + + public void add( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(6), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto.getDefaultInstance())); + } + + public void add2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(7), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto.getDefaultInstance())); + } + + public void testServerGet( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(8), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance())); + } + + public void exchange( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(9), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto.getDefaultInstance())); + } + + public void sleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(10), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance())); + } + + public void lockAndSleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(11), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance())); + } + + public void getAuthMethod( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(12), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto.getDefaultInstance())); + } + + public void getAuthUser( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(13), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.getDefaultInstance())); + } + + public void echoPostponed( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(14), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.getDefaultInstance())); + } + + public void sendPostponed( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(15), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance())); + } + + public void getCurrentUser( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(16), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.getDefaultInstance())); + } + + public void getServerRemoteUser( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(17), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.getDefaultInstance())); + } + } + + public static BlockingInterface newBlockingStub( + com.google.protobuf.BlockingRpcChannel channel) { + return new BlockingStub(channel); + } + + public interface BlockingInterface { + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto request) + throws com.google.protobuf.ServiceException; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto error( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto error2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto slowPing( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto request) + throws com.google.protobuf.ServiceException; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2 echo2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 request) + throws com.google.protobuf.ServiceException; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto add( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto request) + throws com.google.protobuf.ServiceException; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto add2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 request) + throws com.google.protobuf.ServiceException; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto testServerGet( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto exchange( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto request) + throws com.google.protobuf.ServiceException; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto sleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto request) + throws com.google.protobuf.ServiceException; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto lockAndSleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto request) + throws com.google.protobuf.ServiceException; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto getAuthMethod( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto getAuthUser( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto echoPostponed( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto request) + throws com.google.protobuf.ServiceException; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto sendPostponed( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto getCurrentUser( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto getServerRemoteUser( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException; + } + + private static final class BlockingStub implements BlockingInterface { + private BlockingStub(com.google.protobuf.BlockingRpcChannel channel) { + this.channel = channel; + } + + private final com.google.protobuf.BlockingRpcChannel channel; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(0), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance()); + } + + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(1), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.getDefaultInstance()); + } + + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto error( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(2), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance()); + } + + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto error2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(3), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance()); + } + + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto slowPing( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SlowPingRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(4), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance()); + } + + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2 echo2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto2 request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2) channel.callBlockingMethod( + getDescriptor().getMethods().get(5), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto2.getDefaultInstance()); + } + + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto add( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(6), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto.getDefaultInstance()); + } + + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto add2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddRequestProto2 request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(7), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AddResponseProto.getDefaultInstance()); + } + + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto testServerGet( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(8), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance()); + } + + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto exchange( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(9), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.ExchangeResponseProto.getDefaultInstance()); + } + + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto sleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(10), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance()); + } + + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto lockAndSleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(11), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance()); + } + + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto getAuthMethod( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(12), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.AuthMethodResponseProto.getDefaultInstance()); + } + + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto getAuthUser( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(13), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.getDefaultInstance()); + } + + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto echoPostponed( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(14), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.getDefaultInstance()); + } + + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto sendPostponed( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(15), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance()); + } + + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto getCurrentUser( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(16), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.getDefaultInstance()); + } + + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto getServerRemoteUser( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(17), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.UserResponseProto.getDefaultInstance()); + } + + } + + // @@protoc_insertion_point(class_scope:hadoop.common.TestProtobufRpcProto) + } + + /** + * Protobuf service {@code hadoop.common.TestProtobufRpc2Proto} + */ + public static abstract class TestProtobufRpc2Proto + implements com.google.protobuf.Service { + protected TestProtobufRpc2Proto() {} + + public interface Interface { + /** + * rpc ping2(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void ping2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc echo2(.hadoop.common.EchoRequestProto) returns (.hadoop.common.EchoResponseProto); + */ + public abstract void echo2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc sleep(.hadoop.common.SleepRequestProto) returns (.hadoop.common.SleepResponseProto); + */ + public abstract void sleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto request, + com.google.protobuf.RpcCallback done); + + } + + public static com.google.protobuf.Service newReflectiveService( + final Interface impl) { + return new TestProtobufRpc2Proto() { + @java.lang.Override + public void ping2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.ping2(controller, request, done); + } + + @java.lang.Override + public void echo2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.echo2(controller, request, done); + } + + @java.lang.Override + public void sleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.sleep(controller, request, done); + } + + }; + } + + public static com.google.protobuf.BlockingService + newReflectiveBlockingService(final BlockingInterface impl) { + return new com.google.protobuf.BlockingService() { + public final com.google.protobuf.Descriptors.ServiceDescriptor + getDescriptorForType() { + return getDescriptor(); + } + + public final com.google.protobuf.Message callBlockingMethod( + com.google.protobuf.Descriptors.MethodDescriptor method, + com.google.protobuf.RpcController controller, + com.google.protobuf.Message request) + throws com.google.protobuf.ServiceException { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.callBlockingMethod() given method descriptor for " + + "wrong service type."); + } + switch(method.getIndex()) { + case 0: + return impl.ping2(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request); + case 1: + return impl.echo2(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto)request); + case 2: + return impl.sleep(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto)request); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getRequestPrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getRequestPrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 1: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto.getDefaultInstance(); + case 2: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getResponsePrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getResponsePrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 1: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.getDefaultInstance(); + case 2: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + }; + } + + /** + * rpc ping2(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void ping2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc echo2(.hadoop.common.EchoRequestProto) returns (.hadoop.common.EchoResponseProto); + */ + public abstract void echo2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc sleep(.hadoop.common.SleepRequestProto) returns (.hadoop.common.SleepResponseProto); + */ + public abstract void sleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto request, + com.google.protobuf.RpcCallback done); + + public static final + com.google.protobuf.Descriptors.ServiceDescriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestRpcServiceProtosLegacy.getDescriptor().getServices().get(1); + } + public final com.google.protobuf.Descriptors.ServiceDescriptor + getDescriptorForType() { + return getDescriptor(); + } + + public final void callMethod( + com.google.protobuf.Descriptors.MethodDescriptor method, + com.google.protobuf.RpcController controller, + com.google.protobuf.Message request, + com.google.protobuf.RpcCallback< + com.google.protobuf.Message> done) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.callMethod() given method descriptor for wrong " + + "service type."); + } + switch(method.getIndex()) { + case 0: + this.ping2(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + case 1: + this.echo2(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + case 2: + this.sleep(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getRequestPrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getRequestPrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 1: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto.getDefaultInstance(); + case 2: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getResponsePrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getResponsePrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 1: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.getDefaultInstance(); + case 2: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public static Stub newStub( + com.google.protobuf.RpcChannel channel) { + return new Stub(channel); + } + + public static final class Stub extends org.apache.hadoop.ipc.protobuf.TestRpcServiceProtosLegacy.TestProtobufRpc2Proto implements Interface { + private Stub(com.google.protobuf.RpcChannel channel) { + this.channel = channel; + } + + private final com.google.protobuf.RpcChannel channel; + + public com.google.protobuf.RpcChannel getChannel() { + return channel; + } + + public void ping2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(0), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance())); + } + + public void echo2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(1), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.getDefaultInstance())); + } + + public void sleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(2), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto.getDefaultInstance())); + } + } + + public static BlockingInterface newBlockingStub( + com.google.protobuf.BlockingRpcChannel channel) { + return new BlockingStub(channel); + } + + public interface BlockingInterface { + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto ping2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto echo2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto request) + throws com.google.protobuf.ServiceException; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto sleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto request) + throws com.google.protobuf.ServiceException; + } + + private static final class BlockingStub implements BlockingInterface { + private BlockingStub(com.google.protobuf.BlockingRpcChannel channel) { + this.channel = channel; + } + + private final com.google.protobuf.BlockingRpcChannel channel; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto ping2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(0), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance()); + } + + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto echo2( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(1), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EchoResponseProto.getDefaultInstance()); + } + + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto sleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(2), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto.getDefaultInstance()); + } + + } + + // @@protoc_insertion_point(class_scope:hadoop.common.TestProtobufRpc2Proto) + } + + /** + * Protobuf service {@code hadoop.common.OldProtobufRpcProto} + */ + public static abstract class OldProtobufRpcProto + implements com.google.protobuf.Service { + protected OldProtobufRpcProto() {} + + public interface Interface { + /** + * rpc ping(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc echo(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + } + + public static com.google.protobuf.Service newReflectiveService( + final Interface impl) { + return new OldProtobufRpcProto() { + @java.lang.Override + public void ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.ping(controller, request, done); + } + + @java.lang.Override + public void echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.echo(controller, request, done); + } + + }; + } + + public static com.google.protobuf.BlockingService + newReflectiveBlockingService(final BlockingInterface impl) { + return new com.google.protobuf.BlockingService() { + public final com.google.protobuf.Descriptors.ServiceDescriptor + getDescriptorForType() { + return getDescriptor(); + } + + public final com.google.protobuf.Message callBlockingMethod( + com.google.protobuf.Descriptors.MethodDescriptor method, + com.google.protobuf.RpcController controller, + com.google.protobuf.Message request) + throws com.google.protobuf.ServiceException { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.callBlockingMethod() given method descriptor for " + + "wrong service type."); + } + switch(method.getIndex()) { + case 0: + return impl.ping(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request); + case 1: + return impl.echo(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getRequestPrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getRequestPrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 1: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getResponsePrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getResponsePrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 1: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + }; + } + + /** + * rpc ping(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc echo(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + public static final + com.google.protobuf.Descriptors.ServiceDescriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestRpcServiceProtosLegacy.getDescriptor().getServices().get(2); + } + public final com.google.protobuf.Descriptors.ServiceDescriptor + getDescriptorForType() { + return getDescriptor(); + } + + public final void callMethod( + com.google.protobuf.Descriptors.MethodDescriptor method, + com.google.protobuf.RpcController controller, + com.google.protobuf.Message request, + com.google.protobuf.RpcCallback< + com.google.protobuf.Message> done) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.callMethod() given method descriptor for wrong " + + "service type."); + } + switch(method.getIndex()) { + case 0: + this.ping(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + case 1: + this.echo(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getRequestPrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getRequestPrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 1: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getResponsePrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getResponsePrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 1: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public static Stub newStub( + com.google.protobuf.RpcChannel channel) { + return new Stub(channel); + } + + public static final class Stub extends org.apache.hadoop.ipc.protobuf.TestRpcServiceProtosLegacy.OldProtobufRpcProto implements Interface { + private Stub(com.google.protobuf.RpcChannel channel) { + this.channel = channel; + } + + private final com.google.protobuf.RpcChannel channel; + + public com.google.protobuf.RpcChannel getChannel() { + return channel; + } + + public void ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(0), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance())); + } + + public void echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(1), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance())); + } + } + + public static BlockingInterface newBlockingStub( + com.google.protobuf.BlockingRpcChannel channel) { + return new BlockingStub(channel); + } + + public interface BlockingInterface { + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException; + } + + private static final class BlockingStub implements BlockingInterface { + private BlockingStub(com.google.protobuf.BlockingRpcChannel channel) { + this.channel = channel; + } + + private final com.google.protobuf.BlockingRpcChannel channel; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(0), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance()); + } + + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(1), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance()); + } + + } + + // @@protoc_insertion_point(class_scope:hadoop.common.OldProtobufRpcProto) + } + + /** + * Protobuf service {@code hadoop.common.NewProtobufRpcProto} + */ + public static abstract class NewProtobufRpcProto + implements com.google.protobuf.Service { + protected NewProtobufRpcProto() {} + + public interface Interface { + /** + * rpc ping(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc echo(.hadoop.common.OptRequestProto) returns (.hadoop.common.OptResponseProto); + */ + public abstract void echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto request, + com.google.protobuf.RpcCallback done); + + } + + public static com.google.protobuf.Service newReflectiveService( + final Interface impl) { + return new NewProtobufRpcProto() { + @java.lang.Override + public void ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.ping(controller, request, done); + } + + @java.lang.Override + public void echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.echo(controller, request, done); + } + + }; + } + + public static com.google.protobuf.BlockingService + newReflectiveBlockingService(final BlockingInterface impl) { + return new com.google.protobuf.BlockingService() { + public final com.google.protobuf.Descriptors.ServiceDescriptor + getDescriptorForType() { + return getDescriptor(); + } + + public final com.google.protobuf.Message callBlockingMethod( + com.google.protobuf.Descriptors.MethodDescriptor method, + com.google.protobuf.RpcController controller, + com.google.protobuf.Message request) + throws com.google.protobuf.ServiceException { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.callBlockingMethod() given method descriptor for " + + "wrong service type."); + } + switch(method.getIndex()) { + case 0: + return impl.ping(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request); + case 1: + return impl.echo(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto)request); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getRequestPrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getRequestPrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 1: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getResponsePrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getResponsePrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 1: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + }; + } + + /** + * rpc ping(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc echo(.hadoop.common.OptRequestProto) returns (.hadoop.common.OptResponseProto); + */ + public abstract void echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto request, + com.google.protobuf.RpcCallback done); + + public static final + com.google.protobuf.Descriptors.ServiceDescriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestRpcServiceProtosLegacy.getDescriptor().getServices().get(3); + } + public final com.google.protobuf.Descriptors.ServiceDescriptor + getDescriptorForType() { + return getDescriptor(); + } + + public final void callMethod( + com.google.protobuf.Descriptors.MethodDescriptor method, + com.google.protobuf.RpcController controller, + com.google.protobuf.Message request, + com.google.protobuf.RpcCallback< + com.google.protobuf.Message> done) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.callMethod() given method descriptor for wrong " + + "service type."); + } + switch(method.getIndex()) { + case 0: + this.ping(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + case 1: + this.echo(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getRequestPrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getRequestPrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 1: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getResponsePrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getResponsePrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 1: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public static Stub newStub( + com.google.protobuf.RpcChannel channel) { + return new Stub(channel); + } + + public static final class Stub extends org.apache.hadoop.ipc.protobuf.TestRpcServiceProtosLegacy.NewProtobufRpcProto implements Interface { + private Stub(com.google.protobuf.RpcChannel channel) { + this.channel = channel; + } + + private final com.google.protobuf.RpcChannel channel; + + public com.google.protobuf.RpcChannel getChannel() { + return channel; + } + + public void ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(0), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance())); + } + + public void echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(1), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto.getDefaultInstance())); + } + } + + public static BlockingInterface newBlockingStub( + com.google.protobuf.BlockingRpcChannel channel) { + return new BlockingStub(channel); + } + + public interface BlockingInterface { + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto request) + throws com.google.protobuf.ServiceException; + } + + private static final class BlockingStub implements BlockingInterface { + private BlockingStub(com.google.protobuf.BlockingRpcChannel channel) { + this.channel = channel; + } + + private final com.google.protobuf.BlockingRpcChannel channel; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(0), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance()); + } + + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(1), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.OptResponseProto.getDefaultInstance()); + } + + } + + // @@protoc_insertion_point(class_scope:hadoop.common.NewProtobufRpcProto) + } + + /** + * Protobuf service {@code hadoop.common.NewerProtobufRpcProto} + */ + public static abstract class NewerProtobufRpcProto + implements com.google.protobuf.Service { + protected NewerProtobufRpcProto() {} + + public interface Interface { + /** + * rpc ping(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc echo(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + } + + public static com.google.protobuf.Service newReflectiveService( + final Interface impl) { + return new NewerProtobufRpcProto() { + @java.lang.Override + public void ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.ping(controller, request, done); + } + + @java.lang.Override + public void echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.echo(controller, request, done); + } + + }; + } + + public static com.google.protobuf.BlockingService + newReflectiveBlockingService(final BlockingInterface impl) { + return new com.google.protobuf.BlockingService() { + public final com.google.protobuf.Descriptors.ServiceDescriptor + getDescriptorForType() { + return getDescriptor(); + } + + public final com.google.protobuf.Message callBlockingMethod( + com.google.protobuf.Descriptors.MethodDescriptor method, + com.google.protobuf.RpcController controller, + com.google.protobuf.Message request) + throws com.google.protobuf.ServiceException { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.callBlockingMethod() given method descriptor for " + + "wrong service type."); + } + switch(method.getIndex()) { + case 0: + return impl.ping(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request); + case 1: + return impl.echo(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getRequestPrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getRequestPrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 1: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getResponsePrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getResponsePrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 1: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + }; + } + + /** + * rpc ping(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + /** + * rpc echo(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + public static final + com.google.protobuf.Descriptors.ServiceDescriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestRpcServiceProtosLegacy.getDescriptor().getServices().get(4); + } + public final com.google.protobuf.Descriptors.ServiceDescriptor + getDescriptorForType() { + return getDescriptor(); + } + + public final void callMethod( + com.google.protobuf.Descriptors.MethodDescriptor method, + com.google.protobuf.RpcController controller, + com.google.protobuf.Message request, + com.google.protobuf.RpcCallback< + com.google.protobuf.Message> done) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.callMethod() given method descriptor for wrong " + + "service type."); + } + switch(method.getIndex()) { + case 0: + this.ping(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + case 1: + this.echo(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getRequestPrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getRequestPrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + case 1: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getResponsePrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getResponsePrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + case 1: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public static Stub newStub( + com.google.protobuf.RpcChannel channel) { + return new Stub(channel); + } + + public static final class Stub extends org.apache.hadoop.ipc.protobuf.TestRpcServiceProtosLegacy.NewerProtobufRpcProto implements Interface { + private Stub(com.google.protobuf.RpcChannel channel) { + this.channel = channel; + } + + private final com.google.protobuf.RpcChannel channel; + + public com.google.protobuf.RpcChannel getChannel() { + return channel; + } + + public void ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(0), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance())); + } + + public void echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(1), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance())); + } + } + + public static BlockingInterface newBlockingStub( + com.google.protobuf.BlockingRpcChannel channel) { + return new BlockingStub(channel); + } + + public interface BlockingInterface { + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException; + } + + private static final class BlockingStub implements BlockingInterface { + private BlockingStub(com.google.protobuf.BlockingRpcChannel channel) { + this.channel = channel; + } + + private final com.google.protobuf.BlockingRpcChannel channel; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(0), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance()); + } + + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto echo( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(1), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance()); + } + + } + + // @@protoc_insertion_point(class_scope:hadoop.common.NewerProtobufRpcProto) + } + + /** + * Protobuf service {@code hadoop.common.CustomProto} + */ + public static abstract class CustomProto + implements com.google.protobuf.Service { + protected CustomProto() {} + + public interface Interface { + /** + * rpc ping(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + } + + public static com.google.protobuf.Service newReflectiveService( + final Interface impl) { + return new CustomProto() { + @java.lang.Override + public void ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + impl.ping(controller, request, done); + } + + }; + } + + public static com.google.protobuf.BlockingService + newReflectiveBlockingService(final BlockingInterface impl) { + return new com.google.protobuf.BlockingService() { + public final com.google.protobuf.Descriptors.ServiceDescriptor + getDescriptorForType() { + return getDescriptor(); + } + + public final com.google.protobuf.Message callBlockingMethod( + com.google.protobuf.Descriptors.MethodDescriptor method, + com.google.protobuf.RpcController controller, + com.google.protobuf.Message request) + throws com.google.protobuf.ServiceException { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.callBlockingMethod() given method descriptor for " + + "wrong service type."); + } + switch(method.getIndex()) { + case 0: + return impl.ping(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getRequestPrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getRequestPrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getResponsePrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getResponsePrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + }; + } + + /** + * rpc ping(.hadoop.common.EmptyRequestProto) returns (.hadoop.common.EmptyResponseProto); + */ + public abstract void ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done); + + public static final + com.google.protobuf.Descriptors.ServiceDescriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestRpcServiceProtosLegacy.getDescriptor().getServices().get(5); + } + public final com.google.protobuf.Descriptors.ServiceDescriptor + getDescriptorForType() { + return getDescriptor(); + } + + public final void callMethod( + com.google.protobuf.Descriptors.MethodDescriptor method, + com.google.protobuf.RpcController controller, + com.google.protobuf.Message request, + com.google.protobuf.RpcCallback< + com.google.protobuf.Message> done) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.callMethod() given method descriptor for wrong " + + "service type."); + } + switch(method.getIndex()) { + case 0: + this.ping(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getRequestPrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getRequestPrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getResponsePrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getResponsePrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public static Stub newStub( + com.google.protobuf.RpcChannel channel) { + return new Stub(channel); + } + + public static final class Stub extends org.apache.hadoop.ipc.protobuf.TestRpcServiceProtosLegacy.CustomProto implements Interface { + private Stub(com.google.protobuf.RpcChannel channel) { + this.channel = channel; + } + + private final com.google.protobuf.RpcChannel channel; + + public com.google.protobuf.RpcChannel getChannel() { + return channel; + } + + public void ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(0), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance())); + } + } + + public static BlockingInterface newBlockingStub( + com.google.protobuf.BlockingRpcChannel channel) { + return new BlockingStub(channel); + } + + public interface BlockingInterface { + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException; + } + + private static final class BlockingStub implements BlockingInterface { + private BlockingStub(com.google.protobuf.BlockingRpcChannel channel) { + this.channel = channel; + } + + private final com.google.protobuf.BlockingRpcChannel channel; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto ping( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto) channel.callBlockingMethod( + getDescriptor().getMethods().get(0), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.EmptyResponseProto.getDefaultInstance()); + } + + } + + // @@protoc_insertion_point(class_scope:hadoop.common.CustomProto) + } + + /** + * Protobuf service {@code hadoop.common.TestProtobufRpcHandoffProto} + */ + public static abstract class TestProtobufRpcHandoffProto + implements com.google.protobuf.Service { + protected TestProtobufRpcHandoffProto() {} + + public interface Interface { + /** + * rpc sleep(.hadoop.common.SleepRequestProto2) returns (.hadoop.common.SleepResponseProto2); + */ + public abstract void sleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 request, + com.google.protobuf.RpcCallback done); + + } + + public static com.google.protobuf.Service newReflectiveService( + final Interface impl) { + return new TestProtobufRpcHandoffProto() { + @java.lang.Override + public void sleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 request, + com.google.protobuf.RpcCallback done) { + impl.sleep(controller, request, done); + } + + }; + } + + public static com.google.protobuf.BlockingService + newReflectiveBlockingService(final BlockingInterface impl) { + return new com.google.protobuf.BlockingService() { + public final com.google.protobuf.Descriptors.ServiceDescriptor + getDescriptorForType() { + return getDescriptor(); + } + + public final com.google.protobuf.Message callBlockingMethod( + com.google.protobuf.Descriptors.MethodDescriptor method, + com.google.protobuf.RpcController controller, + com.google.protobuf.Message request) + throws com.google.protobuf.ServiceException { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.callBlockingMethod() given method descriptor for " + + "wrong service type."); + } + switch(method.getIndex()) { + case 0: + return impl.sleep(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2)request); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getRequestPrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getRequestPrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getResponsePrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getResponsePrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + }; + } + + /** + * rpc sleep(.hadoop.common.SleepRequestProto2) returns (.hadoop.common.SleepResponseProto2); + */ + public abstract void sleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 request, + com.google.protobuf.RpcCallback done); + + public static final + com.google.protobuf.Descriptors.ServiceDescriptor + getDescriptor() { + return org.apache.hadoop.ipc.protobuf.TestRpcServiceProtosLegacy.getDescriptor().getServices().get(6); + } + public final com.google.protobuf.Descriptors.ServiceDescriptor + getDescriptorForType() { + return getDescriptor(); + } + + public final void callMethod( + com.google.protobuf.Descriptors.MethodDescriptor method, + com.google.protobuf.RpcController controller, + com.google.protobuf.Message request, + com.google.protobuf.RpcCallback< + com.google.protobuf.Message> done) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.callMethod() given method descriptor for wrong " + + "service type."); + } + switch(method.getIndex()) { + case 0: + this.sleep(controller, (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2)request, + com.google.protobuf.RpcUtil.specializeCallback( + done)); + return; + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getRequestPrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getRequestPrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public final com.google.protobuf.Message + getResponsePrototype( + com.google.protobuf.Descriptors.MethodDescriptor method) { + if (method.getService() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "Service.getResponsePrototype() given method " + + "descriptor for wrong service type."); + } + switch(method.getIndex()) { + case 0: + return org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2.getDefaultInstance(); + default: + throw new java.lang.AssertionError("Can't get here."); + } + } + + public static Stub newStub( + com.google.protobuf.RpcChannel channel) { + return new Stub(channel); + } + + public static final class Stub extends org.apache.hadoop.ipc.protobuf.TestRpcServiceProtosLegacy.TestProtobufRpcHandoffProto implements Interface { + private Stub(com.google.protobuf.RpcChannel channel) { + this.channel = channel; + } + + private final com.google.protobuf.RpcChannel channel; + + public com.google.protobuf.RpcChannel getChannel() { + return channel; + } + + public void sleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 request, + com.google.protobuf.RpcCallback done) { + channel.callMethod( + getDescriptor().getMethods().get(0), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2.getDefaultInstance(), + com.google.protobuf.RpcUtil.generalizeCallback( + done, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2.class, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2.getDefaultInstance())); + } + } + + public static BlockingInterface newBlockingStub( + com.google.protobuf.BlockingRpcChannel channel) { + return new BlockingStub(channel); + } + + public interface BlockingInterface { + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2 sleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 request) + throws com.google.protobuf.ServiceException; + } + + private static final class BlockingStub implements BlockingInterface { + private BlockingStub(com.google.protobuf.BlockingRpcChannel channel) { + this.channel = channel; + } + + private final com.google.protobuf.BlockingRpcChannel channel; + + public org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2 sleep( + com.google.protobuf.RpcController controller, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepRequestProto2 request) + throws com.google.protobuf.ServiceException { + return (org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2) channel.callBlockingMethod( + getDescriptor().getMethods().get(0), + controller, + request, + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.SleepResponseProto2.getDefaultInstance()); + } + + } + + // @@protoc_insertion_point(class_scope:hadoop.common.TestProtobufRpcHandoffProto) + } + + + public static com.google.protobuf.Descriptors.FileDescriptor + getDescriptor() { + return descriptor; + } + private static com.google.protobuf.Descriptors.FileDescriptor + descriptor; + static { + java.lang.String[] descriptorData = { + "\n\035test_rpc_service_legacy.proto\022\rhadoop." + + "common\032\021test_legacy.proto2\330\013\n\024TestProtob" + + "ufRpcProto\022K\n\004ping\022 .hadoop.common.Empty" + + "RequestProto\032!.hadoop.common.EmptyRespon" + + "seProto\022I\n\004echo\022\037.hadoop.common.EchoRequ" + + "estProto\032 .hadoop.common.EchoResponsePro" + + "to\022L\n\005error\022 .hadoop.common.EmptyRequest" + + "Proto\032!.hadoop.common.EmptyResponseProto" + + "\022M\n\006error2\022 .hadoop.common.EmptyRequestP" + + "roto\032!.hadoop.common.EmptyResponseProto\022", + "R\n\010slowPing\022#.hadoop.common.SlowPingRequ" + + "estProto\032!.hadoop.common.EmptyResponsePr" + + "oto\022L\n\005echo2\022 .hadoop.common.EchoRequest" + + "Proto2\032!.hadoop.common.EchoResponseProto" + + "2\022F\n\003add\022\036.hadoop.common.AddRequestProto" + + "\032\037.hadoop.common.AddResponseProto\022H\n\004add" + + "2\022\037.hadoop.common.AddRequestProto2\032\037.had" + + "oop.common.AddResponseProto\022T\n\rtestServe" + + "rGet\022 .hadoop.common.EmptyRequestProto\032!" + + ".hadoop.common.EmptyResponseProto\022U\n\010exc", + "hange\022#.hadoop.common.ExchangeRequestPro" + + "to\032$.hadoop.common.ExchangeResponseProto" + + "\022L\n\005sleep\022 .hadoop.common.SleepRequestPr" + + "oto\032!.hadoop.common.EmptyResponseProto\022S" + + "\n\014lockAndSleep\022 .hadoop.common.SleepRequ" + + "estProto\032!.hadoop.common.EmptyResponsePr" + + "oto\022Y\n\rgetAuthMethod\022 .hadoop.common.Emp" + + "tyRequestProto\032&.hadoop.common.AuthMetho" + + "dResponseProto\022Q\n\013getAuthUser\022 .hadoop.c" + + "ommon.EmptyRequestProto\032 .hadoop.common.", + "UserResponseProto\022R\n\rechoPostponed\022\037.had" + + "oop.common.EchoRequestProto\032 .hadoop.com" + + "mon.EchoResponseProto\022T\n\rsendPostponed\022 " + + ".hadoop.common.EmptyRequestProto\032!.hadoo" + + "p.common.EmptyResponseProto\022T\n\016getCurren" + + "tUser\022 .hadoop.common.EmptyRequestProto\032" + + " .hadoop.common.UserResponseProto\022Y\n\023get" + + "ServerRemoteUser\022 .hadoop.common.EmptyRe" + + "questProto\032 .hadoop.common.UserResponseP" + + "roto2\377\001\n\025TestProtobufRpc2Proto\022L\n\005ping2\022", + " .hadoop.common.EmptyRequestProto\032!.hado" + + "op.common.EmptyResponseProto\022J\n\005echo2\022\037." + + "hadoop.common.EchoRequestProto\032 .hadoop." + + "common.EchoResponseProto\022L\n\005sleep\022 .hado" + + "op.common.SleepRequestProto\032!.hadoop.com" + + "mon.SleepResponseProto2\257\001\n\023OldProtobufRp" + + "cProto\022K\n\004ping\022 .hadoop.common.EmptyRequ" + + "estProto\032!.hadoop.common.EmptyResponsePr" + + "oto\022K\n\004echo\022 .hadoop.common.EmptyRequest" + + "Proto\032!.hadoop.common.EmptyResponseProto", + "2\253\001\n\023NewProtobufRpcProto\022K\n\004ping\022 .hadoo" + + "p.common.EmptyRequestProto\032!.hadoop.comm" + + "on.EmptyResponseProto\022G\n\004echo\022\036.hadoop.c" + + "ommon.OptRequestProto\032\037.hadoop.common.Op" + + "tResponseProto2\261\001\n\025NewerProtobufRpcProto" + + "\022K\n\004ping\022 .hadoop.common.EmptyRequestPro" + + "to\032!.hadoop.common.EmptyResponseProto\022K\n" + + "\004echo\022 .hadoop.common.EmptyRequestProto\032" + + "!.hadoop.common.EmptyResponseProto2Z\n\013Cu" + + "stomProto\022K\n\004ping\022 .hadoop.common.EmptyR", + "equestProto\032!.hadoop.common.EmptyRespons" + + "eProto2m\n\033TestProtobufRpcHandoffProto\022N\n" + + "\005sleep\022!.hadoop.common.SleepRequestProto" + + "2\032\".hadoop.common.SleepResponseProto2BB\n" + + "\036org.apache.hadoop.ipc.protobufB\032TestRpc" + + "ServiceProtosLegacy\210\001\001\240\001\001" + }; + com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = + new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { + public com.google.protobuf.ExtensionRegistry assignDescriptors( + com.google.protobuf.Descriptors.FileDescriptor root) { + descriptor = root; + return null; + } + }; + com.google.protobuf.Descriptors.FileDescriptor + .internalBuildGeneratedFileFrom(descriptorData, + new com.google.protobuf.Descriptors.FileDescriptor[] { + org.apache.hadoop.ipc.protobuf.TestProtosLegacy.getDescriptor(), + }, assigner); + } + + // @@protoc_insertion_point(outer_class_scope) +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/cli/CLITestHelper.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/cli/CLITestHelper.java index ada4cd80e4882..f80c62535a1f0 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/cli/CLITestHelper.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/cli/CLITestHelper.java @@ -24,6 +24,8 @@ import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.util.XMLUtils; + import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -34,7 +36,6 @@ import org.xml.sax.helpers.DefaultHandler; import javax.xml.parsers.SAXParser; -import javax.xml.parsers.SAXParserFactory; import java.io.File; import java.util.ArrayList; @@ -76,7 +77,7 @@ protected void readTestConfigFile() { boolean success = false; testConfigFile = TEST_CACHE_DATA_DIR + File.separator + testConfigFile; try { - SAXParser p = (SAXParserFactory.newInstance()).newSAXParser(); + SAXParser p = XMLUtils.newSecureSAXParserFactory().newSAXParser(); p.parse(testConfigFile, getConfigParser()); success = true; } catch (Exception e) { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestCommonConfigurationFields.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestCommonConfigurationFields.java index 1ce23a0eb81f2..8ca414400c8d0 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestCommonConfigurationFields.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestCommonConfigurationFields.java @@ -117,13 +117,31 @@ public void initializeMemberVariables() { xmlPrefixToSkipCompare.add("fs.abfs.impl"); xmlPrefixToSkipCompare.add("fs.abfss.impl"); - // ADL properties are in a different subtree // - org.apache.hadoop.hdfs.web.ADLConfKeys xmlPrefixToSkipCompare.add("adl."); xmlPrefixToSkipCompare.add("fs.adl."); xmlPropsToSkipCompare.add("fs.AbstractFileSystem.adl.impl"); + // ViewfsOverloadScheme target fs impl property keys are dynamically + // constructed and they are advanced props. + xmlPropsToSkipCompare.add("fs.viewfs.overload.scheme.target.abfs.impl"); + xmlPropsToSkipCompare.add("fs.viewfs.overload.scheme.target.abfss.impl"); + xmlPropsToSkipCompare.add("fs.viewfs.overload.scheme.target.file.impl"); + xmlPropsToSkipCompare.add("fs.viewfs.overload.scheme.target.ftp.impl"); + xmlPropsToSkipCompare.add("fs.viewfs.overload.scheme.target.gs.impl"); + xmlPropsToSkipCompare.add("fs.viewfs.overload.scheme.target.hdfs.impl"); + xmlPropsToSkipCompare.add("fs.viewfs.overload.scheme.target.http.impl"); + xmlPropsToSkipCompare.add("fs.viewfs.overload.scheme.target.https.impl"); + xmlPropsToSkipCompare.add("fs.viewfs.overload.scheme.target.ofs.impl"); + xmlPropsToSkipCompare.add("fs.viewfs.overload.scheme.target.o3fs.impl"); + xmlPropsToSkipCompare.add("fs.viewfs.overload.scheme.target.oss.impl"); + xmlPropsToSkipCompare.add("fs.viewfs.overload.scheme.target.s3a.impl"); + xmlPropsToSkipCompare. + add("fs.viewfs.overload.scheme.target.swebhdfs.impl"); + xmlPropsToSkipCompare.add("fs.viewfs.overload.scheme.target.webhdfs.impl"); + xmlPropsToSkipCompare.add("fs.viewfs.overload.scheme.target.wasb.impl"); + // Azure properties are in a different class // - org.apache.hadoop.fs.azure.AzureNativeFileSystemStore // - org.apache.hadoop.fs.azure.SASKeyGeneratorImpl @@ -201,8 +219,7 @@ public void initializeMemberVariables() { xmlPropsToSkipCompare.add("hadoop.common.configuration.version"); // - org.apache.hadoop.fs.FileSystem xmlPropsToSkipCompare.add("fs.har.impl.disable.cache"); - // - org.apache.hadoop.fs.FileSystem#getFileSystemClass() - xmlPropsToSkipCompare.add("fs.swift.impl"); + // - package org.apache.hadoop.tracing.TraceUtils ? xmlPropsToSkipCompare.add("hadoop.htrace.span.receiver.classes"); // Private keys @@ -217,8 +234,6 @@ public void initializeMemberVariables() { // - org.apache.hadoop.net.NetUtils xmlPropsToSkipCompare .add("hadoop.rpc.socket.factory.class.ClientProtocol"); - // - Where is this used? - xmlPropsToSkipCompare.add("hadoop.ssl.enabled"); // Keys with no corresponding variable // - org.apache.hadoop.io.compress.bzip2.Bzip2Factory diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfServlet.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfServlet.java index cf42219f6d8b5..5809aa4da5ba7 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfServlet.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfServlet.java @@ -38,12 +38,15 @@ import org.w3c.dom.NodeList; import org.xml.sax.InputSource; -import com.google.common.base.Strings; +import org.apache.hadoop.thirdparty.com.google.common.base.Strings; import org.apache.hadoop.http.HttpServer2; +import org.apache.hadoop.util.XMLUtils; + import org.junit.BeforeClass; import org.junit.Test; import org.mockito.Mockito; + import static org.mockito.Mockito.when; import static org.mockito.Mockito.mock; import static org.junit.Assert.*; @@ -217,8 +220,7 @@ public void testWriteXml() throws Exception { ConfServlet.writeResponse(getTestConf(), sw, "xml"); String xml = sw.toString(); - DocumentBuilderFactory docBuilderFactory - = DocumentBuilderFactory.newInstance(); + DocumentBuilderFactory docBuilderFactory = XMLUtils.newSecureDocumentBuilderFactory(); DocumentBuilder builder = docBuilderFactory.newDocumentBuilder(); Document doc = builder.parse(new InputSource(new StringReader(xml))); NodeList nameNodes = doc.getElementsByTagName("name"); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java index 81c53959478b4..0ff7f146fc3cc 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java @@ -38,6 +38,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.ConcurrentModificationException; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -45,6 +46,7 @@ import java.util.Properties; import java.util.Random; import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.regex.Pattern; import static java.util.concurrent.TimeUnit.*; @@ -490,6 +492,62 @@ public void testEnvDefault() throws IOException { } } + /** + * Verify that when a configuration is restricted, environment + * variables and system properties will be unresolved. + * The fallback patterns for the variables are still parsed. + */ + @Test + public void testRestrictedEnv() throws IOException { + // this test relies on env.PATH being set on all platforms a + // test run will take place on, and the java.version sysprop + // set in all JVMs. + // Restricted configurations will not get access to these values, so + // will either be unresolved or, for env vars with fallbacks: the fallback + // values. + + conf.setRestrictSystemProperties(true); + + out = new BufferedWriter(new FileWriter(CONFIG)); + startConfig(); + // a simple property to reference + declareProperty("d", "D", "D"); + + // system property evaluation stops working completely + declareProperty("system1", "${java.version}", "${java.version}"); + + // the env variable does not resolve + declareProperty("secret1", "${env.PATH}", "${env.PATH}"); + + // but all the fallback options do work + declareProperty("secret2", "${env.PATH-a}", "a"); + declareProperty("secret3", "${env.PATH:-b}", "b"); + declareProperty("secret4", "${env.PATH:-}", ""); + declareProperty("secret5", "${env.PATH-}", ""); + // special case + declareProperty("secret6", "${env.PATH:}", "${env.PATH:}"); + // safety check + declareProperty("secret7", "${env.PATH:--}", "-"); + + // recursive eval of the fallback + declareProperty("secret8", "${env.PATH:-${d}}", "D"); + + // if the fallback doesn't resolve, the result is the whole variable raw. + declareProperty("secret9", "${env.PATH:-$d}}", "${env.PATH:-$d}}"); + + endConfig(); + Path fileResource = new Path(CONFIG); + conf.addResource(fileResource); + + for (Prop p : props) { + System.out.println("p=" + p.name); + String gotVal = conf.get(p.name); + String gotRawVal = conf.getRaw(p.name); + assertEq(p.val, gotRawVal); + assertEq(p.expectEval, gotVal); + } + } + @Test public void testFinalParam() throws IOException { out=new BufferedWriter(new FileWriter(CONFIG)); @@ -1062,6 +1120,38 @@ public void testRelativeIncludes() throws Exception { new File(new File(relConfig).getParent()).delete(); } + @Test + public void testRelativeIncludesWithLoadingViaUri() throws Exception { + tearDown(); + File configFile = new File("./tmp/test-config.xml"); + File configFile2 = new File("./tmp/test-config2.xml"); + + new File(configFile.getParent()).mkdirs(); + out = new BufferedWriter(new FileWriter(configFile2)); + startConfig(); + appendProperty("a", "b"); + endConfig(); + + out = new BufferedWriter(new FileWriter(configFile)); + startConfig(); + // Add the relative path instead of the absolute one. + startInclude(configFile2.getName()); + endInclude(); + appendProperty("c", "d"); + endConfig(); + + // verify that the includes file contains all properties + Path fileResource = new Path(configFile.toURI()); + conf.addResource(fileResource); + assertEquals("b", conf.get("a")); + assertEquals("d", conf.get("c")); + + // Cleanup + configFile.delete(); + configFile2.delete(); + new File(configFile.getParent()).delete(); + } + @Test public void testIntegerRanges() { Configuration conf = new Configuration(); @@ -2590,4 +2680,31 @@ private static Configuration checkCDATA(byte[] bytes) { assertEquals(" prefix >cdata\nsuffix ", conf.get("cdata-whitespace")); return conf; } + + @Test + public void testConcurrentModificationDuringIteration() throws InterruptedException { + Configuration configuration = new Configuration(); + new Thread(() -> { + while (true) { + configuration.set(String.valueOf(Math.random()), String.valueOf(Math.random())); + } + }).start(); + + AtomicBoolean exceptionOccurred = new AtomicBoolean(false); + + new Thread(() -> { + while (true) { + try { + configuration.iterator(); + } catch (final ConcurrentModificationException e) { + exceptionOccurred.set(true); + break; + } + } + }).start(); + + Thread.sleep(1000); //give enough time for threads to run + + assertFalse("ConcurrentModificationException occurred", exceptionOccurred.get()); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfigurationDeprecation.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfigurationDeprecation.java index efb813164af69..2c0d6025f2688 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfigurationDeprecation.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfigurationDeprecation.java @@ -47,8 +47,8 @@ import org.junit.Before; import org.junit.Test; -import com.google.common.util.concurrent.ThreadFactoryBuilder; -import com.google.common.util.concurrent.Uninterruptibles; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Uninterruptibles; public class TestConfigurationDeprecation { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfigurationSubclass.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfigurationSubclass.java index e15e699534d31..51d23d8038b0b 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfigurationSubclass.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfigurationSubclass.java @@ -53,8 +53,9 @@ public void testReloadNotQuiet() throws Throwable { SubConf conf = new SubConf(true); conf.setQuietMode(false); assertFalse(conf.isReloaded()); + // adding a resource does not force a reload. conf.addResource("not-a-valid-resource"); - assertTrue(conf.isReloaded()); + assertFalse(conf.isReloaded()); try { Properties properties = conf.getProperties(); fail("Should not have got here"); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestReconfiguration.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestReconfiguration.java index 39d3bae655d85..4948df9b1f4cb 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestReconfiguration.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestReconfiguration.java @@ -18,8 +18,8 @@ package org.apache.hadoop.conf; -import com.google.common.base.Supplier; -import com.google.common.collect.Lists; +import java.util.function.Supplier; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.Time; import org.apache.hadoop.conf.ReconfigurationUtil.PropertyChange; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/TestCryptoCodec.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/TestCryptoCodec.java index eca23a7f2a372..ce4e7ffeb519a 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/TestCryptoCodec.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/TestCryptoCodec.java @@ -43,7 +43,7 @@ import org.junit.Before; import org.junit.Test; -import com.google.common.primitives.Longs; +import org.apache.hadoop.thirdparty.com.google.common.primitives.Longs; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/TestCryptoOutputStreamClosing.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/TestCryptoOutputStreamClosing.java index 39e4bb8588068..04cdb962ac936 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/TestCryptoOutputStreamClosing.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/TestCryptoOutputStreamClosing.java @@ -17,12 +17,14 @@ */ package org.apache.hadoop.crypto; +import java.io.IOException; import java.io.OutputStream; import org.apache.hadoop.conf.Configuration; import org.junit.BeforeClass; import org.junit.Test; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.mockito.Mockito.*; /** @@ -54,4 +56,22 @@ public void testOutputStreamNotClosing() throws Exception { verify(outputStream, never()).close(); } + @Test + public void testUnderlyingOutputStreamClosedWhenExceptionClosing() throws Exception { + OutputStream outputStream = mock(OutputStream.class); + CryptoOutputStream cos = spy(new CryptoOutputStream(outputStream, codec, + new byte[16], new byte[16], 0L, true)); + + // exception while flushing during close + doThrow(new IOException("problem flushing wrapped stream")) + .when(cos).flush(); + + intercept(IOException.class, + () -> cos.close()); + + // We expect that the close of the CryptoOutputStream closes the + // wrapped OutputStream even though we got an exception + // during CryptoOutputStream::close (in the flush method) + verify(outputStream).close(); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestValueQueue.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestValueQueue.java index 55a9280d6260a..9b8638faa4b22 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestValueQueue.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestValueQueue.java @@ -32,7 +32,7 @@ import org.junit.Assert; import org.junit.Test; -import com.google.common.collect.Sets; +import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; public class TestValueQueue { Logger LOG = LoggerFactory.getLogger(TestValueQueue.class); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/kms/TestLoadBalancingKMSClientProvider.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/kms/TestLoadBalancingKMSClientProvider.java index 7804c73fd2afc..616c66b0748db 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/kms/TestLoadBalancingKMSClientProvider.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/kms/TestLoadBalancingKMSClientProvider.java @@ -62,7 +62,7 @@ import org.junit.rules.Timeout; import org.mockito.Mockito; -import com.google.common.collect.Sets; +import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; public class TestLoadBalancingKMSClientProvider { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FCStatisticsBaseTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FCStatisticsBaseTest.java index 73fd2802ab1d1..dc12f44fc2758 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FCStatisticsBaseTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FCStatisticsBaseTest.java @@ -37,8 +37,8 @@ import org.junit.Assert; import org.junit.Test; -import com.google.common.base.Supplier; -import com.google.common.util.concurrent.Uninterruptibles; +import java.util.function.Supplier; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Uninterruptibles; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -178,7 +178,8 @@ public Boolean get() { * * @param stats */ - protected abstract void verifyWrittenBytes(Statistics stats); + protected abstract void verifyWrittenBytes(Statistics stats) + throws IOException; /** * Returns the filesystem uri. Should be set diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemContractBaseTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemContractBaseTest.java index a4ccee3f7f58e..8065b3f61f52c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemContractBaseTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemContractBaseTest.java @@ -862,7 +862,8 @@ private void assertListFilesFinds(Path dir, Path subdir) throws IOException { found); } - private void assertListStatusFinds(Path dir, Path subdir) throws IOException { + protected void assertListStatusFinds(Path dir, Path subdir) + throws IOException { FileStatus[] stats = fs.listStatus(dir); boolean found = false; StringBuilder builder = new StringBuilder(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestBlockLocation.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestBlockLocation.java index 8569ea7cf781d..72e850b1313d5 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestBlockLocation.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestBlockLocation.java @@ -27,7 +27,7 @@ public class TestBlockLocation { private static final String[] EMPTY_STR_ARRAY = new String[0]; private static final StorageType[] EMPTY_STORAGE_TYPE_ARRAY = - new StorageType[0]; + StorageType.EMPTY_ARRAY; private static void checkBlockLocation(final BlockLocation loc) throws Exception { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestContentSummary.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestContentSummary.java index 7cc7ae4094974..98f9f2021f8b4 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestContentSummary.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestContentSummary.java @@ -253,4 +253,40 @@ public void testToStringHumanNoShowQuota() { String expected = " 32.6 K 211.9 M 8.0 E "; assertEquals(expected, contentSummary.toString(false, true)); } + + // check the toSnapshot method with human readable. + @Test + public void testToSnapshotHumanReadable() { + long snapshotLength = Long.MAX_VALUE; + long snapshotFileCount = 222222222; + long snapshotDirectoryCount = 33333; + long snapshotSpaceConsumed = 222256578; + + ContentSummary contentSummary = new ContentSummary.Builder() + .snapshotLength(snapshotLength).snapshotFileCount(snapshotFileCount) + .snapshotDirectoryCount(snapshotDirectoryCount) + .snapshotSpaceConsumed(snapshotSpaceConsumed).build(); + String expected = + " 8.0 E 211.9 M 32.6 K " + + " 212.0 M "; + assertEquals(expected, contentSummary.toSnapshot(true)); + } + + // check the toSnapshot method with human readable disabled. + @Test + public void testToSnapshotNotHumanReadable() { + long snapshotLength = 1111; + long snapshotFileCount = 2222; + long snapshotDirectoryCount = 3333; + long snapshotSpaceConsumed = 4444; + + ContentSummary contentSummary = new ContentSummary.Builder() + .snapshotLength(snapshotLength).snapshotFileCount(snapshotFileCount) + .snapshotDirectoryCount(snapshotDirectoryCount) + .snapshotSpaceConsumed(snapshotSpaceConsumed).build(); + String expected = + " 1111 2222 3333 " + + " 4444 "; + assertEquals(expected, contentSummary.toSnapshot(false)); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileSystemCaching.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileSystemCaching.java index b3c38475d435b..67a933bb9e39c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileSystemCaching.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileSystemCaching.java @@ -21,22 +21,31 @@ import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; +import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.hadoop.test.HadoopTestBase; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableFuture; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService; +import org.apache.hadoop.util.BlockingThreadPoolExecutorService; +import org.assertj.core.api.Assertions; import org.junit.Test; -import java.security.PrivilegedExceptionAction; -import java.util.concurrent.Semaphore; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_CREATION_PARALLEL_COUNT; import static org.apache.hadoop.test.LambdaTestUtils.intercept; -import static org.junit.Assert.*; import static org.mockito.Mockito.*; -public class TestFileSystemCaching { +public class TestFileSystemCaching extends HadoopTestBase { @Test public void testCacheEnabled() throws Exception { @@ -336,4 +345,134 @@ public void testCacheIncludesURIUserInfo() throws Throwable { assertNotEquals(keyA, new FileSystem.Cache.Key( new URI("wasb://a:password@account.blob.core.windows.net"), conf)); } + + + /** + * Single semaphore: no surplus FS instances will be created + * and then discarded. + */ + @Test + public void testCacheSingleSemaphoredConstruction() throws Exception { + FileSystem.Cache cache = semaphoredCache(1); + createFileSystems(cache, 10); + Assertions.assertThat(cache.getDiscardedInstances()) + .describedAs("Discarded FS instances") + .isEqualTo(0); + } + + /** + * Dual semaphore: thread 2 will get as far as + * blocking in the initialize() method while awaiting + * thread 1 to complete its initialization. + *

      + * The thread 2 FS instance will be discarded. + * All other threads will block for a cache semaphore, + * so when they are given an opportunity to proceed, + * they will find that an FS instance exists. + */ + @Test + public void testCacheDualSemaphoreConstruction() throws Exception { + FileSystem.Cache cache = semaphoredCache(2); + createFileSystems(cache, 10); + Assertions.assertThat(cache.getDiscardedInstances()) + .describedAs("Discarded FS instances") + .isEqualTo(1); + } + + /** + * Construct the FS instances in a cache with effectively no + * limit on the number of instances which can be created + * simultaneously. + *

      + * This is the effective state before HADOOP-17313. + *

      + * All but one thread's FS instance will be discarded. + */ + @Test + public void testCacheLargeSemaphoreConstruction() throws Exception { + FileSystem.Cache cache = semaphoredCache(999); + int count = 10; + createFileSystems(cache, count); + Assertions.assertThat(cache.getDiscardedInstances()) + .describedAs("Discarded FS instances") + .isEqualTo(count -1); + } + + /** + * Create a cache with a given semaphore size. + * @param semaphores number of semaphores + * @return the cache. + */ + private FileSystem.Cache semaphoredCache(final int semaphores) { + final Configuration conf1 = new Configuration(); + conf1.setInt(FS_CREATION_PARALLEL_COUNT, semaphores); + FileSystem.Cache cache = new FileSystem.Cache(conf1); + return cache; + } + + /** + * Attempt to create {@code count} filesystems in parallel, + * then assert that they are all equal. + * @param cache cache to use + * @param count count of filesystems to instantiate + */ + private void createFileSystems(final FileSystem.Cache cache, final int count) + throws URISyntaxException, InterruptedException, + java.util.concurrent.ExecutionException { + final Configuration conf = new Configuration(); + conf.set("fs.blocking.impl", BlockingInitializer.NAME); + // only one instance can be created at a time. + URI uri = new URI("blocking://a"); + ListeningExecutorService pool = + MoreExecutors.listeningDecorator( + BlockingThreadPoolExecutorService.newInstance(count * 2, 0, + 10, TimeUnit.SECONDS, + "creation-threads")); + + // submit a set of requests to create an FS instance. + // the semaphore will block all but one, and that will block until + // it is allowed to continue + List> futures = new ArrayList<>(count); + + // acquire the semaphore so blocking all FS instances from + // being initialized. + Semaphore semaphore = BlockingInitializer.SEM; + semaphore.acquire(); + + for (int i = 0; i < count; i++) { + futures.add(pool.submit( + () -> cache.get(uri, conf))); + } + // now let all blocked initializers free + semaphore.release(); + // get that first FS + FileSystem createdFS = futures.get(0).get(); + // verify all the others are the same instance + for (int i = 1; i < count; i++) { + FileSystem fs = futures.get(i).get(); + Assertions.assertThat(fs) + .isSameAs(createdFS); + } + } + + /** + * An FS which blocks in initialize() until it can acquire the shared + * semaphore (which it then releases). + */ + private static final class BlockingInitializer extends LocalFileSystem { + + private static final String NAME = BlockingInitializer.class.getName(); + + private static final Semaphore SEM = new Semaphore(1); + + @Override + public void initialize(URI uri, Configuration conf) throws IOException { + try { + SEM.acquire(); + SEM.release(); + } catch (InterruptedException e) { + throw new IOException(e.toString(), e); + } + } + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileUtil.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileUtil.java index 1ca1f241e5e9d..228a875c12294 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileUtil.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileUtil.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.fs; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.apache.hadoop.test.PlatformAssumptions.assumeNotWindows; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; @@ -41,27 +42,31 @@ import java.net.URL; import java.net.UnknownHostException; import java.nio.charset.StandardCharsets; -import java.nio.file.FileSystems; import java.nio.file.Files; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.Objects; import java.util.jar.Attributes; import java.util.jar.JarFile; import java.util.jar.Manifest; -import java.util.zip.ZipEntry; -import java.util.zip.ZipOutputStream; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.util.StringUtils; import org.apache.tools.tar.TarEntry; import org.apache.tools.tar.TarOutputStream; + +import org.assertj.core.api.Assertions; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -157,13 +162,12 @@ public void setup() throws IOException { FileUtils.forceMkdir(dir1); FileUtils.forceMkdir(dir2); - new File(del, FILE).createNewFile(); - File tmpFile = new File(tmp, FILE); - tmpFile.createNewFile(); + Verify.createNewFile(new File(del, FILE)); + File tmpFile = Verify.createNewFile(new File(tmp, FILE)); // create files - new File(dir1, FILE).createNewFile(); - new File(dir2, FILE).createNewFile(); + Verify.createNewFile(new File(dir1, FILE)); + Verify.createNewFile(new File(dir2, FILE)); // create a symlink to file File link = new File(del, LINK); @@ -172,7 +176,7 @@ public void setup() throws IOException { // create a symlink to dir File linkDir = new File(del, "tmpDir"); FileUtil.symLink(tmp.toString(), linkDir.toString()); - Assert.assertEquals(5, del.listFiles().length); + Assert.assertEquals(5, Objects.requireNonNull(del.listFiles()).length); // create files in partitioned directories createFile(partitioned, "part-r-00000", "foo"); @@ -199,13 +203,9 @@ public void tearDown() throws IOException { private File createFile(File directory, String name, String contents) throws IOException { File newFile = new File(directory, name); - PrintWriter pw = new PrintWriter(newFile); - try { + try (PrintWriter pw = new PrintWriter(newFile)) { pw.println(contents); } - finally { - pw.close(); - } return newFile; } @@ -217,11 +217,11 @@ public void testListFiles() throws IOException { //Test existing directory with no files case File newDir = new File(tmp.getPath(),"test"); - newDir.mkdir(); + Verify.mkdir(newDir); Assert.assertTrue("Failed to create test dir", newDir.exists()); files = FileUtil.listFiles(newDir); Assert.assertEquals(0, files.length); - newDir.delete(); + assertTrue(newDir.delete()); Assert.assertFalse("Failed to delete test dir", newDir.exists()); //Test non-existing directory case, this throws @@ -243,11 +243,11 @@ public void testListAPI() throws IOException { //Test existing directory with no files case File newDir = new File(tmp.getPath(),"test"); - newDir.mkdir(); + Verify.mkdir(newDir); Assert.assertTrue("Failed to create test dir", newDir.exists()); files = FileUtil.list(newDir); Assert.assertEquals("New directory unexpectedly contains files", 0, files.length); - newDir.delete(); + assertTrue(newDir.delete()); Assert.assertFalse("Failed to delete test dir", newDir.exists()); //Test non-existing directory case, this throws @@ -265,7 +265,7 @@ public void testListAPI() throws IOException { public void testFullyDelete() throws IOException { boolean ret = FileUtil.fullyDelete(del); Assert.assertTrue(ret); - Assert.assertFalse(del.exists()); + Verify.notExists(del); validateTmpDir(); } @@ -278,13 +278,13 @@ public void testFullyDelete() throws IOException { @Test (timeout = 30000) public void testFullyDeleteSymlinks() throws IOException { File link = new File(del, LINK); - Assert.assertEquals(5, del.list().length); + assertDelListLength(5); // Since tmpDir is symlink to tmp, fullyDelete(tmpDir) should not // delete contents of tmp. See setupDirs for details. boolean ret = FileUtil.fullyDelete(link); Assert.assertTrue(ret); - Assert.assertFalse(link.exists()); - Assert.assertEquals(4, del.list().length); + Verify.notExists(link); + assertDelListLength(4); validateTmpDir(); File linkDir = new File(del, "tmpDir"); @@ -292,8 +292,8 @@ public void testFullyDeleteSymlinks() throws IOException { // delete contents of tmp. See setupDirs for details. ret = FileUtil.fullyDelete(linkDir); Assert.assertTrue(ret); - Assert.assertFalse(linkDir.exists()); - Assert.assertEquals(3, del.list().length); + Verify.notExists(linkDir); + assertDelListLength(3); validateTmpDir(); } @@ -309,16 +309,16 @@ public void testFullyDeleteDanglingSymlinks() throws IOException { // to make y as a dangling link to file tmp/x boolean ret = FileUtil.fullyDelete(tmp); Assert.assertTrue(ret); - Assert.assertFalse(tmp.exists()); + Verify.notExists(tmp); // dangling symlink to file File link = new File(del, LINK); - Assert.assertEquals(5, del.list().length); + assertDelListLength(5); // Even though 'y' is dangling symlink to file tmp/x, fullyDelete(y) // should delete 'y' properly. ret = FileUtil.fullyDelete(link); Assert.assertTrue(ret); - Assert.assertEquals(4, del.list().length); + assertDelListLength(4); // dangling symlink to directory File linkDir = new File(del, "tmpDir"); @@ -326,22 +326,22 @@ public void testFullyDeleteDanglingSymlinks() throws IOException { // delete tmpDir properly. ret = FileUtil.fullyDelete(linkDir); Assert.assertTrue(ret); - Assert.assertEquals(3, del.list().length); + assertDelListLength(3); } @Test (timeout = 30000) public void testFullyDeleteContents() throws IOException { boolean ret = FileUtil.fullyDeleteContents(del); Assert.assertTrue(ret); - Assert.assertTrue(del.exists()); - Assert.assertEquals(0, del.listFiles().length); + Verify.exists(del); + Assert.assertEquals(0, Objects.requireNonNull(del.listFiles()).length); validateTmpDir(); } private void validateTmpDir() { - Assert.assertTrue(tmp.exists()); - Assert.assertEquals(1, tmp.listFiles().length); - Assert.assertTrue(new File(tmp, FILE).exists()); + Verify.exists(tmp); + Assert.assertEquals(1, Objects.requireNonNull(tmp.listFiles()).length); + Verify.exists(new File(tmp, FILE)); } /** @@ -365,15 +365,15 @@ private void validateTmpDir() { * @throws IOException */ private void setupDirsAndNonWritablePermissions() throws IOException { - new MyFile(del, FILE_1_NAME).createNewFile(); + Verify.createNewFile(new MyFile(del, FILE_1_NAME)); // "file1" is non-deletable by default, see MyFile.delete(). - xSubDir.mkdirs(); - file2.createNewFile(); + Verify.mkdirs(xSubDir); + Verify.createNewFile(file2); - xSubSubDir.mkdirs(); - file22.createNewFile(); + Verify.mkdirs(xSubSubDir); + Verify.createNewFile(file22); revokePermissions(file22); revokePermissions(xSubSubDir); @@ -381,8 +381,8 @@ private void setupDirsAndNonWritablePermissions() throws IOException { revokePermissions(file2); revokePermissions(xSubDir); - ySubDir.mkdirs(); - file3.createNewFile(); + Verify.mkdirs(ySubDir); + Verify.createNewFile(file3); File tmpFile = new File(tmp, FILE); tmpFile.createNewFile(); @@ -447,6 +447,88 @@ public void testFailFullyDeleteGrantPermissions() throws IOException { validateAndSetWritablePermissions(false, ret); } + /** + * Asserts if the {@link TestFileUtil#del} meets the given expected length. + * + * @param expectedLength The expected length of the {@link TestFileUtil#del}. + */ + private void assertDelListLength(int expectedLength) { + Assertions.assertThat(del.list()).describedAs("del list").isNotNull().hasSize(expectedLength); + } + + /** + * Helper class to perform {@link File} operation and also verify them. + */ + public static class Verify { + /** + * Invokes {@link File#createNewFile()} on the given {@link File} instance. + * + * @param file The file to call {@link File#createNewFile()} on. + * @return The result of {@link File#createNewFile()}. + * @throws IOException As per {@link File#createNewFile()}. + */ + public static File createNewFile(File file) throws IOException { + assertTrue("Unable to create new file " + file, file.createNewFile()); + return file; + } + + /** + * Invokes {@link File#mkdir()} on the given {@link File} instance. + * + * @param file The file to call {@link File#mkdir()} on. + * @return The result of {@link File#mkdir()}. + */ + public static File mkdir(File file) { + assertTrue("Unable to mkdir for " + file, file.mkdir()); + return file; + } + + /** + * Invokes {@link File#mkdirs()} on the given {@link File} instance. + * + * @param file The file to call {@link File#mkdirs()} on. + * @return The result of {@link File#mkdirs()}. + */ + public static File mkdirs(File file) { + assertTrue("Unable to mkdirs for " + file, file.mkdirs()); + return file; + } + + /** + * Invokes {@link File#delete()} on the given {@link File} instance. + * + * @param file The file to call {@link File#delete()} on. + * @return The result of {@link File#delete()}. + */ + public static File delete(File file) { + assertTrue("Unable to delete " + file, file.delete()); + return file; + } + + /** + * Invokes {@link File#exists()} on the given {@link File} instance. + * + * @param file The file to call {@link File#exists()} on. + * @return The result of {@link File#exists()}. + */ + public static File exists(File file) { + assertTrue("Expected file " + file + " doesn't exist", file.exists()); + return file; + } + + /** + * Invokes {@link File#exists()} on the given {@link File} instance to check if the + * {@link File} doesn't exists. + * + * @param file The file to call {@link File#exists()} on. + * @return The negation of the result of {@link File#exists()}. + */ + public static File notExists(File file) { + assertFalse("Expected file " + file + " must not exist", file.exists()); + return file; + } + } + /** * Extend {@link File}. Same as {@link File} except for two things: (1) This * treats file1Name as a very special file which is not delete-able @@ -579,14 +661,13 @@ public void testGetDU() throws Exception { FileUtil.chmod(partitioned.getAbsolutePath(), "0777", true/*recursive*/); } } - + @Test (timeout = 30000) - public void testUnTar() throws IOException { + public void testUnTar() throws Exception { // make a simple tar: final File simpleTar = new File(del, FILE); - OutputStream os = new FileOutputStream(simpleTar); - TarOutputStream tos = new TarOutputStream(os); - try { + OutputStream os = new FileOutputStream(simpleTar); + try (TarOutputStream tos = new TarOutputStream(os)) { TarEntry te = new TarEntry("/bar/foo"); byte[] data = "some-content".getBytes("UTF-8"); te.setSize(data.length); @@ -595,55 +676,42 @@ public void testUnTar() throws IOException { tos.closeEntry(); tos.flush(); tos.finish(); - } finally { - tos.close(); } // successfully untar it into an existing dir: FileUtil.unTar(simpleTar, tmp); // check result: - assertTrue(new File(tmp, "/bar/foo").exists()); + Verify.exists(new File(tmp, "/bar/foo")); assertEquals(12, new File(tmp, "/bar/foo").length()); - - final File regularFile = new File(tmp, "QuickBrownFoxJumpsOverTheLazyDog"); - regularFile.createNewFile(); - assertTrue(regularFile.exists()); - try { - FileUtil.unTar(simpleTar, regularFile); - assertTrue("An IOException expected.", false); - } catch (IOException ioe) { - // okay - } + + final File regularFile = + Verify.createNewFile(new File(tmp, "QuickBrownFoxJumpsOverTheLazyDog")); + LambdaTestUtils.intercept(IOException.class, () -> FileUtil.unTar(simpleTar, regularFile)); } @Test (timeout = 30000) public void testReplaceFile() throws IOException { - final File srcFile = new File(tmp, "src"); - // src exists, and target does not exist: - srcFile.createNewFile(); - assertTrue(srcFile.exists()); + final File srcFile = Verify.createNewFile(new File(tmp, "src")); final File targetFile = new File(tmp, "target"); - assertTrue(!targetFile.exists()); + Verify.notExists(targetFile); FileUtil.replaceFile(srcFile, targetFile); - assertTrue(!srcFile.exists()); - assertTrue(targetFile.exists()); + Verify.notExists(srcFile); + Verify.exists(targetFile); // src exists and target is a regular file: - srcFile.createNewFile(); - assertTrue(srcFile.exists()); + Verify.createNewFile(srcFile); + Verify.exists(srcFile); FileUtil.replaceFile(srcFile, targetFile); - assertTrue(!srcFile.exists()); - assertTrue(targetFile.exists()); + Verify.notExists(srcFile); + Verify.exists(targetFile); // src exists, and target is a non-empty directory: - srcFile.createNewFile(); - assertTrue(srcFile.exists()); - targetFile.delete(); - targetFile.mkdirs(); - File obstacle = new File(targetFile, "obstacle"); - obstacle.createNewFile(); - assertTrue(obstacle.exists()); + Verify.createNewFile(srcFile); + Verify.exists(srcFile); + Verify.delete(targetFile); + Verify.mkdirs(targetFile); + File obstacle = Verify.createNewFile(new File(targetFile, "obstacle")); assertTrue(targetFile.exists() && targetFile.isDirectory()); try { FileUtil.replaceFile(srcFile, targetFile); @@ -652,9 +720,9 @@ public void testReplaceFile() throws IOException { // okay } // check up the post-condition: nothing is deleted: - assertTrue(srcFile.exists()); + Verify.exists(srcFile); assertTrue(targetFile.exists() && targetFile.isDirectory()); - assertTrue(obstacle.exists()); + Verify.exists(obstacle); } @Test (timeout = 30000) @@ -667,45 +735,84 @@ public void testCreateLocalTempFile() throws IOException { assertTrue(tmp1.exists() && tmp2.exists()); assertTrue(tmp1.canWrite() && tmp2.canWrite()); assertTrue(tmp1.canRead() && tmp2.canRead()); - tmp1.delete(); - tmp2.delete(); + Verify.delete(tmp1); + Verify.delete(tmp2); assertTrue(!tmp1.exists() && !tmp2.exists()); } @Test (timeout = 30000) - public void testUnZip() throws IOException { + public void testUnZip() throws Exception { // make sa simple zip final File simpleZip = new File(del, FILE); - OutputStream os = new FileOutputStream(simpleZip); - ZipOutputStream tos = new ZipOutputStream(os); - try { - ZipEntry ze = new ZipEntry("foo"); - byte[] data = "some-content".getBytes("UTF-8"); - ze.setSize(data.length); - tos.putNextEntry(ze); - tos.write(data); - tos.closeEntry(); + try (OutputStream os = new FileOutputStream(simpleZip); + ZipArchiveOutputStream tos = new ZipArchiveOutputStream(os)) { + List ZipArchiveList = new ArrayList<>(7); + int count = 0; + // create 7 files to verify permissions + for (int i = 0; i < 7; i++) { + ZipArchiveList.add(new ZipArchiveEntry("foo_" + i)); + ZipArchiveEntry archiveEntry = ZipArchiveList.get(i); + archiveEntry.setUnixMode(count += 0100); + byte[] data = "some-content".getBytes("UTF-8"); + archiveEntry.setSize(data.length); + tos.putArchiveEntry(archiveEntry); + tos.write(data); + } + tos.closeArchiveEntry(); tos.flush(); tos.finish(); - } finally { - tos.close(); } - + // successfully unzip it into an existing dir: FileUtil.unZip(simpleZip, tmp); + File foo0 = new File(tmp, "foo_0"); + File foo1 = new File(tmp, "foo_1"); + File foo2 = new File(tmp, "foo_2"); + File foo3 = new File(tmp, "foo_3"); + File foo4 = new File(tmp, "foo_4"); + File foo5 = new File(tmp, "foo_5"); + File foo6 = new File(tmp, "foo_6"); // check result: - assertTrue(new File(tmp, "foo").exists()); - assertEquals(12, new File(tmp, "foo").length()); - - final File regularFile = new File(tmp, "QuickBrownFoxJumpsOverTheLazyDog"); - regularFile.createNewFile(); - assertTrue(regularFile.exists()); - try { - FileUtil.unZip(simpleZip, regularFile); - assertTrue("An IOException expected.", false); - } catch (IOException ioe) { - // okay - } + assertTrue(foo0.exists()); + assertTrue(foo1.exists()); + assertTrue(foo2.exists()); + assertTrue(foo3.exists()); + assertTrue(foo4.exists()); + assertTrue(foo5.exists()); + assertTrue(foo6.exists()); + assertEquals(12, foo0.length()); + // tests whether file foo_0 has executable permissions + assertTrue("file lacks execute permissions", foo0.canExecute()); + assertFalse("file has write permissions", foo0.canWrite()); + assertFalse("file has read permissions", foo0.canRead()); + // tests whether file foo_1 has writable permissions + assertFalse("file has execute permissions", foo1.canExecute()); + assertTrue("file lacks write permissions", foo1.canWrite()); + assertFalse("file has read permissions", foo1.canRead()); + // tests whether file foo_2 has executable and writable permissions + assertTrue("file lacks execute permissions", foo2.canExecute()); + assertTrue("file lacks write permissions", foo2.canWrite()); + assertFalse("file has read permissions", foo2.canRead()); + // tests whether file foo_3 has readable permissions + assertFalse("file has execute permissions", foo3.canExecute()); + assertFalse("file has write permissions", foo3.canWrite()); + assertTrue("file lacks read permissions", foo3.canRead()); + // tests whether file foo_4 has readable and executable permissions + assertTrue("file lacks execute permissions", foo4.canExecute()); + assertFalse("file has write permissions", foo4.canWrite()); + assertTrue("file lacks read permissions", foo4.canRead()); + // tests whether file foo_5 has readable and writable permissions + assertFalse("file has execute permissions", foo5.canExecute()); + assertTrue("file lacks write permissions", foo5.canWrite()); + assertTrue("file lacks read permissions", foo5.canRead()); + // tests whether file foo_6 has readable, writable and executable permissions + assertTrue("file lacks execute permissions", foo6.canExecute()); + assertTrue("file lacks write permissions", foo6.canWrite()); + assertTrue("file lacks read permissions", foo6.canRead()); + + final File regularFile = + Verify.createNewFile(new File(tmp, "QuickBrownFoxJumpsOverTheLazyDog")); + LambdaTestUtils.intercept(IOException.class, () -> FileUtil.unZip(simpleZip, regularFile)); } @Test (timeout = 30000) @@ -713,14 +820,14 @@ public void testUnZip2() throws IOException { // make a simple zip final File simpleZip = new File(del, FILE); OutputStream os = new FileOutputStream(simpleZip); - try (ZipOutputStream tos = new ZipOutputStream(os)) { + try (ZipArchiveOutputStream tos = new ZipArchiveOutputStream(os)) { // Add an entry that contains invalid filename - ZipEntry ze = new ZipEntry("../foo"); + ZipArchiveEntry ze = new ZipArchiveEntry("../foo"); byte[] data = "some-content".getBytes(StandardCharsets.UTF_8); ze.setSize(data.length); - tos.putNextEntry(ze); + tos.putArchiveEntry(ze); tos.write(data); - tos.closeEntry(); + tos.closeArchiveEntry(); tos.flush(); tos.finish(); } @@ -751,24 +858,24 @@ public void testCopy5() throws IOException { final File dest = new File(del, "dest"); boolean result = FileUtil.copy(fs, srcPath, dest, false, conf); assertTrue(result); - assertTrue(dest.exists()); + Verify.exists(dest); assertEquals(content.getBytes().length + System.getProperty("line.separator").getBytes().length, dest.length()); - assertTrue(srcFile.exists()); // should not be deleted + Verify.exists(srcFile); // should not be deleted // copy regular file, delete src: - dest.delete(); - assertTrue(!dest.exists()); + Verify.delete(dest); + Verify.notExists(dest); result = FileUtil.copy(fs, srcPath, dest, true, conf); assertTrue(result); - assertTrue(dest.exists()); + Verify.exists(dest); assertEquals(content.getBytes().length + System.getProperty("line.separator").getBytes().length, dest.length()); - assertTrue(!srcFile.exists()); // should be deleted + Verify.notExists(srcFile); // should be deleted // copy a dir: - dest.delete(); - assertTrue(!dest.exists()); + Verify.delete(dest); + Verify.notExists(dest); srcPath = new Path(partitioned.toURI()); result = FileUtil.copy(fs, srcPath, dest, true, conf); assertTrue(result); @@ -780,7 +887,7 @@ public void testCopy5() throws IOException { assertEquals(3 + System.getProperty("line.separator").getBytes().length, f.length()); } - assertTrue(!partitioned.exists()); // should be deleted + Verify.notExists(partitioned); // should be deleted } @Test (timeout = 30000) @@ -868,8 +975,8 @@ public void testSymlinkRenameTo() throws Exception { // create the symlink FileUtil.symLink(file.getAbsolutePath(), link.getAbsolutePath()); - Assert.assertTrue(file.exists()); - Assert.assertTrue(link.exists()); + Verify.exists(file); + Verify.exists(link); File link2 = new File(del, "_link2"); @@ -879,10 +986,10 @@ public void testSymlinkRenameTo() throws Exception { // Make sure the file still exists // (NOTE: this would fail on Java6 on Windows if we didn't // copy the file in FileUtil#symlink) - Assert.assertTrue(file.exists()); + Verify.exists(file); - Assert.assertTrue(link2.exists()); - Assert.assertFalse(link.exists()); + Verify.exists(link2); + Verify.notExists(link); } /** @@ -897,13 +1004,13 @@ public void testSymlinkDelete() throws Exception { // create the symlink FileUtil.symLink(file.getAbsolutePath(), link.getAbsolutePath()); - Assert.assertTrue(file.exists()); - Assert.assertTrue(link.exists()); + Verify.exists(file); + Verify.exists(link); // make sure that deleting a symlink works properly - Assert.assertTrue(link.delete()); - Assert.assertFalse(link.exists()); - Assert.assertTrue(file.exists()); + Verify.delete(link); + Verify.notExists(link); + Verify.exists(file); } /** @@ -930,13 +1037,13 @@ public void testSymlinkLength() throws Exception { Assert.assertEquals(data.length, file.length()); Assert.assertEquals(data.length, link.length()); - file.delete(); - Assert.assertFalse(file.exists()); + Verify.delete(file); + Verify.notExists(file); Assert.assertEquals(0, link.length()); - link.delete(); - Assert.assertFalse(link.exists()); + Verify.delete(link); + Verify.notExists(link); } /** @@ -1002,7 +1109,7 @@ public void testSymlinkFileAlreadyExists() throws IOException { public void testSymlinkSameFile() throws IOException { File file = new File(del, FILE); - file.delete(); + Verify.delete(file); // Create a symbolic link // The operation should succeed @@ -1075,21 +1182,21 @@ private void doUntarAndVerify(File tarFile, File untarDir) String parentDir = untarDir.getCanonicalPath() + Path.SEPARATOR + "name"; File testFile = new File(parentDir + Path.SEPARATOR + "version"); - Assert.assertTrue(testFile.exists()); + Verify.exists(testFile); Assert.assertTrue(testFile.length() == 0); String imageDir = parentDir + Path.SEPARATOR + "image"; testFile = new File(imageDir + Path.SEPARATOR + "fsimage"); - Assert.assertTrue(testFile.exists()); + Verify.exists(testFile); Assert.assertTrue(testFile.length() == 157); String currentDir = parentDir + Path.SEPARATOR + "current"; testFile = new File(currentDir + Path.SEPARATOR + "fsimage"); - Assert.assertTrue(testFile.exists()); + Verify.exists(testFile); Assert.assertTrue(testFile.length() == 4331); testFile = new File(currentDir + Path.SEPARATOR + "edits"); - Assert.assertTrue(testFile.exists()); + Verify.exists(testFile); Assert.assertTrue(testFile.length() == 1033); testFile = new File(currentDir + Path.SEPARATOR + "fstime"); - Assert.assertTrue(testFile.exists()); + Verify.exists(testFile); Assert.assertTrue(testFile.length() == 8); } @@ -1106,6 +1213,38 @@ public void testUntar() throws IOException { doUntarAndVerify(new File(tarFileName), untarDir); } + /** + * Verify we can't unTar a file which isn't there. + * This will test different codepaths on Windows from unix, + * but both MUST throw an IOE of some kind. + */ + @Test(timeout = 30000) + public void testUntarMissingFile() throws Throwable { + File dataDir = GenericTestUtils.getTestDir(); + File tarFile = new File(dataDir, "missing; true"); + File untarDir = new File(dataDir, "untarDir"); + intercept(IOException.class, () -> + FileUtil.unTar(tarFile, untarDir)); + } + + /** + * Verify we can't unTar a file which isn't there + * through the java untar code. + * This is how {@code FileUtil.unTar(File, File} + * will behave on Windows, + */ + @Test(timeout = 30000) + public void testUntarMissingFileThroughJava() throws Throwable { + File dataDir = GenericTestUtils.getTestDir(); + File tarFile = new File(dataDir, "missing; true"); + File untarDir = new File(dataDir, "untarDir"); + // java8 on unix throws java.nio.file.NoSuchFileException here; + // leaving as an IOE intercept in case windows throws something + // else. + intercept(IOException.class, () -> + FileUtil.unTarUsingJava(tarFile, untarDir, false)); + } + @Test (timeout = 30000) public void testCreateJarWithClassPath() throws Exception { // create files expected to match a wildcard @@ -1118,9 +1257,9 @@ public void testCreateJarWithClassPath() throws Exception { } // create non-jar files, which we expect to not be included in the classpath - Assert.assertTrue(new File(tmp, "text.txt").createNewFile()); - Assert.assertTrue(new File(tmp, "executable.exe").createNewFile()); - Assert.assertTrue(new File(tmp, "README").createNewFile()); + Verify.createNewFile(new File(tmp, "text.txt")); + Verify.createNewFile(new File(tmp, "executable.exe")); + Verify.createNewFile(new File(tmp, "README")); // create classpath jar String wildcardPath = tmp.getCanonicalPath() + File.separator + "*"; @@ -1153,16 +1292,16 @@ public void testCreateJarWithClassPath() throws Exception { if (wildcardPath.equals(classPath)) { // add wildcard matches for (File wildcardMatch: wildcardMatches) { - expectedClassPaths.add(wildcardMatch.toURI().toURL() + expectedClassPaths.add(wildcardMatch.getCanonicalFile().toURI().toURL() .toExternalForm()); } } else { File fileCp = null; if(!new Path(classPath).isAbsolute()) { - fileCp = new File(tmp, classPath); + fileCp = new File(tmp, classPath).getCanonicalFile(); } else { - fileCp = new File(classPath); + fileCp = new File(classPath).getCanonicalFile(); } if (nonExistentSubdir.equals(classPath)) { // expect to maintain trailing path separator if present in input, even @@ -1206,9 +1345,9 @@ public void testGetJarsInDirectory() throws Exception { } // create non-jar files, which we expect to not be included in the result - assertTrue(new File(tmp, "text.txt").createNewFile()); - assertTrue(new File(tmp, "executable.exe").createNewFile()); - assertTrue(new File(tmp, "README").createNewFile()); + Verify.createNewFile(new File(tmp, "text.txt")); + Verify.createNewFile(new File(tmp, "executable.exe")); + Verify.createNewFile(new File(tmp, "README")); // pass in the directory String directory = tmp.getCanonicalPath(); @@ -1217,7 +1356,8 @@ public void testGetJarsInDirectory() throws Exception { for (Path jar: jars) { URL url = jar.toUri().toURL(); assertTrue("the jar should match either of the jars", - url.equals(jar1.toURI().toURL()) || url.equals(jar2.toURI().toURL())); + url.equals(jar1.getCanonicalFile().toURI().toURL()) || + url.equals(jar2.getCanonicalFile().toURI().toURL())); } } @@ -1242,7 +1382,7 @@ public void setupCompareFs() { uri4 = new URI(uris4); uri5 = new URI(uris5); uri6 = new URI(uris6); - } catch (URISyntaxException use) { + } catch (URISyntaxException ignored) { } // Set up InetAddress inet1 = mock(InetAddress.class); @@ -1265,7 +1405,7 @@ public void setupCompareFs() { when(InetAddress.getByName(uris3)).thenReturn(inet3); when(InetAddress.getByName(uris4)).thenReturn(inet4); when(InetAddress.getByName(uris5)).thenReturn(inet5); - } catch (UnknownHostException ue) { + } catch (UnknownHostException ignored) { } fs1 = mock(FileSystem.class); @@ -1285,62 +1425,87 @@ public void setupCompareFs() { @Test public void testCompareFsNull() throws Exception { setupCompareFs(); - assertEquals(FileUtil.compareFs(null,fs1),false); - assertEquals(FileUtil.compareFs(fs1,null),false); + assertFalse(FileUtil.compareFs(null, fs1)); + assertFalse(FileUtil.compareFs(fs1, null)); } @Test public void testCompareFsDirectories() throws Exception { setupCompareFs(); - assertEquals(FileUtil.compareFs(fs1,fs1),true); - assertEquals(FileUtil.compareFs(fs1,fs2),false); - assertEquals(FileUtil.compareFs(fs1,fs5),false); - assertEquals(FileUtil.compareFs(fs3,fs4),true); - assertEquals(FileUtil.compareFs(fs1,fs6),false); + assertTrue(FileUtil.compareFs(fs1, fs1)); + assertFalse(FileUtil.compareFs(fs1, fs2)); + assertFalse(FileUtil.compareFs(fs1, fs5)); + assertTrue(FileUtil.compareFs(fs3, fs4)); + assertFalse(FileUtil.compareFs(fs1, fs6)); } @Test(timeout = 8000) public void testCreateSymbolicLinkUsingJava() throws IOException { final File simpleTar = new File(del, FILE); OutputStream os = new FileOutputStream(simpleTar); - TarArchiveOutputStream tos = new TarArchiveOutputStream(os); - File untarFile = null; - try { + try (TarArchiveOutputStream tos = new TarArchiveOutputStream(os)) { // Files to tar final String tmpDir = "tmp/test"; File tmpDir1 = new File(tmpDir, "dir1/"); File tmpDir2 = new File(tmpDir, "dir2/"); - // Delete the directories if they already exist - tmpDir1.mkdirs(); - tmpDir2.mkdirs(); + Verify.mkdirs(tmpDir1); + Verify.mkdirs(tmpDir2); - java.nio.file.Path symLink = FileSystems - .getDefault().getPath(tmpDir1.getPath() + "/sl"); + java.nio.file.Path symLink = Paths.get(tmpDir1.getPath(), "sl"); // Create Symbolic Link - Files.createSymbolicLink(symLink, - FileSystems.getDefault().getPath(tmpDir2.getPath())).toString(); + Files.createSymbolicLink(symLink, Paths.get(tmpDir2.getPath())); assertTrue(Files.isSymbolicLink(symLink.toAbsolutePath())); - // put entries in tar file + // Put entries in tar file putEntriesInTar(tos, tmpDir1.getParentFile()); tos.close(); - untarFile = new File(tmpDir, "2"); - // Untar using java + File untarFile = new File(tmpDir, "2"); + // Untar using Java FileUtil.unTarUsingJava(simpleTar, untarFile, false); // Check symbolic link and other directories are there in untar file assertTrue(Files.exists(untarFile.toPath())); - assertTrue(Files.exists(FileSystems.getDefault().getPath(untarFile - .getPath(), tmpDir))); - assertTrue(Files.isSymbolicLink(FileSystems.getDefault().getPath(untarFile - .getPath().toString(), symLink.toString()))); - + assertTrue(Files.exists(Paths.get(untarFile.getPath(), tmpDir))); + assertTrue(Files.isSymbolicLink(Paths.get(untarFile.getPath(), symLink.toString()))); } finally { FileUtils.deleteDirectory(new File("tmp")); - tos.close(); } + } + @Test(expected = IOException.class) + public void testCreateArbitrarySymlinkUsingJava() throws IOException { + final File simpleTar = new File(del, FILE); + OutputStream os = new FileOutputStream(simpleTar); + + File rootDir = new File("tmp"); + try (TarArchiveOutputStream tos = new TarArchiveOutputStream(os)) { + tos.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU); + + // Create arbitrary dir + File arbitraryDir = new File(rootDir, "arbitrary-dir/"); + Verify.mkdirs(arbitraryDir); + + // We will tar from the tar-root lineage + File tarRoot = new File(rootDir, "tar-root/"); + File symlinkRoot = new File(tarRoot, "dir1/"); + Verify.mkdirs(symlinkRoot); + + // Create Symbolic Link to an arbitrary dir + java.nio.file.Path symLink = Paths.get(symlinkRoot.getPath(), "sl"); + Files.createSymbolicLink(symLink, arbitraryDir.toPath().toAbsolutePath()); + + // Put entries in tar file + putEntriesInTar(tos, tarRoot); + putEntriesInTar(tos, new File(symLink.toFile(), "dir-outside-tar-root/")); + tos.close(); + + // Untar using Java + File untarFile = new File(rootDir, "extracted"); + FileUtil.unTarUsingJava(simpleTar, untarFile, false); + } finally { + FileUtils.deleteDirectory(rootDir); + } } private void putEntriesInTar(TarArchiveOutputStream tos, File f) @@ -1417,7 +1582,7 @@ public void testReadSymlinkWithAFileAsInput() throws IOException { String result = FileUtil.readLink(file); Assert.assertEquals("", result); - file.delete(); + Verify.delete(file); } /** diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFilterFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFilterFileSystem.java index f0057a6c6d902..c6d2ff056a746 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFilterFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFilterFileSystem.java @@ -137,6 +137,17 @@ public Token[] addDelegationTokens(String renewer, Credentials creds) void setQuota(Path f, long namespaceQuota, long storagespaceQuota); void setQuotaByStorageType(Path f, StorageType type, long quota); StorageStatistics getStorageStatistics(); + + /* + Not passed through as the inner implementation will miss features + of the filter such as checksums. + */ + MultipartUploaderBuilder createMultipartUploader(Path basePath); + + FSDataOutputStream append(Path f, boolean appendToNewBlock) throws IOException; + + FSDataOutputStream append(Path f, int bufferSize, + Progressable progress, boolean appendToNewBlock) throws IOException; } @Test @@ -278,6 +289,23 @@ public void testRenameOptions() throws Exception { verify(mockFs).rename(eq(src), eq(dst), eq(opt)); } + /** + * Verify that filterFS always returns false, even if local/rawlocal + * ever implement multipart uploads. + */ + @Test + public void testFilterPathCapabilites() throws Exception { + try (FilterFileSystem flfs = new FilterLocalFileSystem()) { + flfs.initialize(URI.create("filter:/"), conf); + Path src = new Path("/src"); + assertFalse( + "hasPathCapability(FS_MULTIPART_UPLOADER) should have failed for " + + flfs, + flfs.hasPathCapability(src, + CommonPathCapabilities.FS_MULTIPART_UPLOADER)); + } + } + private void checkInit(FilterFileSystem fs, boolean expectInit) throws Exception { URI uri = URI.create("filter:/"); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShell.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShell.java index e83e30e41e939..1c9781881b4b7 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShell.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShell.java @@ -21,10 +21,7 @@ import org.apache.hadoop.fs.shell.Command; import org.apache.hadoop.fs.shell.CommandFactory; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.hadoop.tracing.SetSpanReceiver; import org.apache.hadoop.util.ToolRunner; -import org.apache.htrace.core.AlwaysSampler; -import org.apache.htrace.core.Tracer; import org.hamcrest.core.StringContains; import org.junit.Assert; import org.junit.Test; @@ -53,10 +50,6 @@ public void testConfWithInvalidFile() throws Throwable { public void testTracing() throws Throwable { Configuration conf = new Configuration(); String prefix = "fs.shell.htrace."; - conf.set(prefix + Tracer.SPAN_RECEIVER_CLASSES_KEY, - SetSpanReceiver.class.getName()); - conf.set(prefix + Tracer.SAMPLER_CLASSES_KEY, - AlwaysSampler.class.getName()); conf.setQuietMode(false); FsShell shell = new FsShell(conf); int res; @@ -65,10 +58,6 @@ public void testTracing() throws Throwable { } finally { shell.close(); } - SetSpanReceiver.assertSpanNamesFound(new String[]{"help"}); - Assert.assertEquals("-help ls cat", - SetSpanReceiver.getMap() - .get("help").get(0).getKVAnnotations().get("args")); } @Test diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java index 72ae296c957b5..91cc9bc56d6af 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java @@ -23,6 +23,7 @@ import static org.hamcrest.CoreMatchers.not; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; @@ -683,4 +684,27 @@ public void testPutSrcFileNoPerm() lfs.setPermission(src, new FsPermission((short)0755)); } } + + @Test + public void testLazyPersistDirectOverwrite() throws Exception { + Path testRoot = new Path(testRootDir, "testLazyPersistDirectOverwrite"); + try { + lfs.delete(testRoot, true); + lfs.mkdirs(testRoot); + Path filePath = new Path(testRoot, new Path("srcFile")); + lfs.create(filePath).close(); + // Put with overwrite in direct mode. + String[] argv = + new String[] {"-put", "-f", "-l", "-d", filePath.toString(), + filePath.toString()}; + assertEquals(0, shell.run(argv)); + + // Put without overwrite in direct mode shouldn't be success. + argv = new String[] {"-put", "-l", "-d", filePath.toString(), + filePath.toString()}; + assertNotEquals(0, shell.run(argv)); + } finally { + lfs.delete(testRoot, true); + } + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellTouch.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellTouch.java index 2e7cb5d6342b3..62e7990674d3b 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellTouch.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellTouch.java @@ -101,7 +101,7 @@ public void testTouch() throws Exception { { assertThat( "Expected successful touch on a non-existent file with -c option", - shellRun("-touch", "-c", newFileName), is(not(0))); + shellRun("-touch", "-c", newFileName), is(0)); assertThat(lfs.exists(newFile), is(false)); } @@ -140,7 +140,7 @@ public void testTouch() throws Exception { Date dateObj = parseTimestamp(strTime); assertThat( - "Expected successful touch with a specified modificatiom time", + "Expected successful touch with a specified modification time", shellRun("-touch", "-m", "-t", strTime, newFileName), is(0)); // Verify if modification time is recorded correctly (and access time // remains unchanged). @@ -179,6 +179,16 @@ public void testTouch() throws Exception { assertThat("Expected failed touch with a missing timestamp", shellRun("-touch", "-t", newFileName), is(not(0))); } + + // Verify -c option when file exists. + String strTime = formatTimestamp(System.currentTimeMillis()); + Date dateObj = parseTimestamp(strTime); + assertThat( + "Expected successful touch on a non-existent file with -c option", + shellRun("-touch", "-c", "-t", strTime, newFileName), is(0)); + FileStatus fileStatus = lfs.getFileStatus(newFile); + assertThat(fileStatus.getAccessTime(), is(dateObj.getTime())); + assertThat(fileStatus.getModificationTime(), is(dateObj.getTime())); } private String formatTimestamp(long timeInMillis) { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystem.java index 2097633839112..b227e16908828 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystem.java @@ -41,7 +41,6 @@ import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.concurrent.CompletableFuture; import static org.apache.hadoop.fs.Options.ChecksumOpt; @@ -248,6 +247,14 @@ CompletableFuture openFileWithOptions( CompletableFuture openFileWithOptions( Path path, OpenFileParameters parameters) throws IOException; + + MultipartUploaderBuilder createMultipartUploader(Path basePath) + throws IOException; + + FSDataOutputStream append(Path f, boolean appendToNewBlock) throws IOException; + + FSDataOutputStream append(Path f, int bufferSize, + Progressable progress, boolean appendToNewBlock) throws IOException; } @Test diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java index c58e731b82b21..6415df6310fc2 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java @@ -33,7 +33,10 @@ import java.util.HashSet; import java.util.Set; -import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + /** * This test class checks basic operations with {@link HarFileSystem} including diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java index acda898ea1342..3693b4f0acde3 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java @@ -26,6 +26,7 @@ import java.util.NoSuchElementException; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.util.DiskChecker.DiskErrorException; import org.apache.hadoop.util.Shell; @@ -532,4 +533,39 @@ public void testGetLocalPathForWriteForInvalidPaths() throws Exception { } } + /** + * Test to verify LocalDirAllocator log details to provide diagnostics when file creation fails. + * + * @throws Exception + */ + @Test(timeout = 30000) + public void testGetLocalPathForWriteForLessSpace() throws Exception { + String dir0 = buildBufferDir(ROOT, 0); + String dir1 = buildBufferDir(ROOT, 1); + conf.set(CONTEXT, dir0 + "," + dir1); + LambdaTestUtils.intercept(DiskErrorException.class, + String.format("Could not find any valid local directory for %s with requested size %s", + "p1/x", Long.MAX_VALUE - 1), "Expect a DiskErrorException.", + () -> dirAllocator.getLocalPathForWrite("p1/x", Long.MAX_VALUE - 1, conf)); + } + + /** + * Test for HADOOP-18636 LocalDirAllocator cannot recover from directory tree deletion. + */ + @Test(timeout = 30000) + public void testDirectoryRecovery() throws Throwable { + String dir0 = buildBufferDir(ROOT, 0); + String subdir = dir0 + "/subdir1/subdir2"; + + conf.set(CONTEXT, subdir); + // get local path and an ancestor + final Path pathForWrite = dirAllocator.getLocalPathForWrite("file", -1, conf); + final Path ancestor = pathForWrite.getParent().getParent(); + + // delete that ancestor + localFs.delete(ancestor, true); + // and expect to get a new file back + dirAllocator.getLocalPathForWrite("file2", -1, conf); + } } + diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java new file mode 100644 index 0000000000000..15466af7c16fb --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import java.io.File; + +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractCopyFromLocalTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.localfs.LocalFSContract; + +public class TestLocalFSCopyFromLocal extends AbstractContractCopyFromLocalTest { + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new LocalFSContract(conf); + } + + @Test + public void testDestinationFileIsToParentDirectory() throws Throwable { + describe("Source is a file and destination is its own parent directory. " + + "Copying will cause the source file to be deleted."); + + File file = createTempFile("local"); + Path dest = new Path(file.getParentFile().toURI()); + Path src = new Path(file.toURI()); + + getFileSystem().copyFromLocalFile(true, true, src, dest); + assertPathDoesNotExist("Source found", src); + } + + @Test + public void testDestinationDirectoryToSelf() throws Throwable { + describe("Source is a directory and it is copied into itself with " + + "delSrc flag set, destination must not exist"); + + File source = createTempDirectory("srcDir"); + Path dest = new Path(source.toURI()); + getFileSystem().copyFromLocalFile( true, true, dest, dest); + + assertPathDoesNotExist("Source found", dest); + } + + @Test + public void testSourceIntoDestinationSubDirectoryWithDelSrc() throws Throwable { + describe("Copying a parent folder inside a child folder with" + + " delSrc=TRUE"); + File parent = createTempDirectory("parent"); + File child = createTempDirectory(parent, "child"); + + Path src = new Path(parent.toURI()); + Path dest = new Path(child.toURI()); + getFileSystem().copyFromLocalFile(true, true, src, dest); + + assertPathDoesNotExist("Source found", src); + assertPathDoesNotExist("Destination found", dest); + } + + @Test + public void testSourceIntoDestinationSubDirectory() throws Throwable { + describe("Copying a parent folder inside a child folder with" + + " delSrc=FALSE"); + File parent = createTempDirectory("parent"); + File child = createTempDirectory(parent, "child"); + + Path src = new Path(parent.toURI()); + Path dest = new Path(child.toURI()); + getFileSystem().copyFromLocalFile(false, true, src, dest); + + Path recursiveParent = new Path(dest, parent.getName()); + Path recursiveChild = new Path(recursiveParent, child.getName()); + + // This definitely counts as interesting behaviour which needs documented + // Depending on the underlying system this can recurse 15+ times + recursiveParent = new Path(recursiveChild, parent.getName()); + recursiveChild = new Path(recursiveParent, child.getName()); + assertPathExists("Recursive parent not found", recursiveParent); + assertPathExists("Recursive child not found", recursiveChild); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFileSystem.java index 517f6ce016544..e7b72a93f3378 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFileSystem.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.fs; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem.Statistics; import org.apache.hadoop.fs.permission.FsPermission; @@ -312,7 +312,7 @@ public void testHasFileDescriptor() throws IOException { .new LocalFSFileInputStream(path), 1024); assertNotNull(bis.getFileDescriptor()); } finally { - IOUtils.cleanup(null, bis); + IOUtils.cleanupWithLogger(null, bis); } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestPath.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestPath.java index c0d79c145f781..e775d1d069dcb 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestPath.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestPath.java @@ -34,7 +34,7 @@ import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.Shell; -import com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; import static org.apache.hadoop.test.PlatformAssumptions.assumeNotWindows; import static org.apache.hadoop.test.PlatformAssumptions.assumeWindows; @@ -528,4 +528,11 @@ public void testSerDeser() throws Throwable { } } + + @Test(timeout = 30000) + public void testSuffixFromRoot() { + Path root = new Path("/"); + Assert.assertNull(root.getParent()); + Assert.assertEquals(new Path("/bar"), root.suffix("bar")); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestTrash.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestTrash.java index e8e028732b2a8..72287782baac6 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestTrash.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestTrash.java @@ -123,9 +123,10 @@ public static void trashShell(final FileSystem fs, final Path base) /** * Test trash for the shell's delete command for the default file system - * specified in the paramter conf - * @param conf + * specified in the parameter conf + * @param conf - configuration object for the filesystem * @param base - the base path where files are created + * @param trashRootFs - the filesystem object to test trash * @param trashRoot - the expected place where the trashbin resides * @throws IOException */ @@ -793,7 +794,7 @@ public void tearDown() throws IOException { } } - static class TestLFS extends LocalFileSystem { + public static class TestLFS extends LocalFileSystem { private URI uriName = null; Path home; TestLFS() { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestVectoredReadUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestVectoredReadUtils.java new file mode 100644 index 0000000000000..e964d23f4b750 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestVectoredReadUtils.java @@ -0,0 +1,487 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.IntBuffer; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.function.IntFunction; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.mockito.ArgumentMatchers; +import org.mockito.Mockito; + +import org.apache.hadoop.fs.impl.CombinedFileRange; +import org.apache.hadoop.test.HadoopTestBase; + +import static org.apache.hadoop.fs.VectoredReadUtils.sortRanges; +import static org.apache.hadoop.fs.VectoredReadUtils.validateNonOverlappingAndReturnSortedRanges; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.apache.hadoop.test.MoreAsserts.assertFutureCompletedSuccessfully; +import static org.apache.hadoop.test.MoreAsserts.assertFutureFailedExceptionally; + +/** + * Test behavior of {@link VectoredReadUtils}. + */ +public class TestVectoredReadUtils extends HadoopTestBase { + + @Test + public void testSliceTo() { + final int size = 64 * 1024; + ByteBuffer buffer = ByteBuffer.allocate(size); + // fill the buffer with data + IntBuffer intBuffer = buffer.asIntBuffer(); + for(int i=0; i < size / Integer.BYTES; ++i) { + intBuffer.put(i); + } + // ensure we don't make unnecessary slices + ByteBuffer slice = VectoredReadUtils.sliceTo(buffer, 100, + FileRange.createFileRange(100, size)); + Assertions.assertThat(buffer) + .describedAs("Slicing on the same offset shouldn't " + + "create a new buffer") + .isEqualTo(slice); + Assertions.assertThat(slice.position()) + .describedAs("Slicing should return buffers starting from position 0") + .isEqualTo(0); + + // try slicing a range + final int offset = 100; + final int sliceStart = 1024; + final int sliceLength = 16 * 1024; + slice = VectoredReadUtils.sliceTo(buffer, offset, + FileRange.createFileRange(offset + sliceStart, sliceLength)); + // make sure they aren't the same, but use the same backing data + Assertions.assertThat(buffer) + .describedAs("Slicing on new offset should " + + "create a new buffer") + .isNotEqualTo(slice); + Assertions.assertThat(buffer.array()) + .describedAs("Slicing should use the same underlying " + + "data") + .isEqualTo(slice.array()); + Assertions.assertThat(slice.position()) + .describedAs("Slicing should return buffers starting from position 0") + .isEqualTo(0); + // test the contents of the slice + intBuffer = slice.asIntBuffer(); + for(int i=0; i < sliceLength / Integer.BYTES; ++i) { + assertEquals("i = " + i, i + sliceStart / Integer.BYTES, intBuffer.get()); + } + } + + @Test + public void testRounding() { + for(int i=5; i < 10; ++i) { + assertEquals("i = "+ i, 5, VectoredReadUtils.roundDown(i, 5)); + assertEquals("i = "+ i, 10, VectoredReadUtils.roundUp(i+1, 5)); + } + assertEquals("Error while roundDown", 13, VectoredReadUtils.roundDown(13, 1)); + assertEquals("Error while roundUp", 13, VectoredReadUtils.roundUp(13, 1)); + } + + @Test + public void testMerge() { + // a reference to use for tracking + Object tracker1 = "one"; + Object tracker2 = "two"; + FileRange base = FileRange.createFileRange(2000, 1000, tracker1); + CombinedFileRange mergeBase = new CombinedFileRange(2000, 3000, base); + + // test when the gap between is too big + assertFalse("Large gap ranges shouldn't get merged", mergeBase.merge(5000, 6000, + FileRange.createFileRange(5000, 1000), 2000, 4000)); + assertEquals("Number of ranges in merged range shouldn't increase", + 1, mergeBase.getUnderlying().size()); + assertFileRange(mergeBase, 2000, 1000); + + // test when the total size gets exceeded + assertFalse("Large size ranges shouldn't get merged", mergeBase.merge(5000, 6000, + FileRange.createFileRange(5000, 1000), 2001, 3999)); + assertEquals("Number of ranges in merged range shouldn't increase", + 1, mergeBase.getUnderlying().size()); + assertFileRange(mergeBase, 2000, 1000); + + // test when the merge works + assertTrue("ranges should get merged ", mergeBase.merge(5000, 6000, + FileRange.createFileRange(5000, 1000, tracker2), + 2001, 4000)); + assertEquals("post merge size", 2, mergeBase.getUnderlying().size()); + assertFileRange(mergeBase, 2000, 4000); + + Assertions.assertThat(mergeBase.getUnderlying().get(0).getReference()) + .describedAs("reference of range %s", mergeBase.getUnderlying().get(0)) + .isSameAs(tracker1); + Assertions.assertThat(mergeBase.getUnderlying().get(1).getReference()) + .describedAs("reference of range %s", mergeBase.getUnderlying().get(1)) + .isSameAs(tracker2); + + // reset the mergeBase and test with a 10:1 reduction + mergeBase = new CombinedFileRange(200, 300, base); + assertFileRange(mergeBase, 200, 100); + + assertTrue("ranges should get merged ", mergeBase.merge(500, 600, + FileRange.createFileRange(5000, 1000), 201, 400)); + assertEquals("post merge size", 2, mergeBase.getUnderlying().size()); + assertFileRange(mergeBase, 200, 400); + } + + @Test + public void testSortAndMerge() { + List input = Arrays.asList( + FileRange.createFileRange(3000, 100, "1"), + FileRange.createFileRange(2100, 100, null), + FileRange.createFileRange(1000, 100, "3") + ); + assertFalse("Ranges are non disjoint", VectoredReadUtils.isOrderedDisjoint(input, 100, 800)); + final List outputList = VectoredReadUtils.mergeSortedRanges( + Arrays.asList(sortRanges(input)), 100, 1001, 2500); + Assertions.assertThat(outputList) + .describedAs("merged range size") + .hasSize(1); + CombinedFileRange output = outputList.get(0); + Assertions.assertThat(output.getUnderlying()) + .describedAs("merged range underlying size") + .hasSize(3); + // range[1000,3100) + assertFileRange(output, 1000, 2100); + assertTrue("merged output ranges are disjoint", + VectoredReadUtils.isOrderedDisjoint(outputList, 100, 800)); + + // the minSeek doesn't allow the first two to merge + assertFalse("Ranges are non disjoint", + VectoredReadUtils.isOrderedDisjoint(input, 100, 1000)); + final List list2 = VectoredReadUtils.mergeSortedRanges( + Arrays.asList(sortRanges(input)), + 100, 1000, 2100); + Assertions.assertThat(list2) + .describedAs("merged range size") + .hasSize(2); + assertFileRange(list2.get(0), 1000, 100); + + // range[2100,3100) + assertFileRange(list2.get(1), 2100, 1000); + + assertTrue("merged output ranges are disjoint", + VectoredReadUtils.isOrderedDisjoint(list2, 100, 1000)); + + // the maxSize doesn't allow the third range to merge + assertFalse("Ranges are non disjoint", + VectoredReadUtils.isOrderedDisjoint(input, 100, 800)); + final List list3 = VectoredReadUtils.mergeSortedRanges( + Arrays.asList(sortRanges(input)), + 100, 1001, 2099); + Assertions.assertThat(list3) + .describedAs("merged range size") + .hasSize(2); + // range[1000,2200) + CombinedFileRange range0 = list3.get(0); + assertFileRange(range0, 1000, 1200); + assertFileRange(range0.getUnderlying().get(0), + 1000, 100, "3"); + assertFileRange(range0.getUnderlying().get(1), + 2100, 100, null); + CombinedFileRange range1 = list3.get(1); + // range[3000,3100) + assertFileRange(range1, 3000, 100); + assertFileRange(range1.getUnderlying().get(0), + 3000, 100, "1"); + + assertTrue("merged output ranges are disjoint", + VectoredReadUtils.isOrderedDisjoint(list3, 100, 800)); + + // test the round up and round down (the maxSize doesn't allow any merges) + assertFalse("Ranges are non disjoint", + VectoredReadUtils.isOrderedDisjoint(input, 16, 700)); + final List list4 = VectoredReadUtils.mergeSortedRanges( + Arrays.asList(sortRanges(input)), + 16, 1001, 100); + Assertions.assertThat(list4) + .describedAs("merged range size") + .hasSize(3); + // range[992,1104) + assertFileRange(list4.get(0), 992, 112); + // range[2096,2208) + assertFileRange(list4.get(1), 2096, 112); + // range[2992,3104) + assertFileRange(list4.get(2), 2992, 112); + assertTrue("merged output ranges are disjoint", + VectoredReadUtils.isOrderedDisjoint(list4, 16, 700)); + } + + /** + * Assert that a file range satisfies the conditions. + * @param range range to validate + * @param offset offset of range + * @param length range length + */ + private void assertFileRange(FileRange range, long offset, int length) { + Assertions.assertThat(range) + .describedAs("file range %s", range) + .isNotNull(); + Assertions.assertThat(range.getOffset()) + .describedAs("offset of %s", range) + .isEqualTo(offset); + Assertions.assertThat(range.getLength()) + .describedAs("length of %s", range) + .isEqualTo(length); + } + + /** + * Assert that a file range satisfies the conditions. + * @param range range to validate + * @param offset offset of range + * @param length range length + * @param reference reference; may be null. + */ + private void assertFileRange(FileRange range, long offset, int length, Object reference) { + assertFileRange(range, offset, length); + Assertions.assertThat(range.getReference()) + .describedAs("reference field of file range %s", range) + .isEqualTo(reference); + } + + + @Test + public void testSortAndMergeMoreCases() throws Exception { + List input = Arrays.asList( + FileRange.createFileRange(3000, 110), + FileRange.createFileRange(3000, 100), + FileRange.createFileRange(2100, 100), + FileRange.createFileRange(1000, 100) + ); + assertFalse("Ranges are non disjoint", + VectoredReadUtils.isOrderedDisjoint(input, 100, 800)); + List outputList = VectoredReadUtils.mergeSortedRanges( + Arrays.asList(sortRanges(input)), 1, 1001, 2500); + Assertions.assertThat(outputList) + .describedAs("merged range size") + .hasSize(1); + CombinedFileRange output = outputList.get(0); + Assertions.assertThat(output.getUnderlying()) + .describedAs("merged range underlying size") + .hasSize(4); + + assertFileRange(output, 1000, 2110); + + assertTrue("merged output ranges are disjoint", + VectoredReadUtils.isOrderedDisjoint(outputList, 1, 800)); + + outputList = VectoredReadUtils.mergeSortedRanges( + Arrays.asList(sortRanges(input)), 100, 1001, 2500); + Assertions.assertThat(outputList) + .describedAs("merged range size") + .hasSize(1); + output = outputList.get(0); + Assertions.assertThat(output.getUnderlying()) + .describedAs("merged range underlying size") + .hasSize(4); + assertFileRange(output, 1000, 2200); + + assertTrue("merged output ranges are disjoint", + VectoredReadUtils.isOrderedDisjoint(outputList, 1, 800)); + + } + + @Test + public void testValidateOverlappingRanges() throws Exception { + List input = Arrays.asList( + FileRange.createFileRange(100, 100), + FileRange.createFileRange(200, 100), + FileRange.createFileRange(250, 100) + ); + + intercept(UnsupportedOperationException.class, + () -> validateNonOverlappingAndReturnSortedRanges(input)); + + List input1 = Arrays.asList( + FileRange.createFileRange(100, 100), + FileRange.createFileRange(500, 100), + FileRange.createFileRange(1000, 100), + FileRange.createFileRange(1000, 100) + ); + + intercept(UnsupportedOperationException.class, + () -> validateNonOverlappingAndReturnSortedRanges(input1)); + + List input2 = Arrays.asList( + FileRange.createFileRange(100, 100), + FileRange.createFileRange(200, 100), + FileRange.createFileRange(300, 100) + ); + // consecutive ranges should pass. + validateNonOverlappingAndReturnSortedRanges(input2); + } + + @Test + public void testMaxSizeZeroDisablesMering() throws Exception { + List randomRanges = Arrays.asList( + FileRange.createFileRange(3000, 110), + FileRange.createFileRange(3000, 100), + FileRange.createFileRange(2100, 100) + ); + assertEqualRangeCountsAfterMerging(randomRanges, 1, 1, 0); + assertEqualRangeCountsAfterMerging(randomRanges, 1, 0, 0); + assertEqualRangeCountsAfterMerging(randomRanges, 1, 100, 0); + } + + private void assertEqualRangeCountsAfterMerging(List inputRanges, + int chunkSize, + int minimumSeek, + int maxSize) { + List combinedFileRanges = VectoredReadUtils + .mergeSortedRanges(inputRanges, chunkSize, minimumSeek, maxSize); + Assertions.assertThat(combinedFileRanges) + .describedAs("Mismatch in number of ranges post merging") + .hasSize(inputRanges.size()); + } + + interface Stream extends PositionedReadable, ByteBufferPositionedReadable { + // nothing + } + + static void fillBuffer(ByteBuffer buffer) { + byte b = 0; + while (buffer.remaining() > 0) { + buffer.put(b++); + } + } + + @Test + public void testReadRangeFromByteBufferPositionedReadable() throws Exception { + Stream stream = Mockito.mock(Stream.class); + Mockito.doAnswer(invocation -> { + fillBuffer(invocation.getArgument(1)); + return null; + }).when(stream).readFully(ArgumentMatchers.anyLong(), + ArgumentMatchers.any(ByteBuffer.class)); + CompletableFuture result = + VectoredReadUtils.readRangeFrom(stream, FileRange.createFileRange(1000, 100), + ByteBuffer::allocate); + assertFutureCompletedSuccessfully(result); + ByteBuffer buffer = result.get(); + assertEquals("Size of result buffer", 100, buffer.remaining()); + byte b = 0; + while (buffer.remaining() > 0) { + assertEquals("remain = " + buffer.remaining(), b++, buffer.get()); + } + + // test an IOException + Mockito.reset(stream); + Mockito.doThrow(new IOException("foo")) + .when(stream).readFully(ArgumentMatchers.anyLong(), + ArgumentMatchers.any(ByteBuffer.class)); + result = + VectoredReadUtils.readRangeFrom(stream, FileRange.createFileRange(1000, 100), + ByteBuffer::allocate); + assertFutureFailedExceptionally(result); + } + + static void runReadRangeFromPositionedReadable(IntFunction allocate) + throws Exception { + PositionedReadable stream = Mockito.mock(PositionedReadable.class); + Mockito.doAnswer(invocation -> { + byte b=0; + byte[] buffer = invocation.getArgument(1); + for(int i=0; i < buffer.length; ++i) { + buffer[i] = b++; + } + return null; + }).when(stream).readFully(ArgumentMatchers.anyLong(), + ArgumentMatchers.any(), ArgumentMatchers.anyInt(), + ArgumentMatchers.anyInt()); + CompletableFuture result = + VectoredReadUtils.readRangeFrom(stream, FileRange.createFileRange(1000, 100), + allocate); + assertFutureCompletedSuccessfully(result); + ByteBuffer buffer = result.get(); + assertEquals("Size of result buffer", 100, buffer.remaining()); + byte b = 0; + while (buffer.remaining() > 0) { + assertEquals("remain = " + buffer.remaining(), b++, buffer.get()); + } + + // test an IOException + Mockito.reset(stream); + Mockito.doThrow(new IOException("foo")) + .when(stream).readFully(ArgumentMatchers.anyLong(), + ArgumentMatchers.any(), ArgumentMatchers.anyInt(), + ArgumentMatchers.anyInt()); + result = + VectoredReadUtils.readRangeFrom(stream, FileRange.createFileRange(1000, 100), + ByteBuffer::allocate); + assertFutureFailedExceptionally(result); + } + + @Test + public void testReadRangeArray() throws Exception { + runReadRangeFromPositionedReadable(ByteBuffer::allocate); + } + + @Test + public void testReadRangeDirect() throws Exception { + runReadRangeFromPositionedReadable(ByteBuffer::allocateDirect); + } + + static void validateBuffer(String message, ByteBuffer buffer, int start) { + byte expected = (byte) start; + while (buffer.remaining() > 0) { + assertEquals(message + " remain: " + buffer.remaining(), expected++, + buffer.get()); + } + } + + @Test + public void testReadVectored() throws Exception { + List input = Arrays.asList(FileRange.createFileRange(0, 100), + FileRange.createFileRange(100_000, 100), + FileRange.createFileRange(200_000, 100)); + runAndValidateVectoredRead(input); + } + + @Test + public void testReadVectoredZeroBytes() throws Exception { + List input = Arrays.asList(FileRange.createFileRange(0, 0), + FileRange.createFileRange(100_000, 100), + FileRange.createFileRange(200_000, 0)); + runAndValidateVectoredRead(input); + } + + + private void runAndValidateVectoredRead(List input) + throws Exception { + Stream stream = Mockito.mock(Stream.class); + Mockito.doAnswer(invocation -> { + fillBuffer(invocation.getArgument(1)); + return null; + }).when(stream).readFully(ArgumentMatchers.anyLong(), + ArgumentMatchers.any(ByteBuffer.class)); + // should not merge the ranges + VectoredReadUtils.readVectored(stream, input, ByteBuffer::allocate); + Mockito.verify(stream, Mockito.times(3)) + .readFully(ArgumentMatchers.anyLong(), ArgumentMatchers.any(ByteBuffer.class)); + for (int b = 0; b < input.size(); ++b) { + validateBuffer("buffer " + b, input.get(b).getData().get(), 0); + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/audit/TestCommonAuditContext.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/audit/TestCommonAuditContext.java new file mode 100644 index 0000000000000..9782eb276d306 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/audit/TestCommonAuditContext.java @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.audit; + +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + +import org.assertj.core.api.AbstractStringAssert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_COMMAND; +import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_PROCESS; +import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_THREAD1; +import static org.apache.hadoop.fs.audit.CommonAuditContext.PROCESS_ID; +import static org.apache.hadoop.fs.audit.CommonAuditContext.removeGlobalContextEntry; +import static org.apache.hadoop.fs.audit.CommonAuditContext.currentAuditContext; +import static org.apache.hadoop.fs.audit.CommonAuditContext.getGlobalContextEntry; +import static org.apache.hadoop.fs.audit.CommonAuditContext.getGlobalContextEntries; +import static org.apache.hadoop.fs.audit.CommonAuditContext.noteEntryPoint; +import static org.apache.hadoop.fs.audit.CommonAuditContext.setGlobalContextEntry; +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Tests of the common audit context. + */ +public class TestCommonAuditContext extends AbstractHadoopTestBase { + + private static final Logger LOG = + LoggerFactory.getLogger(TestCommonAuditContext.class); + + private final CommonAuditContext context = currentAuditContext(); + /** + * We can set, get and enumerate global context values. + */ + @Test + public void testGlobalSetGetEnum() throws Throwable { + + String s = "command"; + setGlobalContextEntry(PARAM_COMMAND, s); + assertGlobalEntry(PARAM_COMMAND) + .isEqualTo(s); + // and the iterators. + List> list = StreamSupport + .stream(getGlobalContextEntries().spliterator(), + false) + .filter(e -> e.getKey().equals(PARAM_COMMAND)) + .collect(Collectors.toList()); + assertThat(list) + .hasSize(1) + .allMatch(e -> e.getValue().equals(s)); + } + + @Test + public void testVerifyProcessID() throws Throwable { + assertThat( + getGlobalContextEntry(PARAM_PROCESS)) + .describedAs("global context value of %s", PARAM_PROCESS) + .isEqualTo(PROCESS_ID); + } + + + @Test + public void testNullValue() throws Throwable { + assertThat(context.get(PARAM_PROCESS)) + .describedAs("Value of context element %s", PARAM_PROCESS) + .isNull(); + } + + @Test + public void testThreadId() throws Throwable { + String t1 = getContextValue(PARAM_THREAD1); + Long tid = Long.valueOf(t1); + assertThat(tid).describedAs("thread ID") + .isEqualTo(Thread.currentThread().getId()); + } + + /** + * Verify functions are dynamically evaluated. + */ + @Test + public void testDynamicEval() throws Throwable { + context.reset(); + final AtomicBoolean ab = new AtomicBoolean(false); + context.put("key", () -> + Boolean.toString(ab.get())); + assertContextValue("key") + .isEqualTo("false"); + // update the reference and the next get call will + // pick up the new value. + ab.set(true); + assertContextValue("key") + .isEqualTo("true"); + } + + private String getContextValue(final String key) { + String val = context.get(key); + assertThat(val).isNotBlank(); + return val; + } + + /** + * Start an assertion on a context value. + * @param key key to look up + * @return an assert which can be extended call + */ + private AbstractStringAssert assertContextValue(final String key) { + String val = context.get(key); + return assertThat(val) + .describedAs("Value of context element %s", key) + .isNotBlank(); + } + /** + * Assert a context value is null. + * @param key key to look up + */ + private void assertContextValueIsNull(final String key) { + assertThat(context.get(key)) + .describedAs("Value of context element %s", key) + .isNull(); + } + + @Test + public void testNoteEntryPoint() throws Throwable { + setAndAssertEntryPoint(this).isEqualTo("TestCommonAuditContext"); + + } + + @Test + public void testNoteNullEntryPoint() throws Throwable { + setAndAssertEntryPoint(null).isNull(); + } + + private AbstractStringAssert setAndAssertEntryPoint(final Object tool) { + removeGlobalContextEntry(PARAM_COMMAND); + noteEntryPoint(tool); + AbstractStringAssert anAssert = assertGlobalEntry( + PARAM_COMMAND); + return anAssert; + } + + private AbstractStringAssert assertGlobalEntry(final String key) { + AbstractStringAssert anAssert = assertThat(getGlobalContextEntry(key)) + .describedAs("Global context value %s", key); + return anAssert; + } + + @Test + public void testAddRemove() throws Throwable { + final String key = "testAddRemove"; + assertContextValueIsNull(key); + context.put(key, key); + assertContextValue(key).isEqualTo(key); + context.remove(key); + assertContextValueIsNull(key); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractContentSummaryTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractContentSummaryTest.java new file mode 100644 index 0000000000000..5e5c917395413 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractContentSummaryTest.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract; + +import org.apache.hadoop.fs.ContentSummary; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import org.assertj.core.api.Assertions; +import org.junit.Test; + +import java.io.FileNotFoundException; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +public abstract class AbstractContractContentSummaryTest extends AbstractFSContractTestBase { + + @Test + public void testGetContentSummary() throws Throwable { + FileSystem fs = getFileSystem(); + + Path parent = path("parent"); + Path nested = path(parent + "/a/b/c"); + Path filePath = path(nested + "file.txt"); + + fs.mkdirs(parent); + fs.mkdirs(nested); + touch(getFileSystem(), filePath); + + ContentSummary summary = fs.getContentSummary(parent); + + Assertions.assertThat(summary.getDirectoryCount()).as("Summary " + summary).isEqualTo(4); + + Assertions.assertThat(summary.getFileCount()).as("Summary " + summary).isEqualTo(1); + } + + @Test + public void testGetContentSummaryIncorrectPath() throws Throwable { + FileSystem fs = getFileSystem(); + + Path parent = path("parent"); + Path nested = path(parent + "/a"); + + fs.mkdirs(parent); + + intercept(FileNotFoundException.class, () -> fs.getContentSummary(nested)); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java new file mode 100644 index 0000000000000..e24eb7181ec9f --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java @@ -0,0 +1,336 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; + +import org.junit.Test; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.FileAlreadyExistsException; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathExistsException; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +public abstract class AbstractContractCopyFromLocalTest extends + AbstractFSContractTestBase { + + private static final Charset ASCII = StandardCharsets.US_ASCII; + private File file; + + @Override + public void teardown() throws Exception { + super.teardown(); + if (file != null) { + file.delete(); + } + } + + @Test + public void testCopyEmptyFile() throws Throwable { + file = File.createTempFile("test", ".txt"); + Path dest = copyFromLocal(file, true); + assertPathExists("uploaded file not found", dest); + } + + @Test + public void testCopyFile() throws Throwable { + String message = "hello"; + file = createTempFile(message); + Path dest = copyFromLocal(file, true); + + assertPathExists("uploaded file not found", dest); + assertTrue("source file deleted", Files.exists(file.toPath())); + + FileSystem fs = getFileSystem(); + FileStatus status = fs.getFileStatus(dest); + assertEquals("File length not equal " + status, + message.getBytes(ASCII).length, status.getLen()); + assertFileTextEquals(dest, message); + } + + @Test + public void testCopyFileNoOverwrite() throws Throwable { + file = createTempFile("hello"); + copyFromLocal(file, true); + intercept(PathExistsException.class, + () -> copyFromLocal(file, false)); + } + + @Test + public void testCopyFileOverwrite() throws Throwable { + file = createTempFile("hello"); + Path dest = copyFromLocal(file, true); + String updated = "updated"; + FileUtils.write(file, updated, ASCII); + copyFromLocal(file, true); + assertFileTextEquals(dest, updated); + } + + @Test + public void testCopyMissingFile() throws Throwable { + describe("Copying a file that's not there must fail."); + file = createTempFile("test"); + file.delete(); + // first upload to create + intercept(FileNotFoundException.class, "", + () -> copyFromLocal(file, true)); + } + + @Test + public void testSourceIsFileAndDelSrcTrue() throws Throwable { + describe("Source is a file delSrc flag is set to true"); + + file = createTempFile("test"); + copyFromLocal(file, false, true); + + assertFalse("Source file not deleted", Files.exists(file.toPath())); + } + + @Test + public void testSourceIsFileAndDestinationIsDirectory() throws Throwable { + describe("Source is a file and destination is a directory. " + + "File must be copied inside the directory."); + + file = createTempFile("test"); + Path source = new Path(file.toURI()); + FileSystem fs = getFileSystem(); + File dir = createTempDirectory("test"); + Path destination = fileToPath(dir); + + // Make sure there's nothing already existing at destination + fs.delete(destination, false); + mkdirs(destination); + fs.copyFromLocalFile(source, destination); + + Path expectedFile = path(dir.getName() + "/" + source.getName()); + assertPathExists("File not copied into directory", expectedFile); + } + + @Test + public void testSourceIsFileAndDestinationIsNonExistentDirectory() + throws Throwable { + describe("Source is a file and destination directory does not exist. " + + "Copy operation must still work."); + + file = createTempFile("test"); + Path source = new Path(file.toURI()); + FileSystem fs = getFileSystem(); + + File dir = createTempDirectory("test"); + Path destination = fileToPath(dir); + fs.delete(destination, false); + assertPathDoesNotExist("Destination not deleted", destination); + + fs.copyFromLocalFile(source, destination); + assertPathExists("Destination doesn't exist.", destination); + } + + @Test + public void testSrcIsDirWithFilesAndCopySuccessful() throws Throwable { + describe("Source is a directory with files, copy must copy all" + + " dir contents to destination"); + String firstChild = "childOne"; + String secondChild = "childTwo"; + File parent = createTempDirectory("parent"); + File root = parent.getParentFile(); + File childFile = createTempFile(parent, firstChild, firstChild); + File secondChildFile = createTempFile(parent, secondChild, secondChild); + + copyFromLocal(parent, false); + + assertPathExists("Parent directory not copied", fileToPath(parent)); + assertFileTextEquals(fileToPath(childFile, root), firstChild); + assertFileTextEquals(fileToPath(secondChildFile, root), secondChild); + } + + @Test + public void testSrcIsEmptyDirWithCopySuccessful() throws Throwable { + describe("Source is an empty directory, copy must succeed"); + File source = createTempDirectory("source"); + Path dest = copyFromLocal(source, false); + + assertPathExists("Empty directory not copied", dest); + } + + @Test + public void testSrcIsDirWithOverwriteOptions() throws Throwable { + describe("Source is a directory, destination exists and " + + "must be overwritten."); + + FileSystem fs = getFileSystem(); + File source = createTempDirectory("source"); + Path sourcePath = new Path(source.toURI()); + String contents = "test file"; + File child = createTempFile(source, "child", contents); + + Path dest = path(source.getName()).getParent(); + fs.copyFromLocalFile(sourcePath, dest); + intercept(PathExistsException.class, + () -> fs.copyFromLocalFile(false, false, + sourcePath, dest)); + + String updated = "updated contents"; + FileUtils.write(child, updated, ASCII); + fs.copyFromLocalFile(sourcePath, dest); + + assertPathExists("Parent directory not copied", fileToPath(source)); + assertFileTextEquals(fileToPath(child, source.getParentFile()), + updated); + } + + @Test + public void testSrcIsDirWithDelSrcOptions() throws Throwable { + describe("Source is a directory containing a file and delSrc flag is set" + + ", this must delete the source after the copy."); + File source = createTempDirectory("source"); + String contents = "child file"; + File child = createTempFile(source, "child", contents); + + copyFromLocal(source, false, true); + Path dest = fileToPath(child, source.getParentFile()); + + assertFalse("Directory not deleted", Files.exists(source.toPath())); + assertFileTextEquals(dest, contents); + } + + /* + * The following path is being created on disk and copied over + * /parent/ (directory) + * /parent/test1.txt + * /parent/child/test.txt + * /parent/secondChild/ (directory) + */ + @Test + public void testCopyTreeDirectoryWithoutDelete() throws Throwable { + File srcDir = createTempDirectory("parent"); + File childDir = createTempDirectory(srcDir, "child"); + File secondChild = createTempDirectory(srcDir, "secondChild"); + File parentFile = createTempFile(srcDir, "test1", ".txt"); + File childFile = createTempFile(childDir, "test2", ".txt"); + + copyFromLocal(srcDir, false, false); + File root = srcDir.getParentFile(); + + assertPathExists("Parent directory not found", + fileToPath(srcDir)); + assertPathExists("Child directory not found", + fileToPath(childDir, root)); + assertPathExists("Second child directory not found", + fileToPath(secondChild, root)); + assertPathExists("Parent file not found", + fileToPath(parentFile, root)); + assertPathExists("Child file not found", + fileToPath(childFile, root)); + } + + @Test + public void testCopyDirectoryWithDelete() throws Throwable { + java.nio.file.Path srcDir = Files.createTempDirectory("parent"); + Files.createTempFile(srcDir, "test1", ".txt"); + + Path src = new Path(srcDir.toUri()); + Path dst = path(srcDir.getFileName().toString()); + getFileSystem().copyFromLocalFile(true, true, src, dst); + + assertFalse("Source directory was not deleted", + Files.exists(srcDir)); + } + + @Test + public void testSourceIsDirectoryAndDestinationIsFile() throws Throwable { + describe("Source is a directory and destination is a file must fail"); + + File file = createTempFile("local"); + File source = createTempDirectory("srcDir"); + Path destination = copyFromLocal(file, false); + Path sourcePath = new Path(source.toURI()); + + intercept(FileAlreadyExistsException.class, + () -> getFileSystem().copyFromLocalFile(false, true, + sourcePath, destination)); + } + + protected Path fileToPath(File file) throws IOException { + return path(file.getName()); + } + + protected Path fileToPath(File file, File parent) throws IOException { + return path(parent + .toPath() + .relativize(file.toPath()) + .toString()); + } + + protected File createTempDirectory(String name) throws IOException { + return Files.createTempDirectory(name).toFile(); + } + + protected Path copyFromLocal(File srcFile, boolean overwrite) throws + IOException { + return copyFromLocal(srcFile, overwrite, false); + } + + protected Path copyFromLocal(File srcFile, boolean overwrite, boolean delSrc) + throws IOException { + Path src = new Path(srcFile.toURI()); + Path dst = path(srcFile.getName()); + getFileSystem().copyFromLocalFile(delSrc, overwrite, src, dst); + return dst; + } + + /** + * Create a temp file with some text. + * @param text text for the file + * @return the file + * @throws IOException on a failure + */ + protected File createTempFile(String text) throws IOException { + File f = File.createTempFile("test", ".txt"); + FileUtils.write(f, text, ASCII); + return f; + } + + protected File createTempFile(File parent, String name, String text) + throws IOException { + File f = File.createTempFile(name, ".txt", parent); + FileUtils.write(f, text, ASCII); + return f; + } + + protected File createTempDirectory(File parent, String name) + throws IOException { + return Files.createTempDirectory(parent.toPath(), name).toFile(); + } + + private void assertFileTextEquals(Path path, String expected) + throws IOException { + assertEquals("Wrong data in " + path, + expected, IOUtils.toString(getFileSystem().open(path), ASCII)); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCreateTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCreateTest.java index 79222ce67d6cf..85e1f849998c3 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCreateTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCreateTest.java @@ -18,23 +18,31 @@ package org.apache.hadoop.fs.contract; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.ParentNotDirectoryException; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.StreamCapabilities; + import org.junit.Test; import org.junit.AssumptionViolatedException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.io.FileNotFoundException; import java.io.IOException; +import static org.apache.hadoop.fs.contract.ContractTestUtils.assertCapabilities; import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; import static org.apache.hadoop.fs.contract.ContractTestUtils.getFileStatusEventually; import static org.apache.hadoop.fs.contract.ContractTestUtils.skip; import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset; import static org.apache.hadoop.fs.contract.ContractTestUtils.writeTextFile; +import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.ioStatisticsSourceToString; /** * Test creating files, overwrite options etc. @@ -42,6 +50,9 @@ public abstract class AbstractContractCreateTest extends AbstractFSContractTestBase { + private static final Logger LOG = + LoggerFactory.getLogger(AbstractContractCreateTest.class); + /** * How long to wait for a path to become visible. */ @@ -436,4 +447,145 @@ private void createFile(Path path) throws IOException { writeDataset(fs, path, data, data.length, 1024 * 1024, true); } + + @Test + public void testSyncable() throws Throwable { + describe("test declared and actual Syncable behaviors"); + FileSystem fs = getFileSystem(); + boolean supportsFlush = isSupported(SUPPORTS_HFLUSH); + boolean supportsSync = isSupported(SUPPORTS_HSYNC); + boolean metadataUpdatedOnHSync = isSupported(METADATA_UPDATED_ON_HSYNC); + + validateSyncableSemantics(fs, + supportsSync, + supportsFlush, + metadataUpdatedOnHSync); + } + + /** + * Validate the semantics of syncable. + * @param fs filesystem + * @param supportsSync sync is present + * @param supportsFlush flush is present. + * @param metadataUpdatedOnHSync Is the metadata updated after an hsync? + * @throws IOException failure + */ + protected void validateSyncableSemantics(final FileSystem fs, + final boolean supportsSync, + final boolean supportsFlush, + final boolean metadataUpdatedOnHSync) + throws IOException { + Path path = methodPath(); + LOG.info("Expecting files under {} to have supportsSync={}" + + " and supportsFlush={}; metadataUpdatedOnHSync={}", + path, supportsSync, supportsFlush, metadataUpdatedOnHSync); + + try (FSDataOutputStream out = fs.create(path, true)) { + LOG.info("Created output stream {}", out); + + // probe stream for support for flush/sync, whose capabilities + // of supports/does not support must match what is expected + String[] hflushCapabilities = { + StreamCapabilities.HFLUSH + }; + String[] hsyncCapabilities = { + StreamCapabilities.HSYNC + }; + if (supportsFlush) { + assertCapabilities(out, hflushCapabilities, null); + } else { + assertCapabilities(out, null, hflushCapabilities); + } + if (supportsSync) { + assertCapabilities(out, hsyncCapabilities, null); + } else { + assertCapabilities(out, null, hsyncCapabilities); + } + + // write one byte, then hflush it + out.write('a'); + try { + out.hflush(); + if (!supportsFlush) { + // FSDataOutputStream silently downgrades to flush() here. + // This is not good, but if changed some applications + // break writing to some stores. + LOG.warn("FS doesn't support Syncable.hflush()," + + " but doesn't reject it either."); + } + } catch (UnsupportedOperationException e) { + if (supportsFlush) { + throw new AssertionError("hflush not supported", e); + } + } + + // write a second byte, then hsync it. + out.write('b'); + try { + out.hsync(); + } catch (UnsupportedOperationException e) { + if (supportsSync) { + throw new AssertionError("HSync not supported", e); + } + } + + if (supportsSync) { + // if sync really worked, data MUST be visible here + + // first the metadata which MUST be present + final FileStatus st = fs.getFileStatus(path); + if (metadataUpdatedOnHSync) { + // not all stores reliably update it, HDFS/webHDFS in particular + assertEquals("Metadata not updated during write " + st, + 2, st.getLen()); + } + + // there's no way to verify durability, but we can + // at least verify a new file input stream reads + // the data + try (FSDataInputStream in = fs.open(path)) { + assertEquals('a', in.read()); + assertEquals('b', in.read()); + assertEquals(-1, in.read()); + LOG.info("Successfully read synced data on a new reader {}", in); + } + } else { + // no sync. Let's do a flush and see what happens. + out.flush(); + // Now look at the filesystem. + try (FSDataInputStream in = fs.open(path)) { + int c = in.read(); + if (c == -1) { + // nothing was synced; sync and flush really aren't there. + LOG.info("sync and flush are declared unsupported" + + " -flushed changes were not saved"); + + } else { + LOG.info("sync and flush are declared unsupported" + + " - but the stream does offer some sync/flush semantics"); + } + // close outside a finally as we do want to see any exception raised. + in.close(); + + } catch (FileNotFoundException e) { + // that's OK if it's an object store, but not if its a real + // FS + if (!isSupported(IS_BLOBSTORE)) { + throw e; + } else { + LOG.warn( + "Output file was not created; this is an object store with different" + + " visibility semantics"); + } + } + } + // close the output stream + out.close(); + + final String stats = ioStatisticsSourceToString(out); + if (!stats.isEmpty()) { + LOG.info("IOStatistics {}", stats); + } + } + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractDeleteTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractDeleteTest.java index 328c8e1377904..08df1d4d883a6 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractDeleteTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractDeleteTest.java @@ -49,18 +49,17 @@ public void testDeleteNonexistentPathRecursive() throws Throwable { Path path = path("testDeleteNonexistentPathRecursive"); assertPathDoesNotExist("leftover", path); ContractTestUtils.rejectRootOperation(path); - assertFalse("Returned true attempting to delete" + assertFalse("Returned true attempting to recursively delete" + " a nonexistent path " + path, - getFileSystem().delete(path, false)); + getFileSystem().delete(path, true)); } - @Test public void testDeleteNonexistentPathNonRecursive() throws Throwable { Path path = path("testDeleteNonexistentPathNonRecursive"); assertPathDoesNotExist("leftover", path); ContractTestUtils.rejectRootOperation(path); - assertFalse("Returned true attempting to recursively delete" + assertFalse("Returned true attempting to non recursively delete" + " a nonexistent path " + path, getFileSystem().delete(path, false)); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractEtagTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractEtagTest.java new file mode 100644 index 0000000000000..e7a121b704677 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractEtagTest.java @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract; + +import java.nio.charset.StandardCharsets; + +import org.assertj.core.api.Assertions; +import org.junit.Assume; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.EtagSource; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; + +import static org.apache.hadoop.fs.CommonPathCapabilities.ETAGS_AVAILABLE; +import static org.apache.hadoop.fs.CommonPathCapabilities.ETAGS_PRESERVED_IN_RENAME; + +/** + * For filesystems which support etags, validate correctness + * of their implementation. + */ +public abstract class AbstractContractEtagTest extends + AbstractFSContractTestBase { + + private static final Logger LOG = + LoggerFactory.getLogger(AbstractContractEtagTest.class); + + /** + * basic consistency across operations, as well as being non-empty. + */ + @Test + public void testEtagConsistencyAcrossListAndHead() throws Throwable { + describe("Etag values must be non-empty and consistent across LIST and HEAD Calls."); + final Path path = methodPath(); + final FileSystem fs = getFileSystem(); + + Assertions.assertThat(fs.hasPathCapability(path, ETAGS_AVAILABLE)) + .describedAs("path capability %s of %s", + ETAGS_AVAILABLE, path) + .isTrue(); + + ContractTestUtils.touch(fs, path); + + final FileStatus st = fs.getFileStatus(path); + final String etag = etagFromStatus(st); + LOG.info("etag of empty file is \"{}\"", etag); + + final FileStatus[] statuses = fs.listStatus(path); + Assertions.assertThat(statuses) + .describedAs("List(%s)", path) + .hasSize(1); + final FileStatus lsStatus = statuses[0]; + Assertions.assertThat(etagFromStatus(lsStatus)) + .describedAs("etag of list status (%s) compared to HEAD value of %s", lsStatus, st) + .isEqualTo(etag); + } + + /** + * Get an etag from a FileStatus which MUST BE + * an implementation of EtagSource and + * whose etag MUST NOT BE null/empty. + * @param st the status + * @return the etag + */ + String etagFromStatus(FileStatus st) { + Assertions.assertThat(st) + .describedAs("FileStatus %s", st) + .isInstanceOf(EtagSource.class); + final String etag = ((EtagSource) st).getEtag(); + Assertions.assertThat(etag) + .describedAs("Etag of %s", st) + .isNotBlank(); + return etag; + } + + /** + * Overwritten data has different etags. + */ + @Test + public void testEtagsOfDifferentDataDifferent() throws Throwable { + describe("Verify that two different blocks of data written have different tags"); + + final Path path = methodPath(); + final FileSystem fs = getFileSystem(); + Path src = new Path(path, "src"); + + ContractTestUtils.createFile(fs, src, true, + "data1234".getBytes(StandardCharsets.UTF_8)); + final FileStatus srcStatus = fs.getFileStatus(src); + final String srcTag = etagFromStatus(srcStatus); + LOG.info("etag of file 1 is \"{}\"", srcTag); + + // now overwrite with data of same length + // (ensure that path or length aren't used exclusively as tag) + ContractTestUtils.createFile(fs, src, true, + "1234data".getBytes(StandardCharsets.UTF_8)); + + // validate + final String tag2 = etagFromStatus(fs.getFileStatus(src)); + LOG.info("etag of file 2 is \"{}\"", tag2); + + Assertions.assertThat(tag2) + .describedAs("etag of updated file") + .isNotEqualTo(srcTag); + } + + /** + * If supported, rename preserves etags. + */ + @Test + public void testEtagConsistencyAcrossRename() throws Throwable { + describe("Verify that when a file is renamed, the etag remains unchanged"); + final Path path = methodPath(); + final FileSystem fs = getFileSystem(); + Assume.assumeTrue( + "Filesystem does not declare that etags are preserved across renames", + fs.hasPathCapability(path, ETAGS_PRESERVED_IN_RENAME)); + Path src = new Path(path, "src"); + Path dest = new Path(path, "dest"); + + ContractTestUtils.createFile(fs, src, true, + "sample data".getBytes(StandardCharsets.UTF_8)); + final FileStatus srcStatus = fs.getFileStatus(src); + LOG.info("located file status string value " + srcStatus); + + final String srcTag = etagFromStatus(srcStatus); + LOG.info("etag of short file is \"{}\"", srcTag); + + Assertions.assertThat(srcTag) + .describedAs("Etag of %s", srcStatus) + .isNotBlank(); + + // rename + fs.rename(src, dest); + + // validate + FileStatus destStatus = fs.getFileStatus(dest); + final String destTag = etagFromStatus(destStatus); + Assertions.assertThat(destTag) + .describedAs("etag of list status (%s) compared to HEAD value of %s", + destStatus, srcStatus) + .isEqualTo(srcTag); + } + + /** + * For effective use of etags, listLocatedStatus SHOULD return status entries + * with consistent values. + * This ensures that listing during query planning can collect and use the etags. + */ + @Test + public void testLocatedStatusAlsoHasEtag() throws Throwable { + describe("verify that listLocatedStatus() and listFiles() are etag sources"); + final Path path = methodPath(); + final FileSystem fs = getFileSystem(); + Path src = new Path(path, "src"); + ContractTestUtils.createFile(fs, src, true, + "sample data".getBytes(StandardCharsets.UTF_8)); + final FileStatus srcStatus = fs.getFileStatus(src); + final String srcTag = etagFromStatus(srcStatus); + final LocatedFileStatus entry = fs.listLocatedStatus(path).next(); + LOG.info("located file status string value " + entry); + final String listTag = etagFromStatus(entry); + Assertions.assertThat(listTag) + .describedAs("etag of listLocatedStatus (%s) compared to HEAD value of %s", + entry, srcStatus) + .isEqualTo(srcTag); + + final LocatedFileStatus entry2 = fs.listFiles(path, false).next(); + Assertions.assertThat(etagFromStatus(entry2)) + .describedAs("etag of listFiles (%s) compared to HEAD value of %s", + entry, srcStatus) + .isEqualTo(srcTag); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractGetFileStatusTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractGetFileStatusTest.java index f63314d39292e..c0d9733bbb9a7 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractGetFileStatusTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractGetFileStatusTest.java @@ -24,6 +24,8 @@ import java.util.List; import java.util.UUID; +import org.assertj.core.api.Assertions; + import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FilterFileSystem; @@ -148,6 +150,7 @@ public void testListLocatedStatusEmptyDirectory() throws IOException { public void testComplexDirActions() throws Throwable { TreeScanResults tree = createTestTree(); checkListStatusStatusComplexDir(tree); + checkListStatusIteratorComplexDir(tree); checkListLocatedStatusStatusComplexDir(tree); checkListFilesComplexDirNonRecursive(tree); checkListFilesComplexDirRecursive(tree); @@ -169,6 +172,34 @@ protected void checkListStatusStatusComplexDir(TreeScanResults tree) listing.assertSizeEquals("listStatus()", TREE_FILES, TREE_WIDTH, 0); } + /** + * Test {@link FileSystem#listStatusIterator(Path)} on a complex + * directory tree. + * @param tree directory tree to list. + * @throws Throwable + */ + protected void checkListStatusIteratorComplexDir(TreeScanResults tree) + throws Throwable { + describe("Expect listStatusIterator to list all entries in top dir only"); + + FileSystem fs = getFileSystem(); + TreeScanResults listing = new TreeScanResults( + fs.listStatusIterator(tree.getBasePath())); + listing.assertSizeEquals("listStatus()", TREE_FILES, TREE_WIDTH, 0); + + List resWithoutCheckingHasNext = + iteratorToListThroughNextCallsAlone(fs + .listStatusIterator(tree.getBasePath())); + + List resWithCheckingHasNext = iteratorToList(fs + .listStatusIterator(tree.getBasePath())); + Assertions.assertThat(resWithCheckingHasNext) + .describedAs("listStatusIterator() should return correct " + + "results even if hasNext() calls are not made.") + .hasSameElementsAs(resWithoutCheckingHasNext); + + } + /** * Test {@link FileSystem#listLocatedStatus(Path)} on a complex * directory tree. @@ -322,6 +353,45 @@ public void testListStatusFile() throws Throwable { verifyStatusArrayMatchesFile(f, getFileSystem().listStatus(f)); } + @Test + public void testListStatusIteratorFile() throws Throwable { + describe("test the listStatusIterator(path) on a file"); + Path f = touchf("listStItrFile"); + + List statusList = (List) iteratorToList( + getFileSystem().listStatusIterator(f)); + validateListingForFile(f, statusList, false); + + List statusList2 = + (List) iteratorToListThroughNextCallsAlone( + getFileSystem().listStatusIterator(f)); + validateListingForFile(f, statusList2, true); + } + + /** + * Validate listing result for an input path which is file. + * @param f file. + * @param statusList list status of a file. + * @param nextCallAlone whether the listing generated just using + * next() calls. + */ + private void validateListingForFile(Path f, + List statusList, + boolean nextCallAlone) { + String msg = String.format("size of file list returned using %s should " + + "be 1", nextCallAlone ? + "next() calls alone" : "hasNext() and next() calls"); + Assertions.assertThat(statusList) + .describedAs(msg) + .hasSize(1); + Assertions.assertThat(statusList.get(0).getPath()) + .describedAs("path returned should match with the input path") + .isEqualTo(f); + Assertions.assertThat(statusList.get(0).isFile()) + .describedAs("path returned should be a file") + .isEqualTo(true); + } + @Test public void testListFilesFile() throws Throwable { describe("test the listStatus(path) on a file"); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractLeaseRecoveryTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractLeaseRecoveryTest.java new file mode 100644 index 0000000000000..e99b62ae1e37f --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractLeaseRecoveryTest.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract; + +import java.io.FileNotFoundException; +import java.io.IOException; + +import org.assertj.core.api.Assertions; +import org.junit.Test; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LeaseRecoverable; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.test.LambdaTestUtils; + +import static org.apache.hadoop.fs.CommonPathCapabilities.LEASE_RECOVERABLE; + +public abstract class AbstractContractLeaseRecoveryTest extends + AbstractFSContractTestBase { + + @Test + public void testLeaseRecovery() throws Throwable { + final Path path = methodPath(); + final FileSystem fs = getFileSystem(); + ContractTestUtils.touch(fs, path); + LeaseRecoverable leaseRecoverableFs = verifyAndGetLeaseRecoverableInstance(fs, path); + + Assertions.assertThat(leaseRecoverableFs.recoverLease(path)) + .describedAs("Issuing lease recovery on a closed file must be successful") + .isTrue(); + + Assertions.assertThat(leaseRecoverableFs.isFileClosed(path)) + .describedAs("Get the isFileClose status on a closed file must be successful") + .isTrue(); + } + + @Test + public void testLeaseRecoveryFileNotExist() throws Throwable { + final Path path = new Path("notExist"); + final FileSystem fs = getFileSystem(); + LeaseRecoverable leaseRecoverableFs = verifyAndGetLeaseRecoverableInstance(fs, path); + + LambdaTestUtils.intercept(FileNotFoundException.class, "File does not exist", + () -> leaseRecoverableFs.recoverLease(path)); + + LambdaTestUtils.intercept(FileNotFoundException.class, "File does not exist", + () -> leaseRecoverableFs.isFileClosed(path)); + } + + @Test + public void testLeaseRecoveryFileOnDirectory() throws Throwable { + final Path path = methodPath(); + final FileSystem fs = getFileSystem(); + LeaseRecoverable leaseRecoverableFs = verifyAndGetLeaseRecoverableInstance(fs, path); + final Path parentDirectory = path.getParent(); + + LambdaTestUtils.intercept(FileNotFoundException.class, "Path is not a file", + () -> leaseRecoverableFs.recoverLease(parentDirectory)); + + LambdaTestUtils.intercept(FileNotFoundException.class, "Path is not a file", + () -> leaseRecoverableFs.isFileClosed(parentDirectory)); + } + + private LeaseRecoverable verifyAndGetLeaseRecoverableInstance(FileSystem fs, Path path) + throws IOException { + Assertions.assertThat(fs.hasPathCapability(path, LEASE_RECOVERABLE)) + .describedAs("path capability %s of %s", LEASE_RECOVERABLE, path) + .isTrue(); + Assertions.assertThat(fs) + .describedAs("filesystem %s", fs) + .isInstanceOf(LeaseRecoverable.class); + return (LeaseRecoverable) fs; + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMultipartUploaderTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMultipartUploaderTest.java index 7a8f0830eda37..c395afdb3779b 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMultipartUploaderTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMultipartUploaderTest.java @@ -26,8 +26,10 @@ import java.util.HashMap; import java.util.Map; import java.util.Random; +import java.util.concurrent.CompletableFuture; -import com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; +import org.assertj.core.api.Assertions; import org.junit.Assume; import org.junit.Test; import org.slf4j.Logger; @@ -35,22 +37,32 @@ import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BBUploadHandle; +import org.apache.hadoop.fs.CommonPathCapabilities; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.MultipartUploader; -import org.apache.hadoop.fs.MultipartUploaderFactory; import org.apache.hadoop.fs.PartHandle; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathHandle; import org.apache.hadoop.fs.UploadHandle; +import org.apache.hadoop.test.LambdaTestUtils; +import org.apache.hadoop.util.DurationInfo; import static org.apache.hadoop.fs.contract.ContractTestUtils.verifyPathExists; +import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.ioStatisticsSourceToString; import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; import static org.apache.hadoop.test.LambdaTestUtils.eventually; import static org.apache.hadoop.test.LambdaTestUtils.intercept; - +import static org.apache.hadoop.util.functional.FutureIO.awaitFuture; + +/** + * Tests of multipart uploads. + *

      + * Note: some of the tests get a random uploader between + * the two which are available. If tests fail intermittently, + * it may be because different uploaders are being selected. + */ public abstract class AbstractContractMultipartUploaderTest extends AbstractFSContractTestBase { @@ -63,36 +75,49 @@ public abstract class AbstractContractMultipartUploaderTest extends */ protected static final int SMALL_FILE = 100; - private MultipartUploader mpu; - private MultipartUploader mpu2; + protected static final int CONSISTENCY_INTERVAL = 1000; + + private MultipartUploader uploader0; + private MultipartUploader uploader1; private final Random random = new Random(); private UploadHandle activeUpload; private Path activeUploadPath; - protected String getMethodName() { - return methodName.getMethodName(); - } - @Override public void setup() throws Exception { super.setup(); - Configuration conf = getContract().getConf(); - mpu = MultipartUploaderFactory.get(getFileSystem(), conf); - mpu2 = MultipartUploaderFactory.get(getFileSystem(), conf); + + final FileSystem fs = getFileSystem(); + Path testPath = getContract().getTestPath(); + Assume.assumeTrue("Multipart uploader is not supported", + fs.hasPathCapability(testPath, + CommonPathCapabilities.FS_MULTIPART_UPLOADER)); + uploader0 = fs.createMultipartUploader(testPath).build(); + uploader1 = fs.createMultipartUploader(testPath).build(); } @Override public void teardown() throws Exception { - if (mpu!= null && activeUpload != null) { + MultipartUploader uploader = getUploader(1); + if (uploader != null) { + if (activeUpload != null) { + abortUploadQuietly(activeUpload, activeUploadPath); + } try { - mpu.abort(activeUploadPath, activeUpload); - } catch (FileNotFoundException ignored) { - /* this is fine */ + // round off with an abort of all uploads + Path teardown = getContract().getTestPath(); + LOG.info("Teardown: aborting outstanding uploads under {}", teardown); + CompletableFuture f + = uploader.abortUploadsUnderPath(teardown); + f.get(); + LOG.info("Statistics {}", + ioStatisticsSourceToString(uploader)); } catch (Exception e) { - LOG.info("in teardown", e); + LOG.warn("Exeception in teardown", e); } } - cleanupWithLogger(LOG, mpu, mpu2); + + cleanupWithLogger(LOG, uploader0, uploader1); super.teardown(); } @@ -192,16 +217,16 @@ protected int timeToBecomeConsistentMillis() { * @param index index of upload * @return an uploader */ - protected MultipartUploader mpu(int index) { - return (index % 2 == 0) ? mpu : mpu2; + protected MultipartUploader getUploader(int index) { + return (index % 2 == 0) ? uploader0 : uploader1; } /** * Pick a multipart uploader at random. * @return an uploader */ - protected MultipartUploader randomMpu() { - return mpu(random.nextInt(10)); + protected MultipartUploader getRandomUploader() { + return getUploader(random.nextInt(10)); } /** @@ -211,39 +236,71 @@ protected MultipartUploader randomMpu() { @Test public void testSingleUpload() throws Exception { Path file = methodPath(); - UploadHandle uploadHandle = initializeUpload(file); + UploadHandle uploadHandle = startUpload(file); Map partHandles = new HashMap<>(); MessageDigest origDigest = DigestUtils.getMd5Digest(); int size = SMALL_FILE; byte[] payload = generatePayload(1, size); origDigest.update(payload); + // use a single uploader + // note: the same is used here as it found a bug in the S3Guard + // DDB bulk operation state upload -the previous operation had + // added an entry to the ongoing state; this second call + // was interpreted as an inconsistent write. + MultipartUploader completer = uploader0; + // and upload with uploader 1 to validate cross-uploader uploads PartHandle partHandle = putPart(file, uploadHandle, 1, payload); partHandles.put(1, partHandle); - PathHandle fd = completeUpload(file, uploadHandle, partHandles, - origDigest, - size); + PathHandle fd = complete(completer, uploadHandle, file, + partHandles); + validateUpload(file, origDigest, size); + + // verify that if the implementation processes data immediately + // then a second attempt at the upload will fail. if (finalizeConsumesUploadIdImmediately()) { intercept(FileNotFoundException.class, - () -> mpu.complete(file, partHandles, uploadHandle)); + () -> complete(completer, uploadHandle, file, partHandles)); } else { - PathHandle fd2 = mpu.complete(file, partHandles, uploadHandle); + // otherwise, the same or other uploader can try again. + PathHandle fd2 = complete(completer, uploadHandle, file, partHandles); assertArrayEquals("Path handles differ", fd.toByteArray(), fd2.toByteArray()); } } /** - * Initialize an upload. + * Complete IO for a specific uploader; await the response. + * @param uploader uploader + * @param uploadHandle Identifier + * @param file Target path for upload + * @param partHandles handles map of part number to part handle + * @return unique PathHandle identifier for the uploaded file. + */ + protected PathHandle complete( + final MultipartUploader uploader, + final UploadHandle uploadHandle, + final Path file, + final Map partHandles) + throws IOException { + try (DurationInfo d = + new DurationInfo(LOG, "Complete upload to %s", file)) { + return awaitFuture( + uploader.complete(uploadHandle, file, partHandles)); + } + } + + /** + * start an upload. * This saves the path and upload handle as the active * upload, for aborting in teardown * @param dest destination * @return the handle * @throws IOException failure to initialize */ - protected UploadHandle initializeUpload(final Path dest) throws IOException { + protected UploadHandle startUpload(final Path dest) throws IOException { activeUploadPath = dest; - activeUpload = randomMpu().initialize(dest); + activeUpload = awaitFuture(getRandomUploader().startUpload(dest)); return activeUpload; } @@ -283,12 +340,17 @@ protected PartHandle putPart(final Path file, final int index, final byte[] payload) throws IOException { ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); - PartHandle partHandle = mpu(index) - .putPart(file, - new ByteArrayInputStream(payload), - index, - uploadHandle, - payload.length); + PartHandle partHandle; + try (DurationInfo d = + new DurationInfo(LOG, "Put part %d (size %s) %s", + index, + payload.length, + file)) { + partHandle = awaitFuture(getUploader(index) + .putPart(uploadHandle, index, file, + new ByteArrayInputStream(payload), + payload.length)); + } timer.end("Uploaded part %s", index); LOG.info("Upload bandwidth {} MB/s", timer.bandwidthDescription(payload.length)); @@ -296,7 +358,7 @@ protected PartHandle putPart(final Path file, } /** - * Complete an upload with the active MPU instance. + * Complete an upload with a random uploader. * @param file destination * @param uploadHandle handle * @param partHandles map of handles @@ -312,36 +374,64 @@ private PathHandle completeUpload(final Path file, final int expectedLength) throws IOException { PathHandle fd = complete(file, uploadHandle, partHandles); - FileStatus status = verifyPathExists(getFileSystem(), - "Completed file", file); - assertEquals("length of " + status, - expectedLength, status.getLen()); + validateUpload(file, origDigest, expectedLength); + return fd; + } + + /** + * Complete an upload with a random uploader. + * @param file destination + * @param origDigest digest of source data (may be null) + * @param expectedLength expected length of result. + * @throws IOException IO failure + */ + private void validateUpload(final Path file, + final MessageDigest origDigest, + final int expectedLength) throws IOException { + verifyPathExists(getFileSystem(), + "Completed file", file); + verifyFileLength(file, expectedLength); if (origDigest != null) { verifyContents(file, origDigest, expectedLength); } - return fd; } /** * Verify the contents of a file. * @param file path * @param origDigest digest - * @param expectedLength expected length (for logging B/W) + * @param expectedLength expected length (for logging download bandwidth) * @throws IOException IO failure */ protected void verifyContents(final Path file, final MessageDigest origDigest, final int expectedLength) throws IOException { ContractTestUtils.NanoTimer timer2 = new ContractTestUtils.NanoTimer(); - assertArrayEquals("digest of source and " + file - + " differ", - origDigest.digest(), digest(file)); + Assertions.assertThat(digest(file)) + .describedAs("digest of uploaded file %s", file) + .isEqualTo(origDigest.digest()); timer2.end("Completed digest", file); LOG.info("Download bandwidth {} MB/s", timer2.bandwidthDescription(expectedLength)); } + /** + * Verify the length of a file. + * @param file path + * @param expectedLength expected length + * @throws IOException IO failure + */ + private void verifyFileLength(final Path file, final long expectedLength) + throws IOException { + FileStatus st = getFileSystem().getFileStatus(file); + Assertions.assertThat(st) + .describedAs("Uploaded file %s", st) + .matches(FileStatus::isFile) + .extracting(FileStatus::getLen) + .isEqualTo(expectedLength); + } + /** * Perform the inner complete without verification. * @param file destination path @@ -353,21 +443,37 @@ protected void verifyContents(final Path file, private PathHandle complete(final Path file, final UploadHandle uploadHandle, final Map partHandles) throws IOException { - ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); - PathHandle fd = randomMpu().complete(file, partHandles, uploadHandle); - timer.end("Completed upload to %s", file); - return fd; + return complete(getRandomUploader(), uploadHandle, file, + partHandles); } /** * Abort an upload. - * @param file path * @param uploadHandle handle + * @param file path * @throws IOException failure */ - private void abortUpload(final Path file, UploadHandle uploadHandle) + private void abortUpload(UploadHandle uploadHandle, + final Path file) throws IOException { - randomMpu().abort(file, uploadHandle); + try (DurationInfo d = + new DurationInfo(LOG, "Abort upload to %s", file)) { + awaitFuture(getRandomUploader().abort(uploadHandle, file)); + } + } + + /** + * Abort an upload; swallows exceptions. + * @param uploadHandle handle + * @param file path + */ + private void abortUploadQuietly(UploadHandle uploadHandle, Path file) { + try { + abortUpload(uploadHandle, file); + } catch (FileNotFoundException ignored) { + } catch (Exception e) { + LOG.info("aborting {}: {}", file, e.toString()); + } } /** @@ -377,10 +483,10 @@ private void abortUpload(final Path file, UploadHandle uploadHandle) @Test public void testMultipartUpload() throws Exception { Path file = methodPath(); - UploadHandle uploadHandle = initializeUpload(file); + UploadHandle uploadHandle = startUpload(file); Map partHandles = new HashMap<>(); MessageDigest origDigest = DigestUtils.getMd5Digest(); - final int payloadCount = getTestPayloadCount(); + int payloadCount = getTestPayloadCount(); for (int i = 1; i <= payloadCount; ++i) { PartHandle partHandle = buildAndPutPart(file, uploadHandle, i, origDigest); @@ -400,16 +506,16 @@ public void testMultipartUploadEmptyPart() throws Exception { FileSystem fs = getFileSystem(); Path file = path("testMultipartUpload"); try (MultipartUploader uploader = - MultipartUploaderFactory.get(fs, null)) { - UploadHandle uploadHandle = uploader.initialize(file); + fs.createMultipartUploader(file).build()) { + UploadHandle uploadHandle = uploader.startUpload(file).get(); Map partHandles = new HashMap<>(); MessageDigest origDigest = DigestUtils.getMd5Digest(); byte[] payload = new byte[0]; origDigest.update(payload); InputStream is = new ByteArrayInputStream(payload); - PartHandle partHandle = uploader.putPart(file, is, 1, uploadHandle, - payload.length); + PartHandle partHandle = awaitFuture( + uploader.putPart(uploadHandle, 1, file, is, payload.length)); partHandles.put(1, partHandle); completeUpload(file, uploadHandle, partHandles, origDigest, 0); } @@ -422,7 +528,7 @@ public void testMultipartUploadEmptyPart() throws Exception { @Test public void testUploadEmptyBlock() throws Exception { Path file = methodPath(); - UploadHandle uploadHandle = initializeUpload(file); + UploadHandle uploadHandle = startUpload(file); Map partHandles = new HashMap<>(); partHandles.put(1, putPart(file, uploadHandle, 1, new byte[0])); completeUpload(file, uploadHandle, partHandles, null, 0); @@ -435,10 +541,10 @@ public void testUploadEmptyBlock() throws Exception { @Test public void testMultipartUploadReverseOrder() throws Exception { Path file = methodPath(); - UploadHandle uploadHandle = initializeUpload(file); + UploadHandle uploadHandle = startUpload(file); Map partHandles = new HashMap<>(); MessageDigest origDigest = DigestUtils.getMd5Digest(); - final int payloadCount = getTestPayloadCount(); + int payloadCount = getTestPayloadCount(); for (int i = 1; i <= payloadCount; ++i) { byte[] payload = generatePayload(i); origDigest.update(payload); @@ -459,7 +565,7 @@ public void testMultipartUploadReverseOrderNonContiguousPartNumbers() throws Exception { describe("Upload in reverse order and the part numbers are not contiguous"); Path file = methodPath(); - UploadHandle uploadHandle = initializeUpload(file); + UploadHandle uploadHandle = startUpload(file); MessageDigest origDigest = DigestUtils.getMd5Digest(); int payloadCount = 2 * getTestPayloadCount(); for (int i = 2; i <= payloadCount; i += 2) { @@ -482,22 +588,22 @@ public void testMultipartUploadReverseOrderNonContiguousPartNumbers() public void testMultipartUploadAbort() throws Exception { describe("Upload and then abort it before completing"); Path file = methodPath(); - UploadHandle uploadHandle = initializeUpload(file); - int end = 10; + UploadHandle uploadHandle = startUpload(file); Map partHandles = new HashMap<>(); for (int i = 12; i > 10; i--) { partHandles.put(i, buildAndPutPart(file, uploadHandle, i, null)); } - abortUpload(file, uploadHandle); + abortUpload(uploadHandle, file); String contents = "ThisIsPart49\n"; int len = contents.getBytes(Charsets.UTF_8).length; InputStream is = IOUtils.toInputStream(contents, "UTF-8"); intercept(IOException.class, - () -> mpu.putPart(file, is, 49, uploadHandle, len)); + () -> awaitFuture( + uploader0.putPart(uploadHandle, 49, file, is, len))); intercept(IOException.class, - () -> mpu.complete(file, partHandles, uploadHandle)); + () -> complete(uploader0, uploadHandle, file, partHandles)); assertPathDoesNotExist("Uploaded file should not exist", file); @@ -505,9 +611,9 @@ public void testMultipartUploadAbort() throws Exception { // consumed by finalization operations (complete, abort). if (finalizeConsumesUploadIdImmediately()) { intercept(FileNotFoundException.class, - () -> abortUpload(file, uploadHandle)); + () -> abortUpload(uploadHandle, file)); } else { - abortUpload(file, uploadHandle); + abortUpload(uploadHandle, file); } } @@ -519,31 +625,55 @@ public void testAbortUnknownUpload() throws Exception { Path file = methodPath(); ByteBuffer byteBuffer = ByteBuffer.wrap( "invalid-handle".getBytes(Charsets.UTF_8)); - UploadHandle uploadHandle = BBUploadHandle.from(byteBuffer); intercept(FileNotFoundException.class, - () -> abortUpload(file, uploadHandle)); + () -> abortUpload(BBUploadHandle.from(byteBuffer), file)); } /** - * Trying to abort with a handle of size 0 must fail. + * Trying to abort an upload with no data does not create a file. */ @Test public void testAbortEmptyUpload() throws Exception { describe("initialize upload and abort before uploading data"); Path file = methodPath(); - abortUpload(file, initializeUpload(file)); + abortUpload(startUpload(file), file); assertPathDoesNotExist("Uploaded file should not exist", file); } + + /** + * Trying to abort an upload with no data does not create a file. + */ + @Test + public void testAbortAllPendingUploads() throws Exception { + describe("initialize upload and abort the pending upload"); + Path path = methodPath(); + Path file = new Path(path, "child"); + UploadHandle upload = startUpload(file); + try { + CompletableFuture oF + = getRandomUploader().abortUploadsUnderPath(path.getParent()); + int abortedUploads = awaitFuture(oF); + if (abortedUploads >= 0) { + // uploads can be aborted + Assertions.assertThat(abortedUploads) + .describedAs("Number of uploads aborted") + .isGreaterThanOrEqualTo(1); + assertPathDoesNotExist("Uploaded file should not exist", file); + } + } finally { + abortUploadQuietly(upload, file); + } + } + /** * Trying to abort with a handle of size 0 must fail. */ @Test public void testAbortEmptyUploadHandle() throws Exception { ByteBuffer byteBuffer = ByteBuffer.wrap(new byte[0]); - UploadHandle uploadHandle = BBUploadHandle.from(byteBuffer); intercept(IllegalArgumentException.class, - () -> abortUpload(methodPath(), uploadHandle)); + () -> abortUpload(BBUploadHandle.from(byteBuffer), methodPath())); } /** @@ -553,10 +683,10 @@ public void testAbortEmptyUploadHandle() throws Exception { public void testCompleteEmptyUpload() throws Exception { describe("Expect an empty MPU to fail, but still be abortable"); Path dest = methodPath(); - UploadHandle handle = initializeUpload(dest); + UploadHandle handle = startUpload(dest); intercept(IllegalArgumentException.class, - () -> mpu.complete(dest, new HashMap<>(), handle)); - abortUpload(dest, handle); + () -> complete(uploader0, handle, dest, new HashMap<>())); + abortUpload(handle, dest); } /** @@ -571,7 +701,7 @@ public void testPutPartEmptyUploadID() throws Exception { byte[] payload = generatePayload(1); InputStream is = new ByteArrayInputStream(payload); intercept(IllegalArgumentException.class, - () -> mpu.putPart(dest, is, 1, emptyHandle, payload.length)); + () -> uploader0.putPart(emptyHandle, 1, dest, is, payload.length)); } /** @@ -581,7 +711,7 @@ public void testPutPartEmptyUploadID() throws Exception { public void testCompleteEmptyUploadID() throws Exception { describe("Expect IllegalArgumentException when complete uploadID is empty"); Path dest = methodPath(); - UploadHandle realHandle = initializeUpload(dest); + UploadHandle realHandle = startUpload(dest); UploadHandle emptyHandle = BBUploadHandle.from(ByteBuffer.wrap(new byte[0])); Map partHandles = new HashMap<>(); @@ -590,14 +720,14 @@ public void testCompleteEmptyUploadID() throws Exception { partHandles.put(1, partHandle); intercept(IllegalArgumentException.class, - () -> mpu.complete(dest, partHandles, emptyHandle)); + () -> complete(uploader0, emptyHandle, dest, partHandles)); // and, while things are setup, attempt to complete with // a part index of 0 partHandles.clear(); partHandles.put(0, partHandle); intercept(IllegalArgumentException.class, - () -> mpu.complete(dest, partHandles, realHandle)); + () -> complete(uploader0, realHandle, dest, partHandles)); } /** @@ -610,7 +740,7 @@ public void testCompleteEmptyUploadID() throws Exception { public void testDirectoryInTheWay() throws Exception { FileSystem fs = getFileSystem(); Path file = methodPath(); - UploadHandle uploadHandle = initializeUpload(file); + UploadHandle uploadHandle = startUpload(file); Map partHandles = new HashMap<>(); int size = SMALL_FILE; PartHandle partHandle = putPart(file, uploadHandle, 1, @@ -622,7 +752,7 @@ public void testDirectoryInTheWay() throws Exception { () -> completeUpload(file, uploadHandle, partHandles, null, size)); // abort should still work - abortUpload(file, uploadHandle); + abortUpload(uploadHandle, file); } @Test @@ -630,46 +760,44 @@ public void testConcurrentUploads() throws Throwable { // if the FS doesn't support concurrent uploads, this test is // required to fail during the second initialization. - final boolean concurrent = supportsConcurrentUploadsToSamePath(); + boolean concurrent = supportsConcurrentUploadsToSamePath(); describe("testing concurrent uploads, MPU support for this is " + concurrent); - final FileSystem fs = getFileSystem(); - final Path file = methodPath(); - final int size1 = SMALL_FILE; - final int partId1 = 1; - final byte[] payload1 = generatePayload(partId1, size1); - final MessageDigest digest1 = DigestUtils.getMd5Digest(); + Path file = methodPath(); + int size1 = SMALL_FILE; + int partId1 = 1; + byte[] payload1 = generatePayload(partId1, size1); + MessageDigest digest1 = DigestUtils.getMd5Digest(); digest1.update(payload1); - final UploadHandle upload1 = initializeUpload(file); - final Map partHandles1 = new HashMap<>(); + UploadHandle upload1 = startUpload(file); + Map partHandles1 = new HashMap<>(); // initiate part 2 // by using a different size, it's straightforward to see which // version is visible, before reading/digesting the contents - final int size2 = size1 * 2; - final int partId2 = 2; - final byte[] payload2 = generatePayload(partId1, size2); - final MessageDigest digest2 = DigestUtils.getMd5Digest(); + int size2 = size1 * 2; + int partId2 = 2; + byte[] payload2 = generatePayload(partId1, size2); + MessageDigest digest2 = DigestUtils.getMd5Digest(); digest2.update(payload2); - final UploadHandle upload2; + UploadHandle upload2; try { - upload2 = initializeUpload(file); + upload2 = startUpload(file); Assume.assumeTrue( "The Filesystem is unexpectedly supporting concurrent uploads", concurrent); } catch (IOException e) { if (!concurrent) { // this is expected, so end the test - LOG.debug("Expected exception raised on concurrent uploads {}", e); + LOG.debug("Expected exception raised on concurrent uploads", e); return; } else { throw e; } } - final Map partHandles2 = new HashMap<>(); - + Map partHandles2 = new HashMap<>(); assertNotEquals("Upload handles match", upload1, upload2); @@ -685,14 +813,27 @@ public void testConcurrentUploads() throws Throwable { // now upload part 2. complete(file, upload2, partHandles2); - // and await the visible length to match - eventually(timeToBecomeConsistentMillis(), 500, - () -> { - FileStatus status = fs.getFileStatus(file); - assertEquals("File length in " + status, - size2, status.getLen()); - }); + + // and await the visible length to match, if this FS is not + // consistent. + final int consistencyDelay = timeToBecomeConsistentMillis(); + if (consistencyDelay > 0) { + eventually(consistencyDelay, + () -> verifyFileLength(file, size2), + new LambdaTestUtils.ProportionalRetryInterval( + CONSISTENCY_INTERVAL, + consistencyDelay)); + } verifyContents(file, digest2, size2); } + + @Test + public void testPathCapabilities() throws Throwable { + FileSystem fs = getFileSystem(); + Assertions.assertThat(fs.hasPathCapability(getContract().getTestPath(), + CommonPathCapabilities.FS_MULTIPART_UPLOADER)) + .describedAs("fs %s, lacks multipart upload capability", fs) + .isTrue(); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractOpenTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractOpenTest.java index a43053180fbf8..3598d33680e30 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractOpenTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractOpenTest.java @@ -30,15 +30,20 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FutureDataInputStreamBuilder; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.impl.FutureIOSupport; import org.apache.hadoop.io.IOUtils; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_BUFFER_SIZE; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_LENGTH; +import static org.apache.hadoop.fs.contract.ContractTestUtils.compareByteArrays; import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.apache.hadoop.test.LambdaTestUtils.interceptFuture; +import static org.apache.hadoop.util.functional.FutureIO.awaitFuture; +import org.assertj.core.api.Assertions; import org.junit.Test; /** @@ -182,7 +187,7 @@ public void testSequentialRead() throws Throwable { @Test public void testOpenFileReadZeroByte() throws Throwable { - describe("create & read a 0 byte file through the builders"); + describe("create & read a 0 byte file through the builders; use a negative length"); Path path = path("zero.txt"); FileSystem fs = getFileSystem(); fs.createFile(path).overwrite(true).build().close(); @@ -190,6 +195,7 @@ public void testOpenFileReadZeroByte() throws Throwable { .opt("fs.test.something", true) .opt("fs.test.something2", 3) .opt("fs.test.something3", "3") + .optLong(FS_OPTION_OPENFILE_LENGTH, -1L) .build().get()) { assertMinusOne("initial byte read", is.read()); } @@ -206,6 +212,17 @@ public void testOpenFileUnknownOption() throws Throwable { () -> builder.build()); } + @Test + public void testOpenFileUnknownOptionLong() throws Throwable { + describe("calling openFile fails when a 'must()' option is unknown"); + FutureDataInputStreamBuilder builder = + getFileSystem().openFile(path("testOpenFileUnknownOption")) + .optLong("fs.test.something", 1L) + .mustLong("fs.test.something2", 1L); + intercept(IllegalArgumentException.class, + () -> builder.build()); + } + @Test public void testOpenFileLazyFail() throws Throwable { describe("openFile fails on a missing file in the get() and not before"); @@ -232,7 +249,7 @@ public void testAwaitFutureFailToFNFE() throws Throwable { getFileSystem().openFile(path("testAwaitFutureFailToFNFE")) .opt("fs.test.something", true); intercept(FileNotFoundException.class, - () -> FutureIOSupport.awaitFuture(builder.build())); + () -> awaitFuture(builder.build())); } @Test @@ -242,7 +259,7 @@ public void testAwaitFutureTimeoutFailToFNFE() throws Throwable { getFileSystem().openFile(path("testAwaitFutureFailToFNFE")) .opt("fs.test.something", true); intercept(FileNotFoundException.class, - () -> FutureIOSupport.awaitFuture(builder.build(), + () -> awaitFuture(builder.build(), 10, TimeUnit.DAYS)); } @@ -250,7 +267,7 @@ public void testAwaitFutureTimeoutFailToFNFE() throws Throwable { public void testOpenFileExceptionallyTranslating() throws Throwable { describe("openFile missing file chains into exceptionally()"); CompletableFuture f = getFileSystem() - .openFile(path("testOpenFileUnknownOption")).build(); + .openFile(path("testOpenFileExceptionallyTranslating")).build(); interceptFuture(RuntimeException.class, "exceptionally", f.exceptionally(ex -> { @@ -262,11 +279,12 @@ public void testOpenFileExceptionallyTranslating() throws Throwable { public void testChainedFailureAwaitFuture() throws Throwable { describe("await Future handles chained failures"); CompletableFuture f = getFileSystem() - .openFile(path("testOpenFileUnknownOption")) + .openFile(path("testChainedFailureAwaitFuture")) + .withFileStatus(null) .build(); intercept(RuntimeException.class, "exceptionally", - () -> FutureIOSupport.awaitFuture( + () -> awaitFuture( f.exceptionally(ex -> { throw new RuntimeException("exceptionally", ex); }))); @@ -280,13 +298,34 @@ public void testOpenFileApplyRead() throws Throwable { int len = 4096; createFile(fs, path, true, dataset(len, 0x40, 0x80)); + FileStatus st = fs.getFileStatus(path); CompletableFuture readAllBytes = fs.openFile(path) - .withFileStatus(fs.getFileStatus(path)) + .withFileStatus(st) .build() .thenApply(ContractTestUtils::readStream); assertEquals("Wrong number of bytes read value", len, (long) readAllBytes.get()); + // now reattempt with a new FileStatus and a different path + // other than the final name element + // implementations MUST use path in openFile() call + FileStatus st2 = new FileStatus( + len, false, + st.getReplication(), + st.getBlockSize(), + st.getModificationTime(), + st.getAccessTime(), + st.getPermission(), + st.getOwner(), + st.getGroup(), + new Path("gopher:///localhost:/" + path.getName())); + assertEquals("Wrong number of bytes read value", + len, + (long) fs.openFile(path) + .withFileStatus(st2) + .build() + .thenApply(ContractTestUtils::readStream) + .get()); } @Test @@ -294,21 +333,79 @@ public void testOpenFileApplyAsyncRead() throws Throwable { describe("verify that async accept callbacks are evaluated"); Path path = path("testOpenFileApplyAsyncRead"); FileSystem fs = getFileSystem(); + final int len = 512; createFile(fs, path, true, - dataset(4, 0x40, 0x80)); - CompletableFuture future = fs.openFile(path).build(); + dataset(len, 0x40, 0x80)); + CompletableFuture future = fs.openFile(path) + .mustDouble(FS_OPTION_OPENFILE_LENGTH, 43.2e60) // pass in a double + .build(); AtomicBoolean accepted = new AtomicBoolean(false); - future.thenAcceptAsync(i -> accepted.set(true)).get(); + final Long bytes = future.thenApply(stream -> { + accepted.set(true); + return ContractTestUtils.readStream(stream); + }).get(); assertTrue("async accept operation not invoked", accepted.get()); + Assertions.assertThat(bytes) + .describedAs("bytes read from stream") + .isEqualTo(len); } + /** + * Open a file with a null status, and the length + * passed in as an opt() option (along with sequential IO). + * The file is opened, the data read, and it must match + * the source data. + * opt() is used so that integration testing with external + * filesystem connectors will downgrade if the option is not + * recognized. + */ @Test - public void testOpenFileNullStatus() throws Throwable { - describe("use openFile() with a null status"); + public void testOpenFileNullStatusButFileLength() throws Throwable { + describe("use openFile() with a null status and expect the status to be" + + " ignored. block size, fadvise and length are passed in as" + + " opt() options"); Path path = path("testOpenFileNullStatus"); - intercept(NullPointerException.class, - () -> getFileSystem().openFile(path).withFileStatus(null)); + FileSystem fs = getFileSystem(); + int len = 4; + byte[] result = new byte[len]; + byte[] dataset = dataset(len, 0x40, 0x80); + createFile(fs, path, true, + dataset); + CompletableFuture future = fs.openFile(path) + .withFileStatus(null) + .opt(FS_OPTION_OPENFILE_READ_POLICY, + "unknown, sequential, random") + .optLong(FS_OPTION_OPENFILE_BUFFER_SIZE, 32768) + .optLong(FS_OPTION_OPENFILE_LENGTH, len) + .build(); + + try (FSDataInputStream in = future.get()) { + in.readFully(result); + } + compareByteArrays(dataset, result, len); + } + + /** + * open a file with a length set as a double; verifies resilience + * of the parser. + */ + @Test + public void testFloatingPointLength() throws Throwable { + describe("Open file with a length"); + Path path = methodPath(); + FileSystem fs = getFileSystem(); + int len = 4096; + createFile(fs, path, true, + dataset(len, 0x40, 0x80)); + final Long l = fs.openFile(path) + .mustDouble(FS_OPTION_OPENFILE_LENGTH, len) + .build() + .thenApply(ContractTestUtils::readStream) + .get(); + Assertions.assertThat(l) + .describedAs("bytes read from file %s", path) + .isEqualTo(len); } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRenameTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRenameTest.java index 78ff2541483a3..e032604b5788c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRenameTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRenameTest.java @@ -104,29 +104,43 @@ public void testRenameFileOverExistingFile() throws Throwable { assertIsFile(destFile); boolean renameOverwritesDest = isSupported(RENAME_OVERWRITES_DEST); boolean renameReturnsFalseOnRenameDestExists = - !isSupported(RENAME_RETURNS_FALSE_IF_DEST_EXISTS); + isSupported(RENAME_RETURNS_FALSE_IF_DEST_EXISTS); + assertFalse(RENAME_OVERWRITES_DEST + " and " + + RENAME_RETURNS_FALSE_IF_DEST_EXISTS + " cannot be both supported", + renameOverwritesDest && renameReturnsFalseOnRenameDestExists); + String expectedTo = "expected rename(" + srcFile + ", " + destFile + ") to "; + boolean destUnchanged = true; try { + // rename is rejected by returning 'false' or throwing an exception boolean renamed = rename(srcFile, destFile); + destUnchanged = !renamed; if (renameOverwritesDest) { - // the filesystem supports rename(file, file2) by overwriting file2 - - assertTrue("Rename returned false", renamed); - destUnchanged = false; + assertTrue(expectedTo + "overwrite destination, but got false", + renamed); + } else if (renameReturnsFalseOnRenameDestExists) { + assertFalse(expectedTo + "be rejected with false, but destination " + + "was overwritten", renamed); + } else if (renamed) { + String destDirLS = generateAndLogErrorListing(srcFile, destFile); + getLogger().error("dest dir {}", destDirLS); + + fail(expectedTo + "be rejected with exception, but got overwritten"); } else { - // rename is rejected by returning 'false' or throwing an exception - if (renamed && !renameReturnsFalseOnRenameDestExists) { - //expected an exception - String destDirLS = generateAndLogErrorListing(srcFile, destFile); - getLogger().error("dest dir {}", destDirLS); - fail("expected rename(" + srcFile + ", " + destFile + " ) to fail," + - " but got success and destination of " + destDirLS); - } + fail(expectedTo + "be rejected with exception, but got false"); } } catch (FileAlreadyExistsException e) { + // rename(file, file2) should throw exception iff + // it neither overwrites nor returns false + assertFalse(expectedTo + "overwrite destination, but got exception", + renameOverwritesDest); + assertFalse(expectedTo + "be rejected with false, but got exception", + renameReturnsFalseOnRenameDestExists); + handleExpectedException(e); } + // verify that the destination file is as expected based on the expected // outcome verifyFileContents(getFileSystem(), destFile, diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRootDirectoryTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRootDirectoryTest.java index 27c6933ae1885..924ebd0065d96 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRootDirectoryTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRootDirectoryTest.java @@ -22,13 +22,16 @@ import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.junit.Test; +import org.assertj.core.api.Assertions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.util.Arrays; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.RemoteIterator; @@ -39,6 +42,7 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; import static org.apache.hadoop.fs.contract.ContractTestUtils.deleteChildren; import static org.apache.hadoop.fs.contract.ContractTestUtils.dumpStats; +import static org.apache.hadoop.fs.contract.ContractTestUtils.iteratorToList; import static org.apache.hadoop.fs.contract.ContractTestUtils.listChildren; import static org.apache.hadoop.fs.contract.ContractTestUtils.toList; import static org.apache.hadoop.fs.contract.ContractTestUtils.treeWalk; @@ -191,10 +195,9 @@ public void testListEmptyRootDirectory() throws IOException { for (FileStatus status : statuses) { ContractTestUtils.assertDeleted(fs, status.getPath(), false, true, false); } - FileStatus[] rootListStatus = fs.listStatus(root); - assertEquals("listStatus on empty root-directory returned found: " - + join("\n", rootListStatus), - 0, rootListStatus.length); + Assertions.assertThat(fs.listStatus(root)) + .describedAs("ls /") + .hasSize(0); assertNoElements("listFiles(/, false)", fs.listFiles(root, false)); assertNoElements("listFiles(/, true)", @@ -242,6 +245,13 @@ public void testSimpleRootListing() throws IOException { + "listStatus = " + listStatusResult + "listFiles = " + listFilesResult, fileList.size() <= statuses.length); + List statusList = (List) iteratorToList( + fs.listStatusIterator(root)); + Assertions.assertThat(statusList) + .describedAs("Result of listStatus(/) and listStatusIterator(/)" + + " must match") + .hasSameElementsAs(Arrays.stream(statuses) + .collect(Collectors.toList())); } @Test @@ -254,7 +264,7 @@ public void testRecursiveRootListing() throws IOException { fs.listFiles(root, true)); describe("verifying consistency with treewalk's files"); ContractTestUtils.TreeScanResults treeWalk = treeWalk(fs, root); - treeWalk.assertFieldsEquivalent("files", listing, + treeWalk.assertFieldsEquivalent("treewalk vs listFiles(/, true)", listing, treeWalk.getFiles(), listing.getFiles()); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractSafeModeTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractSafeModeTest.java new file mode 100644 index 0000000000000..72d0dce9ff9e7 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractSafeModeTest.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract; + +import org.assertj.core.api.Assertions; +import org.junit.Test; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.SafeMode; +import org.apache.hadoop.fs.SafeModeAction; + +public abstract class AbstractContractSafeModeTest extends AbstractFSContractTestBase { + + @Test + public void testSafeMode() throws Throwable { + final FileSystem fs = getFileSystem(); + SafeMode fsWithSafeMode = verifyAndGetSafeModeInstance(fs); + Assertions.assertThat(fsWithSafeMode.setSafeMode(SafeModeAction.GET)) + .describedAs("Getting the status of safe mode before entering should be off.").isFalse(); + Assertions.assertThat(fsWithSafeMode.setSafeMode(SafeModeAction.ENTER)) + .describedAs("Entering Safe mode and safe mode turns on.").isTrue(); + Assertions.assertThat(fsWithSafeMode.setSafeMode(SafeModeAction.GET)) + .describedAs("Getting the status of safe mode after entering, safe mode should be on.") + .isTrue(); + Assertions.assertThat(fsWithSafeMode.setSafeMode(SafeModeAction.LEAVE)) + .describedAs("Leaving safe mode, and safe mode switches off.").isFalse(); + Assertions.assertThat(fsWithSafeMode.setSafeMode(SafeModeAction.FORCE_EXIT)) + .describedAs("Force exist safe mode at any time, safe mode should always switches off.") + .isFalse(); + } + + private SafeMode verifyAndGetSafeModeInstance(FileSystem fs) { + Assertions.assertThat(fs) + .describedAs("File system %s must be an instance of %s", fs, SafeMode.class.getClass()) + .isInstanceOf(SafeMode.class); + return (SafeMode) fs; + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractSeekTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractSeekTest.java index ca8e4a053beac..d34178489c81d 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractSeekTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractSeekTest.java @@ -317,7 +317,7 @@ public void testPositionedBulkReadDoesntChangePosition() throws Throwable { int v = 256; byte[] readBuffer = new byte[v]; - assertEquals(v, instream.read(128, readBuffer, 0, v)); + instream.readFully(128, readBuffer, 0, v); //have gone back assertEquals(40000, instream.getPos()); //content is the same too @@ -572,8 +572,7 @@ public void testReadSmallFile() throws Throwable { // now read the entire file in one go byte[] fullFile = new byte[TEST_FILE_LEN]; - assertEquals(TEST_FILE_LEN, - instream.read(0, fullFile, 0, fullFile.length)); + instream.readFully(0, fullFile, 0, fullFile.length); assertEquals(0, instream.getPos()); // now read past the end of the file diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractStreamIOStatisticsTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractStreamIOStatisticsTest.java new file mode 100644 index 0000000000000..89b21c497083b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractStreamIOStatisticsTest.java @@ -0,0 +1,313 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract; + +import java.util.Collections; +import java.util.List; + +import org.assertj.core.api.Assertions; +import org.junit.AfterClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsSnapshot; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.extractStatistics; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.verifyStatisticCounterValue; +import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.demandStringifyIOStatisticsSource; +import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.ioStatisticsToPrettyString; +import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.snapshotIOStatistics; +import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_READ_BYTES; +import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_WRITE_BYTES; + +/** + * Tests {@link IOStatistics} support in input and output streams. + *

      + * Requires both the input and output streams to offer the basic + * bytes read/written statistics. + *

      + * If the IO is buffered, that information must be provided, + * especially the input buffer size. + */ +public abstract class AbstractContractStreamIOStatisticsTest + extends AbstractFSContractTestBase { + + private static final Logger LOG = + LoggerFactory.getLogger(AbstractContractStreamIOStatisticsTest.class); + + /** + * FileSystem statistics are collected across every test case. + */ + protected static final IOStatisticsSnapshot FILESYSTEM_IOSTATS = + snapshotIOStatistics(); + + @Override + public void teardown() throws Exception { + final FileSystem fs = getFileSystem(); + if (fs instanceof IOStatisticsSource) { + FILESYSTEM_IOSTATS.aggregate(((IOStatisticsSource)fs).getIOStatistics()); + } + super.teardown(); + } + + /** + * Dump the filesystem statistics after the class if contains any values. + */ + @AfterClass + public static void dumpFileSystemIOStatistics() { + if (!FILESYSTEM_IOSTATS.counters().isEmpty()) { + // if there is at least one counter + LOG.info("Aggregate FileSystem Statistics {}", + ioStatisticsToPrettyString(FILESYSTEM_IOSTATS)); + } + } + + @Test + public void testOutputStreamStatisticKeys() throws Throwable { + describe("Look at the statistic keys of an output stream"); + Path path = methodPath(); + FileSystem fs = getFileSystem(); + fs.mkdirs(path.getParent()); + try (FSDataOutputStream out = fs.create(path, true)) { + IOStatistics statistics = extractStatistics(out); + final List keys = outputStreamStatisticKeys(); + Assertions.assertThat(statistics.counters().keySet()) + .describedAs("statistic keys of %s", statistics) + .containsAll(keys); + Assertions.assertThat(keys) + .describedAs("Statistics supported by the stream %s", out) + .contains(STREAM_WRITE_BYTES); + } finally { + fs.delete(path, false); + } + } + + /** + * If the stream writes in blocks, then counters during the write may be + * zero until a whole block is written -or the write has finished. + * @return true if writes are buffered into whole blocks. + */ + public boolean streamWritesInBlocks() { + return false; + } + + @Test + public void testWriteSingleByte() throws Throwable { + describe("Write a byte to a file and verify" + + " the stream statistics are updated"); + Path path = methodPath(); + FileSystem fs = getFileSystem(); + fs.mkdirs(path.getParent()); + boolean writesInBlocks = streamWritesInBlocks(); + try (FSDataOutputStream out = fs.create(path, true)) { + IOStatistics statistics = extractStatistics(out); + // before a write, no bytes + verifyStatisticCounterValue(statistics, STREAM_WRITE_BYTES, 0); + out.write('0'); + verifyStatisticCounterValue(statistics, STREAM_WRITE_BYTES, + writesInBlocks ? 0 : 1); + // close the stream + out.close(); + // statistics are still valid after the close + // always call the output stream to check that behavior + statistics = extractStatistics(out); + final String strVal = statistics.toString(); + LOG.info("Statistics = {}", strVal); + verifyStatisticCounterValue(statistics, STREAM_WRITE_BYTES, 1); + } finally { + fs.delete(path, false); + } + } + + @Test + public void testWriteByteArrays() throws Throwable { + describe("Write byte arrays to a file and verify" + + " the stream statistics are updated"); + Path path = methodPath(); + FileSystem fs = getFileSystem(); + fs.mkdirs(path.getParent()); + boolean writesInBlocks = streamWritesInBlocks(); + try (FSDataOutputStream out = fs.create(path, true)) { + Object demandStatsString = demandStringifyIOStatisticsSource(out); + // before a write, no bytes + final byte[] bytes = ContractTestUtils.toAsciiByteArray( + "statistically-speaking"); + final long len = bytes.length; + out.write(bytes); + out.flush(); + LOG.info("stats {}", demandStatsString); + IOStatistics statistics = extractStatistics(out); + verifyStatisticCounterValue(statistics, STREAM_WRITE_BYTES, + writesInBlocks ? 0 : len); + out.write(bytes); + out.flush(); + verifyStatisticCounterValue(statistics, STREAM_WRITE_BYTES, + writesInBlocks ? 0 : len * 2); + // close the stream + out.close(); + LOG.info("stats {}", demandStatsString); + // statistics are still valid after the close + // always call the output stream to check that behavior + statistics = extractStatistics(out); + verifyStatisticCounterValue(statistics, STREAM_WRITE_BYTES, len * 2); + // the to string value must contain the same counterHiCable you mean + Assertions.assertThat(demandStatsString.toString()) + .contains(Long.toString(len * 2)); + } finally { + fs.delete(path, false); + } + } + + @Test + public void testInputStreamStatisticKeys() throws Throwable { + describe("Look at the statistic keys of an input stream"); + Path path = methodPath(); + FileSystem fs = getFileSystem(); + ContractTestUtils.touch(fs, path); + try (FSDataInputStream in = fs.open(path)) { + IOStatistics statistics = extractStatistics(in); + final List keys = inputStreamStatisticKeys(); + Assertions.assertThat(statistics.counters().keySet()) + .describedAs("statistic keys of %s", statistics) + .containsAll(keys); + Assertions.assertThat(keys) + .describedAs("Statistics supported by the stream %s", in) + .contains(STREAM_READ_BYTES); + verifyStatisticCounterValue(statistics, STREAM_READ_BYTES, 0); + } finally { + fs.delete(path, false); + } + } + + @Test + public void testInputStreamStatisticRead() throws Throwable { + describe("Read Data from an input stream"); + Path path = methodPath(); + FileSystem fs = getFileSystem(); + final int fileLen = 1024; + final byte[] ds = dataset(fileLen, 'a', 26); + ContractTestUtils.writeDataset(fs, path, ds, fileLen, 8_000, true); + + try (FSDataInputStream in = fs.open(path)) { + long current = 0; + IOStatistics statistics = extractStatistics(in); + verifyStatisticCounterValue(statistics, STREAM_READ_BYTES, 0); + Assertions.assertThat(in.read()).isEqualTo('a'); + int bufferSize = readBufferSize(); + // either a single byte was read or a whole block + current = verifyBytesRead(statistics, current, 1, bufferSize); + final int bufferLen = 128; + byte[] buf128 = new byte[bufferLen]; + in.read(buf128); + current = verifyBytesRead(statistics, current, bufferLen, bufferSize); + in.readFully(buf128); + current = verifyBytesRead(statistics, current, bufferLen, bufferSize); + in.readFully(0, buf128); + current = verifyBytesRead(statistics, current, bufferLen, bufferSize); + // seek must not increment the read counter + in.seek(256); + verifyBytesRead(statistics, current, 0, bufferSize); + + // if a stream implements lazy-seek the seek operation + // may be postponed until the read + final int sublen = 32; + Assertions.assertThat(in.read(buf128, 0, sublen)) + .isEqualTo(sublen); + current = verifyBytesRead(statistics, current, sublen, bufferSize); + + // perform some read operations near the end of the file such that + // the buffer will not be completely read. + // skip these tests for buffered IO as it is too complex to work out + if (bufferSize == 0) { + final int pos = fileLen - sublen; + in.seek(pos); + Assertions.assertThat(in.read(buf128)) + .describedAs("Read overlapping EOF") + .isEqualTo(sublen); + current = verifyStatisticCounterValue(statistics, STREAM_READ_BYTES, + current + sublen); + Assertions.assertThat(in.read(pos, buf128, 0, bufferLen)) + .describedAs("Read(buffer) overlapping EOF") + .isEqualTo(sublen); + verifyStatisticCounterValue(statistics, STREAM_READ_BYTES, + current + sublen); + } + } finally { + fs.delete(path, false); + } + } + + /** + * Verify the bytes read value, taking into account block size. + * @param statistics stats + * @param current current count + * @param bytesRead bytes explicitly read + * @param bufferSize buffer size of stream + * @return the current count of bytes read ignoring block size + */ + public long verifyBytesRead(final IOStatistics statistics, + final long current, + final int bytesRead, final int bufferSize) { + // final position. for unbuffered read, this is the expected value + long finalPos = current + bytesRead; + long expected = finalPos; + if (bufferSize > 0) { + // buffered. count of read is number of buffers already read + // plus the current buffer, multiplied by that buffer size + expected = bufferSize * (1 + (current / bufferSize)); + } + verifyStatisticCounterValue(statistics, STREAM_READ_BYTES, expected); + return finalPos; + } + + /** + * Buffer size for reads. + * Filesystems performing block reads (checksum, etc) + * must return their buffer value is + * @return buffer capacity; 0 for unbuffered + */ + public int readBufferSize() { + return 0; + } + + /** + * Keys which the output stream must support. + * @return a list of keys + */ + public List outputStreamStatisticKeys() { + return Collections.singletonList(STREAM_WRITE_BYTES); + } + + /** + * Keys which the input stream must support. + * @return a list of keys + */ + public List inputStreamStatisticKeys() { + return Collections.singletonList(STREAM_READ_BYTES); + } + +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractUnbufferTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractUnbufferTest.java index 5eb1e892f83d5..adaf0a910c620 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractUnbufferTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractUnbufferTest.java @@ -137,7 +137,8 @@ protected void validateFileContents(FSDataInputStream stream, int length, throws IOException { byte[] streamData = new byte[length]; assertEquals("failed to read expected number of bytes from " - + "stream", length, stream.read(streamData)); + + "stream. This may be transient", + length, stream.read(streamData)); byte[] validateFileBytes; if (startIndex == 0 && length == fileBytes.length) { validateFileBytes = fileBytes; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java new file mode 100644 index 0000000000000..a39201df24943 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java @@ -0,0 +1,485 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract; + +import java.io.EOFException; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.function.IntFunction; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileRange; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.StreamCapabilities; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.impl.FutureIOSupport; +import org.apache.hadoop.io.WeakReferencedElasticByteBufferPool; +import org.apache.hadoop.util.concurrent.HadoopExecutors; +import org.apache.hadoop.util.functional.FutureIO; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.VECTORED_READ_OPERATION_TEST_TIMEOUT_SECONDS; +import static org.apache.hadoop.fs.contract.ContractTestUtils.assertCapabilities; +import static org.apache.hadoop.fs.contract.ContractTestUtils.assertDatasetEquals; +import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; +import static org.apache.hadoop.fs.contract.ContractTestUtils.returnBuffersToPoolPostRead; +import static org.apache.hadoop.fs.contract.ContractTestUtils.validateVectoredReadResult; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.apache.hadoop.test.LambdaTestUtils.interceptFuture; + +@RunWith(Parameterized.class) +public abstract class AbstractContractVectoredReadTest extends AbstractFSContractTestBase { + + private static final Logger LOG = LoggerFactory.getLogger(AbstractContractVectoredReadTest.class); + + public static final int DATASET_LEN = 64 * 1024; + protected static final byte[] DATASET = ContractTestUtils.dataset(DATASET_LEN, 'a', 32); + protected static final String VECTORED_READ_FILE_NAME = "vectored_file.txt"; + + private final IntFunction allocate; + + private final WeakReferencedElasticByteBufferPool pool = + new WeakReferencedElasticByteBufferPool(); + + private final String bufferType; + + @Parameterized.Parameters(name = "Buffer type : {0}") + public static List params() { + return Arrays.asList("direct", "array"); + } + + public AbstractContractVectoredReadTest(String bufferType) { + this.bufferType = bufferType; + this.allocate = value -> { + boolean isDirect = !"array".equals(bufferType); + return pool.getBuffer(isDirect, value); + }; + } + + public IntFunction getAllocate() { + return allocate; + } + + public WeakReferencedElasticByteBufferPool getPool() { + return pool; + } + + @Override + public void setup() throws Exception { + super.setup(); + Path path = path(VECTORED_READ_FILE_NAME); + FileSystem fs = getFileSystem(); + createFile(fs, path, true, DATASET); + } + + @Override + public void teardown() throws Exception { + super.teardown(); + pool.release(); + } + + @Test + public void testVectoredReadCapability() throws Exception { + FileSystem fs = getFileSystem(); + String[] vectoredReadCapability = new String[]{StreamCapabilities.VECTOREDIO}; + try (FSDataInputStream in = fs.open(path(VECTORED_READ_FILE_NAME))) { + assertCapabilities(in, vectoredReadCapability, null); + } + } + + @Test + public void testVectoredReadMultipleRanges() throws Exception { + FileSystem fs = getFileSystem(); + List fileRanges = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + FileRange fileRange = FileRange.createFileRange(i * 100, 100); + fileRanges.add(fileRange); + } + try (FSDataInputStream in = fs.open(path(VECTORED_READ_FILE_NAME))) { + in.readVectored(fileRanges, allocate); + CompletableFuture[] completableFutures = new CompletableFuture[fileRanges.size()]; + int i = 0; + for (FileRange res : fileRanges) { + completableFutures[i++] = res.getData(); + } + CompletableFuture combinedFuture = CompletableFuture.allOf(completableFutures); + combinedFuture.get(); + + validateVectoredReadResult(fileRanges, DATASET); + returnBuffersToPoolPostRead(fileRanges, pool); + } + } + + @Test + public void testVectoredReadAndReadFully() throws Exception { + FileSystem fs = getFileSystem(); + List fileRanges = new ArrayList<>(); + fileRanges.add(FileRange.createFileRange(100, 100)); + try (FSDataInputStream in = fs.open(path(VECTORED_READ_FILE_NAME))) { + in.readVectored(fileRanges, allocate); + byte[] readFullRes = new byte[100]; + in.readFully(100, readFullRes); + ByteBuffer vecRes = FutureIOSupport.awaitFuture(fileRanges.get(0).getData()); + Assertions.assertThat(vecRes) + .describedAs("Result from vectored read and readFully must match") + .isEqualByComparingTo(ByteBuffer.wrap(readFullRes)); + returnBuffersToPoolPostRead(fileRanges, pool); + } + } + + /** + * As the minimum seek value is 4*1024,none of the below ranges + * will get merged. + */ + @Test + public void testDisjointRanges() throws Exception { + FileSystem fs = getFileSystem(); + List fileRanges = new ArrayList<>(); + fileRanges.add(FileRange.createFileRange(0, 100)); + fileRanges.add(FileRange.createFileRange(4_000 + 101, 100)); + fileRanges.add(FileRange.createFileRange(16_000 + 101, 100)); + try (FSDataInputStream in = fs.open(path(VECTORED_READ_FILE_NAME))) { + in.readVectored(fileRanges, allocate); + validateVectoredReadResult(fileRanges, DATASET); + returnBuffersToPoolPostRead(fileRanges, pool); + } + } + + /** + * As the minimum seek value is 4*1024, all the below ranges + * will get merged into one. + */ + @Test + public void testAllRangesMergedIntoOne() throws Exception { + FileSystem fs = getFileSystem(); + List fileRanges = new ArrayList<>(); + fileRanges.add(FileRange.createFileRange(0, 100)); + fileRanges.add(FileRange.createFileRange(4_000 - 101, 100)); + fileRanges.add(FileRange.createFileRange(8_000 - 101, 100)); + try (FSDataInputStream in = fs.open(path(VECTORED_READ_FILE_NAME))) { + in.readVectored(fileRanges, allocate); + validateVectoredReadResult(fileRanges, DATASET); + returnBuffersToPoolPostRead(fileRanges, pool); + } + } + + /** + * As the minimum seek value is 4*1024, the first three ranges will be + * merged into and other two will remain as it is. + */ + @Test + public void testSomeRangesMergedSomeUnmerged() throws Exception { + FileSystem fs = getFileSystem(); + List fileRanges = new ArrayList<>(); + fileRanges.add(FileRange.createFileRange(8 * 1024, 100)); + fileRanges.add(FileRange.createFileRange(14 * 1024, 100)); + fileRanges.add(FileRange.createFileRange(10 * 1024, 100)); + fileRanges.add(FileRange.createFileRange(2 * 1024 - 101, 100)); + fileRanges.add(FileRange.createFileRange(40 * 1024, 1024)); + FileStatus fileStatus = fs.getFileStatus(path(VECTORED_READ_FILE_NAME)); + CompletableFuture builder = + fs.openFile(path(VECTORED_READ_FILE_NAME)) + .withFileStatus(fileStatus) + .build(); + try (FSDataInputStream in = builder.get()) { + in.readVectored(fileRanges, allocate); + validateVectoredReadResult(fileRanges, DATASET); + returnBuffersToPoolPostRead(fileRanges, pool); + } + } + + @Test + public void testOverlappingRanges() throws Exception { + FileSystem fs = getFileSystem(); + List fileRanges = getSampleOverlappingRanges(); + FileStatus fileStatus = fs.getFileStatus(path(VECTORED_READ_FILE_NAME)); + CompletableFuture builder = + fs.openFile(path(VECTORED_READ_FILE_NAME)) + .withFileStatus(fileStatus) + .build(); + try (FSDataInputStream in = builder.get()) { + in.readVectored(fileRanges, allocate); + validateVectoredReadResult(fileRanges, DATASET); + returnBuffersToPoolPostRead(fileRanges, pool); + } + } + + @Test + public void testSameRanges() throws Exception { + // Same ranges are special case of overlapping only. + FileSystem fs = getFileSystem(); + List fileRanges = getSampleSameRanges(); + CompletableFuture builder = + fs.openFile(path(VECTORED_READ_FILE_NAME)) + .build(); + try (FSDataInputStream in = builder.get()) { + in.readVectored(fileRanges, allocate); + validateVectoredReadResult(fileRanges, DATASET); + returnBuffersToPoolPostRead(fileRanges, pool); + } + } + + @Test + public void testSomeRandomNonOverlappingRanges() throws Exception { + FileSystem fs = getFileSystem(); + List fileRanges = new ArrayList<>(); + fileRanges.add(FileRange.createFileRange(500, 100)); + fileRanges.add(FileRange.createFileRange(1000, 200)); + fileRanges.add(FileRange.createFileRange(50, 10)); + fileRanges.add(FileRange.createFileRange(10, 5)); + try (FSDataInputStream in = fs.open(path(VECTORED_READ_FILE_NAME))) { + in.readVectored(fileRanges, allocate); + validateVectoredReadResult(fileRanges, DATASET); + returnBuffersToPoolPostRead(fileRanges, pool); + } + } + + @Test + public void testConsecutiveRanges() throws Exception { + FileSystem fs = getFileSystem(); + List fileRanges = new ArrayList<>(); + fileRanges.add(FileRange.createFileRange(500, 100)); + fileRanges.add(FileRange.createFileRange(600, 200)); + fileRanges.add(FileRange.createFileRange(800, 100)); + try (FSDataInputStream in = fs.open(path(VECTORED_READ_FILE_NAME))) { + in.readVectored(fileRanges, allocate); + validateVectoredReadResult(fileRanges, DATASET); + returnBuffersToPoolPostRead(fileRanges, pool); + } + } + + /** + * Test to validate EOF ranges. Default implementation fails with EOFException + * while reading the ranges. Some implementation like s3, checksum fs fail fast + * as they already have the file length calculated. + */ + @Test + public void testEOFRanges() throws Exception { + FileSystem fs = getFileSystem(); + List fileRanges = new ArrayList<>(); + fileRanges.add(FileRange.createFileRange(DATASET_LEN, 100)); + try (FSDataInputStream in = fs.open(path(VECTORED_READ_FILE_NAME))) { + in.readVectored(fileRanges, allocate); + for (FileRange res : fileRanges) { + CompletableFuture data = res.getData(); + interceptFuture(EOFException.class, + "", + ContractTestUtils.VECTORED_READ_OPERATION_TEST_TIMEOUT_SECONDS, + TimeUnit.SECONDS, + data); + } + } + } + + @Test + public void testNegativeLengthRange() throws Exception { + FileSystem fs = getFileSystem(); + List fileRanges = new ArrayList<>(); + fileRanges.add(FileRange.createFileRange(0, -50)); + verifyExceptionalVectoredRead(fs, fileRanges, IllegalArgumentException.class); + } + + @Test + public void testNegativeOffsetRange() throws Exception { + FileSystem fs = getFileSystem(); + List fileRanges = new ArrayList<>(); + fileRanges.add(FileRange.createFileRange(-1, 50)); + verifyExceptionalVectoredRead(fs, fileRanges, EOFException.class); + } + + @Test + public void testNormalReadAfterVectoredRead() throws Exception { + FileSystem fs = getFileSystem(); + List fileRanges = createSampleNonOverlappingRanges(); + try (FSDataInputStream in = fs.open(path(VECTORED_READ_FILE_NAME))) { + in.readVectored(fileRanges, allocate); + // read starting 200 bytes + byte[] res = new byte[200]; + in.read(res, 0, 200); + ByteBuffer buffer = ByteBuffer.wrap(res); + assertDatasetEquals(0, "normal_read", buffer, 200, DATASET); + Assertions.assertThat(in.getPos()) + .describedAs("Vectored read shouldn't change file pointer.") + .isEqualTo(200); + validateVectoredReadResult(fileRanges, DATASET); + returnBuffersToPoolPostRead(fileRanges, pool); + } + } + + @Test + public void testVectoredReadAfterNormalRead() throws Exception { + FileSystem fs = getFileSystem(); + List fileRanges = createSampleNonOverlappingRanges(); + try (FSDataInputStream in = fs.open(path(VECTORED_READ_FILE_NAME))) { + // read starting 200 bytes + byte[] res = new byte[200]; + in.read(res, 0, 200); + ByteBuffer buffer = ByteBuffer.wrap(res); + assertDatasetEquals(0, "normal_read", buffer, 200, DATASET); + Assertions.assertThat(in.getPos()) + .describedAs("Vectored read shouldn't change file pointer.") + .isEqualTo(200); + in.readVectored(fileRanges, allocate); + validateVectoredReadResult(fileRanges, DATASET); + returnBuffersToPoolPostRead(fileRanges, pool); + } + } + + @Test + public void testMultipleVectoredReads() throws Exception { + FileSystem fs = getFileSystem(); + List fileRanges1 = createSampleNonOverlappingRanges(); + List fileRanges2 = createSampleNonOverlappingRanges(); + try (FSDataInputStream in = fs.open(path(VECTORED_READ_FILE_NAME))) { + in.readVectored(fileRanges1, allocate); + in.readVectored(fileRanges2, allocate); + validateVectoredReadResult(fileRanges2, DATASET); + validateVectoredReadResult(fileRanges1, DATASET); + returnBuffersToPoolPostRead(fileRanges1, pool); + returnBuffersToPoolPostRead(fileRanges2, pool); + } + } + + /** + * This test creates list of ranges and then submit a readVectored + * operation and then uses a separate thread pool to process the + * results asynchronously. + */ + @Test + public void testVectoredIOEndToEnd() throws Exception { + FileSystem fs = getFileSystem(); + List fileRanges = new ArrayList<>(); + fileRanges.add(FileRange.createFileRange(8 * 1024, 100)); + fileRanges.add(FileRange.createFileRange(14 * 1024, 100)); + fileRanges.add(FileRange.createFileRange(10 * 1024, 100)); + fileRanges.add(FileRange.createFileRange(2 * 1024 - 101, 100)); + fileRanges.add(FileRange.createFileRange(40 * 1024, 1024)); + + ExecutorService dataProcessor = Executors.newFixedThreadPool(5); + CountDownLatch countDown = new CountDownLatch(fileRanges.size()); + + try (FSDataInputStream in = fs.open(path(VECTORED_READ_FILE_NAME))) { + in.readVectored(fileRanges, value -> pool.getBuffer(true, value)); + for (FileRange res : fileRanges) { + dataProcessor.submit(() -> { + try { + readBufferValidateDataAndReturnToPool(res, countDown); + } catch (Exception e) { + String error = String.format("Error while processing result for %s", res); + LOG.error(error, e); + ContractTestUtils.fail(error, e); + } + }); + } + // user can perform other computations while waiting for IO. + if (!countDown.await(VECTORED_READ_OPERATION_TEST_TIMEOUT_SECONDS, TimeUnit.SECONDS)) { + ContractTestUtils.fail("Timeout/Error while processing vectored io results"); + } + } finally { + HadoopExecutors.shutdown(dataProcessor, LOG, + VECTORED_READ_OPERATION_TEST_TIMEOUT_SECONDS, TimeUnit.SECONDS); + } + } + + private void readBufferValidateDataAndReturnToPool(FileRange res, + CountDownLatch countDownLatch) + throws IOException, TimeoutException { + CompletableFuture data = res.getData(); + // Read the data and perform custom operation. Here we are just + // validating it with original data. + FutureIO.awaitFuture(data.thenAccept(buffer -> { + assertDatasetEquals((int) res.getOffset(), + "vecRead", buffer, res.getLength(), DATASET); + // return buffer to the pool once read. + pool.putBuffer(buffer); + }), + VECTORED_READ_OPERATION_TEST_TIMEOUT_SECONDS, TimeUnit.SECONDS); + + // countdown to notify main thread that processing has been done. + countDownLatch.countDown(); + } + + + protected List createSampleNonOverlappingRanges() { + List fileRanges = new ArrayList<>(); + fileRanges.add(FileRange.createFileRange(0, 100)); + fileRanges.add(FileRange.createFileRange(110, 50)); + return fileRanges; + } + + protected List getSampleSameRanges() { + List fileRanges = new ArrayList<>(); + fileRanges.add(FileRange.createFileRange(8_000, 1000)); + fileRanges.add(FileRange.createFileRange(8_000, 1000)); + fileRanges.add(FileRange.createFileRange(8_000, 1000)); + return fileRanges; + } + + protected List getSampleOverlappingRanges() { + List fileRanges = new ArrayList<>(); + fileRanges.add(FileRange.createFileRange(100, 500)); + fileRanges.add(FileRange.createFileRange(400, 500)); + return fileRanges; + } + + protected List getConsecutiveRanges() { + List fileRanges = new ArrayList<>(); + fileRanges.add(FileRange.createFileRange(100, 500)); + fileRanges.add(FileRange.createFileRange(600, 500)); + return fileRanges; + } + + /** + * Validate that exceptions must be thrown during a vectored + * read operation with specific input ranges. + * @param fs FileSystem instance. + * @param fileRanges input file ranges. + * @param clazz type of exception expected. + * @throws Exception any other IOE. + */ + protected void verifyExceptionalVectoredRead( + FileSystem fs, + List fileRanges, + Class clazz) throws Exception { + + CompletableFuture builder = + fs.openFile(path(VECTORED_READ_FILE_NAME)) + .build(); + try (FSDataInputStream in = builder.get()) { + intercept(clazz, + () -> in.readVectored(fileRanges, allocate)); + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContract.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContract.java index f09496a6082c8..76d3116c3abdc 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContract.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContract.java @@ -69,6 +69,14 @@ public void init() throws IOException { } + /** + * Any teardown logic can go here. + * @throws IOException IO problems + */ + public void teardown() throws IOException { + + } + /** * Add a configuration resource to this instance's configuration * @param resource resource reference @@ -113,7 +121,7 @@ public FileSystem getFileSystem(URI uri) throws IOException { public abstract FileSystem getTestFileSystem() throws IOException; /** - * Get the scheme of this FS + * Get the scheme of this FS. * @return the scheme this FS supports */ public abstract String getScheme(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java index 60373f67992eb..ac9de6d7bfe8c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java @@ -213,6 +213,9 @@ public void teardown() throws Exception { Thread.currentThread().setName("teardown"); LOG.debug("== Teardown =="); deleteTestDirInTeardown(); + if (contract != null) { + contract.teardown(); + } LOG.debug("== Teardown complete =="); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractOptions.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractOptions.java index 3f31c07742c59..29cd29dfaf225 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractOptions.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractOptions.java @@ -241,4 +241,19 @@ public interface ContractOptions { */ String TEST_RANDOM_SEEK_COUNT = "test.random-seek-count"; + /** + * Is hflush supported in API and StreamCapabilities? + */ + String SUPPORTS_HFLUSH = "supports-hflush"; + + /** + * Is hsync supported in API and StreamCapabilities? + */ + String SUPPORTS_HSYNC = "supports-hsync"; + + /** + * Is the metadata updated after an hsync? + * HDFS does not do this. + */ + String METADATA_UPDATED_ON_HSYNC = "metadata_updated_on_hsync"; } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java index 4789630f95f1c..b61abddd43426 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java @@ -21,6 +21,7 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.FileRange; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; @@ -28,7 +29,11 @@ import org.apache.hadoop.fs.PathCapabilities; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.StreamCapabilities; +import org.apache.hadoop.io.ByteBufferPool; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.util.functional.RemoteIterators; +import org.apache.hadoop.util.functional.FutureIO; + import org.junit.Assert; import org.junit.AssumptionViolatedException; import org.slf4j.Logger; @@ -39,6 +44,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -49,6 +55,9 @@ import java.util.Properties; import java.util.Set; import java.util.UUID; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY; @@ -68,6 +77,11 @@ public class ContractTestUtils extends Assert { public static final String IO_CHUNK_MODULUS_SIZE = "io.chunk.modulus.size"; public static final int DEFAULT_IO_CHUNK_MODULUS_SIZE = 128; + /** + * Timeout in seconds for vectored read operation in tests : {@value}. + */ + public static final int VECTORED_READ_OPERATION_TEST_TIMEOUT_SECONDS = 5 * 60; + /** * Assert that a property in the property set matches the expected value. * @param props property set @@ -233,8 +247,8 @@ public static byte[] readDataset(FileSystem fs, Path path, int len) public static void verifyFileContents(FileSystem fs, Path path, byte[] original) throws IOException { - assertIsFile(fs, path); FileStatus stat = fs.getFileStatus(path); + assertIsFile(path, stat); String statText = stat.toString(); assertEquals("wrong length " + statText, original.length, stat.getLen()); byte[] bytes = readDataset(fs, path, original.length); @@ -399,9 +413,7 @@ public static boolean rm(FileSystem fileSystem, IOException { if (fileSystem != null) { rejectRootOperation(path, allowRootDelete); - if (fileSystem.exists(path)) { - return fileSystem.delete(path, recursive); - } + return fileSystem.delete(path, recursive); } return false; @@ -728,8 +740,10 @@ public static void assertDeleted(FileSystem fs, assertPathExists(fs, "about to be deleted file", file); } boolean deleted = fs.delete(file, recursive); - String dir = ls(fs, file.getParent()); - assertTrue("Delete failed on " + file + ": " + dir, deleted); + if (!deleted) { + String dir = ls(fs, file.getParent()); + assertTrue("Delete failed on " + file + ": " + dir, deleted); + } assertPathDoesNotExist(fs, "Deleted file", file); } @@ -1095,6 +1109,78 @@ public static void validateFileContent(byte[] concat, byte[][] bytes) { mismatch); } + /** + * Utility to validate vectored read results. + * @param fileRanges input ranges. + * @param originalData original data. + * @throws IOException any ioe. + */ + public static void validateVectoredReadResult(List fileRanges, + byte[] originalData) + throws IOException, TimeoutException { + CompletableFuture[] completableFutures = new CompletableFuture[fileRanges.size()]; + int i = 0; + for (FileRange res : fileRanges) { + completableFutures[i++] = res.getData(); + } + CompletableFuture combinedFuture = CompletableFuture.allOf(completableFutures); + FutureIO.awaitFuture(combinedFuture, + VECTORED_READ_OPERATION_TEST_TIMEOUT_SECONDS, + TimeUnit.SECONDS); + + for (FileRange res : fileRanges) { + CompletableFuture data = res.getData(); + ByteBuffer buffer = FutureIO.awaitFuture(data, + VECTORED_READ_OPERATION_TEST_TIMEOUT_SECONDS, + TimeUnit.SECONDS); + assertDatasetEquals((int) res.getOffset(), "vecRead", + buffer, res.getLength(), originalData); + } + } + + /** + * Utility to return buffers back to the pool once all + * data has been read for each file range. + * @param fileRanges list of file range. + * @param pool buffer pool. + * @throws IOException any IOE + * @throws TimeoutException ideally this should never occur. + */ + public static void returnBuffersToPoolPostRead(List fileRanges, + ByteBufferPool pool) + throws IOException, TimeoutException { + for (FileRange range : fileRanges) { + ByteBuffer buffer = FutureIO.awaitFuture(range.getData(), + VECTORED_READ_OPERATION_TEST_TIMEOUT_SECONDS, + TimeUnit.SECONDS); + pool.putBuffer(buffer); + } + } + + + /** + * Assert that the data read matches the dataset at the given offset. + * This helps verify that the seek process is moving the read pointer + * to the correct location in the file. + * @param readOffset the offset in the file where the read began. + * @param operation operation name for the assertion. + * @param data data read in. + * @param length length of data to check. + * @param originalData original data. + */ + public static void assertDatasetEquals( + final int readOffset, + final String operation, + final ByteBuffer data, + int length, byte[] originalData) { + for (int i = 0; i < length; i++) { + int o = readOffset + i; + assertEquals(operation + " with read offset " + readOffset + + ": data[" + i + "] != DATASET[" + o + "]", + originalData[o], data.get()); + } + } + /** * Receives test data from the given input file and checks the size of the * data as well as the pattern inside the received data. @@ -1446,9 +1532,47 @@ public static TreeScanResults treeWalk(FileSystem fs, Path path) */ public static List toList( RemoteIterator iterator) throws IOException { - ArrayList list = new ArrayList<>(); - while (iterator.hasNext()) { - list.add(iterator.next()); + return RemoteIterators.toList(iterator); + } + + /** + * Convert a remote iterator over file status results into a list. + * The utility equivalents in commons collection and guava cannot be + * used here, as this is a different interface, one whose operators + * can throw IOEs. + * @param iterator input iterator + * @return the file status entries as a list. + * @throws IOException + */ + public static List iteratorToList( + RemoteIterator iterator) throws IOException { + return RemoteIterators.toList(iterator); + } + + + /** + * Convert a remote iterator over file status results into a list. + * This uses {@link RemoteIterator#next()} calls only, expecting + * a raised {@link NoSuchElementException} exception to indicate that + * the end of the listing has been reached. This iteration strategy is + * designed to verify that the implementation of the remote iterator + * generates results and terminates consistently with the {@code hasNext/next} + * iteration. More succinctly "verifies that the {@code next()} operator + * isn't relying on {@code hasNext()} to always be called during an iteration. + * @param iterator input iterator + * @return the status entries as a list. + * @throws IOException IO problems + */ + @SuppressWarnings("InfiniteLoopStatement") + public static List iteratorToListThroughNextCallsAlone( + RemoteIterator iterator) throws IOException { + List list = new ArrayList<>(); + try { + while (true) { + list.add(iterator.next()); + } + } catch (NoSuchElementException expected) { + // ignored } return list; } @@ -1496,19 +1620,49 @@ public static void assertCapabilities( StreamCapabilities source = (StreamCapabilities) stream; if (shouldHaveCapabilities != null) { for (String shouldHaveCapability : shouldHaveCapabilities) { - assertTrue("Should have capability: " + shouldHaveCapability, + assertTrue("Should have capability: " + shouldHaveCapability + + " in " + source, source.hasCapability(shouldHaveCapability)); } } if (shouldNotHaveCapabilities != null) { for (String shouldNotHaveCapability : shouldNotHaveCapabilities) { - assertFalse("Should not have capability: " + shouldNotHaveCapability, + assertFalse("Should not have capability: " + shouldNotHaveCapability + + " in " + source, source.hasCapability(shouldNotHaveCapability)); } } } + + /** + * Custom assert to verify capabilities supported by + * an object through {@link StreamCapabilities}. + * + * @param source The object to test for StreamCapabilities + * @param capabilities The list of expected capabilities + */ + public static void assertHasStreamCapabilities( + final Object source, + final String... capabilities) { + assertCapabilities(source, capabilities, null); + } + + /** + * Custom assert to verify capabilities NOT supported by + * an object through {@link StreamCapabilities}. + * + * @param source The object to test for StreamCapabilities + * @param capabilities The list of capabilities which must not be + * supported. + */ + public static void assertLacksStreamCapabilities( + final Object source, + final String... capabilities) { + assertCapabilities(source, null, capabilities); + } + /** * Custom assert to test {@link PathCapabilities}. * @@ -1523,7 +1677,8 @@ public static void assertHasPathCapabilities( for (String shouldHaveCapability: capabilities) { assertTrue("Should have capability: " + shouldHaveCapability - + " under " + path, + + " under " + path + + " in " + source, source.hasPathCapability(path, shouldHaveCapability)); } } @@ -1565,17 +1720,22 @@ public static int read(InputStream in) { /** * Read a whole stream; downgrades an IOE to a runtime exception. + * Closes the stream afterwards. * @param in input * @return the number of bytes read. * @throws AssertionError on any IOException */ public static long readStream(InputStream in) { - long count = 0; + try { + long count = 0; - while (read(in) >= 0) { - count++; + while (read(in) >= 0) { + count++; + } + return count; + } finally { + IOUtils.cleanupWithLogger(LOG, in); } - return count; } @@ -1602,7 +1762,7 @@ public TreeScanResults(Path basePath) { * @param results results of the listFiles/listStatus call. * @throws IOException IO problems during the iteration. */ - public TreeScanResults(RemoteIterator results) + public TreeScanResults(RemoteIterator results) throws IOException { while (results.hasNext()) { add(results.next()); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractContentSummary.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractContentSummary.java new file mode 100644 index 0000000000000..7555cf85158f9 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractContentSummary.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract.localfs; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractContentSummaryTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +public class TestLocalFSContractContentSummary extends AbstractContractContentSummaryTest { + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new LocalFSContract(conf); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractCreate.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractCreate.java index f8eeb961e92ff..3cea68c221000 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractCreate.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractCreate.java @@ -18,7 +18,10 @@ package org.apache.hadoop.fs.contract.localfs; +import org.junit.Test; + import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.contract.AbstractContractCreateTest; import org.apache.hadoop.fs.contract.AbstractFSContract; @@ -29,4 +32,17 @@ protected AbstractFSContract createContract(Configuration conf) { return new LocalFSContract(conf); } + @Test + public void testSyncablePassthroughIfChecksumDisabled() throws Throwable { + describe("Create an instance of the local fs, disable the checksum" + + " and verify that Syncable now works"); + LocalFileSystem fs = (LocalFileSystem) getFileSystem(); + try (LocalFileSystem lfs = new LocalFileSystem( + fs.getRawFileSystem())) { + // disable checksumming output + lfs.setWriteChecksum(false); + // now the filesystem supports Sync with immediate update of file status + validateSyncableSemantics(lfs, true, true, true); + } + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractMultipartUploader.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractMultipartUploader.java deleted file mode 100644 index f675ddfa0db82..0000000000000 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractMultipartUploader.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.fs.contract.localfs; - -import org.junit.Assume; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractMultipartUploaderTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; - -/** - * Test the FileSystemMultipartUploader on local file system. - */ -public class TestLocalFSContractMultipartUploader - extends AbstractContractMultipartUploaderTest { - - @Override - public void setup() throws Exception { - Assume.assumeTrue("Skipping until HDFS-13934", false); - super.setup(); - } - - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new LocalFSContract(conf); - } - - /** - * There is no real need to upload any particular size. - * @return 1 kilobyte - */ - @Override - protected int partSizeInBytes() { - return 1024; - } - - @Override - protected boolean finalizeConsumesUploadIdImmediately() { - return true; - } - - @Override - protected boolean supportsConcurrentUploadsToSamePath() { - return true; - } -} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractStreamIOStatistics.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractStreamIOStatistics.java new file mode 100644 index 0000000000000..642baec502d2e --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractStreamIOStatistics.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract.localfs; + +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractStreamIOStatisticsTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_READ_BYTES; +import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_READ_EXCEPTIONS; +import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_READ_SEEK_OPERATIONS; +import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_READ_SKIP_BYTES; +import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_READ_SKIP_OPERATIONS; +import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_WRITE_BYTES; +import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_WRITE_EXCEPTIONS; + +/** + * Test IOStatistics through the local FS. + */ +public class TestLocalFSContractStreamIOStatistics extends + AbstractContractStreamIOStatisticsTest { + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new LocalFSContract(conf); + } + + /** + * Keys which the input stream must support. + * @return a list of keys + */ + public List inputStreamStatisticKeys() { + return Arrays.asList(STREAM_READ_BYTES, + STREAM_READ_EXCEPTIONS, + STREAM_READ_SEEK_OPERATIONS, + STREAM_READ_SKIP_OPERATIONS, + STREAM_READ_SKIP_BYTES); + } + + /** + * Keys which the output stream must support. + * @return a list of keys + */ + @Override + public List outputStreamStatisticKeys() { + return Arrays.asList(STREAM_WRITE_BYTES, + STREAM_WRITE_EXCEPTIONS); + } + + @Override + public int readBufferSize() { + return 1024; + } + + @Override + public boolean streamWritesInBlocks() { + return true; + } + + +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractVectoredRead.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractVectoredRead.java new file mode 100644 index 0000000000000..5ee888015315c --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractVectoredRead.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract.localfs; + +import java.io.EOFException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CompletableFuture; + +import org.assertj.core.api.Assertions; +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.ChecksumException; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileRange; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.AbstractContractVectoredReadTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.ContractTestUtils; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.validateVectoredReadResult; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +public class TestLocalFSContractVectoredRead extends AbstractContractVectoredReadTest { + + public TestLocalFSContractVectoredRead(String bufferType) { + super(bufferType); + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new LocalFSContract(conf); + } + + @Test + public void testChecksumValidationDuringVectoredRead() throws Exception { + Path testPath = path("big_range_checksum_file"); + List someRandomRanges = new ArrayList<>(); + someRandomRanges.add(FileRange.createFileRange(10, 1024)); + someRandomRanges.add(FileRange.createFileRange(1025, 1024)); + validateCheckReadException(testPath, DATASET_LEN, someRandomRanges); + } + + + /** + * Test for file size less than checksum chunk size. + * {@code ChecksumFileSystem#bytesPerChecksum}. + */ + @Test + public void testChecksumValidationDuringVectoredReadSmallFile() throws Exception { + Path testPath = path("big_range_checksum_file"); + final int length = 471; + List smallFileRanges = new ArrayList<>(); + smallFileRanges.add(FileRange.createFileRange(10, 50)); + smallFileRanges.add(FileRange.createFileRange(100, 20)); + validateCheckReadException(testPath, length, smallFileRanges); + } + + private void validateCheckReadException(Path testPath, + int length, + List ranges) throws Exception { + LocalFileSystem localFs = (LocalFileSystem) getFileSystem(); + final byte[] datasetCorrect = ContractTestUtils.dataset(length, 'a', 32); + try (FSDataOutputStream out = localFs.create(testPath, true)){ + out.write(datasetCorrect); + } + Path checksumPath = localFs.getChecksumFile(testPath); + Assertions.assertThat(localFs.exists(checksumPath)) + .describedAs("Checksum file should be present") + .isTrue(); + CompletableFuture fis = localFs.openFile(testPath).build(); + try (FSDataInputStream in = fis.get()){ + in.readVectored(ranges, getAllocate()); + validateVectoredReadResult(ranges, datasetCorrect); + } + final byte[] datasetCorrupted = ContractTestUtils.dataset(length, 'a', 64); + try (FSDataOutputStream out = localFs.getRaw().create(testPath, true)){ + out.write(datasetCorrupted); + } + CompletableFuture fisN = localFs.openFile(testPath).build(); + try (FSDataInputStream in = fisN.get()){ + in.readVectored(ranges, getAllocate()); + // Expect checksum exception when data is updated directly through + // raw local fs instance. + intercept(ChecksumException.class, + () -> validateVectoredReadResult(ranges, datasetCorrupted)); + } + } + @Test + public void tesChecksumVectoredReadBoundaries() throws Exception { + Path testPath = path("boundary_range_checksum_file"); + final int length = 1071; + LocalFileSystem localFs = (LocalFileSystem) getFileSystem(); + final byte[] datasetCorrect = ContractTestUtils.dataset(length, 'a', 32); + try (FSDataOutputStream out = localFs.create(testPath, true)){ + out.write(datasetCorrect); + } + Path checksumPath = localFs.getChecksumFile(testPath); + Assertions.assertThat(localFs.exists(checksumPath)) + .describedAs("Checksum file should be present at {} ", checksumPath) + .isTrue(); + CompletableFuture fis = localFs.openFile(testPath).build(); + List smallRange = new ArrayList<>(); + smallRange.add(FileRange.createFileRange(1000, 71)); + try (FSDataInputStream in = fis.get()){ + in.readVectored(smallRange, getAllocate()); + validateVectoredReadResult(smallRange, datasetCorrect); + } + } + + + /** + * Overriding in checksum fs as vectored read api fails fast + * in case of EOF requested range. + */ + @Override + public void testEOFRanges() throws Exception { + FileSystem fs = getFileSystem(); + List fileRanges = new ArrayList<>(); + fileRanges.add(FileRange.createFileRange(DATASET_LEN, 100)); + verifyExceptionalVectoredRead(fs, fileRanges, EOFException.class); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/rawlocal/TestRawLocalContractVectoredRead.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/rawlocal/TestRawLocalContractVectoredRead.java new file mode 100644 index 0000000000000..cbb31ffe27a59 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/rawlocal/TestRawLocalContractVectoredRead.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract.rawlocal; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractVectoredReadTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +public class TestRawLocalContractVectoredRead extends AbstractContractVectoredReadTest { + + public TestRawLocalContractVectoredRead(String bufferType) { + super(bufferType); + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new RawlocalFSContract(conf); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/sftp/SFTPContract.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/sftp/SFTPContract.java new file mode 100644 index 0000000000000..f72a2aec86242 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/sftp/SFTPContract.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract.sftp; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileSystemTestHelper; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.sftp.SFTPFileSystem; +import org.apache.sshd.common.NamedFactory; +import org.apache.sshd.server.SshServer; +import org.apache.sshd.server.auth.UserAuth; +import org.apache.sshd.server.auth.password.UserAuthPasswordFactory; +import org.apache.sshd.server.keyprovider.SimpleGeneratorHostKeyProvider; +import org.apache.sshd.server.subsystem.sftp.SftpSubsystemFactory; + +public class SFTPContract extends AbstractFSContract { + + private static final String CONTRACT_XML = "contract/sftp.xml"; + private static final URI TEST_URI = + URI.create("sftp://user:password@localhost"); + private final String testDataDir = + new FileSystemTestHelper().getTestRootDir(); + private final Configuration conf; + private SshServer sshd; + + public SFTPContract(Configuration conf) { + super(conf); + addConfResource(CONTRACT_XML); + this.conf = conf; + } + + @Override + public void init() throws IOException { + sshd = SshServer.setUpDefaultServer(); + // ask OS to assign a port + sshd.setPort(0); + sshd.setKeyPairProvider(new SimpleGeneratorHostKeyProvider()); + + List> userAuthFactories = new ArrayList<>(); + userAuthFactories.add(new UserAuthPasswordFactory()); + + sshd.setUserAuthFactories(userAuthFactories); + sshd.setPasswordAuthenticator((username, password, session) -> + username.equals("user") && password.equals("password") + ); + + sshd.setSubsystemFactories( + Collections.singletonList(new SftpSubsystemFactory())); + + sshd.start(); + int port = sshd.getPort(); + + conf.setClass("fs.sftp.impl", SFTPFileSystem.class, FileSystem.class); + conf.setInt("fs.sftp.host.port", port); + conf.setBoolean("fs.sftp.impl.disable.cache", true); + } + + @Override + public void teardown() throws IOException { + if (sshd != null) { + sshd.stop(); + } + } + + @Override + public FileSystem getTestFileSystem() throws IOException { + return FileSystem.get(TEST_URI, conf); + } + + @Override + public String getScheme() { + return "sftp"; + } + + @Override + public Path getTestPath() { + try { + FileSystem fs = FileSystem.get( + URI.create("sftp://user:password@localhost"), conf + ); + return fs.makeQualified(new Path(testDataDir)); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/sftp/TestSFTPContractSeek.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/sftp/TestSFTPContractSeek.java new file mode 100644 index 0000000000000..20f4116b98019 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/sftp/TestSFTPContractSeek.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract.sftp; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractSeekTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +public class TestSFTPContractSeek extends AbstractContractSeekTest { + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new SFTPContract(conf); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/ftp/FtpTestServer.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/ftp/FtpTestServer.java new file mode 100644 index 0000000000000..eca26dea5b39b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/ftp/FtpTestServer.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.ftp; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; + +import org.apache.ftpserver.FtpServer; +import org.apache.ftpserver.FtpServerFactory; +import org.apache.ftpserver.ftplet.Authority; +import org.apache.ftpserver.ftplet.FtpException; +import org.apache.ftpserver.ftplet.UserManager; +import org.apache.ftpserver.impl.DefaultFtpServer; +import org.apache.ftpserver.listener.Listener; +import org.apache.ftpserver.listener.ListenerFactory; +import org.apache.ftpserver.usermanager.PropertiesUserManagerFactory; +import org.apache.ftpserver.usermanager.impl.BaseUser; + +/** + * Helper class facilitating to manage a local ftp + * server for unit tests purposes only. + */ +public class FtpTestServer { + + private int port; + private Path ftpRoot; + private UserManager userManager; + private FtpServer server; + + public FtpTestServer(Path ftpRoot) { + this.ftpRoot = ftpRoot; + this.userManager = new PropertiesUserManagerFactory().createUserManager(); + FtpServerFactory serverFactory = createServerFactory(); + serverFactory.setUserManager(userManager); + this.server = serverFactory.createServer(); + } + + public FtpTestServer start() throws Exception { + server.start(); + Listener listener = ((DefaultFtpServer) server) + .getListeners() + .get("default"); + port = listener.getPort(); + return this; + } + + public Path getFtpRoot() { + return ftpRoot; + } + + public int getPort() { + return port; + } + + public void stop() { + if (!server.isStopped()) { + server.stop(); + } + } + + public BaseUser addUser(String name, String password, + Authority... authorities) throws IOException, FtpException { + + BaseUser user = new BaseUser(); + user.setName(name); + user.setPassword(password); + Path userHome = Files.createDirectory(ftpRoot.resolve(name)); + user.setHomeDirectory(userHome.toString()); + user.setAuthorities(Arrays.asList(authorities)); + userManager.save(user); + return user; + } + + private FtpServerFactory createServerFactory() { + FtpServerFactory serverFactory = new FtpServerFactory(); + ListenerFactory defaultListener = new ListenerFactory(); + defaultListener.setPort(0); + serverFactory.addListener("default", defaultListener.createListener()); + return serverFactory; + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/ftp/TestFTPFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/ftp/TestFTPFileSystem.java index 3d41ccb91d6c4..8155d8e2b2ba1 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/ftp/TestFTPFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/ftp/TestFTPFileSystem.java @@ -17,18 +17,35 @@ */ package org.apache.hadoop.fs.ftp; -import com.google.common.base.Preconditions; -import org.apache.commons.net.ftp.FTP; +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.Comparator; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.commons.net.ftp.FTP; import org.apache.commons.net.ftp.FTPClient; import org.apache.commons.net.ftp.FTPFile; +import org.apache.ftpserver.usermanager.impl.BaseUser; +import org.apache.ftpserver.usermanager.impl.WritePermission; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.LambdaTestUtils; +import org.junit.After; +import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.Timeout; - +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.Assert.assertEquals; /** @@ -37,9 +54,75 @@ */ public class TestFTPFileSystem { + private FtpTestServer server; + private java.nio.file.Path testDir; @Rule public Timeout testTimeout = new Timeout(180000); + @Before + public void setUp() throws Exception { + testDir = Files.createTempDirectory( + GenericTestUtils.getTestDir().toPath(), getClass().getName() + ); + server = new FtpTestServer(testDir).start(); + } + + @After + @SuppressWarnings("ResultOfMethodCallIgnored") + public void tearDown() throws Exception { + if (server != null) { + server.stop(); + Files.walk(testDir) + .sorted(Comparator.reverseOrder()) + .map(java.nio.file.Path::toFile) + .forEach(File::delete); + } + } + + @Test + public void testCreateWithWritePermissions() throws Exception { + BaseUser user = server.addUser("test", "password", new WritePermission()); + Configuration configuration = new Configuration(); + configuration.set("fs.defaultFS", "ftp:///"); + configuration.set("fs.ftp.host", "localhost"); + configuration.setInt("fs.ftp.host.port", server.getPort()); + configuration.set("fs.ftp.user.localhost", user.getName()); + configuration.set("fs.ftp.password.localhost", user.getPassword()); + configuration.setBoolean("fs.ftp.impl.disable.cache", true); + + FileSystem fs = FileSystem.get(configuration); + byte[] bytesExpected = "hello world".getBytes(StandardCharsets.UTF_8); + try (FSDataOutputStream outputStream = fs.create(new Path("test1.txt"))) { + outputStream.write(bytesExpected); + } + try (FSDataInputStream input = fs.open(new Path("test1.txt"))) { + assertThat(bytesExpected, equalTo(IOUtils.readFullyToByteArray(input))); + } + } + + @Test + public void testCreateWithoutWritePermissions() throws Exception { + BaseUser user = server.addUser("test", "password"); + Configuration configuration = new Configuration(); + configuration.set("fs.defaultFS", "ftp:///"); + configuration.set("fs.ftp.host", "localhost"); + configuration.setInt("fs.ftp.host.port", server.getPort()); + configuration.set("fs.ftp.user.localhost", user.getName()); + configuration.set("fs.ftp.password.localhost", user.getPassword()); + configuration.setBoolean("fs.ftp.impl.disable.cache", true); + + FileSystem fs = FileSystem.get(configuration); + byte[] bytesExpected = "hello world".getBytes(StandardCharsets.UTF_8); + LambdaTestUtils.intercept( + IOException.class, "Unable to create file: test1.txt, Aborting", + () -> { + try (FSDataOutputStream out = fs.create(new Path("test1.txt"))) { + out.write(bytesExpected); + } + } + ); + } + @Test public void testFTPDefaultPort() throws Exception { FTPFileSystem ftp = new FTPFileSystem(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/http/TestHttpFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/http/TestHttpFileSystem.java index 0902c04c79b66..4c6cf823a7659 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/http/TestHttpFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/http/TestHttpFileSystem.java @@ -25,14 +25,17 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; +import org.junit.Before; import org.junit.Test; + import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.nio.charset.StandardCharsets; +import java.util.stream.IntStream; import static org.junit.Assert.assertEquals; @@ -40,28 +43,48 @@ * Testing HttpFileSystem. */ public class TestHttpFileSystem { + private final Configuration conf = new Configuration(false); + + @Before + public void setUp() { + conf.set("fs.http.impl", HttpFileSystem.class.getCanonicalName()); + } + @Test public void testHttpFileSystem() throws IOException, URISyntaxException, InterruptedException { - Configuration conf = new Configuration(false); - conf.set("fs.http.impl", HttpFileSystem.class.getCanonicalName()); final String data = "foo"; - try (MockWebServer server = new MockWebServer()) { - server.enqueue(new MockResponse().setBody(data)); + IntStream.rangeClosed(1, 3).forEach(i -> server.enqueue(new MockResponse().setBody(data))); server.start(); URI uri = URI.create(String.format("http://%s:%d", server.getHostName(), server.getPort())); FileSystem fs = FileSystem.get(uri, conf); - try (InputStream is = fs.open( - new Path(new URL(uri.toURL(), "/foo").toURI()), - 4096)) { - byte[] buf = new byte[data.length()]; - IOUtils.readFully(is, buf, 0, buf.length); - assertEquals(data, new String(buf, StandardCharsets.UTF_8)); - } + assertSameData(fs, new Path(new URL(uri.toURL(), "/foo").toURI()), data); + assertSameData(fs, new Path("/foo"), data); + assertSameData(fs, new Path("foo"), data); RecordedRequest req = server.takeRequest(); assertEquals("/foo", req.getPath()); } } + + @Test + public void testHttpFileStatus() throws IOException, URISyntaxException, InterruptedException { + URI uri = new URI("http://www.example.com"); + FileSystem fs = FileSystem.get(uri, conf); + URI expectedUri = uri.resolve("/foo"); + assertEquals(fs.getFileStatus(new Path(new Path(uri), "/foo")).getPath().toUri(), expectedUri); + assertEquals(fs.getFileStatus(new Path("/foo")).getPath().toUri(), expectedUri); + assertEquals(fs.getFileStatus(new Path("foo")).getPath().toUri(), expectedUri); + } + + private void assertSameData(FileSystem fs, Path path, String data) throws IOException { + try (InputStream is = fs.open( + path, + 4096)) { + byte[] buf = new byte[data.length()]; + IOUtils.readFully(is, buf, 0, buf.length); + assertEquals(data, new String(buf, StandardCharsets.UTF_8)); + } + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/ExceptionAsserts.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/ExceptionAsserts.java new file mode 100644 index 0000000000000..82348d97798ea --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/ExceptionAsserts.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import org.apache.hadoop.test.LambdaTestUtils; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +public final class ExceptionAsserts { + + private ExceptionAsserts() { + } + + + /** + * Asserts that the given code throws an exception of the given type + * and that the exception message contains the given sub-message. + * + * Usage: + * + * ExceptionAsserts.assertThrows( + * IllegalArgumentException.class, + * "'nullArg' must not be null", + * () -> Preconditions.checkNotNull(null, "nullArg")); + * + * Note: JUnit 5 has similar functionality but it will be a long time before + * we move to that framework because of significant differences and lack of + * backward compatibility for some JUnit rules. + */ + public static void assertThrows( + Class expectedExceptionClass, + String partialMessage, + LambdaTestUtils.VoidCallable code) throws Exception { + + intercept(expectedExceptionClass, partialMessage, code); + + } + + public static void assertThrows( + Class expectedExceptionClass, + LambdaTestUtils.VoidCallable code) throws Exception { + + intercept(expectedExceptionClass, code); + + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/SampleDataForTests.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/SampleDataForTests.java new file mode 100644 index 0000000000000..b6f744582d3e2 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/SampleDataForTests.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * Frequently used test data items. + */ +public final class SampleDataForTests { + + private SampleDataForTests() { + } + + + // Array data. + public static final Object[] NULL_ARRAY = null; + + public static final Object[] EMPTY_ARRAY = new Object[0]; + + public static final Object[] NON_EMPTY_ARRAY = new Object[1]; + + public static final byte[] NULL_BYTE_ARRAY = null; + + public static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; + + public static final byte[] NON_EMPTY_BYTE_ARRAY = new byte[1]; + + public static final short[] NULL_SHORT_ARRAY = null; + + public static final short[] EMPTY_SHORT_ARRAY = new short[0]; + + public static final short[] NON_EMPTY_SHORT_ARRAY = new short[1]; + + public static final int[] NULL_INT_ARRAY = null; + + public static final int[] EMPTY_INT_ARRAY = new int[0]; + + public static final int[] NON_EMPTY_INT_ARRAY = new int[1]; + + public static final long[] NULL_LONG_ARRAY = null; + + public static final long[] EMPTY_LONG_ARRAY = new long[0]; + + public static final long[] NON_EMPTY_LONG_ARRAY = new long[1]; + + public static final List NULL_LIST = null; + + public static final List EMPTY_LIST = new ArrayList(); + + public static final List VALID_LIST = Arrays.asList(new Object[1]); +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestBlockCache.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestBlockCache.java new file mode 100644 index 0000000000000..3b60c1c795336 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestBlockCache.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import java.nio.ByteBuffer; + +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.LocalDirAllocator; +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_TMP_DIR; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertTrue; + +public class TestBlockCache extends AbstractHadoopTestBase { + + private static final int BUFFER_SIZE = 16; + + private static final Configuration CONF = new Configuration(); + + @Test + public void testArgChecks() throws Exception { + // Should not throw. + BlockCache cache = + new SingleFilePerBlockCache(EmptyPrefetchingStatistics.getInstance()); + + ByteBuffer buffer = ByteBuffer.allocate(16); + + // Verify it throws correctly. + intercept(IllegalArgumentException.class, "'buffer' must not be null", + () -> cache.put(42, null, null, null)); + + + intercept(NullPointerException.class, null, + () -> new SingleFilePerBlockCache(null)); + + } + + + @Test + public void testPutAndGet() throws Exception { + BlockCache cache = + new SingleFilePerBlockCache(EmptyPrefetchingStatistics.getInstance()); + + ByteBuffer buffer1 = ByteBuffer.allocate(BUFFER_SIZE); + for (byte i = 0; i < BUFFER_SIZE; i++) { + buffer1.put(i); + } + + assertEquals(0, cache.size()); + assertFalse(cache.containsBlock(0)); + cache.put(0, buffer1, CONF, new LocalDirAllocator(HADOOP_TMP_DIR)); + assertEquals(1, cache.size()); + assertTrue(cache.containsBlock(0)); + ByteBuffer buffer2 = ByteBuffer.allocate(BUFFER_SIZE); + cache.get(0, buffer2); + assertNotSame(buffer1, buffer2); + assertBuffersEqual(buffer1, buffer2); + + assertEquals(1, cache.size()); + assertFalse(cache.containsBlock(1)); + cache.put(1, buffer1, CONF, new LocalDirAllocator(HADOOP_TMP_DIR)); + assertEquals(2, cache.size()); + assertTrue(cache.containsBlock(1)); + ByteBuffer buffer3 = ByteBuffer.allocate(BUFFER_SIZE); + cache.get(1, buffer3); + assertNotSame(buffer1, buffer3); + assertBuffersEqual(buffer1, buffer3); + } + + private void assertBuffersEqual(ByteBuffer buffer1, ByteBuffer buffer2) { + assertNotNull(buffer1); + assertNotNull(buffer2); + assertEquals(buffer1.limit(), buffer2.limit()); + assertEquals(BUFFER_SIZE, buffer1.limit()); + for (int i = 0; i < BUFFER_SIZE; i++) { + assertEquals(buffer1.get(i), buffer2.get(i)); + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestBlockData.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestBlockData.java new file mode 100644 index 0000000000000..50ce220f6527e --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestBlockData.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import org.junit.Test; + +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +public class TestBlockData extends AbstractHadoopTestBase { + + @Test + public void testArgChecks() throws Exception { + // Should not throw. + new BlockData(10, 5); + new BlockData(5, 10); + new BlockData(0, 10); + + // Verify it throws correctly. + + + intercept(IllegalArgumentException.class, "'fileSize' must not be negative", + () -> new BlockData(-1, 2)); + + intercept(IllegalArgumentException.class, + "'blockSize' must be a positive integer", + () -> new BlockData(10, 0)); + + intercept(IllegalArgumentException.class, + "'blockSize' must be a positive integer", + () -> new BlockData(10, -2)); + + intercept(IllegalArgumentException.class, + "'blockNumber' (-1) must be within the range [0, 3]", + () -> new BlockData(10, 3).isLastBlock( + -1)); + + intercept(IllegalArgumentException.class, + "'blockNumber' (11) must be within the range [0, 3]", + () -> new BlockData(10, 3).isLastBlock( + 11)); + + } + + @Test + public void testComputedFields() throws Exception { + testComputedFieldsHelper(0, 10); + testComputedFieldsHelper(1, 10); + testComputedFieldsHelper(10, 1); + testComputedFieldsHelper(10, 2); + testComputedFieldsHelper(10, 3); + } + + private void testComputedFieldsHelper(long fileSize, int blockSize) + throws Exception { + BlockData bd = new BlockData(fileSize, blockSize); + + if (fileSize == 0) { + assertFalse(bd.isLastBlock(0)); + assertFalse(bd.isLastBlock(1)); + assertFalse(bd.isValidOffset(0)); + assertEquals(0, bd.getSize(0)); + assertEquals("", bd.getStateString()); + + ExceptionAsserts.assertThrows( + IllegalArgumentException.class, + "'offset' (0) must be within the range [0, -1]", + () -> bd.getBlockNumber(0)); + + ExceptionAsserts.assertThrows( + IllegalArgumentException.class, + "'blockNumber' (0) must be within the range [0, -1]", + () -> bd.getStartOffset(0)); + + ExceptionAsserts.assertThrows( + IllegalArgumentException.class, + "'offset' (0) must be within the range [0, -1]", + () -> bd.getRelativeOffset(0, 0)); + + ExceptionAsserts.assertThrows( + IllegalArgumentException.class, + "'blockNumber' (0) must be within the range [0, -1]", + () -> bd.getState(0)); + + ExceptionAsserts.assertThrows( + IllegalArgumentException.class, + "'blockNumber' (0) must be within the range [0, -1]", + () -> bd.setState(0, BlockData.State.READY)); + + return; + } + + assertEquals(fileSize, bd.getFileSize()); + assertEquals(blockSize, bd.getBlockSize()); + + int expectedNumBlocks = (int) (fileSize / blockSize); + if (fileSize % blockSize > 0) { + expectedNumBlocks++; + } + assertEquals(expectedNumBlocks, bd.getNumBlocks()); + + int lastBlockNumber = expectedNumBlocks - 1; + for (int b = 0; b < lastBlockNumber; b++) { + assertFalse(bd.isLastBlock(b)); + assertEquals(blockSize, bd.getSize(b)); + } + assertTrue(bd.isLastBlock(lastBlockNumber)); + int lastBlockSize = (int) (fileSize - blockSize * (expectedNumBlocks - 1)); + assertEquals(lastBlockSize, bd.getSize(lastBlockNumber)); + + // Offset related methods. + for (long offset = 0; offset < fileSize; offset++) { + int expectedBlockNumber = (int) (offset / blockSize); + assertEquals(expectedBlockNumber, bd.getBlockNumber(offset)); + + for (int b = 0; b < expectedNumBlocks - 1; b++) { + long expectedStartOffset = b * blockSize; + assertEquals(expectedStartOffset, bd.getStartOffset(b)); + + int expectedRelativeOffset = (int) (offset - expectedStartOffset); + assertEquals(expectedRelativeOffset, bd.getRelativeOffset(b, offset)); + } + } + + + // State methods. + for (int b = 0; b < expectedNumBlocks; b++) { + assertEquals(b * blockSize, bd.getStartOffset(b)); + assertEquals(BlockData.State.NOT_READY, bd.getState(b)); + bd.setState(b, BlockData.State.QUEUED); + assertEquals(BlockData.State.QUEUED, bd.getState(b)); + bd.setState(b, BlockData.State.READY); + assertEquals(BlockData.State.READY, bd.getState(b)); + bd.setState(b, BlockData.State.CACHED); + assertEquals(BlockData.State.CACHED, bd.getState(b)); + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestBlockOperations.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestBlockOperations.java new file mode 100644 index 0000000000000..703041379ab6e --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestBlockOperations.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import java.lang.reflect.Method; + +import org.junit.Test; + +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.junit.Assert.assertTrue; + +public class TestBlockOperations extends AbstractHadoopTestBase { + + @Test + public void testArgChecks() throws Exception { + // Should not throw. + BlockOperations ops = new BlockOperations(); + + // Verify it throws correctly. + + intercept(IllegalArgumentException.class, + "'blockNumber' must not be negative", + () -> ops.getPrefetched(-1)); + + intercept(IllegalArgumentException.class, + "'blockNumber' must not be negative", + () -> ops.getCached(-1)); + + intercept(IllegalArgumentException.class, + "'blockNumber' must not be negative", + () -> ops.getRead(-1)); + + intercept(IllegalArgumentException.class, + "'blockNumber' must not be negative", + () -> ops.release(-1)); + + intercept(IllegalArgumentException.class, + "'blockNumber' must not be negative", + () -> ops.requestPrefetch(-1)); + + intercept(IllegalArgumentException.class, + "'blockNumber' must not be negative", + () -> ops.requestCaching(-1)); + + } + + @Test + public void testGetSummary() throws Exception { + verifySummary("getPrefetched", "GP"); + verifySummary("getCached", "GC"); + verifySummary("getRead", "GR"); + verifySummary("release", "RL"); + verifySummary("requestPrefetch", "RP"); + verifySummary("prefetch", "PF"); + verifySummary("requestCaching", "RC"); + verifySummary("addToCache", "C+"); + + verifySummaryNoArg("cancelPrefetches", "CP"); + verifySummaryNoArg("close", "CX"); + } + + private void verifySummary(String methodName, String shortName) + throws Exception { + int blockNumber = 42; + BlockOperations ops = new BlockOperations(); + Method method = ops.getClass().getDeclaredMethod(methodName, int.class); + BlockOperations.Operation op = + (BlockOperations.Operation) method.invoke(ops, blockNumber); + ops.end(op); + String summary = ops.getSummary(false); + String opSummary = String.format("%s(%d)", shortName, blockNumber); + String expectedSummary = String.format("%s;E%s;", opSummary, opSummary); + assertTrue(summary.startsWith(expectedSummary)); + } + + private void verifySummaryNoArg(String methodName, String shortName) + throws Exception { + BlockOperations ops = new BlockOperations(); + Method method = ops.getClass().getDeclaredMethod(methodName); + BlockOperations.Operation op = + (BlockOperations.Operation) method.invoke(ops); + ops.end(op); + String summary = ops.getSummary(false); + String expectedSummary = String.format("%s;E%s;", shortName, shortName); + assertTrue(summary.startsWith(expectedSummary)); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestBoundedResourcePool.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestBoundedResourcePool.java new file mode 100644 index 0000000000000..fc29e1b725405 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestBoundedResourcePool.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.IdentityHashMap; +import java.util.Set; + +import org.junit.Test; + +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; + +public class TestBoundedResourcePool extends AbstractHadoopTestBase { + + static class BufferPool extends BoundedResourcePool { + + BufferPool(int size) { + super(size); + } + + @Override + protected ByteBuffer createNew() { + return ByteBuffer.allocate(10); + } + } + + @Test + public void testArgChecks() throws Exception { + + // Should not throw. + BufferPool pool = new BufferPool(5); + + // Verify it throws correctly. + + intercept(IllegalArgumentException.class, + "'size' must be a positive integer", + () -> new BufferPool(-1)); + + intercept(IllegalArgumentException.class, + "'size' must be a positive integer", + () -> new BufferPool(0)); + + intercept(IllegalArgumentException.class, "'item' must not be null", + () -> pool.release(null)); + + intercept(IllegalArgumentException.class, + "This item is not a part of this pool", + () -> pool.release(ByteBuffer.allocate(4))); + + } + + @Test + public void testAcquireReleaseSingle() { + final int numBuffers = 5; + BufferPool pool = new BufferPool(numBuffers); + + assertEquals(0, pool.numCreated()); + assertEquals(numBuffers, pool.numAvailable()); + + ByteBuffer buffer1 = pool.acquire(); + assertNotNull(buffer1); + assertEquals(1, pool.numCreated()); + assertEquals(numBuffers - 1, pool.numAvailable()); + + // Release and immediately reacquire => should not end up creating new buffer. + pool.release(buffer1); + assertEquals(1, pool.numCreated()); + + ByteBuffer buffer2 = pool.acquire(); + assertNotNull(buffer2); + assertSame(buffer1, buffer2); + assertEquals(1, pool.numCreated()); + } + + @Test + public void testAcquireReleaseMultiple() { + final int numBuffers = 5; + BufferPool pool = new BufferPool(numBuffers); + Set buffers = + Collections.newSetFromMap(new IdentityHashMap()); + + assertEquals(0, pool.numCreated()); + + // Acquire all one by one. + for (int i = 0; i < numBuffers; i++) { + assertEquals(numBuffers - i, pool.numAvailable()); + ByteBuffer buffer = pool.acquire(); + assertNotNull(buffer); + assertFalse(buffers.contains(buffer)); + buffers.add(buffer); + assertEquals(i + 1, pool.numCreated()); + } + + assertEquals(numBuffers, pool.numCreated()); + assertEquals(0, pool.numAvailable()); + + int releaseCount = 0; + + // Release all one by one. + for (ByteBuffer buffer : buffers) { + assertEquals(releaseCount, pool.numAvailable()); + releaseCount++; + pool.release(buffer); + assertEquals(releaseCount, pool.numAvailable()); + + // Releasing the same buffer again should not have any ill effect. + pool.release(buffer); + assertEquals(releaseCount, pool.numAvailable()); + pool.release(buffer); + assertEquals(releaseCount, pool.numAvailable()); + } + + // Acquire all one by one again to ensure that they are the same ones we got earlier. + for (int i = 0; i < numBuffers; i++) { + ByteBuffer buffer = pool.acquire(); + assertTrue(buffers.contains(buffer)); + } + + assertEquals(numBuffers, pool.numCreated()); + assertEquals(0, pool.numAvailable()); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestBufferData.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestBufferData.java new file mode 100644 index 0000000000000..ee5f95ca6bbb6 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestBufferData.java @@ -0,0 +1,244 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import java.nio.ByteBuffer; +import java.nio.ReadOnlyBufferException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.CompletableFuture; + +import org.junit.Test; + +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertSame; + +public class TestBufferData extends AbstractHadoopTestBase { + + @Test + public void testArgChecks() throws Exception { + // Should not throw. + ByteBuffer buffer = ByteBuffer.allocate(1); + BufferData data = new BufferData(1, buffer); + + // Verify it throws correctly. + + intercept(IllegalArgumentException.class, + "'blockNumber' must not be negative", + () -> new BufferData(-1, buffer)); + + intercept(IllegalArgumentException.class, "'buffer' must not be null", + () -> new BufferData(1, null)); + + intercept(IllegalArgumentException.class, "'actionFuture' must not be null", + () -> data.setPrefetch(null)); + + intercept(IllegalArgumentException.class, "'actionFuture' must not be null", + () -> data.setCaching(null)); + + intercept(IllegalArgumentException.class, "'states' must not be null", + () -> data.throwIfStateIncorrect((BufferData.State[]) null)); + + intercept(IllegalStateException.class, + "Expected buffer state to be 'READY or CACHING' but found", + () -> data.throwIfStateIncorrect(BufferData.State.READY, + BufferData.State.CACHING)); + + } + + @Test + public void testValidStateUpdates() { + ByteBuffer buffer = ByteBuffer.allocate(1); + BufferData data = new BufferData(1, buffer); + + assertEquals(BufferData.State.BLANK, data.getState()); + + CompletableFuture actionFuture = new CompletableFuture<>(); + actionFuture.complete(null); + data.setPrefetch(actionFuture); + assertEquals(BufferData.State.PREFETCHING, data.getState()); + assertNotNull(data.getActionFuture()); + assertSame(actionFuture, data.getActionFuture()); + + CompletableFuture actionFuture2 = new CompletableFuture<>(); + data.setCaching(actionFuture2); + assertEquals(BufferData.State.CACHING, data.getState()); + assertNotNull(data.getActionFuture()); + assertSame(actionFuture2, data.getActionFuture()); + assertNotSame(actionFuture, actionFuture2); + + List states = Arrays.asList( + BufferData.State.BLANK, + BufferData.State.PREFETCHING, + BufferData.State.CACHING, + BufferData.State.READY + ); + + BufferData data2 = new BufferData(1, buffer); + BufferData.State prevState = null; + for (BufferData.State state : states) { + if (prevState != null) { + assertEquals(prevState, data2.getState()); + data2.updateState(state, prevState); + assertEquals(state, data2.getState()); + } + prevState = state; + } + } + + @Test + public void testInvalidStateUpdates() throws Exception { + CompletableFuture actionFuture = new CompletableFuture<>(); + actionFuture.complete(null); + testInvalidStateUpdatesHelper( + (d) -> d.setPrefetch(actionFuture), + BufferData.State.BLANK, + BufferData.State.READY); + + testInvalidStateUpdatesHelper( + (d) -> d.setCaching(actionFuture), + BufferData.State.PREFETCHING, + BufferData.State.READY); + } + + @Test + public void testSetReady() throws Exception { + byte[] bytes1 = new byte[5]; + initBytes(bytes1); + + ByteBuffer buffer = ByteBuffer.allocate(10); + buffer.put(bytes1); + buffer.limit(bytes1.length); + BufferData data = new BufferData(1, buffer); + assertNotEquals(BufferData.State.READY, data.getState()); + assertEquals(0, data.getChecksum()); + + data.setReady(BufferData.State.BLANK); + assertEquals(BufferData.State.READY, data.getState()); + assertNotEquals(0, data.getChecksum()); + + // Verify that buffer cannot be modified once in READY state. + ExceptionAsserts.assertThrows( + ReadOnlyBufferException.class, + null, + () -> data.getBuffer().put(bytes1)); + + // Verify that buffer cannot be set to READY state more than once. + ExceptionAsserts.assertThrows( + IllegalStateException.class, + "Checksum cannot be changed once set", + () -> data.setReady(BufferData.State.BLANK)); + + // Verify that we detect post READY buffer modification. + buffer.array()[2] = (byte) 42; + ExceptionAsserts.assertThrows( + IllegalStateException.class, + "checksum changed after setReady()", + () -> data.setDone()); + } + + @Test + public void testChecksum() { + byte[] bytes1 = new byte[5]; + byte[] bytes2 = new byte[10]; + + initBytes(bytes1); + initBytes(bytes2); + + ByteBuffer buffer1 = ByteBuffer.wrap(bytes1); + ByteBuffer buffer2 = ByteBuffer.wrap(bytes2); + buffer2.limit(bytes1.length); + + long checksum1 = BufferData.getChecksum(buffer1); + long checksum2 = BufferData.getChecksum(buffer2); + + assertEquals(checksum1, checksum2); + } + + private void initBytes(byte[] bytes) { + for (int i = 0; i < bytes.length; i++) { + bytes[i] = (byte) i; + } + } + + @FunctionalInterface + public interface StateChanger { + + void run(BufferData data) throws Exception; + } + + private void testInvalidStateUpdatesHelper( + StateChanger changeState, + BufferData.State... validFromState) throws Exception { + + ByteBuffer buffer = ByteBuffer.allocate(1); + BufferData data = new BufferData(1, buffer); + data.updateState(validFromState[0], BufferData.State.BLANK); + List states = this.getStatesExcept(validFromState); + BufferData.State prevState = validFromState[0]; + String expectedMessage = + String.format("Expected buffer state to be '%s", validFromState[0]); + for (BufferData.State s : states) { + data.updateState(s, prevState); + + ExceptionAsserts.assertThrows( + IllegalStateException.class, + expectedMessage, + () -> changeState.run(data)); + + assertEquals(s, data.getState()); + prevState = s; + } + } + + static final List ALL_STATES = Arrays.asList( + BufferData.State.UNKNOWN, + BufferData.State.BLANK, + BufferData.State.PREFETCHING, + BufferData.State.CACHING, + BufferData.State.READY + ); + + private List getStatesExcept(BufferData.State... states) { + + List result = new ArrayList<>(); + for (BufferData.State s : ALL_STATES) { + boolean found = false; + for (BufferData.State ss : states) { + if (s == ss) { + found = true; + } + } + + if (!found) { + result.add(s); + } + } + + return result; + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestBufferPool.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestBufferPool.java new file mode 100644 index 0000000000000..b8375fe66dcb1 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestBufferPool.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import org.junit.Test; + +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertSame; + +public class TestBufferPool extends AbstractHadoopTestBase { + + private static final int POOL_SIZE = 2; + + private static final int BUFFER_SIZE = 10; + + private final PrefetchingStatistics statistics = + EmptyPrefetchingStatistics.getInstance(); + + @Test + public void testArgChecks() throws Exception { + // Should not throw. + BufferPool pool = new BufferPool(POOL_SIZE, BUFFER_SIZE, statistics); + + // Verify it throws correctly. + + intercept(IllegalArgumentException.class, + "'size' must be a positive integer", + () -> new BufferPool(0, 10, statistics)); + + intercept(IllegalArgumentException.class, + "'size' must be a positive integer", + () -> new BufferPool(-1, 10, statistics)); + + intercept(IllegalArgumentException.class, + "'bufferSize' must be a positive integer", + () -> new BufferPool(10, 0, statistics)); + + intercept(IllegalArgumentException.class, + "'bufferSize' must be a positive integer", + () -> new BufferPool(1, -10, statistics)); + + intercept(NullPointerException.class, + () -> new BufferPool(1, 10, null)); + + intercept(IllegalArgumentException.class, + "'blockNumber' must not be negative", + () -> pool.acquire(-1)); + + intercept(IllegalArgumentException.class, + "'blockNumber' must not be negative", + () -> pool.tryAcquire(-1)); + + intercept(NullPointerException.class, "data", + () -> pool.release((BufferData) null)); + + } + + @Test + public void testGetAndRelease() { + BufferPool pool = new BufferPool(POOL_SIZE, BUFFER_SIZE, statistics); + assertInitialState(pool, POOL_SIZE); + + int count = 0; + for (BufferData data : pool.getAll()) { + count++; + } + assertEquals(0, count); + + BufferData data1 = this.acquire(pool, 1); + BufferData data2 = this.acquire(pool, 2); + BufferData data3 = pool.tryAcquire(3); + assertNull(data3); + + count = 0; + for (BufferData data : pool.getAll()) { + count++; + } + assertEquals(2, count); + + assertEquals(2, pool.numCreated()); + assertEquals(0, pool.numAvailable()); + + data1.updateState(BufferData.State.READY, BufferData.State.BLANK); + pool.release(data1); + + assertEquals(2, pool.numCreated()); + assertEquals(1, pool.numAvailable()); + + data2.updateState(BufferData.State.READY, BufferData.State.BLANK); + pool.release(data2); + + assertEquals(2, pool.numCreated()); + assertEquals(2, pool.numAvailable()); + } + + @Test + public void testRelease() throws Exception { + testReleaseHelper(BufferData.State.BLANK, true); + testReleaseHelper(BufferData.State.PREFETCHING, true); + testReleaseHelper(BufferData.State.CACHING, true); + testReleaseHelper(BufferData.State.READY, false); + } + + private void testReleaseHelper(BufferData.State stateBeforeRelease, + boolean expectThrow) + throws Exception { + + BufferPool pool = new BufferPool(POOL_SIZE, BUFFER_SIZE, statistics); + assertInitialState(pool, POOL_SIZE); + + BufferData data = this.acquire(pool, 1); + data.updateState(stateBeforeRelease, BufferData.State.BLANK); + + if (expectThrow) { + + intercept(IllegalArgumentException.class, "Unable to release buffer", + () -> pool.release(data)); + + } else { + pool.release(data); + } + } + + private BufferData acquire(BufferPool pool, int blockNumber) { + BufferData data = pool.acquire(blockNumber); + assertNotNull(data); + assertSame(data, pool.acquire(blockNumber)); + assertEquals(blockNumber, data.getBlockNumber()); + return data; + } + + private void assertInitialState(BufferPool pool, int poolSize) { + assertEquals(poolSize, pool.numAvailable()); + assertEquals(0, pool.numCreated()); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestExecutorServiceFuturePool.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestExecutorServiceFuturePool.java new file mode 100644 index 0000000000000..3b8bc75f14989 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestExecutorServiceFuturePool.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.test.LambdaTestUtils.interceptFuture; +import static org.junit.Assert.assertTrue; + +public class TestExecutorServiceFuturePool extends AbstractHadoopTestBase { + + private ExecutorService executorService; + + @Before + public void setUp() { + executorService = Executors.newFixedThreadPool(3); + } + + @After + public void tearDown() { + if (executorService != null) { + executorService.shutdownNow(); + } + } + + @Test + public void testRunnableSucceeds() throws Exception { + ExecutorServiceFuturePool futurePool = + new ExecutorServiceFuturePool(executorService); + final AtomicBoolean atomicBoolean = new AtomicBoolean(false); + Future future = + futurePool.executeRunnable(() -> atomicBoolean.set(true)); + future.get(30, TimeUnit.SECONDS); + assertTrue("atomicBoolean set to true?", atomicBoolean.get()); + } + + @Test + public void testSupplierSucceeds() throws Exception { + ExecutorServiceFuturePool futurePool = + new ExecutorServiceFuturePool(executorService); + final AtomicBoolean atomicBoolean = new AtomicBoolean(false); + Future future = futurePool.executeFunction(() -> { + atomicBoolean.set(true); + return null; + }); + future.get(30, TimeUnit.SECONDS); + assertTrue("atomicBoolean set to true?", atomicBoolean.get()); + } + + @Test + public void testRunnableFails() throws Exception { + ExecutorServiceFuturePool futurePool = + new ExecutorServiceFuturePool(executorService); + Future future = futurePool.executeRunnable(() -> { + throw new IllegalStateException("deliberate"); + }); + interceptFuture(IllegalStateException.class, "deliberate", 30, + TimeUnit.SECONDS, future); + } + + @Test + public void testSupplierFails() throws Exception { + ExecutorServiceFuturePool futurePool = + new ExecutorServiceFuturePool(executorService); + Future future = futurePool.executeFunction(() -> { + throw new IllegalStateException("deliberate"); + }); + interceptFuture(IllegalStateException.class, "deliberate", 30, + TimeUnit.SECONDS, future); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestFilePosition.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestFilePosition.java new file mode 100644 index 0000000000000..12ab62556a104 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestFilePosition.java @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import java.nio.ByteBuffer; + +import org.junit.Test; + +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +public class TestFilePosition extends AbstractHadoopTestBase { + + @Test + public void testArgChecks() throws Exception { + ByteBuffer buffer = ByteBuffer.allocate(10); + BufferData data = new BufferData(0, buffer); + + // Should not throw. + new FilePosition(0, 0); + new FilePosition(0, 5); + new FilePosition(10, 5); + new FilePosition(5, 10); + new FilePosition(10, 5).setData(data, 3, 4); + new FilePosition(10, 10).setData(data, 3, 13); + + // Verify it throws correctly. + + intercept(IllegalArgumentException.class, "'fileSize' must not be negative", + () -> new FilePosition(-1, 2)); + + intercept(IllegalArgumentException.class, + "'blockSize' must be a positive integer", + () -> new FilePosition(1, 0)); + + intercept(IllegalArgumentException.class, + "'blockSize' must be a positive integer", + () -> new FilePosition(1, -1)); + + FilePosition pos = new FilePosition(10, 3); + + // Verify that we cannot obtain buffer properties without setting buffer. + + intercept(IllegalStateException.class, "'buffer' must not be null", + () -> pos.buffer()); + + intercept(IllegalStateException.class, "'buffer' must not be null", + () -> pos.absolute()); + + intercept(IllegalStateException.class, "'buffer' must not be null", + () -> pos.isWithinCurrentBuffer(2)); + + intercept(IllegalStateException.class, "'buffer' must not be null", + () -> pos.blockNumber()); + + intercept(IllegalStateException.class, "'buffer' must not be null", + () -> pos.isLastBlock()); + + intercept(IllegalStateException.class, "'buffer' must not be null", + () -> pos.bufferFullyRead()); + + // Verify that we cannot set invalid buffer parameters. + + intercept(IllegalArgumentException.class, "'bufferData' must not be null", + () -> pos.setData(null, 4, 4)); + + intercept(IllegalArgumentException.class, + "'startOffset' must not be negative", () -> pos.setData(data, -4, 4)); + + intercept(IllegalArgumentException.class, + "'readOffset' must not be negative", () -> pos.setData(data, 4, -4)); + + intercept(IllegalArgumentException.class, + "'readOffset' must not be negative", () -> pos.setData(data, 4, -4)); + + intercept(IllegalArgumentException.class, + "'readOffset' (15) must be within the range [4, 14]", + () -> pos.setData(data, 4, 15)); + + intercept(IllegalArgumentException.class, + "'readOffset' (3) must be within the range [4, 14]", + () -> pos.setData(data, 4, 3)); + + } + + @Test + public void testValidity() { + int bufferSize = 8; + long fileSize = 100; + long bufferStartOffset = 7; + long readStartOffset = 9; + + ByteBuffer buffer = ByteBuffer.allocate(bufferSize); + BufferData data = new BufferData(0, buffer); + FilePosition pos = new FilePosition(fileSize, bufferSize); + + assertFalse(pos.isValid()); + pos.setData(data, bufferStartOffset, readStartOffset); + assertTrue(pos.isValid()); + + pos.invalidate(); + assertFalse(pos.isValid()); + } + + @Test + public void testOffsets() { + int bufferSize = 8; + long fileSize = 100; + long bufferStartOffset = 7; + long readStartOffset = 9; + + ByteBuffer buffer = ByteBuffer.allocate(bufferSize); + BufferData data = new BufferData(0, buffer); + FilePosition pos = new FilePosition(fileSize, bufferSize); + pos.setData(data, bufferStartOffset, readStartOffset); + assertTrue(pos.isValid()); + + assertEquals(readStartOffset, pos.absolute()); + assertEquals(readStartOffset - bufferStartOffset, pos.relative()); + assertTrue(pos.isWithinCurrentBuffer(8)); + assertFalse(pos.isWithinCurrentBuffer(6)); + assertFalse(pos.isWithinCurrentBuffer(1)); + + int expectedBlockNumber = (int) (bufferStartOffset / bufferSize); + assertEquals(expectedBlockNumber, pos.blockNumber()); + assertFalse(pos.isLastBlock()); + + pos.setData(data, fileSize - 3, fileSize - 2); + assertTrue(pos.isLastBlock()); + } + + @Test + public void testBufferStats() { + int bufferSize = 8; + long fileSize = 100; + long bufferStartOffset = 7; + long readStartOffset = 9; + + ByteBuffer buffer = ByteBuffer.allocate(bufferSize); + BufferData data = new BufferData(0, buffer); + FilePosition pos = new FilePosition(fileSize, bufferSize); + pos.setData(data, bufferStartOffset, readStartOffset); + assertTrue(pos.isValid()); + assertEquals(bufferStartOffset, pos.bufferStartOffset()); + + assertEquals(0, pos.numBytesRead()); + assertEquals(0, pos.numSingleByteReads()); + assertEquals(0, pos.numBufferReads()); + + pos.incrementBytesRead(1); + pos.incrementBytesRead(1); + pos.incrementBytesRead(1); + pos.incrementBytesRead(5); + pos.incrementBytesRead(51); + + assertEquals(59, pos.numBytesRead()); + assertEquals(3, pos.numSingleByteReads()); + assertEquals(2, pos.numBufferReads()); + + assertFalse(pos.bufferFullyRead()); + + pos.setData(data, bufferStartOffset, bufferStartOffset); + assertTrue(pos.isValid()); + + assertEquals(0, pos.numBytesRead()); + assertEquals(0, pos.numSingleByteReads()); + assertEquals(0, pos.numBufferReads()); + + for (int i = 0; i < bufferSize; i++) { + pos.buffer().get(); + pos.incrementBytesRead(1); + } + assertTrue(pos.bufferFullyRead()); + } + + @Test + public void testBounds() { + int bufferSize = 8; + long fileSize = bufferSize; + + ByteBuffer buffer = ByteBuffer.allocate(bufferSize); + BufferData data = new BufferData(0, buffer); + FilePosition pos = new FilePosition(fileSize, bufferSize); + + long eofOffset = fileSize; + pos.setData(data, 0, eofOffset); + + assertThat(pos.isWithinCurrentBuffer(eofOffset)) + .describedAs("EOF offset %d should be within the current buffer", eofOffset) + .isTrue(); + assertThat(pos.absolute()) + .describedAs("absolute() should return the EOF offset") + .isEqualTo(eofOffset); + + assertThat(pos.setAbsolute(eofOffset)) + .describedAs("setAbsolute() should return true on the EOF offset %d", eofOffset) + .isTrue(); + assertThat(pos.absolute()) + .describedAs("absolute() should return the EOF offset") + .isEqualTo(eofOffset); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestRetryer.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestRetryer.java new file mode 100644 index 0000000000000..50701c717a4b0 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestRetryer.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import org.junit.Test; + +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +public class TestRetryer extends AbstractHadoopTestBase { + + @Test + public void testArgChecks() throws Exception { + // Should not throw. + new Retryer(10, 50, 500); + + // Verify it throws correctly. + + intercept(IllegalArgumentException.class, + "'perRetryDelay' must be a positive integer", + () -> new Retryer(-1, 50, 500)); + + intercept(IllegalArgumentException.class, + "'perRetryDelay' must be a positive integer", + () -> new Retryer(0, 50, 500)); + + intercept(IllegalArgumentException.class, + "'maxDelay' (5) must be greater than 'perRetryDelay' (10)", + () -> new Retryer(10, 5, 500)); + + intercept(IllegalArgumentException.class, + "'statusUpdateInterval' must be a positive integer", + () -> new Retryer(10, 50, -1)); + + intercept(IllegalArgumentException.class, + "'statusUpdateInterval' must be a positive integer", + () -> new Retryer(10, 50, 0)); + + } + + @Test + public void testRetry() { + int perRetryDelay = 1; + int statusUpdateInterval = 3; + int maxDelay = 10; + + Retryer retryer = + new Retryer(perRetryDelay, maxDelay, statusUpdateInterval); + for (int t = 1; t <= maxDelay; t++) { + assertTrue(retryer.continueRetry()); + if (t % statusUpdateInterval == 0) { + assertTrue(retryer.updateStatus()); + } else { + assertFalse(retryer.updateStatus()); + } + } + + assertFalse(retryer.continueRetry()); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestValidate.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestValidate.java new file mode 100644 index 0000000000000..a42462b3355af --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestValidate.java @@ -0,0 +1,341 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.fs.impl.prefetch; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; + +import org.junit.Test; + +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.fs.impl.prefetch.SampleDataForTests.EMPTY_BYTE_ARRAY; +import static org.apache.hadoop.fs.impl.prefetch.SampleDataForTests.EMPTY_INT_ARRAY; +import static org.apache.hadoop.fs.impl.prefetch.SampleDataForTests.EMPTY_LIST; +import static org.apache.hadoop.fs.impl.prefetch.SampleDataForTests.EMPTY_LONG_ARRAY; +import static org.apache.hadoop.fs.impl.prefetch.SampleDataForTests.EMPTY_SHORT_ARRAY; +import static org.apache.hadoop.fs.impl.prefetch.SampleDataForTests.NON_EMPTY_ARRAY; +import static org.apache.hadoop.fs.impl.prefetch.SampleDataForTests.NON_EMPTY_BYTE_ARRAY; +import static org.apache.hadoop.fs.impl.prefetch.SampleDataForTests.NON_EMPTY_INT_ARRAY; +import static org.apache.hadoop.fs.impl.prefetch.SampleDataForTests.NON_EMPTY_LONG_ARRAY; +import static org.apache.hadoop.fs.impl.prefetch.SampleDataForTests.NON_EMPTY_SHORT_ARRAY; +import static org.apache.hadoop.fs.impl.prefetch.SampleDataForTests.NULL_BYTE_ARRAY; +import static org.apache.hadoop.fs.impl.prefetch.SampleDataForTests.NULL_INT_ARRAY; +import static org.apache.hadoop.fs.impl.prefetch.SampleDataForTests.NULL_LIST; +import static org.apache.hadoop.fs.impl.prefetch.SampleDataForTests.NULL_LONG_ARRAY; +import static org.apache.hadoop.fs.impl.prefetch.SampleDataForTests.NULL_SHORT_ARRAY; +import static org.apache.hadoop.fs.impl.prefetch.SampleDataForTests.VALID_LIST; +import static org.apache.hadoop.fs.impl.prefetch.Validate.checkPositiveInteger; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +public class TestValidate extends AbstractHadoopTestBase { + + @Test + public void testCheckNotNull() throws Exception { + String nonNullArg = "nonNullArg"; + String nullArg = null; + + // Should not throw. + Validate.checkNotNull(nonNullArg, "nonNullArg"); + + // Verify it throws. + + intercept(IllegalArgumentException.class, "'nullArg' must not be null", + () -> Validate.checkNotNull(nullArg, "nullArg")); + + } + + @Test + public void testCheckPositiveInteger() throws Exception { + int positiveArg = 1; + int zero = 0; + int negativeArg = -1; + + // Should not throw. + checkPositiveInteger(positiveArg, "positiveArg"); + + // Verify it throws. + + intercept(IllegalArgumentException.class, + "'negativeArg' must be a positive integer", + () -> checkPositiveInteger(negativeArg, "negativeArg")); + + intercept(IllegalArgumentException.class, + "'zero' must be a positive integer", + () -> checkPositiveInteger(zero, "zero")); + + } + + @Test + public void testCheckNotNegative() throws Exception { + int positiveArg = 1; + int zero = 0; + int negativeArg = -1; + + // Should not throw. + Validate.checkNotNegative(zero, "zeroArg"); + Validate.checkNotNegative(positiveArg, "positiveArg"); + + // Verify it throws. + + intercept(IllegalArgumentException.class, + "'negativeArg' must not be negative", + () -> Validate.checkNotNegative(negativeArg, "negativeArg")); + + } + + @Test + public void testCheckRequired() throws Exception { + // Should not throw. + Validate.checkRequired(true, "arg"); + + // Verify it throws. + + intercept(IllegalArgumentException.class, "'arg' is required", + () -> Validate.checkRequired(false, "arg")); + + } + + @Test + public void testCheckValid() throws Exception { + // Should not throw. + Validate.checkValid(true, "arg"); + + // Verify it throws. + ExceptionAsserts.assertThrows( + IllegalArgumentException.class, + "'arg' is invalid", + () -> Validate.checkValid(false, "arg")); + } + + @Test + public void testCheckValidWithValues() throws Exception { + String validValues = "foo, bar"; + + // Should not throw. + Validate.checkValid(true, "arg", validValues); + + // Verify it throws. + + intercept(IllegalArgumentException.class, + "'arg' is invalid. Valid values are: foo, bar", + () -> Validate.checkValid(false, "arg", validValues)); + + } + + @Test + public void testCheckNotNullAndNotEmpty() throws Exception { + // Should not throw. + Validate.checkNotNullAndNotEmpty(NON_EMPTY_ARRAY, "array"); + Validate.checkNotNullAndNotEmpty(NON_EMPTY_BYTE_ARRAY, "array"); + Validate.checkNotNullAndNotEmpty(NON_EMPTY_SHORT_ARRAY, "array"); + Validate.checkNotNullAndNotEmpty(NON_EMPTY_INT_ARRAY, "array"); + Validate.checkNotNullAndNotEmpty(NON_EMPTY_LONG_ARRAY, "array"); + + // Verify it throws. + + intercept(IllegalArgumentException.class, "'string' must not be empty", + () -> Validate.checkNotNullAndNotEmpty("", "string")); + + intercept(IllegalArgumentException.class, "'array' must not be null", () -> + Validate.checkNotNullAndNotEmpty(SampleDataForTests.NULL_ARRAY, + "array")); + + intercept(IllegalArgumentException.class, + "'array' must have at least one element", () -> + Validate.checkNotNullAndNotEmpty(SampleDataForTests.EMPTY_ARRAY, + "array")); + + ExceptionAsserts.assertThrows( + IllegalArgumentException.class, + "'array' must not be null", + () -> Validate.checkNotNullAndNotEmpty(NULL_BYTE_ARRAY, "array")); + + ExceptionAsserts.assertThrows( + IllegalArgumentException.class, + "'array' must have at least one element", + () -> Validate.checkNotNullAndNotEmpty(EMPTY_BYTE_ARRAY, "array")); + + ExceptionAsserts.assertThrows( + IllegalArgumentException.class, + "'array' must not be null", + () -> Validate.checkNotNullAndNotEmpty(NULL_SHORT_ARRAY, "array")); + + ExceptionAsserts.assertThrows( + IllegalArgumentException.class, + "'array' must have at least one element", + () -> Validate.checkNotNullAndNotEmpty(EMPTY_SHORT_ARRAY, "array")); + + ExceptionAsserts.assertThrows( + IllegalArgumentException.class, + "'array' must not be null", + () -> Validate.checkNotNullAndNotEmpty(NULL_INT_ARRAY, "array")); + + ExceptionAsserts.assertThrows( + IllegalArgumentException.class, + "'array' must have at least one element", + () -> Validate.checkNotNullAndNotEmpty(EMPTY_INT_ARRAY, "array")); + + ExceptionAsserts.assertThrows( + IllegalArgumentException.class, + "'array' must not be null", + () -> Validate.checkNotNullAndNotEmpty(NULL_LONG_ARRAY, "array")); + + ExceptionAsserts.assertThrows( + IllegalArgumentException.class, + "'array' must have at least one element", + () -> Validate.checkNotNullAndNotEmpty(EMPTY_LONG_ARRAY, "array")); + } + + @Test + public void testCheckListNotNullAndNotEmpty() throws Exception { + // Should not throw. + Validate.checkNotNullAndNotEmpty(VALID_LIST, "list"); + + // Verify it throws. + ExceptionAsserts.assertThrows( + IllegalArgumentException.class, + "'list' must not be null", + () -> Validate.checkNotNullAndNotEmpty(NULL_LIST, "list")); + + ExceptionAsserts.assertThrows( + IllegalArgumentException.class, + "'list' must have at least one element", + () -> Validate.checkNotNullAndNotEmpty(EMPTY_LIST, "list")); + } + + @Test + public void testCheckNotNullAndNumberOfElements() throws Exception { + // Should not throw. + Validate.checkNotNullAndNumberOfElements(Arrays.asList(1, 2, 3), 3, "arg"); + + // Verify it throws. + + intercept(IllegalArgumentException.class, "'arg' must not be null", + () -> Validate.checkNotNullAndNumberOfElements(null, 3, "arg")); + + // Verify it throws. + ExceptionAsserts.assertThrows( + IllegalArgumentException.class, + "Number of elements in 'arg' must be exactly 3, 2 given.", + () -> Validate.checkNotNullAndNumberOfElements(Arrays.asList(1, 2), 3, + "arg") + ); + } + + @Test + public void testCheckValuesEqual() throws Exception { + // Should not throw. + Validate.checkValuesEqual(1, "arg1", 1, "arg2"); + + // Verify it throws. + + intercept(IllegalArgumentException.class, + "'arg1' (1) must equal 'arg2' (2)", + () -> Validate.checkValuesEqual(1, "arg1", 2, "arg2")); + + } + + @Test + public void testCheckIntegerMultiple() throws Exception { + // Should not throw. + Validate.checkIntegerMultiple(10, "arg1", 5, "arg2"); + + // Verify it throws. + + intercept(IllegalArgumentException.class, + "'arg1' (10) must be an integer multiple of 'arg2' (3)", + () -> Validate.checkIntegerMultiple(10, "arg1", 3, "arg2")); + + } + + @Test + public void testCheckGreater() throws Exception { + // Should not throw. + Validate.checkGreater(10, "arg1", 5, "arg2"); + + // Verify it throws. + + intercept(IllegalArgumentException.class, + "'arg1' (5) must be greater than 'arg2' (10)", + () -> Validate.checkGreater(5, "arg1", 10, "arg2")); + + } + + @Test + public void testCheckGreaterOrEqual() throws Exception { + // Should not throw. + Validate.checkGreaterOrEqual(10, "arg1", 5, "arg2"); + + // Verify it throws. + + intercept(IllegalArgumentException.class, + "'arg1' (5) must be greater than or equal to 'arg2' (10)", + () -> Validate.checkGreaterOrEqual(5, "arg1", 10, "arg2")); + + } + + @Test + public void testCheckWithinRange() throws Exception { + // Should not throw. + Validate.checkWithinRange(10, "arg", 5, 15); + Validate.checkWithinRange(10.0, "arg", 5.0, 15.0); + + // Verify it throws. + + intercept(IllegalArgumentException.class, + "'arg' (5) must be within the range [10, 20]", + () -> Validate.checkWithinRange(5, "arg", 10, 20)); + + intercept(IllegalArgumentException.class, + "'arg' (5.0) must be within the range [10.0, 20.0]", + () -> Validate.checkWithinRange(5.0, "arg", 10.0, 20.0)); + + } + + @Test + public void testCheckPathExists() throws Exception { + Path tempFile = Files.createTempFile("foo", "bar"); + Path tempDir = tempFile.getParent(); + Path notFound = Paths.get(""); + + // Should not throw. + Validate.checkPathExists(tempFile, "tempFile"); + Validate.checkPathExists(tempDir, "tempDir"); + + // Verify it throws. + + intercept(IllegalArgumentException.class, "'nullArg' must not be null", + () -> Validate.checkPathExists(null, "nullArg")); + + intercept(IllegalArgumentException.class, + "Path notFound () does not exist", + () -> Validate.checkPathExists(notFound, "notFound")); + + intercept(IllegalArgumentException.class, "must point to a directory", + () -> Validate.checkPathExistsAsDir(tempFile, "tempFile")); + + intercept(IllegalArgumentException.class, "must point to a file", + () -> Validate.checkPathExistsAsFile(tempDir, "tempDir")); + + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/loadGenerator/LoadGenerator.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/loadGenerator/LoadGenerator.java index 4b3bd2f94075c..1ccc3400788d1 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/loadGenerator/LoadGenerator.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/loadGenerator/LoadGenerator.java @@ -47,7 +47,7 @@ import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/sftp/TestSFTPFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/sftp/TestSFTPFileSystem.java index 693926242c95d..58452f86f5999 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/sftp/TestSFTPFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/sftp/TestSFTPFileSystem.java @@ -374,4 +374,15 @@ public void testMkDirs() throws IOException { assertThat(((SFTPFileSystem) sftpFs).getConnectionPool().getLiveConnCount(), is(1)); } + + @Test + public void testCloseFileSystemClosesConnectionPool() throws Exception { + SFTPFileSystem fs = (SFTPFileSystem) sftpFs; + fs.getHomeDirectory(); + assertThat(fs.getConnectionPool().getLiveConnCount(), is(1)); + fs.close(); + assertThat(fs.getConnectionPool().getLiveConnCount(), is(0)); + ///making sure that re-entrant close calls are safe + fs.close(); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestCopyFromLocal.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestCopyFromLocal.java index e7f36fc85013b..757c588104ea1 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestCopyFromLocal.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestCopyFromLocal.java @@ -17,23 +17,25 @@ */ package org.apache.hadoop.fs.shell; +import java.io.IOException; +import java.util.LinkedList; +import java.util.concurrent.ThreadPoolExecutor; + +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + import org.apache.commons.lang3.RandomStringUtils; import org.apache.commons.lang3.RandomUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.FileSystemTestHelper; -import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.shell.CopyCommands.CopyFromLocal; -import org.junit.BeforeClass; -import org.junit.AfterClass; -import org.junit.Test; -import org.junit.Assert; - -import java.io.IOException; -import java.util.LinkedList; -import java.util.concurrent.ThreadPoolExecutor; import static org.junit.Assert.assertEquals; @@ -48,6 +50,9 @@ public class TestCopyFromLocal { private static Path testDir; private static Configuration conf; + private Path dir = null; + private int numFiles = 0; + public static int initialize(Path dir) throws Exception { fs.mkdirs(dir); Path fromDirPath = new Path(dir, FROM_DIR_NAME); @@ -66,7 +71,7 @@ public static int initialize(Path dir) throws Exception { Path subFile = new Path(subDirPath, "file" + fileCount); fs.createNewFile(subFile); FSDataOutputStream output = fs.create(subFile, true); - for(int i = 0; i < 100; ++i) { + for (int i = 0; i < 100; ++i) { output.writeInt(i); output.writeChar('\n'); } @@ -96,48 +101,36 @@ public static void cleanup() throws Exception { fs.close(); } + @Before + public void initDirectory() throws Exception { + dir = new Path("dir" + RandomStringUtils.randomNumeric(4)); + numFiles = initialize(dir); + } + + private void run(CommandWithDestination cmd, String... args) { cmd.setConf(conf); assertEquals(0, cmd.run(args)); } @Test(timeout = 10000) - public void testCopyFromLocal() throws Exception { - Path dir = new Path("dir" + RandomStringUtils.randomNumeric(4)); - TestCopyFromLocal.initialize(dir); + public void testCopyFromLocal() { run(new TestMultiThreadedCopy(1, 0), new Path(dir, FROM_DIR_NAME).toString(), new Path(dir, TO_DIR_NAME).toString()); } @Test(timeout = 10000) - public void testCopyFromLocalWithThreads() throws Exception { - Path dir = new Path("dir" + RandomStringUtils.randomNumeric(4)); - int numFiles = TestCopyFromLocal.initialize(dir); - int maxThreads = Runtime.getRuntime().availableProcessors() * 2; - int randThreads = RandomUtils.nextInt(0, maxThreads - 1) + 1; - String numThreads = Integer.toString(randThreads); - run(new TestMultiThreadedCopy(randThreads, - randThreads == 1 ? 0 : numFiles), "-t", numThreads, - new Path(dir, FROM_DIR_NAME).toString(), - new Path(dir, TO_DIR_NAME).toString()); - } - - @Test(timeout = 10000) - public void testCopyFromLocalWithThreadWrong() throws Exception { - Path dir = new Path("dir" + RandomStringUtils.randomNumeric(4)); - int numFiles = TestCopyFromLocal.initialize(dir); - int maxThreads = Runtime.getRuntime().availableProcessors() * 2; - String numThreads = Integer.toString(maxThreads * 2); - run(new TestMultiThreadedCopy(maxThreads, numFiles), "-t", numThreads, + public void testCopyFromLocalWithThreads(){ + int threads = Runtime.getRuntime().availableProcessors() * 2 + 1; + run(new TestMultiThreadedCopy(threads, numFiles), + "-t", Integer.toString(threads), new Path(dir, FROM_DIR_NAME).toString(), new Path(dir, TO_DIR_NAME).toString()); } @Test(timeout = 10000) - public void testCopyFromLocalWithZeroThreads() throws Exception { - Path dir = new Path("dir" + RandomStringUtils.randomNumeric(4)); - TestCopyFromLocal.initialize(dir); + public void testCopyFromLocalWithThreadWrong(){ run(new TestMultiThreadedCopy(1, 0), "-t", "0", new Path(dir, FROM_DIR_NAME).toString(), new Path(dir, TO_DIR_NAME).toString()); @@ -148,8 +141,7 @@ private class TestMultiThreadedCopy extends CopyFromLocal { private int expectedThreads; private int expectedCompletedTaskCount; - TestMultiThreadedCopy(int expectedThreads, - int expectedCompletedTaskCount) { + TestMultiThreadedCopy(int expectedThreads, int expectedCompletedTaskCount) { this.expectedThreads = expectedThreads; this.expectedCompletedTaskCount = expectedCompletedTaskCount; } @@ -158,17 +150,22 @@ private class TestMultiThreadedCopy extends CopyFromLocal { protected void processArguments(LinkedList args) throws IOException { // Check if the correct number of threads are spawned - Assert.assertEquals(expectedThreads, getNumThreads()); + Assert.assertEquals(expectedThreads, getThreadCount()); super.processArguments(args); - // Once the copy is complete, check following - // 1) number of completed tasks are same as expected - // 2) There are no active tasks in the executor - // 3) Executor has shutdown correctly - ThreadPoolExecutor executor = getExecutor(); - Assert.assertEquals(expectedCompletedTaskCount, - executor.getCompletedTaskCount()); - Assert.assertEquals(0, executor.getActiveCount()); - Assert.assertTrue(executor.isTerminated()); + + if (isMultiThreadNecessary(args)) { + // Once the copy is complete, check following + // 1) number of completed tasks are same as expected + // 2) There are no active tasks in the executor + // 3) Executor has shutdown correctly + ThreadPoolExecutor executor = getExecutor(); + Assert.assertEquals(expectedCompletedTaskCount, + executor.getCompletedTaskCount()); + Assert.assertEquals(0, executor.getActiveCount()); + Assert.assertTrue(executor.isTerminated()); + } else { + assert getExecutor() == null; + } } } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestCopyPreserveFlag.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestCopyPreserveFlag.java index 8d2e1608723d1..b68be243c956e 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestCopyPreserveFlag.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestCopyPreserveFlag.java @@ -17,11 +17,12 @@ */ package org.apache.hadoop.fs.shell; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotEquals; - import java.io.IOException; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -31,13 +32,13 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.fs.shell.CopyCommands.CopyFromLocal; import org.apache.hadoop.fs.shell.CopyCommands.Cp; import org.apache.hadoop.fs.shell.CopyCommands.Get; import org.apache.hadoop.fs.shell.CopyCommands.Put; -import org.apache.hadoop.fs.shell.CopyCommands.CopyFromLocal; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; public class TestCopyPreserveFlag { private static final int MODIFICATION_TIME = 12345000; @@ -122,6 +123,22 @@ public void testPutWithoutP() throws Exception { assertAttributesChanged(TO); } + @Test(timeout = 10000) + public void testPutWithPQ() throws Exception { + Put put = new Put(); + run(put, "-p", "-q", "100", FROM.toString(), TO.toString()); + assertEquals(put.getThreadPoolQueueSize(), 100); + assertAttributesPreserved(TO); + } + + @Test(timeout = 10000) + public void testPutWithQ() throws Exception { + Put put = new Put(); + run(put, "-q", "100", FROM.toString(), TO.toString()); + assertEquals(put.getThreadPoolQueueSize(), 100); + assertAttributesChanged(TO); + } + @Test(timeout = 10000) public void testPutWithSplCharacter() throws Exception { fs.mkdirs(DIR_FROM_SPL); @@ -160,6 +177,34 @@ public void testGetWithoutP() throws Exception { assertAttributesChanged(TO); } + @Test(timeout = 10000) + public void testGetWithPQ() throws Exception { + Get get = new Get(); + run(get, "-p", "-q", "100", FROM.toString(), TO.toString()); + assertEquals(get.getThreadPoolQueueSize(), 100); + assertAttributesPreserved(TO); + } + + @Test(timeout = 10000) + public void testGetWithQ() throws Exception { + Get get = new Get(); + run(get, "-q", "100", FROM.toString(), TO.toString()); + assertEquals(get.getThreadPoolQueueSize(), 100); + assertAttributesChanged(TO); + } + + @Test(timeout = 10000) + public void testGetWithThreads() throws Exception { + run(new Get(), "-t", "10", FROM.toString(), TO.toString()); + assertAttributesChanged(TO); + } + + @Test(timeout = 10000) + public void testGetWithThreadsPreserve() throws Exception { + run(new Get(), "-p", "-t", "10", FROM.toString(), TO.toString()); + assertAttributesPreserved(TO); + } + @Test(timeout = 10000) public void testCpWithP() throws Exception { run(new Cp(), "-p", FROM.toString(), TO.toString()); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestCopyToLocal.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestCopyToLocal.java new file mode 100644 index 0000000000000..202b81912c104 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestCopyToLocal.java @@ -0,0 +1,210 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.shell; + +import java.io.IOException; +import java.util.LinkedList; +import java.util.concurrent.ThreadPoolExecutor; + +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.lang3.RandomUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileSystemTestHelper; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.shell.CopyCommands.CopyToLocal; + +import static org.apache.hadoop.fs.shell.CopyCommandWithMultiThread.DEFAULT_QUEUE_SIZE; +import static org.junit.Assert.assertEquals; + +public class TestCopyToLocal { + + private static final String FROM_DIR_NAME = "fromDir"; + private static final String TO_DIR_NAME = "toDir"; + + private static FileSystem fs; + private static Path testDir; + private static Configuration conf; + + private Path dir = null; + private int numFiles = 0; + + private static int initialize(Path dir) throws Exception { + fs.mkdirs(dir); + Path fromDirPath = new Path(dir, FROM_DIR_NAME); + fs.mkdirs(fromDirPath); + Path toDirPath = new Path(dir, TO_DIR_NAME); + fs.mkdirs(toDirPath); + + int numTotalFiles = 0; + int numDirs = RandomUtils.nextInt(0, 5); + for (int dirCount = 0; dirCount < numDirs; ++dirCount) { + Path subDirPath = new Path(fromDirPath, "subdir" + dirCount); + fs.mkdirs(subDirPath); + int numFiles = RandomUtils.nextInt(0, 10); + for (int fileCount = 0; fileCount < numFiles; ++fileCount) { + numTotalFiles++; + Path subFile = new Path(subDirPath, "file" + fileCount); + fs.createNewFile(subFile); + FSDataOutputStream output = fs.create(subFile, true); + for (int i = 0; i < 100; ++i) { + output.writeInt(i); + output.writeChar('\n'); + } + output.close(); + } + } + + return numTotalFiles; + } + + @BeforeClass + public static void init() throws Exception { + conf = new Configuration(false); + conf.set("fs.file.impl", LocalFileSystem.class.getName()); + fs = FileSystem.getLocal(conf); + testDir = new FileSystemTestHelper().getTestRootPath(fs); + // don't want scheme on the path, just an absolute path + testDir = new Path(fs.makeQualified(testDir).toUri().getPath()); + + FileSystem.setDefaultUri(conf, fs.getUri()); + fs.setWorkingDirectory(testDir); + } + + @AfterClass + public static void cleanup() throws Exception { + fs.delete(testDir, true); + fs.close(); + } + + private void run(CopyCommandWithMultiThread cmd, String... args) { + cmd.setConf(conf); + assertEquals(0, cmd.run(args)); + } + + @Before + public void initDirectory() throws Exception { + dir = new Path("dir" + RandomStringUtils.randomNumeric(4)); + numFiles = initialize(dir); + } + + @Test(timeout = 10000) + public void testCopy() throws Exception { + MultiThreadedCopy copy = new MultiThreadedCopy(1, DEFAULT_QUEUE_SIZE, 0); + run(copy, new Path(dir, FROM_DIR_NAME).toString(), + new Path(dir, TO_DIR_NAME).toString()); + assert copy.getExecutor() == null; + } + + @Test(timeout = 10000) + public void testCopyWithThreads() { + run(new MultiThreadedCopy(5, DEFAULT_QUEUE_SIZE, numFiles), "-t", "5", + new Path(dir, FROM_DIR_NAME).toString(), + new Path(dir, TO_DIR_NAME).toString()); + } + + @Test(timeout = 10000) + public void testCopyWithThreadWrong() { + run(new MultiThreadedCopy(1, DEFAULT_QUEUE_SIZE, 0), "-t", "0", + new Path(dir, FROM_DIR_NAME).toString(), + new Path(dir, TO_DIR_NAME).toString()); + } + + @Test(timeout = 10000) + public void testCopyWithThreadsAndQueueSize() { + int queueSize = 256; + run(new MultiThreadedCopy(5, queueSize, numFiles), "-t", "5", "-q", + Integer.toString(queueSize), + new Path(dir, FROM_DIR_NAME).toString(), + new Path(dir, TO_DIR_NAME).toString()); + } + + @Test(timeout = 10000) + public void testCopyWithThreadsAndQueueSizeWrong() { + int queueSize = 0; + run(new MultiThreadedCopy(5, DEFAULT_QUEUE_SIZE, numFiles), "-t", "5", "-q", + Integer.toString(queueSize), + new Path(dir, FROM_DIR_NAME).toString(), + new Path(dir, TO_DIR_NAME).toString()); + } + + @Test(timeout = 10000) + public void testCopySingleFile() throws Exception { + Path fromDirPath = new Path(dir, FROM_DIR_NAME); + Path subFile = new Path(fromDirPath, "file0"); + fs.createNewFile(subFile); + FSDataOutputStream output = fs.create(subFile, true); + for (int i = 0; i < 100; ++i) { + output.writeInt(i); + output.writeChar('\n'); + } + output.close(); + + MultiThreadedCopy copy = new MultiThreadedCopy(5, DEFAULT_QUEUE_SIZE, 0); + run(copy, "-t", "5", subFile.toString(), + new Path(dir, TO_DIR_NAME).toString()); + assert copy.getExecutor() == null; + } + + private static class MultiThreadedCopy extends CopyToLocal { + public static final String NAME = "multiThreadCopy"; + private final int expectedThreads; + private final int expectedQueuePoolSize; + private final int expectedCompletedTaskCount; + + MultiThreadedCopy(int expectedThreads, int expectedQueuePoolSize, + int expectedCompletedTaskCount) { + this.expectedThreads = expectedThreads; + this.expectedQueuePoolSize = expectedQueuePoolSize; + this.expectedCompletedTaskCount = expectedCompletedTaskCount; + } + + @Override + protected void processArguments(LinkedList args) + throws IOException { + // Check if the number of threads are same as expected + Assert.assertEquals(expectedThreads, getThreadCount()); + // Check if the queue pool size of executor is same as expected + Assert.assertEquals(expectedQueuePoolSize, getThreadPoolQueueSize()); + + super.processArguments(args); + + if (isMultiThreadNecessary(args)) { + // Once the copy is complete, check following + // 1) number of completed tasks are same as expected + // 2) There are no active tasks in the executor + // 3) Executor has shutdown correctly + ThreadPoolExecutor executor = getExecutor(); + Assert.assertEquals(expectedCompletedTaskCount, + executor.getCompletedTaskCount()); + Assert.assertEquals(0, executor.getActiveCount()); + Assert.assertTrue(executor.isTerminated()); + } else { + assert getExecutor() == null; + } + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestCount.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestCount.java index b5adfcf76157c..f101fed26bbf8 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestCount.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestCount.java @@ -411,6 +411,25 @@ public void processPathWithQuotasByMultipleStorageTypes() throws Exception { verifyNoMoreInteractions(out); } + @Test + public void processPathWithSnapshotHeader() throws Exception { + Path path = new Path("mockfs:/test"); + when(mockFs.getFileStatus(eq(path))).thenReturn(fileStat); + PrintStream out = mock(PrintStream.class); + Count count = new Count(); + count.out = out; + LinkedList options = new LinkedList(); + options.add("-s"); + options.add("-v"); + options.add("dummy"); + count.processOptions(options); + String withSnapshotHeader = " DIR_COUNT FILE_COUNT CONTENT_SIZE " + + " SNAPSHOT_LENGTH SNAPSHOT_FILE_COUNT " + + " SNAPSHOT_DIR_COUNT SNAPSHOT_SPACE_CONSUMED PATHNAME"; + verify(out).println(withSnapshotHeader); + verifyNoMoreInteractions(out); + } + @Test public void getCommandName() { Count count = new Count(); @@ -448,7 +467,8 @@ public void getUsage() { Count count = new Count(); String actual = count.getUsage(); String expected = - "-count [-q] [-h] [-v] [-t []] [-u] [-x] [-e] ..."; + "-count [-q] [-h] [-v] [-t []]" + + " [-u] [-x] [-e] [-s] ..."; assertEquals("Count.getUsage", expected, actual); } @@ -480,7 +500,8 @@ public void getDescription() { + "storage types.\n" + "The -u option shows the quota and \n" + "the usage against the quota without the detailed content summary." - + "The -e option shows the erasure coding policy."; + + "The -e option shows the erasure coding policy." + + "The -s option shows snapshot counts."; assertEquals("Count.getDescription", expected, actual); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestCpCommand.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestCpCommand.java new file mode 100644 index 0000000000000..214f1a0686cd9 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestCpCommand.java @@ -0,0 +1,210 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.shell; + +import java.io.IOException; +import java.util.LinkedList; +import java.util.concurrent.ThreadPoolExecutor; + +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.lang3.RandomUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileSystemTestHelper; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.shell.CopyCommands.Cp; + +import static org.apache.hadoop.fs.shell.CopyCommandWithMultiThread.DEFAULT_QUEUE_SIZE; +import static org.junit.Assert.assertEquals; + +public class TestCpCommand { + + private static final String FROM_DIR_NAME = "fromDir"; + private static final String TO_DIR_NAME = "toDir"; + + private static FileSystem fs; + private static Path testDir; + private static Configuration conf; + + private Path dir = null; + private int numFiles = 0; + + private static int initialize(Path dir) throws Exception { + fs.mkdirs(dir); + Path fromDirPath = new Path(dir, FROM_DIR_NAME); + fs.mkdirs(fromDirPath); + Path toDirPath = new Path(dir, TO_DIR_NAME); + fs.mkdirs(toDirPath); + + int numTotalFiles = 0; + int numDirs = RandomUtils.nextInt(0, 5); + for (int dirCount = 0; dirCount < numDirs; ++dirCount) { + Path subDirPath = new Path(fromDirPath, "subdir" + dirCount); + fs.mkdirs(subDirPath); + int numFiles = RandomUtils.nextInt(0, 10); + for (int fileCount = 0; fileCount < numFiles; ++fileCount) { + numTotalFiles++; + Path subFile = new Path(subDirPath, "file" + fileCount); + fs.createNewFile(subFile); + FSDataOutputStream output = fs.create(subFile, true); + for (int i = 0; i < 100; ++i) { + output.writeInt(i); + output.writeChar('\n'); + } + output.close(); + } + } + + return numTotalFiles; + } + + @BeforeClass + public static void init() throws Exception { + conf = new Configuration(false); + conf.set("fs.file.impl", LocalFileSystem.class.getName()); + fs = FileSystem.getLocal(conf); + testDir = new FileSystemTestHelper().getTestRootPath(fs); + // don't want scheme on the path, just an absolute path + testDir = new Path(fs.makeQualified(testDir).toUri().getPath()); + + FileSystem.setDefaultUri(conf, fs.getUri()); + fs.setWorkingDirectory(testDir); + } + + @AfterClass + public static void cleanup() throws Exception { + fs.delete(testDir, true); + fs.close(); + } + + private void run(CopyCommandWithMultiThread cmd, String... args) { + cmd.setConf(conf); + assertEquals(0, cmd.run(args)); + } + + @Before + public void initDirectory() throws Exception { + dir = new Path("dir" + RandomStringUtils.randomNumeric(4)); + numFiles = initialize(dir); + } + + @Test(timeout = 10000) + public void testCp() throws Exception { + MultiThreadedCp copy = new MultiThreadedCp(1, DEFAULT_QUEUE_SIZE, 0); + run(copy, new Path(dir, FROM_DIR_NAME).toString(), + new Path(dir, TO_DIR_NAME).toString()); + assert copy.getExecutor() == null; + } + + @Test(timeout = 10000) + public void testCpWithThreads() { + run(new MultiThreadedCp(5, DEFAULT_QUEUE_SIZE, numFiles), "-t", "5", + new Path(dir, FROM_DIR_NAME).toString(), + new Path(dir, TO_DIR_NAME).toString()); + } + + @Test(timeout = 10000) + public void testCpWithThreadWrong() { + run(new MultiThreadedCp(1, DEFAULT_QUEUE_SIZE, 0), "-t", "0", + new Path(dir, FROM_DIR_NAME).toString(), + new Path(dir, TO_DIR_NAME).toString()); + } + + @Test(timeout = 10000) + public void testCpWithThreadsAndQueueSize() { + int queueSize = 256; + run(new MultiThreadedCp(5, queueSize, numFiles), "-t", "5", "-q", + Integer.toString(queueSize), + new Path(dir, FROM_DIR_NAME).toString(), + new Path(dir, TO_DIR_NAME).toString()); + } + + @Test(timeout = 10000) + public void testCpWithThreadsAndQueueSizeWrong() { + int queueSize = 0; + run(new MultiThreadedCp(5, DEFAULT_QUEUE_SIZE, numFiles), "-t", "5", "-q", + Integer.toString(queueSize), + new Path(dir, FROM_DIR_NAME).toString(), + new Path(dir, TO_DIR_NAME).toString()); + } + + @Test(timeout = 10000) + public void testCpSingleFile() throws Exception { + Path fromDirPath = new Path(dir, FROM_DIR_NAME); + Path subFile = new Path(fromDirPath, "file0"); + fs.createNewFile(subFile); + FSDataOutputStream output = fs.create(subFile, true); + for (int i = 0; i < 100; ++i) { + output.writeInt(i); + output.writeChar('\n'); + } + output.close(); + + MultiThreadedCp copy = new MultiThreadedCp(5, DEFAULT_QUEUE_SIZE, 0); + run(copy, "-t", "5", subFile.toString(), + new Path(dir, TO_DIR_NAME).toString()); + assert copy.getExecutor() == null; + } + + private static class MultiThreadedCp extends Cp { + public static final String NAME = "multiThreadCp"; + private final int expectedThreads; + private final int expectedQueuePoolSize; + private final int expectedCompletedTaskCount; + + MultiThreadedCp(int expectedThreads, int expectedQueuePoolSize, + int expectedCompletedTaskCount) { + this.expectedThreads = expectedThreads; + this.expectedQueuePoolSize = expectedQueuePoolSize; + this.expectedCompletedTaskCount = expectedCompletedTaskCount; + } + + @Override + protected void processArguments(LinkedList args) + throws IOException { + // Check if the number of threads are same as expected + Assert.assertEquals(expectedThreads, getThreadCount()); + // Check if the queue pool size of executor is same as expected + Assert.assertEquals(expectedQueuePoolSize, getThreadPoolQueueSize()); + + super.processArguments(args); + + if (isMultiThreadNecessary(args)) { + // Once the copy is complete, check following + // 1) number of completed tasks are same as expected + // 2) There are no active tasks in the executor + // 3) Executor has shutdown correctly + ThreadPoolExecutor executor = getExecutor(); + Assert.assertEquals(expectedCompletedTaskCount, + executor.getCompletedTaskCount()); + Assert.assertEquals(0, executor.getActiveCount()); + Assert.assertTrue(executor.isTerminated()); + } else { + assert getExecutor() == null; + } + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestFsShellConcat.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestFsShellConcat.java new file mode 100644 index 0000000000000..a2c4d3a1972c7 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestFsShellConcat.java @@ -0,0 +1,167 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.shell; + +import java.io.ByteArrayOutputStream; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.net.URI; +import java.util.Random; + +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; +import org.assertj.core.api.Assertions; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FsShell; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.mockito.ArgumentMatchers.any; +import static org.junit.Assert.assertEquals; + +/** + * Test Concat. + */ +public class TestFsShellConcat extends AbstractHadoopTestBase { + + private static Configuration conf; + private static FsShell shell; + private static LocalFileSystem lfs; + private static Path testRootDir; + private static Path dstPath; + + @Before + public void before() throws IOException { + conf = new Configuration(); + shell = new FsShell(conf); + lfs = FileSystem.getLocal(conf); + testRootDir = lfs.makeQualified(new Path(GenericTestUtils.getTempPath( + "testFsShellCopy"))); + + lfs.delete(testRootDir, true); + lfs.mkdirs(testRootDir); + lfs.setWorkingDirectory(testRootDir); + dstPath = new Path(testRootDir, "dstFile"); + lfs.create(dstPath).close(); + + Random random = new Random(); + for (int i = 0; i < 10; i++) { + OutputStream out = + lfs.create(new Path(testRootDir, String.format("file-%02d", i))); + out.write(random.nextInt()); + out.close(); + } + } + + @Test + public void testConcat() throws Exception { + // Read concatenated files to build the expected file content. + ByteArrayOutputStream out = new ByteArrayOutputStream(); + for (int i = 0; i < 10; i++) { + try (InputStream in = lfs + .open(new Path(testRootDir, String.format("file-%02d", i)))) { + IOUtils.copyBytes(in, out, 1024); + } + } + byte[] expectContent = out.toByteArray(); + + // Do concat. + FileSystem mockFs = Mockito.mock(FileSystem.class); + Mockito.doAnswer(invocation -> { + Object[] args = invocation.getArguments(); + Path target = (Path)args[0]; + Path[] src = (Path[]) args[1]; + mockConcat(target, src); + return null; + }).when(mockFs).concat(any(Path.class), any(Path[].class)); + Concat.setTestFs(mockFs); + shellRun(0, "-concat", dstPath.toString(), testRootDir+"/file-*"); + + // Verify concat result. + ContractTestUtils + .assertPathExists(lfs, "The target file doesn't exist.", dstPath); + Assertions.assertThat(lfs.listStatus(testRootDir).length).isEqualTo(1); + assertEquals(expectContent.length, lfs.getFileStatus(dstPath).getLen()); + out = new ByteArrayOutputStream(); + try (InputStream in = lfs.open(dstPath)) { + IOUtils.copyBytes(in, out, 1024); + } + // Verify content. + byte[] concatedContent = out.toByteArray(); + assertEquals(expectContent.length, concatedContent.length); + ContractTestUtils.compareByteArrays(expectContent, concatedContent, + expectContent.length); + } + + @Test + public void testUnsupportedFs() throws Exception { + FileSystem mockFs = Mockito.mock(FileSystem.class); + Mockito.doThrow( + new UnsupportedOperationException("Mock unsupported exception.")) + .when(mockFs).concat(any(Path.class), any(Path[].class)); + Mockito.doAnswer(invocationOnMock -> new URI("mockfs:///")).when(mockFs) + .getUri(); + Concat.setTestFs(mockFs); + final ByteArrayOutputStream err = new ByteArrayOutputStream(); + PrintStream oldErr = System.err; + System.setErr(new PrintStream(err)); + try { + shellRun(1, "-concat", dstPath.toString(), testRootDir + "/file-*"); + } finally { + System.setErr(oldErr); + } + System.err.print(err.toString()); + String expectedErrMsg = "Dest filesystem 'mockfs' doesn't support concat"; + Assertions.assertThat(err.toString().contains(expectedErrMsg)) + .withFailMessage("The err message should contain \"" + expectedErrMsg + + "\" message.").isTrue(); + } + + private void shellRun(int n, String... args) { + assertEquals(n, shell.run(args)); + } + + /** + * Simple simulation of concat. + */ + private void mockConcat(Path target, Path[] srcArray) throws IOException { + Path tmp = new Path(target.getParent(), target.getName() + ".bak"); + lfs.rename(target, tmp); + try (OutputStream out = lfs.create(target)) { + try (InputStream in = lfs.open(tmp)) { + IOUtils.copyBytes(in, out, 1024); + } + lfs.delete(tmp, true); + for (int i = 0; i < srcArray.length; i++) { + try (InputStream iin = lfs.open(srcArray[i])) { + IOUtils.copyBytes(iin, out, 1024); + } + lfs.delete(srcArray[i], true); + } + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestMove.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestMove.java index 1f379448ee86c..b9e87d3dacefe 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestMove.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestMove.java @@ -32,6 +32,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FilterFileSystem; import org.apache.hadoop.fs.PathExistsException; +import org.apache.hadoop.fs.shell.CommandFormat.UnknownOptionException; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -93,6 +94,12 @@ public void testMoveTargetExistsWithoutExplicitRename() throws Exception { assertTrue("Rename should have failed with path exists exception", cmd.error instanceof PathExistsException); } + + @Test(expected = UnknownOptionException.class) + public void testMoveFromLocalDoesNotAllowTOption() { + new MoveCommands.MoveFromLocal().run("-t", "2", + null, null); + } static class MockFileSystem extends FilterFileSystem { Configuration conf; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestPrintableString.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestPrintableString.java index 8e09fc29744fe..91bfdd6d3948c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestPrintableString.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestPrintableString.java @@ -76,8 +76,8 @@ public void testNonPrintableCharacters() throws Exception { "x\uDB80\uDC00y\uDBFF\uDFFDz\u1050", "x?y?z\u1050"); // Unassigned Unicode - expect("Should replace unassigned U+30000 and U+DFFFF", - "-\uD880\uDC00-\uDB3F\uDFFF-", "-?-?-"); + expect("Should replace unassigned U+DFFFF", + "-\uDB3F\uDFFF-", "-?-"); // Standalone surrogate character (not in a pair) expect("Should replace standalone surrogate U+DB80", "x\uDB80yz", "x?yz"); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestTextCommand.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestTextCommand.java index 7b848363720df..c99b97e6e4021 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestTextCommand.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestTextCommand.java @@ -27,6 +27,7 @@ import java.io.StringWriter; import java.lang.reflect.Method; import java.net.URI; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; @@ -124,7 +125,7 @@ public InputStream getInputStream(PathData item) throws IOException { private String inputStreamToString(InputStream stream) throws IOException { StringWriter writer = new StringWriter(); - IOUtils.copy(stream, writer); + IOUtils.copy(stream, writer, StandardCharsets.UTF_8); return writer.toString(); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/IOStatisticAssertions.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/IOStatisticAssertions.java new file mode 100644 index 0000000000000..755599f0c390c --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/IOStatisticAssertions.java @@ -0,0 +1,548 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.ObjectStreamClass; +import java.io.Serializable; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.assertj.core.api.AbstractLongAssert; +import org.assertj.core.api.ObjectAssert; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +import static org.apache.hadoop.fs.statistics.StoreStatisticNames.SUFFIX_MAX; +import static org.apache.hadoop.fs.statistics.StoreStatisticNames.SUFFIX_MIN; +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Assertions and any other support for IOStatistics testing. + * If used downstream: know it is unstable. + */ + +@InterfaceAudience.Private +@InterfaceStability.Unstable +public final class IOStatisticAssertions { + + private static final String COUNTER = "Counter"; + + private static final String GAUGE = "Gauge"; + + private static final String MINIMUM = "Minimum"; + + private static final String MAXIMUM = "Maxiumum"; + + private static final String MEAN = "Mean"; + + private IOStatisticAssertions() { + } + + /** + * Get a required counter statistic. + * @param stats statistics source + * @param key statistic key + * @return the value + */ + public static long lookupCounterStatistic( + final IOStatistics stats, + final String key) { + return lookupStatistic(COUNTER, key, + verifyStatisticsNotNull(stats).counters()); + } + + /** + * Given an IOStatistics instance, verify it is not null, + * and return the value for continued use in a test. + * @param stats statistics source. + * @param type of statistics + * @return the value passed in. + */ + public static T + verifyStatisticsNotNull(final T stats) { + assertThat(stats) + .describedAs("IO Statistics reference") + .isNotNull(); + return stats; + } + + /** + * Get a required gauge statistic. + * @param stats statistics source + * @param key statistic key + * @return the value + */ + public static long lookupGaugeStatistic( + final IOStatistics stats, + final String key) { + return lookupStatistic(GAUGE, key, + verifyStatisticsNotNull(stats).gauges()); + } + + /** + * Get a required maximum statistic. + * @param stats statistics source + * @param key statistic key + * @return the value + */ + public static long lookupMaximumStatistic( + final IOStatistics stats, + final String key) { + return lookupStatistic(MAXIMUM, key, + verifyStatisticsNotNull(stats).maximums()); + } + + /** + * Get a required minimum statistic. + * @param stats statistics source + * @param key statistic key + * @return the value + */ + public static long lookupMinimumStatistic( + final IOStatistics stats, + final String key) { + return lookupStatistic(MINIMUM, key, + verifyStatisticsNotNull(stats).minimums()); + } + + /** + * Get a required mean statistic. + * @param stats statistics source + * @param key statistic key + * @return the value + */ + public static MeanStatistic lookupMeanStatistic( + final IOStatistics stats, + final String key) { + return lookupStatistic(MEAN, key, + verifyStatisticsNotNull(stats).meanStatistics()); + } + + /** + * Get a required counter statistic. + * @param type of map element + * @param type type for error text + * @param key statistic key + * @param map map to probe + * @return the value + */ + private static E lookupStatistic( + final String type, + final String key, + final Map map) { + final E statistic = map.get(key); + assertThat(statistic) + .describedAs("%s named %s", type, key) + .isNotNull(); + return statistic; + } + + /** + * Assert that a counter has an expected value. + * @param stats statistics source + * @param key statistic key + * @param value expected value. + * @return the value (which always equals the expected value) + */ + public static long verifyStatisticCounterValue( + final IOStatistics stats, + final String key, + final long value) { + return verifyStatisticValue(COUNTER, key, + verifyStatisticsNotNull(stats).counters(), value); + } + + /** + * Assert that a gauge has an expected value. + * @param stats statistics source + * @param key statistic key + * @param value expected value. + * @return the value (which always equals the expected value) + */ + public static long verifyStatisticGaugeValue( + final IOStatistics stats, + final String key, + final long value) { + return verifyStatisticValue(GAUGE, key, + verifyStatisticsNotNull(stats).gauges(), value); + } + + /** + * Assert that a maximum has an expected value. + * @param stats statistics source + * @param key statistic key + * @param value expected value. + * @return the value (which always equals the expected value) + */ + public static long verifyStatisticMaximumValue( + final IOStatistics stats, + final String key, + final long value) { + return verifyStatisticValue(MAXIMUM, key, + verifyStatisticsNotNull(stats).maximums(), value); + } + + /** + * Assert that a minimum has an expected value. + * @param stats statistics source + * @param key statistic key + * @param value expected value. + * @return the value (which always equals the expected value) + */ + public static long verifyStatisticMinimumValue( + final IOStatistics stats, + final String key, + final long value) { + return verifyStatisticValue(MINIMUM, key, + verifyStatisticsNotNull(stats).minimums(), value); + } + + /** + * Assert that a mean has an expected value. + * @param stats statistics source + * @param key statistic key + * @param value expected value. + * @return the value (which always equals the expected value) + */ + public static MeanStatistic verifyStatisticMeanValue( + final IOStatistics stats, + final String key, + final MeanStatistic value) { + return verifyStatisticValue(MEAN, key, + verifyStatisticsNotNull(stats).meanStatistics(), value); + } + + /** + * Assert that a given statistic has an expected value. + * @param type type for error text + * @param key statistic key + * @param map map to look up + * @param value expected value. + * @param type of map element + * @return the value (which always equals the expected value) + */ + private static E verifyStatisticValue( + final String type, + final String key, + final Map map, + final E value) { + final E statistic = lookupStatistic(type, key, map); + assertThat(statistic) + .describedAs("%s named %s with expected value %s", type, + key, value) + .isEqualTo(value); + return statistic; + } + + + /** + * Assert that a given statistic has an expected value. + * @param type of map element + * @param type type for error text + * @param key statistic key + * @param map map to look up + * @return an ongoing assertion + */ + private static ObjectAssert assertThatStatistic( + final String type, + final String key, + final Map map) { + final E statistic = lookupStatistic(type, key, map); + return assertThat(statistic) + .describedAs("%s named %s", type, key); + } + + /** + * Assert that a given statistic has an expected value. + * @param type of map element + * @param type type for error text + * @param key statistic key + * @param map map to look up + * @return an ongoing assertion + */ + private static AbstractLongAssert assertThatStatisticLong( + final String type, + final String key, + final Map map) { + final long statistic = lookupStatistic(type, key, map); + return assertThat(statistic) + .describedAs("%s named %s", type, key); + } + + /** + * Start an assertion chain on + * a required counter statistic. + * @param stats statistics source + * @param key statistic key + * @return an ongoing assertion + */ + public static AbstractLongAssert assertThatStatisticCounter( + final IOStatistics stats, + final String key) { + return assertThatStatisticLong(COUNTER, key, + verifyStatisticsNotNull(stats).counters()); + } + + /** + * Start an assertion chain on + * a required gauge statistic. + * @param stats statistics source + * @param key statistic key + * @return an ongoing assertion + */ + public static AbstractLongAssert assertThatStatisticGauge( + final IOStatistics stats, + final String key) { + return assertThatStatisticLong(GAUGE, key, + verifyStatisticsNotNull(stats).gauges()); + } + + /** + * Start an assertion chain on + * a required minimum statistic. + * @param stats statistics source + * @param key statistic key + * @return an ongoing assertion + */ + public static AbstractLongAssert assertThatStatisticMinimum( + final IOStatistics stats, + final String key) { + return assertThatStatisticLong(MINIMUM, key, + verifyStatisticsNotNull(stats).minimums()); + } + + /** + * Start an assertion chain on + * a required maximum statistic. + * @param stats statistics source + * @param key statistic key + * @return an ongoing assertion + */ + public static AbstractLongAssert assertThatStatisticMaximum( + final IOStatistics stats, + final String key) { + return assertThatStatisticLong(MAXIMUM, key, + verifyStatisticsNotNull(stats).maximums()); + } + + /** + * Assert that a duration is within a given minimum/maximum range. + * @param stats statistics source + * @param key statistic key without any suffix + * @param min minimum statistic must be equal to or greater than this. + * @param max maximum statistic must be equal to or less than this. + */ + public static void assertDurationRange( + final IOStatistics stats, + final String key, + final long min, + final long max) { + assertThatStatisticMinimum(stats, key + SUFFIX_MIN) + .isGreaterThanOrEqualTo(min); + assertThatStatisticMaximum(stats, key + SUFFIX_MAX) + .isLessThanOrEqualTo(max); + } + + /** + * Start an assertion chain on + * a required mean statistic. + * @param stats statistics source + * @param key statistic key + * @return an ongoing assertion + */ + public static ObjectAssert assertThatStatisticMean( + final IOStatistics stats, + final String key) { + return assertThatStatistic(MEAN, key, + verifyStatisticsNotNull(stats).meanStatistics()); + } + + /** + * Start an assertion chain on + * a required mean statistic with the initial validation on the + * sample count and sum. + * @param stats statistics source + * @param key statistic key + * @return an ongoing assertion + */ + public static ObjectAssert assertThatStatisticMeanMatches( + final IOStatistics stats, + final String key, + final long samples, + final long sum) { + return assertThatStatisticMean(stats, key) + .matches(p -> (p.getSamples() == samples), + "samples == " + samples) + .matches(p -> (p.getSum() == sum), + "sum == " + sum); + } + + /** + * Assert that a given counter statistic is untracked. + * @param stats statistics source + * @param type type for error text + * @param key statistic key + * @param map map to probe + */ + private static void assertUntracked(final IOStatistics stats, + final String type, + final String key, + final Map map) { + assertThat(map.containsKey(key)) + .describedAs("%s %s is tracked in %s", type, key, stats) + .isFalse(); + } + + /** + * Assert that a given counter statistic is untracked. + * @param stats statistics source + * @param type type for error text + * @param key statistic key + * @param map map to probe + */ + private static void assertTracked(final IOStatistics stats, + final String type, + final String key, + final Map map) { + assertThat(map.containsKey(key)) + .describedAs("%s %s is not tracked in %s", type, key, stats) + .isTrue(); + } + + /** + * Assert that a given statistic is tracked. + * @param stats statistics source + * @param key statistic key + */ + public static void assertStatisticCounterIsTracked( + final IOStatistics stats, + final String key) { + assertTracked(stats, COUNTER, key, + verifyStatisticsNotNull(stats).counters()); + } + + /** + * Assert that a given counter statistic is untracked. + * @param stats statistics source + * @param key statistic key + */ + public static void assertStatisticCounterIsUntracked( + final IOStatistics stats, + final String key) { + assertUntracked(stats, COUNTER, key, + verifyStatisticsNotNull(stats).counters()); + } + + /** + * Assert that an object is a statistics source and that the + * statistics is not null. + * @param source source object. + */ + public static void assertIsStatisticsSource(Object source) { + assertThat(source) + .describedAs("Object %s", source) + .isInstanceOf(IOStatisticsSource.class) + .extracting(o -> ((IOStatisticsSource) o).getIOStatistics()) + .isNotNull(); + } + + /** + * Query the source for the statistics; fails if the statistics + * returned are null or the class does not implement the API. + * @param source source object. + * @return the statistics it provides. + */ + public static IOStatistics extractStatistics(Object source) { + assertThat(source) + .describedAs("Object %s", source) + .isInstanceOf(IOStatisticsSource.class); + IOStatisticsSource ios = (IOStatisticsSource) source; + return extractStatistics(ios); + } + + /** + * Get the non-null statistics. + * @param ioStatisticsSource source + * @return the statistics, guaranteed to be non null + */ + private static IOStatistics extractStatistics( + final IOStatisticsSource ioStatisticsSource) { + IOStatistics statistics = ioStatisticsSource.getIOStatistics(); + assertThat(statistics) + .describedAs("Statistics from %s", ioStatisticsSource) + .isNotNull(); + return statistics; + } + + /** + * Perform a serialization round trip on a statistics instance. + * @param stat statistic + * @return the deserialized version. + */ + public static IOStatistics statisticsJavaRoundTrip(final IOStatistics stat) + throws IOException, ClassNotFoundException { + assertThat(stat).isInstanceOf(Serializable.class); + ByteArrayOutputStream baos = new ByteArrayOutputStream(1024); + try (ObjectOutputStream oos = new ObjectOutputStream(baos)) { + oos.writeObject(stat); + } + ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); + IOStatistics deser; + try (ObjectInputStream ois = new RestrictedInput(bais, + IOStatisticsSnapshot.requiredSerializationClasses())) { + deser = (IOStatistics) ois.readObject(); + } + return deser; + } + + private static final class RestrictedInput extends ObjectInputStream { + + private final List allowedClasses; + + private RestrictedInput(final InputStream in, + final List allowedClasses) throws IOException { + + super(in); + this.allowedClasses = allowedClasses.stream() + .map(Class::getName) + .collect(Collectors.toList()); + } + + @Override + protected Class resolveClass(final ObjectStreamClass desc) + throws IOException, ClassNotFoundException { + final String classname = desc.getName(); + if (!allowedClasses.contains(classname)) { + throw new ClassNotFoundException("Class " + classname + + " Not in list of allowed classes"); + } + + return super.resolveClass(desc); + } + } + +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/TestDurationTracking.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/TestDurationTracking.java new file mode 100644 index 0000000000000..cfde1583e2c21 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/TestDurationTracking.java @@ -0,0 +1,360 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Function; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore; +import org.apache.hadoop.test.AbstractHadoopTestBase; +import org.apache.hadoop.util.functional.FunctionRaisingIOE; +import org.apache.hadoop.util.functional.FutureIO; + +import static org.apache.hadoop.fs.statistics.DurationStatisticSummary.fetchDurationSummary; +import static org.apache.hadoop.fs.statistics.DurationStatisticSummary.fetchSuccessSummary; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.*; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.*; +import static org.apache.hadoop.fs.statistics.impl.StubDurationTrackerFactory.STUB_DURATION_TRACKER_FACTORY; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Test the IOStatistic DurationTracker logic. + */ +public class TestDurationTracking extends AbstractHadoopTestBase { + + private static final Logger LOG = + LoggerFactory.getLogger(TestDurationTracking.class); + + private static final String REQUESTS = "requests"; + + public static final String UNKNOWN = "unknown"; + + private IOStatisticsStore stats; + + private final AtomicInteger invocationCounter = new AtomicInteger(0); + + @Before + public void setup() { + stats = iostatisticsStore() + .withDurationTracking(REQUESTS) + .build(); + } + + @After + public void teardown() { + LOG.info("stats {}", stats); + } + + /** + * Duration tracking. + */ + @Test + public void testDurationTryWithResources() throws Throwable { + DurationTracker tracker = + stats.trackDuration(REQUESTS); + verifyStatisticCounterValue(stats, REQUESTS, 1L); + sleep(); + tracker.close(); + try (DurationTracker ignored = + stats.trackDuration(REQUESTS)) { + sleep(); + } + LOG.info("Statistics: {}", stats); + DurationStatisticSummary summary = fetchSuccessSummary(stats, REQUESTS); + assertSummaryValues(summary, 2, 1, 1); + assertSummaryMean(summary, 2, 0); + } + + /** + * A little sleep method; exceptions are swallowed. + * Increments {@link #invocationCounter}. + * Increments {@inheritDoc #atomicCounter}. + */ + public void sleep() { + sleepf(10); + } + + /** + * A little sleep function; exceptions are swallowed. + * Increments {@link #invocationCounter}. + */ + protected int sleepf(final int millis) { + invocationCounter.incrementAndGet(); + try { + Thread.sleep(millis); + } catch (InterruptedException ignored) { + } + return millis; + } + + /** + * Assert that the sleep counter has been invoked + * the expected number of times. + * @param expected expected value + */ + private void assertCounterValue(final int expected) { + assertThat(invocationCounter.get()) + .describedAs("Sleep invocation Counter") + .isEqualTo(expected); + } + + /** + * Test that a function raising an IOE can be wrapped. + */ + @Test + public void testDurationFunctionIOE() throws Throwable { + FunctionRaisingIOE fn = + trackFunctionDuration(stats, REQUESTS, + (Integer x) -> invocationCounter.getAndSet(x)); + assertThat(fn.apply(1)).isEqualTo(0); + assertCounterValue(1); + assertSummaryValues( + fetchSuccessSummary(stats, REQUESTS), + 1, 0, 0); + } + + /** + * Trigger a failure and verify its the failure statistics + * which go up. + */ + @Test + public void testDurationFunctionIOEFailure() throws Throwable { + FunctionRaisingIOE fn = + trackFunctionDuration(stats, REQUESTS, + (Integer x) -> { + sleep(); + return 100 / x; + }); + intercept(ArithmeticException.class, + () -> fn.apply(0)); + assertSummaryValues( + fetchSuccessSummary(stats, REQUESTS), + 1, -1, -1); + + DurationStatisticSummary failures = fetchDurationSummary(stats, REQUESTS, + false); + assertSummaryValues(failures, 1, 0, 0); + assertSummaryMean(failures, 1, 0); + } + + /** + * Trigger a failure and verify its the failure statistics + * which go up. + */ + @Test + public void testDurationJavaFunctionFailure() throws Throwable { + Function fn = + trackJavaFunctionDuration(stats, REQUESTS, + (Integer x) -> { + return 100 / x; + }); + intercept(ArithmeticException.class, + () -> fn.apply(0)); + assertSummaryValues( + fetchSuccessSummary(stats, REQUESTS), + 1, -1, -1); + + DurationStatisticSummary failures = fetchDurationSummary(stats, REQUESTS, + false); + assertSummaryValues(failures, 1, 0, 0); + } + + /** + * Test trackDurationOfCallable. + */ + @Test + public void testCallableDuration() throws Throwable { + // call the operation + assertThat( + trackDurationOfCallable(stats, REQUESTS, () -> sleepf(100)).call()) + .isEqualTo(100); + DurationStatisticSummary summary = fetchSuccessSummary(stats, REQUESTS); + assertSummaryValues(summary, 1, 0, 0); + assertSummaryMean(summary, 1, 0); + } + + /** + * Callable raising an RTE after a sleep; failure + * stats will be updated and the execution count will be + * 1. + */ + @Test + public void testCallableFailureDuration() throws Throwable { + + intercept(RuntimeException.class, + trackDurationOfCallable(stats, REQUESTS, () -> { + sleepf(100); + throw new RuntimeException("oops"); + })); + assertCounterValue(1); + assertSummaryValues( + fetchSuccessSummary(stats, REQUESTS), + 1, -1, -1); + + assertSummaryValues(fetchDurationSummary(stats, REQUESTS, false), + 1, 0, 0); + } + + /** + * Duration of the successful execution of a InvocationRaisingIOE. + */ + @Test + public void testInvocationDuration() throws Throwable { + // call the operation + trackDurationOfInvocation(stats, REQUESTS, () -> { + sleepf(100); + }); + assertCounterValue(1); + DurationStatisticSummary summary = fetchSuccessSummary(stats, REQUESTS); + assertSummaryValues(summary, 1, 0, 0); + assertSummaryMean(summary, 1, 0); + } + + /** + * Duration of the successful execution of a CallableRaisingIOE. + */ + @Test + public void testCallableIOEDuration() throws Throwable { + // call the operation + assertThat( + trackDuration(stats, REQUESTS, () -> sleepf(100))) + .isEqualTo(100); + DurationStatisticSummary summary = fetchSuccessSummary(stats, REQUESTS); + assertSummaryValues(summary, 1, 0, 0); + assertSummaryMean(summary, 1, 0); + } + + /** + * Track the duration of an IOE raising callable which fails. + */ + @Test + public void testCallableIOEFailureDuration() throws Throwable { + intercept(IOException.class, + () -> + trackDuration(stats, REQUESTS, () -> { + sleepf(100); + throw new IOException("oops"); + })); + assertSummaryValues( + fetchSuccessSummary(stats, REQUESTS), + 1, -1, -1); + + assertSummaryValues(fetchDurationSummary(stats, REQUESTS, false), + 1, 0, 0); + } + + + /** + * Track the duration of an IOE raising callable which fails. + */ + @Test + public void testDurationThroughEval() throws Throwable { + CompletableFuture eval = FutureIO.eval( + trackDurationOfOperation(stats, REQUESTS, () -> { + sleepf(100); + throw new FileNotFoundException("oops"); + })); + intercept(FileNotFoundException.class, "oops", () -> + FutureIO.awaitFuture(eval)); + assertSummaryValues(fetchDurationSummary(stats, REQUESTS, false), + 1, 0, 0); + } + + /** + * It's OK to track a duration against an unknown statistic. + */ + @Test + public void testUnknownDuration() throws Throwable { + trackDurationOfCallable(stats, UNKNOWN, () -> sleepf(1)).call(); + DurationStatisticSummary summary = fetchSuccessSummary(stats, UNKNOWN); + assertSummaryValues(summary, 0, -1, -1); + assertThat(summary.getMean()).isNull(); + } + + /** + * The stub duration tracker factory can be supplied as an input. + */ + @Test + public void testTrackDurationWithStubFactory() throws Throwable { + trackDuration(STUB_DURATION_TRACKER_FACTORY, UNKNOWN, () -> sleepf(1)); + } + + /** + * Make sure the tracker returned from the stub factory + * follows the basic lifecycle. + */ + @Test + public void testStubDurationLifecycle() throws Throwable { + DurationTracker tracker = STUB_DURATION_TRACKER_FACTORY + .trackDuration("k", 1); + tracker.failed(); + tracker.close(); + tracker.close(); + } + + /** + * Assert that a statistics summary has the specific values. + * @param summary summary data + * @param count count -must match exactly. + * @param minBase minimum value for the minimum field (inclusive) + * @param maxBase minimum value for the maximum field (inclusive) + */ + protected void assertSummaryValues( + final DurationStatisticSummary summary, + final int count, + final int minBase, + final int maxBase) { + assertThat(summary) + .matches(s -> s.getCount() == count, "Count value") + .matches(s -> s.getMax() >= maxBase, "Max value") + .matches(s -> s.getMin() >= minBase, "Min value"); + } + + /** + * Assert that at a summary has a matching mean value. + * @param summary summary data. + * @param expectedSampleCount sample count -which must match + * @param meanGreaterThan the mean must be greater than this value. + */ + protected void assertSummaryMean( + final DurationStatisticSummary summary, + final int expectedSampleCount, + final double meanGreaterThan) { + String description = "mean of " + summary; + assertThat(summary.getMean()) + .describedAs(description) + .isNotNull(); + assertThat(summary.getMean().getSamples()) + .describedAs(description) + .isEqualTo(expectedSampleCount); + assertThat(summary.getMean().mean()) + .describedAs(description) + .isGreaterThan(meanGreaterThan); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/TestDynamicIOStatistics.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/TestDynamicIOStatistics.java new file mode 100644 index 0000000000000..9b929ac82ff11 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/TestDynamicIOStatistics.java @@ -0,0 +1,311 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import java.util.Iterator; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; + +import org.assertj.core.api.Assertions; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.statistics.impl.SourceWrappedStatistics; +import org.apache.hadoop.metrics2.MetricsInfo; +import org.apache.hadoop.metrics2.lib.MutableCounterLong; +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.assertStatisticCounterIsTracked; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.assertStatisticCounterIsUntracked; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.verifyStatisticCounterValue; +import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.demandStringifyIOStatistics; +import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.demandStringifyIOStatisticsSource; +import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.ioStatisticsToString; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.ENTRY_PATTERN; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.NULL_SOURCE; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.dynamicIOStatistics; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.emptyStatistics; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * verify dynamic statistics are dynamic, except when you iterate through + * them, along with other tests of the class's behavior. + */ +public class TestDynamicIOStatistics extends AbstractHadoopTestBase { + + private static final Logger LOG = + LoggerFactory.getLogger(TestDynamicIOStatistics.class); + + private static final String ALONG = "along"; + + private static final String AINT = "aint"; + + private static final String COUNT = "count"; + + private static final String EVAL = "eval"; + + /** + * The statistics. + */ + private IOStatistics statistics = emptyStatistics(); + + /** + * A source of these statistics. + */ + private IOStatisticsSource statsSource; + + private final AtomicLong aLong = new AtomicLong(); + + private final AtomicInteger aInt = new AtomicInteger(); + + private final MutableCounterLong counter = new MutableCounterLong( + new Info("counter"), 0); + + private long evalLong; + + private static final String[] KEYS = new String[]{ALONG, AINT, COUNT, EVAL}; + + @Before + public void setUp() throws Exception { + statistics = dynamicIOStatistics() + .withAtomicLongCounter(ALONG, aLong) + .withAtomicIntegerCounter(AINT, aInt) + .withMutableCounter(COUNT, counter) + .withLongFunctionCounter(EVAL, x -> evalLong) + .build(); + statsSource = new SourceWrappedStatistics(statistics); + } + + /** + * The eval operation is foundational. + */ + @Test + public void testEval() throws Throwable { + verifyStatisticCounterValue(statistics, EVAL, 0); + evalLong = 10; + verifyStatisticCounterValue(statistics, EVAL, 10); + } + + /** + * Atomic Long statistic. + */ + @Test + public void testAlong() throws Throwable { + verifyStatisticCounterValue(statistics, ALONG, 0); + aLong.addAndGet(1); + verifyStatisticCounterValue(statistics, ALONG, 1); + } + + /** + * Atomic Int statistic. + */ + @Test + public void testAint() throws Throwable { + verifyStatisticCounterValue(statistics, AINT, 0); + aInt.addAndGet(1); + verifyStatisticCounterValue(statistics, AINT, 1); + } + + /** + * Metrics2 counter. + */ + @Test + public void testCounter() throws Throwable { + verifyStatisticCounterValue(statistics, COUNT, 0); + counter.incr(); + verifyStatisticCounterValue(statistics, COUNT, 1); + } + + /** + * keys() returns all the keys. + */ + @Test + public void testKeys() throws Throwable { + Assertions.assertThat(statistics.counters().keySet()) + .describedAs("statistic keys of %s", statistics) + .containsExactlyInAnyOrder(KEYS); + } + + @Test + public void testIteratorHasAllKeys() throws Throwable { + // go through the statistics iterator and assert that it contains exactly + // the values. + assertThat(statistics.counters().keySet()) + .containsExactlyInAnyOrder(KEYS); + } + + /** + * Verify that the iterator is taken from + * a snapshot of the values. + */ + @Test + public void testIteratorIsSnapshot() throws Throwable { + // set the counters all to 1 + incrementAllCounters(); + // take the snapshot + final Iterator> it = + statistics.counters().entrySet().iterator(); + // increment the counters + incrementAllCounters(); + // now assert that all the iterator values are of value 1 + while (it.hasNext()) { + Map.Entry next = it.next(); + assertThat(next.getValue()) + .describedAs("Value of entry %s", next) + .isEqualTo(1); + } + } + + @Test + public void testUnknownStatistic() throws Throwable { + assertStatisticCounterIsUntracked(statistics, "anything"); + } + + @Test + public void testStatisticsTrackedAssertion() throws Throwable { + // expect an exception to be raised when an assertion + // is made that an unknown statistic is tracked,. + assertThatThrownBy(() -> + assertStatisticCounterIsTracked(statistics, "anything")) + .isInstanceOf(AssertionError.class); + } + + @Test + public void testStatisticsValueAssertion() throws Throwable { + // expect an exception to be raised when + // an assertion is made about the value of an unknown statistics + assertThatThrownBy(() -> + verifyStatisticCounterValue(statistics, "anything", 0)) + .isInstanceOf(AssertionError.class); + } + + /** + * Serialization round trip will preserve all the values. + */ + @Test + public void testSerDeser() throws Throwable { + incrementAllCounters(); + IOStatistics stat = IOStatisticsSupport.snapshotIOStatistics(statistics); + incrementAllCounters(); + IOStatistics deser = IOStatisticAssertions.statisticsJavaRoundTrip(stat); + assertThat(deser.counters().keySet()) + .containsExactlyInAnyOrder(KEYS); + for (Map.Entry e : deser.counters().entrySet()) { + assertThat(e.getValue()) + .describedAs("Value of entry %s", e) + .isEqualTo(1); + } + } + + @Test + public void testStringification() throws Throwable { + assertThat(ioStatisticsToString(statistics)) + .isNotBlank() + .contains(KEYS); + } + + @Test + public void testDemandStringification() throws Throwable { + String counterPattern = ENTRY_PATTERN; + // this is not yet evaluated + Object demand = demandStringifyIOStatistics(statistics); + // nor is this. + Object demandSource = demandStringifyIOStatisticsSource(statsSource); + + // show it evaluates + String formatted1 = String.format(counterPattern, ALONG, aLong.get()); + assertThat(demand + .toString()) + .contains(formatted1); + assertThat(demandSource + .toString()) + .contains(formatted1); + + // when the counters are incremented + incrementAllCounters(); + incrementAllCounters(); + // there are new values to expect + String formatted2 = String.format(counterPattern, ALONG, aLong.get()); + assertThat(demand + .toString()) + .doesNotContain(formatted1) + .contains(formatted2); + assertThat(demandSource + .toString()) + .doesNotContain(formatted1) + .contains(formatted2); + } + + @Test + public void testNullSourceStringification() throws Throwable { + assertThat(demandStringifyIOStatisticsSource((IOStatisticsSource) null) + .toString()) + .isEqualTo(NULL_SOURCE); + } + + @Test + public void testNullStatStringification() throws Throwable { + assertThat(demandStringifyIOStatistics((IOStatistics) null) + .toString()) + .isEqualTo(NULL_SOURCE); + } + + @Test + public void testStringLogging() throws Throwable { + LOG.info("Output {}", demandStringifyIOStatistics(statistics)); + } + + /** + * Increment all the counters from their current value. + */ + private void incrementAllCounters() { + aLong.incrementAndGet(); + aInt.incrementAndGet(); + evalLong += 1; + counter.incr(); + } + + /** + * Needed to provide a metrics info instance for the counter + * constructor. + */ + private static final class Info implements MetricsInfo { + + private final String name; + + private Info(final String name) { + this.name = name; + } + + @Override + public String name() { + return name; + } + + @Override + public String description() { + return name; + } + } + +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/TestEmptyIOStatistics.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/TestEmptyIOStatistics.java new file mode 100644 index 0000000000000..296470abaa9bf --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/TestEmptyIOStatistics.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import org.junit.Test; + +import org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding; +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.assertStatisticCounterIsTracked; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.assertStatisticCounterIsUntracked; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.verifyStatisticCounterValue; +import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.ioStatisticsToString; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.emptyStatistics; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Test handling of the empty IO statistics class. + */ +public class TestEmptyIOStatistics extends AbstractHadoopTestBase { + + private final IOStatistics empty = emptyStatistics(); + + @Test + public void testUnknownStatistic() throws Throwable { + assertStatisticCounterIsUntracked(empty, "anything"); + } + + @Test + public void testStatisticsTrackedAssertion() throws Throwable { + // expect an exception to be raised when an assertion + // is made that an unknown statistic is tracked,. + assertThatThrownBy(() -> + assertStatisticCounterIsTracked(empty, "anything")) + .isInstanceOf(AssertionError.class); + } + + @Test + public void testStatisticsValueAssertion() throws Throwable { + // expect an exception to be raised when + // an assertion is made about the value of an unknown statistics + assertThatThrownBy(() -> + verifyStatisticCounterValue(empty, "anything", 0)) + .isInstanceOf(AssertionError.class); + } + + @Test + public void testEmptySnapshot() throws Throwable { + final IOStatistics stat = IOStatisticsSupport.snapshotIOStatistics(empty); + assertThat(stat.counters().keySet()) + .describedAs("keys of snapshot") + .isEmpty(); + IOStatistics deser = IOStatisticAssertions.statisticsJavaRoundTrip(stat); + assertThat(deser.counters().keySet()) + .describedAs("keys of deserialized snapshot") + .isEmpty(); + } + + @Test + public void testStringification() throws Throwable { + assertThat(ioStatisticsToString(empty)) + .isNotBlank(); + } + + @Test + public void testWrap() throws Throwable { + IOStatisticsSource statisticsSource = IOStatisticsBinding.wrap(empty); + assertThat(statisticsSource.getIOStatistics()) + .isSameAs(empty); + } + + @Test + public void testStringifyNullSource() throws Throwable { + assertThat(IOStatisticsLogging.ioStatisticsSourceToString(null)) + .isEmpty(); + } + + @Test + public void testStringifyNullStats() throws Throwable { + assertThat( + IOStatisticsLogging.ioStatisticsSourceToString( + IOStatisticsBinding.wrap(null))) + .isEmpty(); + } + + @Test + public void testStringificationNull() throws Throwable { + assertThat(ioStatisticsToString(null)) + .describedAs("Null statistics should stringify to \"\"") + .isEmpty(); + } + +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/TestIOStatisticsSetters.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/TestIOStatisticsSetters.java new file mode 100644 index 0000000000000..7dfb540500457 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/TestIOStatisticsSetters.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import java.util.Arrays; +import java.util.Collection; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import org.apache.hadoop.fs.statistics.impl.ForwardingIOStatisticsStore; +import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore; +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.assertThatStatisticCounter; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.assertThatStatisticGauge; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.assertThatStatisticMaximum; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.assertThatStatisticMean; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.assertThatStatisticMinimum; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.iostatisticsStore; + +/** + * Test the {@link IOStatisticsSetters} interface implementations through + * a parameterized run with each implementation. + * For each of the setters, the value is set, verified, + * updated, verified again. + * An option known to be undefined in all created IOStatisticsStore instances + * is set, to verify it is harmless. + */ + +@RunWith(Parameterized.class) + +public class TestIOStatisticsSetters extends AbstractHadoopTestBase { + + public static final String COUNTER = "counter"; + + public static final String GAUGE = "gauge"; + + public static final String MAXIMUM = "max"; + + public static final String MINIMUM = "min"; + + public static final String MEAN = "mean"; + + private final IOStatisticsSetters ioStatistics; + + private final boolean createsNewEntries; + + @Parameterized.Parameters(name="{0}") + public static Collection params() { + return Arrays.asList(new Object[][]{ + {"IOStatisticsSnapshot", new IOStatisticsSnapshot(), true}, + {"IOStatisticsStore", createTestStore(), false}, + {"ForwardingIOStatisticsStore", new ForwardingIOStatisticsStore(createTestStore()), false}, + }); + } + + /** + * Create a test store with the stats used for testing set up. + * @return a set up store + */ + private static IOStatisticsStore createTestStore() { + return iostatisticsStore() + .withCounters(COUNTER) + .withGauges(GAUGE) + .withMaximums(MAXIMUM) + .withMinimums(MINIMUM) + .withMeanStatistics(MEAN) + .build(); + } + + public TestIOStatisticsSetters( + String source, + IOStatisticsSetters ioStatisticsSetters, + boolean createsNewEntries) { + this.ioStatistics = ioStatisticsSetters; + + this.createsNewEntries = createsNewEntries; + } + + @Test + public void testCounter() throws Throwable { + // write + ioStatistics.setCounter(COUNTER, 1); + assertThatStatisticCounter(ioStatistics, COUNTER) + .isEqualTo(1); + + // update + ioStatistics.setCounter(COUNTER, 2); + assertThatStatisticCounter(ioStatistics, COUNTER) + .isEqualTo(2); + + // unknown value + final String unknown = "unknown"; + ioStatistics.setCounter(unknown, 3); + if (createsNewEntries) { + assertThatStatisticCounter(ioStatistics, unknown) + .isEqualTo(3); + } else { + Assertions.assertThat(ioStatistics.counters()) + .describedAs("Counter map in {}", ioStatistics) + .doesNotContainKey(unknown); + } + } + + @Test + public void testMaximum() throws Throwable { + // write + ioStatistics.setMaximum(MAXIMUM, 1); + assertThatStatisticMaximum(ioStatistics, MAXIMUM) + .isEqualTo(1); + + // update + ioStatistics.setMaximum(MAXIMUM, 2); + assertThatStatisticMaximum(ioStatistics, MAXIMUM) + .isEqualTo(2); + + // unknown value + ioStatistics.setMaximum("mm2", 3); + } + + @Test + public void testMinimum() throws Throwable { + // write + ioStatistics.setMinimum(MINIMUM, 1); + assertThatStatisticMinimum(ioStatistics, MINIMUM) + .isEqualTo(1); + + // update + ioStatistics.setMinimum(MINIMUM, 2); + assertThatStatisticMinimum(ioStatistics, MINIMUM) + .isEqualTo(2); + + // unknown value + ioStatistics.setMinimum("c2", 3); + } + + @Test + public void testGauge() throws Throwable { + // write + ioStatistics.setGauge(GAUGE, 1); + assertThatStatisticGauge(ioStatistics, GAUGE) + .isEqualTo(1); + + // update + ioStatistics.setGauge(GAUGE, 2); + assertThatStatisticGauge(ioStatistics, GAUGE) + .isEqualTo(2); + + // unknown value + ioStatistics.setGauge("g2", 3); + } + + @Test + public void testMean() throws Throwable { + // write + final MeanStatistic mean11 = new MeanStatistic(1, 1); + ioStatistics.setMeanStatistic(MEAN, mean11); + assertThatStatisticMean(ioStatistics, MEAN) + .isEqualTo(mean11); + + // update + final MeanStatistic mean22 = new MeanStatistic(2, 2); + ioStatistics.setMeanStatistic(MEAN, mean22); + assertThatStatisticMean(ioStatistics, MEAN) + .isEqualTo(mean22); + + // unknown value + ioStatistics.setMeanStatistic("m2", mean11); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/TestIOStatisticsSnapshot.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/TestIOStatisticsSnapshot.java new file mode 100644 index 0000000000000..41e9bffefe834 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/TestIOStatisticsSnapshot.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import org.assertj.core.api.Assertions; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding; +import org.apache.hadoop.test.AbstractHadoopTestBase; +import org.apache.hadoop.util.JsonSerialization; + +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.*; +import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.ioStatisticsToString; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Test handling of the {@link IOStatisticsSnapshot} class. + */ +public class TestIOStatisticsSnapshot extends AbstractHadoopTestBase { + + private static final Logger LOG = + LoggerFactory.getLogger(TestIOStatisticsSnapshot.class); + + /** + * Simple snapshot built up in test setup. + */ + private final IOStatisticsSnapshot snapshot = new IOStatisticsSnapshot(); + + /** Saved to the snapshot as "mean01". */ + private MeanStatistic mean0; + + /** Saved to the snapshot as "mean1". */ + private MeanStatistic mean1; + + @Before + public void setup() throws Exception { + snapshot.counters().put("c1", 0L); + snapshot.gauges().put("g1", 1L); + snapshot.minimums().put("m1", -1L); + mean1 = new MeanStatistic(1, 1); + snapshot.meanStatistics().put("mean1", + mean1); + mean0 = new MeanStatistic(0, 1); + snapshot.meanStatistics().put("mean0", + mean0); + } + + @Test + public void testTrackedValues() throws Throwable { + verifyStatisticCounterValue(snapshot, "c1", 0L); + verifyStatisticGaugeValue(snapshot, "g1", 1L); + verifyStatisticMinimumValue(snapshot, "m1", -1L); + verifyStatisticMeanValue(snapshot, "mean0", + new MeanStatistic(0, 1)); + } + + @Test + public void testStatisticsValueAssertion() throws Throwable { + // expect an exception to be raised when + // an assertion is made about the value of an unknown statistics + assertThatThrownBy(() -> + verifyStatisticCounterValue(snapshot, "anything", 0)) + .isInstanceOf(AssertionError.class); + } + + @Test + public void testStringification() throws Throwable { + assertThat(ioStatisticsToString(snapshot)) + .isNotBlank(); + } + + @Test + public void testStringification2() throws Throwable { + + String ss = snapshot.toString(); + LOG.info("original {}", ss); + Assertions.assertThat(ss) + .describedAs("snapshot toString()") + .contains("c1=0") + .contains("g1=1"); + } + + @Test + public void testWrap() throws Throwable { + IOStatisticsSource statisticsSource = IOStatisticsBinding.wrap(snapshot); + assertThat(statisticsSource.getIOStatistics()) + .isSameAs(snapshot); + } + + @Test + public void testJsonRoundTrip() throws Throwable { + JsonSerialization serializer + = IOStatisticsSnapshot.serializer(); + + String json = serializer.toJson(snapshot); + LOG.info("serialized form\n{}", json); + IOStatisticsSnapshot deser = serializer.fromJson(json); + verifyDeserializedInstance(deser); + } + + /** + * Verify the deserialized instance's data + * matches the expected values. + * @param deser deserialized vlaue. + */ + public void verifyDeserializedInstance( + final IOStatistics deser) { + LOG.info("deserialized {}", deser); + verifyStatisticCounterValue(deser, "c1", 0L); + verifyStatisticGaugeValue(deser, "g1", 1L); + verifyStatisticMinimumValue(deser, "m1", -1L); + verifyStatisticMeanValue(deser, "mean0", + new MeanStatistic(0, 1)); + verifyStatisticMeanValue(deser, "mean1", + snapshot.meanStatistics().get("mean1")); + } + + @Test + public void testJavaRoundTrip() throws Throwable { + verifyDeserializedInstance( + IOStatisticAssertions.statisticsJavaRoundTrip( + snapshot)); + + + } + +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/TestIOStatisticsStore.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/TestIOStatisticsStore.java new file mode 100644 index 0000000000000..778eab8315aa5 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/TestIOStatisticsStore.java @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import org.assertj.core.api.Assertions; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore; +import org.apache.hadoop.test.AbstractHadoopTestBase; +import org.apache.hadoop.util.JsonSerialization; + +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.assertThatStatisticMeanMatches; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.verifyStatisticCounterValue; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.verifyStatisticGaugeValue; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.verifyStatisticMaximumValue; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.verifyStatisticMinimumValue; +import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.snapshotIOStatistics; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.iostatisticsStore; + +/** + * Test the IOStatisticStore implementation. + */ +public class TestIOStatisticsStore extends AbstractHadoopTestBase { + + private static final Logger LOG = + LoggerFactory.getLogger(TestIOStatisticsStore.class); + + + private static final String COUNT = "count"; + + private static final String GAUGE = "gauge"; + + private static final String MIN = "min"; + + private static final String MAX = "max"; + + private static final String MEAN = "mean"; + + public static final String UNKNOWN = "unknown"; + + private IOStatisticsStore stats; + + @Before + public void setup() { + stats = iostatisticsStore() + .withCounters(COUNT) + .withGauges(GAUGE) + .withMinimums(MIN) + .withMaximums(MAX) + .withMeanStatistics(MEAN) + .build(); + } + + @After + public void teardown() { + LOG.info("stats {}", stats); + } + + /** + * Gauges go up and down. + */ + @Test + public void testGauges() throws Throwable { + stats.setGauge(GAUGE, 1); + verifyStatisticGaugeValue(stats, GAUGE, 1); + stats.incrementGauge(GAUGE, 1); + verifyStatisticGaugeValue(stats, GAUGE, 2); + stats.setGauge(GAUGE, -1); + verifyStatisticGaugeValue(stats, GAUGE, -1); + Assertions.assertThat(stats.incrementGauge(GAUGE, -1)) + .isEqualTo(-2); + verifyStatisticGaugeValue(stats, GAUGE, -2); + Assertions.assertThat(stats.getGaugeReference(GAUGE).get()) + .isEqualTo(-2); + stats.setGauge(UNKNOWN, 1); + Assertions.assertThat(stats.incrementGauge(UNKNOWN, 1)) + .isEqualTo(0); + } + + @Test + public void testMinimums() throws Throwable { + stats.setMinimum(MIN, 100); + verifyStatisticMinimumValue(stats, MIN, 100); + stats.setMinimum(MIN, 100); + // will do nothing as it is higher + stats.addMinimumSample(MIN, 200); + verifyStatisticMinimumValue(stats, MIN, 100); + stats.addMinimumSample(MIN, 10); + verifyStatisticMinimumValue(stats, MIN, 10); + stats.setMinimum(UNKNOWN, 100); + stats.addMinimumSample(UNKNOWN, 200); + } + + @Test + public void testMaximums() throws Throwable { + stats.setMaximum(MAX, 100); + verifyStatisticMaximumValue(stats, MAX, 100); + stats.setMaximum(MAX, 100); + stats.addMaximumSample(MAX, 200); + verifyStatisticMaximumValue(stats, MAX, 200); + stats.addMaximumSample(MAX, 10); + verifyStatisticMaximumValue(stats, MAX, 200); + stats.setMaximum(UNKNOWN, 100); + stats.addMaximumSample(UNKNOWN, 200); + } + + @Test + public void testMeans() throws Throwable { + stats.setMeanStatistic(MEAN, + new MeanStatistic(1, 1)); + + assertThatStatisticMeanMatches(stats, MEAN, 1, 1) + .matches(p -> p.mean() == 1, "mean"); + stats.addMeanStatisticSample(MEAN, 9); + assertThatStatisticMeanMatches(stats, MEAN, 2, 10) + .matches(p -> p.mean() == 5, "mean"); + } + + @Test + public void testRoundTrip() throws Throwable { + JsonSerialization serializer + = IOStatisticsSnapshot.serializer(); + stats.incrementCounter(COUNT); + stats.setGauge(GAUGE, -1); + stats.addMaximumSample(MAX, 200); + stats.addMinimumSample(MIN, -100); + stats.addMeanStatisticSample(MEAN, 1); + stats.addMeanStatisticSample(MEAN, 9); + + String json = serializer.toJson(snapshotIOStatistics(stats)); + LOG.info("serialized form\n{}", json); + IOStatisticsSnapshot deser = serializer.fromJson(json); + LOG.info("deserialized {}", deser); + verifyStatisticCounterValue(deser, COUNT, 1L); + verifyStatisticGaugeValue(deser, GAUGE, -1); + verifyStatisticMaximumValue(deser, MAX, 200); + verifyStatisticMinimumValue(deser, MIN, -100); + assertThatStatisticMeanMatches(deser, MEAN, 2, 10) + .matches(p -> p.mean() == 5, "mean"); + + } + + @Test + public void testUnknownCounter() throws Throwable { + Assertions.assertThat(stats.incrementCounter("unknown", -10)) + .isEqualTo(0); + } + + @Test + public void testNegativeCounterIncrementIgnored() throws Throwable { + Assertions.assertThat(stats.incrementCounter(COUNT, 2)) + .isEqualTo(2); + Assertions.assertThat(stats.incrementCounter(COUNT, -10)) + .isEqualTo(2); + } + +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/TestMeanStatistic.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/TestMeanStatistic.java new file mode 100644 index 0000000000000..749a6ee4d9eb4 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/statistics/TestMeanStatistic.java @@ -0,0 +1,219 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.test.AbstractHadoopTestBase; +import org.apache.hadoop.util.JsonSerialization; + +/** + * Test the {@link MeanStatistic} class. + */ +public class TestMeanStatistic extends AbstractHadoopTestBase { + + private static final Logger LOG = + LoggerFactory.getLogger(TestMeanStatistic.class); + + private static final int TEN = 10; + + private static final double ZEROD = 0.0d; + + private static final double TEND = 10.0d; + + private final MeanStatistic empty = new MeanStatistic(0, 0); + + private final MeanStatistic tenFromOne = new MeanStatistic(1, TEN); + + private final MeanStatistic tenFromTen = new MeanStatistic(TEN, TEN); + + @Test + public void testEmptiness() throws Throwable { + Assertions.assertThat(empty) + .matches(MeanStatistic::isEmpty, "is empty") + .isEqualTo(new MeanStatistic(0, TEN)) + .isEqualTo(new MeanStatistic()) + .isNotEqualTo(tenFromOne); + Assertions.assertThat(empty.mean()) + .isEqualTo(ZEROD); + Assertions.assertThat(empty.toString()) + .contains("0.0"); + } + + @Test + public void testTenFromOne() throws Throwable { + Assertions.assertThat(tenFromOne) + .matches(p -> !p.isEmpty(), "is not empty") + .isEqualTo(tenFromOne) + .isNotEqualTo(tenFromTen); + Assertions.assertThat(tenFromOne.mean()) + .isEqualTo(TEND); + } + + @Test + public void testNegativeSamplesAreEmpty() throws Throwable { + MeanStatistic stat = new MeanStatistic(-10, 1); + Assertions.assertThat(stat) + .describedAs("stat with negative samples") + .matches(MeanStatistic::isEmpty, "is empty") + .isEqualTo(empty) + .extracting(MeanStatistic::mean) + .isEqualTo(ZEROD); + Assertions.assertThat(stat.toString()) + .contains("0.0"); + + } + + @Test + public void testCopyNonEmpty() throws Throwable { + MeanStatistic stat = tenFromOne.copy(); + Assertions.assertThat(stat) + .describedAs("copy of " + tenFromOne) + .isEqualTo(tenFromOne) + .isNotSameAs(tenFromOne); + } + + @Test + public void testCopyEmpty() throws Throwable { + MeanStatistic stat = empty.copy(); + Assertions.assertThat(stat) + .describedAs("copy of " + empty) + .isEqualTo(empty) + .isNotSameAs(empty); + } + + @Test + public void testDoubleSamples() throws Throwable { + MeanStatistic stat = tenFromOne.copy(); + Assertions.assertThat(stat.add(tenFromOne)) + .isEqualTo(new MeanStatistic(2, 20)) + .extracting(MeanStatistic::mean) + .isEqualTo(TEND); + } + + @Test + public void testAddEmptyR() throws Throwable { + MeanStatistic stat = tenFromOne.copy(); + Assertions.assertThat(stat.add(empty)) + .isEqualTo(tenFromOne); + } + + @Test + public void testAddEmptyL() throws Throwable { + MeanStatistic stat = empty.copy(); + Assertions.assertThat(stat.add(tenFromOne)) + .isEqualTo(tenFromOne); + } + + @Test + public void testAddEmptyLR() throws Throwable { + MeanStatistic stat = empty.copy(); + Assertions.assertThat(stat.add(empty)) + .isEqualTo(empty); + } + + @Test + public void testAddSampleToEmpty() throws Throwable { + MeanStatistic stat = empty.copy(); + stat.addSample(TEN); + Assertions.assertThat(stat) + .isEqualTo(tenFromOne); + } + + @Test + public void testAddZeroValueSamples() throws Throwable { + MeanStatistic stat = tenFromOne.copy(); + for (int i = 0; i < 9; i++) { + stat.addSample(0); + } + Assertions.assertThat(stat) + .isEqualTo(tenFromTen); + } + + @Test + public void testSetSamples() throws Throwable { + MeanStatistic stat = tenFromOne.copy(); + stat.setSamples(10); + Assertions.assertThat(stat) + .isEqualTo(tenFromTen); + } + + @Test + public void testSetSums() throws Throwable { + MeanStatistic stat = tenFromOne.copy(); + stat.setSum(100); + stat.setSamples(20); + Assertions.assertThat(stat) + .isEqualTo(new MeanStatistic(20, 100)) + .extracting(MeanStatistic::mean) + .isEqualTo(5.0d); + } + + @Test + public void testSetNegativeSamplesMakesEmpty() throws Throwable { + MeanStatistic stat = tenFromOne.copy(); + stat.setSamples(-3); + Assertions.assertThat(stat) + .isEqualTo(empty); + } + + @Test + public void testJsonRoundTrip() throws Throwable { + JsonSerialization serializer = serializer(); + + String json = serializer.toJson(tenFromTen); + LOG.info("serialized form\n{}", json); + Assertions.assertThat(json) + .describedAs("JSON form of %s", tenFromTen) + .doesNotContain("empty") + .doesNotContain("mean"); + + MeanStatistic deser = serializer.fromJson(json); + LOG.info("deserialized {}", deser); + Assertions.assertThat(deser) + .isEqualTo(tenFromTen); + } + + /** + * negative sample counts in the json convert the stat to being empty. + */ + @Test + public void testHandleMaliciousStat() throws Throwable { + String json = "{\n" + + " \"sum\" : 10,\n" + + " \"samples\" : -10\n" + + "}"; + JsonSerialization serializer = serializer(); + MeanStatistic deser = serializer.fromJson(json); + LOG.info("deserialized {}", deser); + Assertions.assertThat(deser) + .isEqualTo(empty); + } + + /** + * Get a JSON serializer. + * @return a serializer. + */ + public static JsonSerialization serializer() { + return new JsonSerialization<>(MeanStatistic.class, true, true); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/store/TestDataBlocks.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/store/TestDataBlocks.java new file mode 100644 index 0000000000000..5698a08c7e16b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/store/TestDataBlocks.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.store; + +import java.io.IOException; +import java.util.Random; + +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.test.LambdaTestUtils; + +import static org.apache.hadoop.fs.store.DataBlocks.DATA_BLOCKS_BUFFER_ARRAY; +import static org.apache.hadoop.fs.store.DataBlocks.DATA_BLOCKS_BUFFER_DISK; +import static org.apache.hadoop.fs.store.DataBlocks.DATA_BLOCKS_BYTEBUFFER; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * UTs to test {@link DataBlocks} functionalities. + */ +public class TestDataBlocks { + private final Configuration configuration = new Configuration(); + private static final int ONE_KB = 1024; + private static final Logger LOG = + LoggerFactory.getLogger(TestDataBlocks.class); + + /** + * Test to verify different DataBlocks factories, different operations. + */ + @Test + public void testDataBlocksFactory() throws Exception { + testCreateFactory(DATA_BLOCKS_BUFFER_DISK); + testCreateFactory(DATA_BLOCKS_BUFFER_ARRAY); + testCreateFactory(DATA_BLOCKS_BYTEBUFFER); + } + + /** + * Verify creation of a data block factory and its operations. + * + * @param nameOfFactory Name of the DataBlock factory to be created. + * @throws IOException Throw IOE in case of failure while creating a block. + */ + public void testCreateFactory(String nameOfFactory) throws Exception { + LOG.info("Testing: {}", nameOfFactory); + DataBlocks.BlockFactory blockFactory = + DataBlocks.createFactory("Dir", configuration, nameOfFactory); + + DataBlocks.DataBlock dataBlock = blockFactory.create(0, ONE_KB, null); + assertWriteBlock(dataBlock); + assertToByteArray(dataBlock); + assertCloseBlock(dataBlock); + } + + /** + * Verify Writing of a dataBlock. + * + * @param dataBlock DataBlock to be tested. + * @throws IOException Throw Exception in case of failures. + */ + private void assertWriteBlock(DataBlocks.DataBlock dataBlock) + throws IOException { + byte[] oneKbBuff = new byte[ONE_KB]; + new Random().nextBytes(oneKbBuff); + dataBlock.write(oneKbBuff, 0, ONE_KB); + // Verify DataBlock state is at Writing. + dataBlock.verifyState(DataBlocks.DataBlock.DestState.Writing); + // Verify that the DataBlock has data written. + assertTrue("Expected Data block to have data", dataBlock.hasData()); + // Verify the size of data. + assertEquals("Mismatch in data size in block", ONE_KB, + dataBlock.dataSize()); + // Verify that no capacity is left in the data block to write more. + assertFalse("Expected the data block to have no capacity to write 1 byte " + + "of data", dataBlock.hasCapacity(1)); + } + + /** + * Verify the Conversion of Data blocks into byte[]. + * + * @param dataBlock data block to be tested. + * @throws Exception Throw Exception in case of failures. + */ + private void assertToByteArray(DataBlocks.DataBlock dataBlock) + throws Exception { + DataBlocks.BlockUploadData blockUploadData = dataBlock.startUpload(); + // Verify that the current state is in upload. + dataBlock.verifyState(DataBlocks.DataBlock.DestState.Upload); + // Convert the DataBlock upload to byteArray. + byte[] bytesWritten = blockUploadData.toByteArray(); + // Verify that we can call toByteArray() more than once and gives the + // same byte[]. + assertEquals("Mismatch in byteArray provided by toByteArray() the second " + + "time", bytesWritten, blockUploadData.toByteArray()); + IOUtils.close(blockUploadData); + // Verify that after closing blockUploadData, we can't call toByteArray(). + LambdaTestUtils.intercept(IllegalStateException.class, + "Block is closed", + "Expected to throw IllegalStateException.java after closing " + + "blockUploadData and trying to call toByteArray()", + () -> { + blockUploadData.toByteArray(); + }); + } + + /** + * Verify the close() of data blocks. + * + * @param dataBlock data block to be tested. + * @throws IOException Throw Exception in case of failures. + */ + private void assertCloseBlock(DataBlocks.DataBlock dataBlock) + throws IOException { + dataBlock.close(); + // Verify that the current state is in Closed. + dataBlock.verifyState(DataBlocks.DataBlock.DestState.Closed); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/store/TestFSBuilderSupport.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/store/TestFSBuilderSupport.java new file mode 100644 index 0000000000000..c34cdbe0ae59b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/store/TestFSBuilderSupport.java @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.store; + +import java.io.IOException; + +import javax.annotation.Nonnull; + +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSBuilder; +import org.apache.hadoop.fs.impl.FSBuilderSupport; +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Test builder support, forwarding of opt double/float to long, + * resilience. + */ +@SuppressWarnings("deprecation") +public class TestFSBuilderSupport extends AbstractHadoopTestBase { + + @Test + public void testOptFloatDoubleForwardsToLong() throws Throwable { + FSBuilderSupport c = builder() + .opt("f", 1.8f) + .opt("d", 2.0e3) + .build(); + assertThat(c.getLong("f", 2)) + .isEqualTo(1); + assertThat(c.getLong("d", 2)) + .isEqualTo(2000); + } + + @Test + public void testMustFloatDoubleForwardsToLong() throws Throwable { + FSBuilderSupport c = builder() + .must("f", 1.8f) + .must("d", 2.0e3) + .build(); + assertThat(c.getLong("f", 2)) + .isEqualTo(1); + assertThat(c.getLong("d", 2)) + .isEqualTo(2000); + } + + @Test + public void testLongOptStillWorks() throws Throwable { + FSBuilderSupport c = builder() + .opt("o", 1L) + .must("m", 1L) + .build(); + assertThat(c.getLong("o", 2)) + .isEqualTo(1L); + assertThat(c.getLong("m", 2)) + .isEqualTo(1L); + } + + @Test + public void testFloatParseFallback() throws Throwable { + FSBuilderSupport c = builder() + .opt("f", "1.8f") + .opt("d", "1.8e20") + .build(); + + assertThat(c.getLong("f", 2)) + .isEqualTo(2); + assertThat(c.getLong("d", 2)) + .isEqualTo(2); + } + + @Test + public void testNegatives() throws Throwable { + FSBuilderSupport c = builder() + .optLong("-1", -1) + .mustLong("-2", -2) + .build(); + + // getLong gets the long value + assertThat(c.getLong("-1", 2)) + .isEqualTo(-1); + + + // but getPositiveLong returns the positive default + assertThat(c.getPositiveLong("-1", 2)) + .isEqualTo(2); + } + + @Test + public void testBoolean() throws Throwable { + final FSBuilderSupport c = builder() + .opt("f", false) + .opt("t", true) + .opt("o", "other") + .build(); + assertThat(c.getOptions().getBoolean("f", true)) + .isEqualTo(false); + assertThat(c.getOptions().getBoolean("t", false)) + .isEqualTo(true); + // this is handled in Configuration itself. + assertThat(c.getOptions().getBoolean("o", true)) + .isEqualTo(true); + } + + private SimpleBuilder builder() { + return new BuilderImpl(); + } + + private interface SimpleBuilder + extends FSBuilder { + } + + /** + * This is a minimal builder which relies on default implementations of the interface. + * If it ever stops compiling, it means a new interface has been added which + * is not backwards compatible with external implementations, such as that + * in HBoss (see HBASE-26483). + * + */ + private static final class BuilderImpl + implements SimpleBuilder { + private final Configuration options = new Configuration(false); + + @Override + public SimpleBuilder opt(@Nonnull final String key, @Nonnull final String value) { + options.set(key, value); + return this; + } + + @Override + public SimpleBuilder opt(@Nonnull final String key, @Nonnull final String... values) { + options.setStrings(key, values); + return this; + } + + @Override + public SimpleBuilder must(@Nonnull final String key, @Nonnull final String value) { + return opt(key, value); + } + + @Override + public SimpleBuilder must(@Nonnull final String key, @Nonnull final String... values) { + return opt(key, values); + } + + @Override + public FSBuilderSupport build() + throws IllegalArgumentException, UnsupportedOperationException, IOException { + return new FSBuilderSupport(options); + } + } + + +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestHCFSMountTableConfigLoader.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestHCFSMountTableConfigLoader.java new file mode 100644 index 0000000000000..bf7a6e32c8e93 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestHCFSMountTableConfigLoader.java @@ -0,0 +1,165 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.viewfs; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URI; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileSystemTestHelper; +import org.apache.hadoop.fs.FsConstants; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Tests the mount table loading. + */ +public class TestHCFSMountTableConfigLoader { + + private static final String DOT = "."; + + private static final String TARGET_TWO = "/tar2"; + + private static final String TARGET_ONE = "/tar1"; + + private static final String SRC_TWO = "/src2"; + + private static final String SRC_ONE = "/src1"; + + private static final String TABLE_NAME = "test"; + + private MountTableConfigLoader loader = new HCFSMountTableConfigLoader(); + + private static FileSystem fsTarget; + private static Configuration conf; + private static Path targetTestRoot; + private static FileSystemTestHelper fileSystemTestHelper = + new FileSystemTestHelper(); + private static File oldVersionMountTableFile; + private static File newVersionMountTableFile; + private static final String MOUNT_LINK_KEY_SRC_ONE = + new StringBuilder(Constants.CONFIG_VIEWFS_PREFIX).append(DOT) + .append(TABLE_NAME).append(DOT).append(Constants.CONFIG_VIEWFS_LINK) + .append(DOT).append(SRC_ONE).toString(); + private static final String MOUNT_LINK_KEY_SRC_TWO = + new StringBuilder(Constants.CONFIG_VIEWFS_PREFIX).append(DOT) + .append(TABLE_NAME).append(DOT).append(Constants.CONFIG_VIEWFS_LINK) + .append(DOT).append(SRC_TWO).toString(); + + @BeforeClass + public static void init() throws Exception { + fsTarget = new LocalFileSystem(); + fsTarget.initialize(new URI("file:///"), new Configuration()); + targetTestRoot = fileSystemTestHelper.getAbsoluteTestRootPath(fsTarget); + fsTarget.delete(targetTestRoot, true); + fsTarget.mkdirs(targetTestRoot); + } + + @Before + public void setUp() throws Exception { + conf = new Configuration(); + conf.set(String.format( + FsConstants.FS_VIEWFS_OVERLOAD_SCHEME_TARGET_FS_IMPL_PATTERN, "file"), + LocalFileSystem.class.getName()); + oldVersionMountTableFile = + new File(new URI(targetTestRoot.toString() + "/table.1.xml")); + oldVersionMountTableFile.createNewFile(); + newVersionMountTableFile = + new File(new URI(targetTestRoot.toString() + "/table.2.xml")); + newVersionMountTableFile.createNewFile(); + } + + @Test + public void testMountTableFileLoadingWhenMultipleFilesExist() + throws Exception { + ViewFsTestSetup.addMountLinksToFile(TABLE_NAME, + new String[] {SRC_ONE, SRC_TWO }, new String[] {TARGET_ONE, + TARGET_TWO }, + new Path(newVersionMountTableFile.toURI()), conf); + loader.load(targetTestRoot.toString(), conf); + Assert.assertEquals(conf.get(MOUNT_LINK_KEY_SRC_TWO), TARGET_TWO); + Assert.assertEquals(conf.get(MOUNT_LINK_KEY_SRC_ONE), TARGET_ONE); + } + + @Test + public void testMountTableFileWithInvalidFormat() throws Exception { + Path path = new Path(new URI( + targetTestRoot.toString() + "/testMountTableFileWithInvalidFormat/")); + fsTarget.mkdirs(path); + File invalidMountFileName = + new File(new URI(path.toString() + "/table.InvalidVersion.xml")); + invalidMountFileName.createNewFile(); + // Adding mount links to make sure it will not read it. + ViewFsTestSetup.addMountLinksToFile(TABLE_NAME, + new String[] {SRC_ONE, SRC_TWO }, new String[] {TARGET_ONE, + TARGET_TWO }, + new Path(invalidMountFileName.toURI()), conf); + // Pass mount table directory + loader.load(path.toString(), conf); + Assert.assertEquals(null, conf.get(MOUNT_LINK_KEY_SRC_TWO)); + Assert.assertEquals(null, conf.get(MOUNT_LINK_KEY_SRC_ONE)); + invalidMountFileName.delete(); + } + + @Test + public void testMountTableFileWithInvalidFormatWithNoDotsInName() + throws Exception { + Path path = new Path(new URI(targetTestRoot.toString() + + "/testMountTableFileWithInvalidFormatWithNoDots/")); + fsTarget.mkdirs(path); + File invalidMountFileName = + new File(new URI(path.toString() + "/tableInvalidVersionxml")); + invalidMountFileName.createNewFile(); + // Pass mount table directory + loader.load(path.toString(), conf); + Assert.assertEquals(null, conf.get(MOUNT_LINK_KEY_SRC_TWO)); + Assert.assertEquals(null, conf.get(MOUNT_LINK_KEY_SRC_ONE)); + invalidMountFileName.delete(); + } + + @Test(expected = FileNotFoundException.class) + public void testLoadWithMountFile() throws Exception { + loader.load(new URI(targetTestRoot.toString() + "/Non-Existent-File.xml") + .toString(), conf); + } + + @Test + public void testLoadWithNonExistentMountFile() throws Exception { + ViewFsTestSetup.addMountLinksToFile(TABLE_NAME, + new String[] {SRC_ONE, SRC_TWO }, + new String[] {TARGET_ONE, TARGET_TWO }, + new Path(oldVersionMountTableFile.toURI()), conf); + loader.load(oldVersionMountTableFile.toURI().toString(), conf); + Assert.assertEquals(conf.get(MOUNT_LINK_KEY_SRC_TWO), TARGET_TWO); + Assert.assertEquals(conf.get(MOUNT_LINK_KEY_SRC_ONE), TARGET_ONE); + } + + @AfterClass + public static void tearDown() throws IOException { + fsTarget.delete(targetTestRoot, true); + } + +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestRegexMountPoint.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestRegexMountPoint.java new file mode 100644 index 0000000000000..a5df2bab41322 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestRegexMountPoint.java @@ -0,0 +1,166 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

      + * http://www.apache.org/licenses/LICENSE-2.0 + *

      + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.viewfs; + +import java.util.function.Function; +import java.io.IOException; +import java.net.URI; +import java.util.Map; +import java.util.Set; + +import org.apache.hadoop.conf.Configuration; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Test Regex Mount Point. + */ +public class TestRegexMountPoint { + private static final Logger LOGGER = + LoggerFactory.getLogger(TestRegexMountPoint.class.getName()); + + private InodeTree inodeTree; + private Configuration conf; + + class TestRegexMountPointFileSystem { + public URI getUri() { + return uri; + } + + private URI uri; + + TestRegexMountPointFileSystem(URI uri) { + String uriStr = uri == null ? "null" : uri.toString(); + LOGGER.info("Create TestRegexMountPointFileSystem Via URI:" + uriStr); + this.uri = uri; + } + } + + @Before + public void setUp() throws Exception { + conf = new Configuration(); + ConfigUtil.addLink(conf, TestRegexMountPoint.class.getName(), "/mnt", + URI.create("file:///")); + + inodeTree = new InodeTree(conf, + TestRegexMountPoint.class.getName(), null, false) { + @Override + protected Function + initAndGetTargetFs() { + return new Function() { + @Override + public TestRegexMountPointFileSystem apply(URI uri) { + return new TestRegexMountPointFileSystem(uri); + } + }; + } + + @Override + protected TestRegexMountPointFileSystem getTargetFileSystem( + final INodeDir dir) { + return new TestRegexMountPointFileSystem(null); + } + + @Override + protected TestRegexMountPointFileSystem getTargetFileSystem( + final String settings, final URI[] mergeFsURIList) { + return new TestRegexMountPointFileSystem(null); + } + }; + } + + @After + public void tearDown() throws Exception { + inodeTree = null; + } + + @Test + public void testGetVarListInString() throws IOException { + String srcRegex = "/(\\w+)"; + String target = "/$0/${1}/$1/${2}/${2}"; + RegexMountPoint regexMountPoint = + new RegexMountPoint(inodeTree, srcRegex, target, null); + regexMountPoint.initialize(); + Map> varMap = regexMountPoint.getVarInDestPathMap(); + Assert.assertEquals(varMap.size(), 3); + Assert.assertEquals(varMap.get("0").size(), 1); + Assert.assertTrue(varMap.get("0").contains("$0")); + Assert.assertEquals(varMap.get("1").size(), 2); + Assert.assertTrue(varMap.get("1").contains("${1}")); + Assert.assertTrue(varMap.get("1").contains("$1")); + Assert.assertEquals(varMap.get("2").size(), 1); + Assert.assertTrue(varMap.get("2").contains("${2}")); + } + + @Test + public void testResolve() throws IOException { + String regexStr = "^/user/(?\\w+)"; + String dstPathStr = "/namenode1/testResolve/$username"; + String settingsStr = null; + RegexMountPoint regexMountPoint = + new RegexMountPoint(inodeTree, regexStr, dstPathStr, settingsStr); + regexMountPoint.initialize(); + InodeTree.ResolveResult resolveResult = + regexMountPoint.resolve("/user/hadoop/file1", true); + Assert.assertEquals(resolveResult.kind, InodeTree.ResultKind.EXTERNAL_DIR); + Assert.assertTrue( + resolveResult.targetFileSystem + instanceof TestRegexMountPointFileSystem); + Assert.assertEquals("/user/hadoop", resolveResult.resolvedPath); + Assert.assertTrue( + resolveResult.targetFileSystem + instanceof TestRegexMountPointFileSystem); + Assert.assertEquals("/namenode1/testResolve/hadoop", + ((TestRegexMountPointFileSystem) resolveResult.targetFileSystem) + .getUri().toString()); + Assert.assertEquals("/file1", resolveResult.remainingPath.toString()); + } + + @Test + public void testResolveWithInterceptor() throws IOException { + String regexStr = "^/user/(?\\w+)"; + String dstPathStr = "/namenode1/testResolve/$username"; + // Replace "_" with "-" + RegexMountPointResolvedDstPathReplaceInterceptor interceptor = + new RegexMountPointResolvedDstPathReplaceInterceptor("_", "-"); + // replaceresolvedpath:_:- + String settingsStr = interceptor.serializeToString(); + RegexMountPoint regexMountPoint = + new RegexMountPoint(inodeTree, regexStr, dstPathStr, settingsStr); + regexMountPoint.initialize(); + InodeTree.ResolveResult resolveResult = + regexMountPoint.resolve("/user/hadoop_user1/file_index", true); + Assert.assertEquals(resolveResult.kind, InodeTree.ResultKind.EXTERNAL_DIR); + Assert.assertTrue( + resolveResult.targetFileSystem + instanceof TestRegexMountPointFileSystem); + Assert.assertEquals("/user/hadoop_user1", resolveResult.resolvedPath); + Assert.assertTrue( + resolveResult.targetFileSystem + instanceof TestRegexMountPointFileSystem); + Assert.assertEquals("/namenode1/testResolve/hadoop-user1", + ((TestRegexMountPointFileSystem) resolveResult.targetFileSystem) + .getUri().toString()); + Assert.assertEquals("/file_index", + resolveResult.remainingPath.toString()); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestRegexMountPointInterceptorFactory.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestRegexMountPointInterceptorFactory.java new file mode 100644 index 0000000000000..c567944ffe307 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestRegexMountPointInterceptorFactory.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

      + * http://www.apache.org/licenses/LICENSE-2.0 + *

      + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.viewfs; + +import org.junit.Assert; +import org.junit.Test; + +/** + * Test Regex Mount Point Interceptor Factory. + */ +public class TestRegexMountPointInterceptorFactory { + + @Test + public void testCreateNormalCase() { + String replaceInterceptorStr = + RegexMountPointInterceptorType.REPLACE_RESOLVED_DST_PATH.getConfigName() + + Character.toString(RegexMountPoint.INTERCEPTOR_INTERNAL_SEP) + + "src" + Character + .toString(RegexMountPoint.INTERCEPTOR_INTERNAL_SEP) + "replace"; + RegexMountPointInterceptor interceptor = + RegexMountPointInterceptorFactory.create(replaceInterceptorStr); + Assert.assertTrue( + interceptor + instanceof RegexMountPointResolvedDstPathReplaceInterceptor); + } + + @Test + public void testCreateBadCase() { + String replaceInterceptorStr = + RegexMountPointInterceptorType.REPLACE_RESOLVED_DST_PATH.getConfigName() + + "___" + Character + .toString(RegexMountPoint.INTERCEPTOR_INTERNAL_SEP) + "src" + + Character.toString(RegexMountPoint.INTERCEPTOR_INTERNAL_SEP) + + "replace"; + RegexMountPointInterceptor interceptor = + RegexMountPointInterceptorFactory.create(replaceInterceptorStr); + Assert.assertTrue(interceptor == null); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestRegexMountPointResolvedDstPathReplaceInterceptor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestRegexMountPointResolvedDstPathReplaceInterceptor.java new file mode 100644 index 0000000000000..9fdf0f6ac9c5c --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestRegexMountPointResolvedDstPathReplaceInterceptor.java @@ -0,0 +1,101 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

      + * http://www.apache.org/licenses/LICENSE-2.0 + *

      + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.viewfs; + +import java.io.IOException; + +import org.junit.Assert; +import org.junit.Test; + +import static org.apache.hadoop.fs.viewfs.RegexMountPointInterceptorType.REPLACE_RESOLVED_DST_PATH; + +/** + * Test RegexMountPointResolvedDstPathReplaceInterceptor. + */ +public class TestRegexMountPointResolvedDstPathReplaceInterceptor { + + public String createSerializedString(String regex, String replaceString) { + return REPLACE_RESOLVED_DST_PATH.getConfigName() + + RegexMountPoint.INTERCEPTOR_INTERNAL_SEP + regex + + RegexMountPoint.INTERCEPTOR_INTERNAL_SEP + replaceString; + } + + @Test + public void testDeserializeFromStringNormalCase() throws IOException { + String srcRegex = "-"; + String replaceString = "_"; + String serializedString = createSerializedString(srcRegex, replaceString); + RegexMountPointResolvedDstPathReplaceInterceptor interceptor = + RegexMountPointResolvedDstPathReplaceInterceptor + .deserializeFromString(serializedString); + Assert.assertEquals(srcRegex, interceptor.getSrcRegexString()); + Assert.assertEquals(replaceString, interceptor.getReplaceString()); + Assert.assertNull(interceptor.getSrcRegexPattern()); + interceptor.initialize(); + Assert.assertEquals(srcRegex, + interceptor.getSrcRegexPattern().toString()); + } + + @Test + public void testDeserializeFromStringBadCase() throws IOException { + String srcRegex = "-"; + String replaceString = "_"; + String serializedString = createSerializedString(srcRegex, replaceString); + serializedString = serializedString + ":ddd"; + RegexMountPointResolvedDstPathReplaceInterceptor interceptor = + RegexMountPointResolvedDstPathReplaceInterceptor + .deserializeFromString(serializedString); + Assert.assertNull(interceptor); + } + + @Test + public void testSerialization() { + String srcRegex = "word1"; + String replaceString = "word2"; + String serializedString = createSerializedString(srcRegex, replaceString); + RegexMountPointResolvedDstPathReplaceInterceptor interceptor = + new RegexMountPointResolvedDstPathReplaceInterceptor(srcRegex, + replaceString); + Assert.assertEquals(interceptor.serializeToString(), serializedString); + } + + @Test + public void testInterceptSource() { + String srcRegex = "word1"; + String replaceString = "word2"; + RegexMountPointResolvedDstPathReplaceInterceptor interceptor = + new RegexMountPointResolvedDstPathReplaceInterceptor(srcRegex, + replaceString); + String sourcePath = "/a/b/l3/dd"; + Assert.assertEquals(sourcePath, interceptor.interceptSource(sourcePath)); + } + + @Test + public void testInterceptResolve() throws IOException { + String pathAfterResolution = "/user-hadoop"; + + String srcRegex = "hadoop"; + String replaceString = "hdfs"; + RegexMountPointResolvedDstPathReplaceInterceptor interceptor = + new RegexMountPointResolvedDstPathReplaceInterceptor(srcRegex, + replaceString); + interceptor.initialize(); + Assert.assertEquals("/user-hdfs", + interceptor.interceptResolvedDestPathStr(pathAfterResolution)); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFSOverloadSchemeCentralMountTableConfig.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFSOverloadSchemeCentralMountTableConfig.java new file mode 100644 index 0000000000000..1527e3c1f30d8 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFSOverloadSchemeCentralMountTableConfig.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.viewfs; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.net.URISyntaxException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.junit.Before; + +/** + * Test the TestViewFSOverloadSchemeCentralMountTableConfig with mount-table + * configuration files in configured fs location. + */ +public class TestViewFSOverloadSchemeCentralMountTableConfig + extends TestViewFileSystemOverloadSchemeLocalFileSystem { + private Path oldMountTablePath; + private Path latestMountTablepath; + + @Before + public void setUp() throws Exception { + super.setUp(); + // Mount table name format: mount-table..xml + String mountTableFileName1 = "mount-table.1.xml"; + String mountTableFileName2 = "mount-table.2.xml"; + oldMountTablePath = + new Path(getTestRoot() + File.separator + mountTableFileName1); + latestMountTablepath = + new Path(getTestRoot() + File.separator + mountTableFileName2); + getConf().set(Constants.CONFIG_VIEWFS_MOUNTTABLE_PATH, + getTestRoot().toString()); + File f = new File(oldMountTablePath.toUri()); + f.createNewFile(); // Just creating empty mount-table file. + File f2 = new File(latestMountTablepath.toUri()); + latestMountTablepath = new Path(f2.toURI()); + f2.createNewFile(); + } + + /** + * This method saves the mount links in a local files. + */ + @Override + void addMountLinks(String mountTable, String[] sources, String[] targets, + Configuration conf) throws IOException, URISyntaxException { + // we don't use conf here, instead we use config paths to store links. + // Mount-table old version file mount-table-.xml + try (BufferedWriter out = new BufferedWriter( + new FileWriter(new File(oldMountTablePath.toUri())))) { + out.write("\n"); + // Invalid tag. This file should not be read. + out.write(""); + out.write("\n"); + out.flush(); + } + ViewFsTestSetup.addMountLinksToFile(mountTable, sources, targets, + latestMountTablepath, conf); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemDelegation.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemDelegation.java index d8c39f79d0454..3a60d6ecdda94 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemDelegation.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemDelegation.java @@ -83,12 +83,6 @@ public void testSanity() throws URISyntaxException { assertEquals(new URI("fs2:/").getAuthority(), fs2.getUri().getAuthority()); } - @Test - public void testVerifyChecksum() throws Exception { - checkVerifyChecksum(false); - checkVerifyChecksum(true); - } - /** * Tests that ViewFileSystem dispatches calls for every ACL method through the * mount table to the correct underlying FileSystem with all Path arguments @@ -144,12 +138,6 @@ public void testAclMethods() throws Exception { verify(mockFs2).getAclStatus(mockFsPath2); } - void checkVerifyChecksum(boolean flag) { - viewFs.setVerifyChecksum(flag); - assertEquals(flag, fs1.getVerifyChecksum()); - assertEquals(flag, fs2.getVerifyChecksum()); - } - static class FakeFileSystem extends LocalFileSystem { boolean verifyChecksum = true; URI uri; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemLocalFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemLocalFileSystem.java index 808d8b06c35ba..adc5db87e7725 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemLocalFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemLocalFileSystem.java @@ -33,6 +33,8 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import static org.apache.hadoop.fs.FileSystem.TRASH_PREFIX; +import org.apache.hadoop.security.UserGroupInformation; import org.junit.After; import org.junit.Before; @@ -61,6 +63,13 @@ public void setUp() throws Exception { } + @Override + Path getTrashRootInFallBackFS() throws IOException { + return new Path( + "/" + TRASH_PREFIX + "/" + UserGroupInformation.getCurrentUser() + .getShortUserName()); + } + @Test public void testNflyWriteSimple() throws IOException { LOG.info("Starting testNflyWriteSimple"); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemOverloadSchemeLocalFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemOverloadSchemeLocalFileSystem.java new file mode 100644 index 0000000000000..ac7a1a6899425 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemOverloadSchemeLocalFileSystem.java @@ -0,0 +1,174 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.viewfs; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileSystemTestHelper; +import org.apache.hadoop.fs.FsConstants; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +/** + * + * Test the TestViewFileSystemOverloadSchemeLF using a file with authority: + * file://mountTableName/ i.e, the authority is used to load a mount table. + */ +public class TestViewFileSystemOverloadSchemeLocalFileSystem { + private static final String FILE = "file"; + private static final Log LOG = + LogFactory.getLog(TestViewFileSystemOverloadSchemeLocalFileSystem.class); + private FileSystem fsTarget; + private Configuration conf; + private Path targetTestRoot; + private FileSystemTestHelper fileSystemTestHelper = + new FileSystemTestHelper(); + + @Before + public void setUp() throws Exception { + conf = new Configuration(); + conf.set(String.format("fs.%s.impl", FILE), + ViewFileSystemOverloadScheme.class.getName()); + conf.set(String.format( + FsConstants.FS_VIEWFS_OVERLOAD_SCHEME_TARGET_FS_IMPL_PATTERN, FILE), + LocalFileSystem.class.getName()); + fsTarget = new LocalFileSystem(); + fsTarget.initialize(new URI("file:///"), conf); + // create the test root on local_fs + targetTestRoot = fileSystemTestHelper.getAbsoluteTestRootPath(fsTarget); + fsTarget.delete(targetTestRoot, true); + fsTarget.mkdirs(targetTestRoot); + } + + /** + * Adds the given mount links to config. sources contains mount link src and + * the respective index location in targets contains the target uri. + */ + void addMountLinks(String mountTable, String[] sources, String[] targets, + Configuration config) throws IOException, URISyntaxException { + ViewFsTestSetup.addMountLinksToConf(mountTable, sources, targets, config); + } + + /** + * Tests write file and read file with ViewFileSystemOverloadScheme. + */ + @Test + public void testLocalTargetLinkWriteSimple() + throws IOException, URISyntaxException { + LOG.info("Starting testLocalTargetLinkWriteSimple"); + final String testString = "Hello Local!..."; + final Path lfsRoot = new Path("/lfsRoot"); + addMountLinks(null, new String[] {lfsRoot.toString() }, + new String[] {targetTestRoot + "/local" }, conf); + try (FileSystem lViewFs = FileSystem.get(URI.create("file:///"), conf)) { + final Path testPath = new Path(lfsRoot, "test.txt"); + try (FSDataOutputStream fsDos = lViewFs.create(testPath)) { + fsDos.writeUTF(testString); + } + + try (FSDataInputStream lViewIs = lViewFs.open(testPath)) { + Assert.assertEquals(testString, lViewIs.readUTF()); + } + } + } + + /** + * Tests create file and delete file with ViewFileSystemOverloadScheme. + */ + @Test + public void testLocalFsCreateAndDelete() throws Exception { + LOG.info("Starting testLocalFsCreateAndDelete"); + addMountLinks("mt", new String[] {"/lfsroot" }, + new String[] {targetTestRoot + "/wd2" }, conf); + final URI mountURI = URI.create("file://mt/"); + try (FileSystem lViewFS = FileSystem.get(mountURI, conf)) { + Path testPath = new Path(mountURI.toString() + "/lfsroot/test"); + lViewFS.createNewFile(testPath); + Assert.assertTrue(lViewFS.exists(testPath)); + lViewFS.delete(testPath, true); + Assert.assertFalse(lViewFS.exists(testPath)); + } + } + + /** + * Tests root level file with linkMergeSlash with + * ViewFileSystemOverloadScheme. + */ + @Test + public void testLocalFsLinkSlashMerge() throws Exception { + LOG.info("Starting testLocalFsLinkSlashMerge"); + addMountLinks("mt", + new String[] {Constants.CONFIG_VIEWFS_LINK_MERGE_SLASH }, + new String[] {targetTestRoot + "/wd2" }, conf); + final URI mountURI = URI.create("file://mt/"); + try (FileSystem lViewFS = FileSystem.get(mountURI, conf)) { + Path fileOnRoot = new Path(mountURI.toString() + "/NewFile"); + lViewFS.createNewFile(fileOnRoot); + Assert.assertTrue(lViewFS.exists(fileOnRoot)); + } + } + + /** + * Tests with linkMergeSlash and other mounts in + * ViewFileSystemOverloadScheme. + */ + @Test(expected = IOException.class) + public void testLocalFsLinkSlashMergeWithOtherMountLinks() throws Exception { + LOG.info("Starting testLocalFsLinkSlashMergeWithOtherMountLinks"); + addMountLinks("mt", + new String[] {"/lfsroot", Constants.CONFIG_VIEWFS_LINK_MERGE_SLASH }, + new String[] {targetTestRoot + "/wd2", targetTestRoot + "/wd2" }, conf); + final URI mountURI = URI.create("file://mt/"); + FileSystem.get(mountURI, conf); + Assert.fail("A merge slash cannot be configured with other mount links."); + } + + @After + public void tearDown() throws Exception { + if (null != fsTarget) { + fsTarget.delete(fileSystemTestHelper.getTestRootPath(fsTarget), true); + fsTarget.close(); + } + } + + /** + * Returns the test root dir. + */ + public Path getTestRoot() { + return this.targetTestRoot; + } + + /** + * Returns the conf. + */ + public Configuration getConf() { + return this.conf; + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemWithAuthorityLocalFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemWithAuthorityLocalFileSystem.java index 877c2228c1eea..9223338f34bf5 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemWithAuthorityLocalFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemWithAuthorityLocalFileSystem.java @@ -25,6 +25,9 @@ import org.apache.hadoop.fs.FileSystemTestHelper; import org.apache.hadoop.fs.FsConstants; import org.apache.hadoop.fs.Path; +import static org.apache.hadoop.fs.FileSystem.TRASH_PREFIX; +import org.apache.hadoop.security.UserGroupInformation; +import java.io.IOException; import org.junit.After; import org.junit.Assert; @@ -63,6 +66,13 @@ public void tearDown() throws Exception { super.tearDown(); } + @Override + Path getTrashRootInFallBackFS() throws IOException { + return new Path( + "/" + TRASH_PREFIX + "/" + UserGroupInformation.getCurrentUser() + .getShortUserName()); + } + @Override @Test public void testBasicPaths() { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsConfig.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsConfig.java index 136837fc801c4..7c318654ecf1c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsConfig.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsConfig.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.fs.viewfs; +import java.util.function.Function; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; @@ -39,10 +40,10 @@ public void testInvalidConfig() throws IOException, URISyntaxException { class Foo { } - new InodeTree(conf, null) { + new InodeTree(conf, null, null, false) { @Override - protected Foo getTargetFileSystem(final URI uri) { + protected Function initAndGetTargetFs() { return null; } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsOverloadSchemeListStatus.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsOverloadSchemeListStatus.java new file mode 100644 index 0000000000000..7afc78981f6e3 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsOverloadSchemeListStatus.java @@ -0,0 +1,162 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

      + * http://www.apache.org/licenses/LICENSE-2.0 + *

      + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.viewfs; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.FsConstants; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.*; + +/** + * ViewFsOverloadScheme ListStatus. + */ +public class TestViewFsOverloadSchemeListStatus { + + private static final File TEST_DIR = + GenericTestUtils.getTestDir(TestViewfsFileStatus.class.getSimpleName()); + private Configuration conf; + private static final String FILE_NAME = "file"; + + @Before + public void setUp() { + conf = new Configuration(); + conf.set(String.format("fs.%s.impl", FILE_NAME), + ViewFileSystemOverloadScheme.class.getName()); + conf.set(String + .format(FsConstants.FS_VIEWFS_OVERLOAD_SCHEME_TARGET_FS_IMPL_PATTERN, + FILE_NAME), LocalFileSystem.class.getName()); + FileUtil.fullyDelete(TEST_DIR); + assertTrue(TEST_DIR.mkdirs()); + } + + @After + public void tearDown() throws IOException { + FileUtil.fullyDelete(TEST_DIR); + } + + /** + * Tests the ACL and isDirectory returned from listStatus for directories and + * files. + */ + @Test + public void testListStatusACL() throws IOException, URISyntaxException { + String testfilename = "testFileACL"; + String childDirectoryName = "testDirectoryACL"; + TEST_DIR.mkdirs(); + File infile = new File(TEST_DIR, testfilename); + final byte[] content = "dingos".getBytes(); + + try (FileOutputStream fos = new FileOutputStream(infile)) { + fos.write(content); + } + assertEquals(content.length, infile.length()); + File childDir = new File(TEST_DIR, childDirectoryName); + childDir.mkdirs(); + + ConfigUtil.addLink(conf, "/file", infile.toURI()); + ConfigUtil.addLink(conf, "/dir", childDir.toURI()); + + String fileUriStr = "file:///"; + try (FileSystem vfs = FileSystem.get(new URI(fileUriStr), conf)) { + assertEquals(ViewFileSystemOverloadScheme.class, vfs.getClass()); + FileStatus[] statuses = vfs.listStatus(new Path("/")); + + FileSystem localFs = ((ViewFileSystemOverloadScheme) vfs) + .getRawFileSystem(new Path(fileUriStr), conf); + FileStatus fileStat = localFs.getFileStatus(new Path(infile.getPath())); + FileStatus dirStat = localFs.getFileStatus(new Path(childDir.getPath())); + for (FileStatus status : statuses) { + if (status.getPath().getName().equals(FILE_NAME)) { + assertEquals(fileStat.getPermission(), status.getPermission()); + } else { + assertEquals(dirStat.getPermission(), status.getPermission()); + } + } + + localFs.setPermission(new Path(infile.getPath()), + FsPermission.valueOf("-rwxr--r--")); + localFs.setPermission(new Path(childDir.getPath()), + FsPermission.valueOf("-r--rwxr--")); + + statuses = vfs.listStatus(new Path("/")); + for (FileStatus status : statuses) { + if (status.getPath().getName().equals(FILE_NAME)) { + assertEquals(FsPermission.valueOf("-rwxr--r--"), + status.getPermission()); + assertFalse(status.isDirectory()); + } else { + assertEquals(FsPermission.valueOf("-r--rwxr--"), + status.getPermission()); + assertTrue(status.isDirectory()); + } + } + } + } + + /** + * Tests that ViewFSOverloadScheme should consider initialized fs as fallback + * if there are no mount links configured. It should add fallback with the + * chrootedFS at it's uri's root. + */ + @Test(timeout = 30000) + public void testViewFSOverloadSchemeWithoutAnyMountLinks() throws Exception { + Path initUri = new Path(TEST_DIR.toURI().toString(), "init"); + try (FileSystem fs = FileSystem.get(initUri.toUri(), conf)) { + ViewFileSystemOverloadScheme vfs = (ViewFileSystemOverloadScheme) fs; + assertEquals(0, vfs.getMountPoints().length); + Path testOnFallbackPath = new Path(TEST_DIR.toURI().toString(), "test"); + assertTrue(vfs.mkdirs(testOnFallbackPath)); + FileStatus[] status = vfs.listStatus(testOnFallbackPath.getParent()); + assertEquals(Path.getPathWithoutSchemeAndAuthority(testOnFallbackPath), + Path.getPathWithoutSchemeAndAuthority(status[0].getPath())); + //Check directly on localFS. The fallBackFs(localFS) should be chrooted + //at it's root. So, after + FileSystem lfs = vfs.getRawFileSystem(testOnFallbackPath, conf); + FileStatus[] statusOnLocalFS = + lfs.listStatus(testOnFallbackPath.getParent()); + assertEquals(testOnFallbackPath.getName(), + statusOnLocalFS[0].getPath().getName()); + //initUri should not have exist in lfs, as it would have chrooted on it's + // root only. + assertFalse(lfs.exists(initUri)); + } + } + + @AfterClass + public static void cleanup() throws IOException { + FileUtil.fullyDelete(TEST_DIR); + } + +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsTrash.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsTrash.java index 94c3262eaae92..06cbdab8d210f 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsTrash.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsTrash.java @@ -17,9 +17,8 @@ */ package org.apache.hadoop.fs.viewfs; - +import java.io.DataOutputStream; import java.io.IOException; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystemTestHelper; @@ -27,39 +26,40 @@ import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.TestTrash; +import org.apache.hadoop.fs.Trash; +import org.apache.hadoop.fs.TrashPolicyDefault; +import org.apache.hadoop.fs.contract.ContractTestUtils; import org.junit.After; import org.junit.Before; import org.junit.Test; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.*; +import static org.apache.hadoop.fs.viewfs.Constants.*; +import static org.junit.Assert.*; public class TestViewFsTrash { FileSystem fsTarget; // the target file system - the mount will point here FileSystem fsView; Configuration conf; - FileSystemTestHelper fileSystemTestHelper = new FileSystemTestHelper(); - - class TestLFS extends LocalFileSystem { - Path home; - TestLFS() throws IOException { - this(new Path(fileSystemTestHelper.getTestRootDir())); - } - TestLFS(Path home) throws IOException { - super(); - this.home = home; - } - @Override - public Path getHomeDirectory() { - return home; - } - } + private FileSystemTestHelper fileSystemTestHelper; @Before public void setUp() throws Exception { - fsTarget = FileSystem.getLocal(new Configuration()); - fsTarget.mkdirs(new Path(fileSystemTestHelper. - getTestRootPath(fsTarget), "dir1")); + Configuration targetFSConf = new Configuration(); + targetFSConf.setClass("fs.file.impl", TestTrash.TestLFS.class, FileSystem.class); + + fsTarget = FileSystem.getLocal(targetFSConf); + fileSystemTestHelper = new FileSystemTestHelper(fsTarget.getHomeDirectory().toUri().getPath()); + conf = ViewFileSystemTestSetup.createConfig(); fsView = ViewFileSystemTestSetup.setupForViewFileSystem(conf, fileSystemTestHelper, fsTarget); conf.set("fs.defaultFS", FsConstants.VIEWFS_URI.toString()); + + /* + * Need to set the fs.file.impl to TestViewFsTrash.TestLFS. Otherwise, it will load + * LocalFileSystem implementation which uses System.getProperty("user.home") for homeDirectory. + */ + conf.setClass("fs.file.impl", TestTrash.TestLFS.class, FileSystem.class); + } @After @@ -72,7 +72,39 @@ public void tearDown() throws Exception { @Test public void testTrash() throws Exception { TestTrash.trashShell(conf, fileSystemTestHelper.getTestRootPath(fsView), - fsTarget, new Path(fsTarget.getHomeDirectory(), ".Trash/Current")); + fsView, new Path(fileSystemTestHelper.getTestRootPath(fsView), ".Trash/Current")); + } + + @Test + public void testLocalizedTrashInMoveToAppropriateTrash() throws IOException { + Configuration conf2 = new Configuration(conf); + Path testFile = new Path("/data/testfile.txt"); + + // Enable moveToTrash and add a mount point for /data + conf2.setLong(FS_TRASH_INTERVAL_KEY, 1); + ConfigUtil.addLink(conf2, "/data", new Path(fileSystemTestHelper.getAbsoluteTestRootPath(fsTarget), "data").toUri()); + + // Default case. file should be moved to fsTarget.getTrashRoot()/resolvedPath + conf2.setBoolean(CONFIG_VIEWFS_TRASH_FORCE_INSIDE_MOUNT_POINT, false); + try (FileSystem fsView2 = FileSystem.get(conf2)) { + FileSystemTestHelper.createFile(fsView2, testFile); + Path resolvedFile = fsView2.resolvePath(testFile); + + Trash.moveToAppropriateTrash(fsView2, testFile, conf2); + Trash trash = new Trash(fsTarget, conf2); + Path movedPath = Path.mergePaths(trash.getCurrentTrashDir(testFile), resolvedFile); + ContractTestUtils.assertPathExists(fsTarget, "File not in trash", movedPath); + } + + // Turn on localized trash. File should be moved to viewfs:/data/.Trash/{user}/Current. + conf2.setBoolean(CONFIG_VIEWFS_TRASH_FORCE_INSIDE_MOUNT_POINT, true); + try (FileSystem fsView2 = FileSystem.get(conf2)) { + FileSystemTestHelper.createFile(fsView2, testFile); + + Trash.moveToAppropriateTrash(fsView2, testFile, conf2); + Trash trash = new Trash(fsView2, conf2); + Path movedPath = Path.mergePaths(trash.getCurrentTrashDir(testFile), testFile); + ContractTestUtils.assertPathExists(fsView2, "File not in localized trash", movedPath); + } } - } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsWithAuthorityLocalFs.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsWithAuthorityLocalFs.java index 2e498f2c0a023..fd5de72ed71ad 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsWithAuthorityLocalFs.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsWithAuthorityLocalFs.java @@ -48,10 +48,9 @@ public void setUp() throws Exception { fcTarget = FileContext.getLocalFSFileContext(); super.setUp(); // this sets up conf (and fcView which we replace) - // Now create a viewfs using a mount table called "default" - // hence viewfs://default/ + // Now create a viewfs using a mount table using the {MOUNT_TABLE_NAME} schemeWithAuthority = - new URI(FsConstants.VIEWFS_SCHEME, "default", "/", null, null); + new URI(FsConstants.VIEWFS_SCHEME, MOUNT_TABLE_NAME, "/", null, null); fcView = FileContext.getFileContext(schemeWithAuthority, conf); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewfsFileStatus.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewfsFileStatus.java index 0c31c8ed6a901..8ac447eb02e9b 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewfsFileStatus.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestViewfsFileStatus.java @@ -29,10 +29,13 @@ import org.apache.hadoop.fs.FsConstants; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.test.GenericTestUtils; +import org.junit.After; import org.junit.AfterClass; +import org.junit.Before; import org.junit.Test; import org.mockito.Mockito; @@ -48,6 +51,17 @@ public class TestViewfsFileStatus { private static final File TEST_DIR = GenericTestUtils.getTestDir( TestViewfsFileStatus.class.getSimpleName()); + @Before + public void setUp() { + FileUtil.fullyDelete(TEST_DIR); + assertTrue(TEST_DIR.mkdirs()); + } + + @After + public void tearDown() throws IOException { + FileUtil.fullyDelete(TEST_DIR); + } + @Test public void testFileStatusSerialziation() throws IOException, URISyntaxException { @@ -56,38 +70,92 @@ public void testFileStatusSerialziation() File infile = new File(TEST_DIR, testfilename); final byte[] content = "dingos".getBytes(); - FileOutputStream fos = null; - try { - fos = new FileOutputStream(infile); + try (FileOutputStream fos = new FileOutputStream(infile)) { fos.write(content); - } finally { - if (fos != null) { - fos.close(); - } } assertEquals((long)content.length, infile.length()); Configuration conf = new Configuration(); ConfigUtil.addLink(conf, "/foo/bar/baz", TEST_DIR.toURI()); - FileSystem vfs = FileSystem.get(FsConstants.VIEWFS_URI, conf); - assertEquals(ViewFileSystem.class, vfs.getClass()); - Path path = new Path("/foo/bar/baz", testfilename); - FileStatus stat = vfs.getFileStatus(path); - assertEquals(content.length, stat.getLen()); - ContractTestUtils.assertNotErasureCoded(vfs, path); - assertTrue(path + " should have erasure coding unset in " + - "FileStatus#toString(): " + stat, - stat.toString().contains("isErasureCoded=false")); - - // check serialization/deserialization - DataOutputBuffer dob = new DataOutputBuffer(); - stat.write(dob); - DataInputBuffer dib = new DataInputBuffer(); - dib.reset(dob.getData(), 0, dob.getLength()); - FileStatus deSer = new FileStatus(); - deSer.readFields(dib); - assertEquals(content.length, deSer.getLen()); - assertFalse(deSer.isErasureCoded()); + try (FileSystem vfs = FileSystem.get(FsConstants.VIEWFS_URI, conf)) { + assertEquals(ViewFileSystem.class, vfs.getClass()); + Path path = new Path("/foo/bar/baz", testfilename); + FileStatus stat = vfs.getFileStatus(path); + assertEquals(content.length, stat.getLen()); + ContractTestUtils.assertNotErasureCoded(vfs, path); + assertTrue(path + " should have erasure coding unset in " + + "FileStatus#toString(): " + stat, + stat.toString().contains("isErasureCoded=false")); + + // check serialization/deserialization + DataOutputBuffer dob = new DataOutputBuffer(); + stat.write(dob); + DataInputBuffer dib = new DataInputBuffer(); + dib.reset(dob.getData(), 0, dob.getLength()); + FileStatus deSer = new FileStatus(); + deSer.readFields(dib); + assertEquals(content.length, deSer.getLen()); + assertFalse(deSer.isErasureCoded()); + } + } + + /** + * Tests the ACL returned from getFileStatus for directories and files. + * @throws IOException + */ + @Test + public void testListStatusACL() throws IOException { + String testfilename = "testFileACL"; + String childDirectoryName = "testDirectoryACL"; + TEST_DIR.mkdirs(); + File infile = new File(TEST_DIR, testfilename); + final byte[] content = "dingos".getBytes(); + + try (FileOutputStream fos = new FileOutputStream(infile)) { + fos.write(content); + } + assertEquals(content.length, infile.length()); + File childDir = new File(TEST_DIR, childDirectoryName); + childDir.mkdirs(); + + Configuration conf = new Configuration(); + ConfigUtil.addLink(conf, "/file", infile.toURI()); + ConfigUtil.addLink(conf, "/dir", childDir.toURI()); + conf.setBoolean(Constants.CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS, false); + try (FileSystem vfs = FileSystem.get(FsConstants.VIEWFS_URI, conf)) { + assertEquals(ViewFileSystem.class, vfs.getClass()); + FileStatus[] statuses = vfs.listStatus(new Path("/")); + + FileSystem localFs = FileSystem.getLocal(conf); + FileStatus fileStat = localFs.getFileStatus(new Path(infile.getPath())); + FileStatus dirStat = localFs.getFileStatus(new Path(childDir.getPath())); + + for (FileStatus status : statuses) { + if (status.getPath().getName().equals("file")) { + assertEquals(fileStat.getPermission(), status.getPermission()); + } else { + assertEquals(dirStat.getPermission(), status.getPermission()); + } + } + + localFs.setPermission(new Path(infile.getPath()), + FsPermission.valueOf("-rwxr--r--")); + localFs.setPermission(new Path(childDir.getPath()), + FsPermission.valueOf("-r--rwxr--")); + + statuses = vfs.listStatus(new Path("/")); + for (FileStatus status : statuses) { + if (status.getPath().getName().equals("file")) { + assertEquals(FsPermission.valueOf("-rwxr--r--"), + status.getPermission()); + assertFalse(status.isDirectory()); + } else { + assertEquals(FsPermission.valueOf("-r--rwxr--"), + status.getPermission()); + assertTrue(status.isDirectory()); + } + } + } } // Tests that ViewFileSystem.getFileChecksum calls res.targetFileSystem @@ -97,8 +165,8 @@ public void testGetFileChecksum() throws IOException { final Path path = new Path("/tmp/someFile"); FileSystem mockFS = Mockito.mock(FileSystem.class); InodeTree.ResolveResult res = - new InodeTree.ResolveResult(null, mockFS , null, - new Path("someFile")); + new InodeTree.ResolveResult(null, mockFS, null, + new Path("someFile"), true); @SuppressWarnings("unchecked") InodeTree fsState = Mockito.mock(InodeTree.class); Mockito.when(fsState.resolve(path.toString(), true)).thenReturn(res); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFileSystemBaseTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFileSystemBaseTest.java index 4902d733e954b..afa98b0de8e18 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFileSystemBaseTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFileSystemBaseTest.java @@ -17,7 +17,9 @@ */ package org.apache.hadoop.fs.viewfs; +import java.io.File; import java.io.FileNotFoundException; +import java.io.FileOutputStream; import java.io.IOException; import java.net.URI; import java.security.PrivilegedExceptionAction; @@ -32,6 +34,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.BlockStoragePolicySpi; +import org.apache.hadoop.fs.ContentSummary; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystemTestHelper; @@ -57,10 +61,15 @@ import org.apache.hadoop.security.token.Token; import org.apache.hadoop.test.GenericTestUtils; import org.junit.Assume; +import org.junit.Rule; +import org.junit.rules.TemporaryFolder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static org.apache.hadoop.fs.FileSystemTestHelper.*; +import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_ENABLE_INNER_CACHE; import static org.apache.hadoop.fs.viewfs.Constants.PERMISSION_555; +import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_TRASH_FORCE_INSIDE_MOUNT_POINT; +import static org.apache.hadoop.fs.FileSystem.TRASH_PREFIX; import org.junit.After; import org.junit.Assert; @@ -109,6 +118,9 @@ protected FileSystemTestHelper createFileSystemHelper() { return new FileSystemTestHelper(); } + @Rule + public TemporaryFolder temporaryFolder = new TemporaryFolder(); + @Before public void setUp() throws Exception { initializeTargetTestRoot(); @@ -1091,6 +1103,176 @@ public void testTrashRoot() throws IOException { Assert.assertTrue("", fsView.getTrashRoots(true).size() > 0); } + // Default implementation of getTrashRoot for a fallback FS mounted at root: + // e.g., fallbackFS.uri.getPath = '/' + Path getTrashRootInFallBackFS() throws IOException { + return new Path(fsTarget.getHomeDirectory().toUri().getPath(), + TRASH_PREFIX); + } + + /** + * Test TRASH_FORCE_INSIDE_MOUNT_POINT feature for getTrashRoot. + */ + @Test + public void testTrashRootForceInsideMountPoint() throws IOException { + UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); + Configuration conf2 = new Configuration(conf); + conf2.setBoolean(CONFIG_VIEWFS_TRASH_FORCE_INSIDE_MOUNT_POINT, true); + ConfigUtil.addLinkFallback(conf2, targetTestRoot.toUri()); + FileSystem fsView2 = FileSystem.get(FsConstants.VIEWFS_URI, conf2); + + // Case 1: path p in the /data mount point. + // Return a trash root within the /data mount point. + Path dataTestPath = new Path("/data/dir/file"); + Path dataTrashRoot = fsView2.makeQualified( + new Path("/data/" + TRASH_PREFIX + "/" + ugi.getShortUserName())); + Assert.assertEquals(dataTrashRoot, fsView2.getTrashRoot(dataTestPath)); + + // Case 2: path p not found in mount table. + // Return a trash root in fallback FS. + Path nonExistentPath = new Path("/nonExistentDir/nonExistentFile"); + Path expectedTrash = + fsView2.makeQualified(getTrashRootInFallBackFS()); + Assert.assertEquals(expectedTrash, fsView2.getTrashRoot(nonExistentPath)); + + // Case 3: turn off the CONFIG_VIEWFS_TRASH_FORCE_INSIDE_MOUNT_POINT flag. + // Return a trash root in user home dir. + conf2.setBoolean(CONFIG_VIEWFS_TRASH_FORCE_INSIDE_MOUNT_POINT, false); + fsView2 = FileSystem.get(FsConstants.VIEWFS_URI, conf2); + Path targetFSUserHomeTrashRoot = fsTarget.makeQualified( + new Path(fsTarget.getHomeDirectory(), TRASH_PREFIX)); + Assert.assertEquals(targetFSUserHomeTrashRoot, + fsView2.getTrashRoot(dataTestPath)); + + // Case 4: viewFS without fallback. Expect exception for a nonExistent path + conf2 = new Configuration(conf); + fsView2 = FileSystem.get(FsConstants.VIEWFS_URI, conf2); + try { + fsView2.getTrashRoot(nonExistentPath); + } catch (NotInMountpointException ignored) { + } + } + + /** + * A mocked FileSystem which returns a deep trash dir. + */ + static class DeepTrashRootMockFS extends MockFileSystem { + public static final Path TRASH = + new Path("/vol/very/deep/deep/trash/dir/.Trash"); + + @Override + public Path getTrashRoot(Path path) { + return TRASH; + } + } + + /** + * Test getTrashRoot that is very deep inside a mount point. + */ + @Test + public void testTrashRootDeepTrashDir() throws IOException { + + Configuration conf2 = ViewFileSystemTestSetup.createConfig(); + conf2.setBoolean(CONFIG_VIEWFS_TRASH_FORCE_INSIDE_MOUNT_POINT, true); + conf2.setClass("fs.mocktrashfs.impl", DeepTrashRootMockFS.class, + FileSystem.class); + ConfigUtil.addLink(conf2, "/mnt/datavol1", + URI.create("mocktrashfs://localhost/vol")); + Path testPath = new Path("/mnt/datavol1/projs/proj"); + FileSystem fsView2 = FileSystem.get(FsConstants.VIEWFS_URI, conf2); + Path expectedTrash = fsView2.makeQualified( + new Path("/mnt/datavol1/very/deep/deep/trash/dir/.Trash")); + Assert.assertEquals(expectedTrash, fsView2.getTrashRoot(testPath)); + } + + /** + * Test getTrashRoots() for all users. + */ + @Test + public void testTrashRootsAllUsers() throws IOException { + Configuration conf2 = new Configuration(conf); + conf2.setBoolean(CONFIG_VIEWFS_TRASH_FORCE_INSIDE_MOUNT_POINT, true); + FileSystem fsView2 = FileSystem.get(FsConstants.VIEWFS_URI, conf2); + + // Case 1: verify correct trash roots from fsView and fsView2 + int beforeTrashRootNum = fsView.getTrashRoots(true).size(); + int beforeTrashRootNum2 = fsView2.getTrashRoots(true).size(); + Assert.assertEquals(beforeTrashRootNum, beforeTrashRootNum2); + + fsView.mkdirs(new Path("/data/" + TRASH_PREFIX + "/user1")); + fsView.mkdirs(new Path("/data/" + TRASH_PREFIX + "/user2")); + fsView.mkdirs(new Path("/user/" + TRASH_PREFIX + "/user3")); + fsView.mkdirs(new Path("/user/" + TRASH_PREFIX + "/user4")); + fsView.mkdirs(new Path("/user2/" + TRASH_PREFIX + "/user5")); + int afterTrashRootsNum = fsView.getTrashRoots(true).size(); + int afterTrashRootsNum2 = fsView2.getTrashRoots(true).size(); + Assert.assertEquals(beforeTrashRootNum, afterTrashRootsNum); + Assert.assertEquals(beforeTrashRootNum2 + 5, afterTrashRootsNum2); + + // Case 2: per-user mount point + fsTarget.mkdirs(new Path(targetTestRoot, "Users/userA/.Trash/userA")); + Configuration conf3 = new Configuration(conf2); + ConfigUtil.addLink(conf3, "/Users/userA", + new Path(targetTestRoot, "Users/userA").toUri()); + FileSystem fsView3 = FileSystem.get(FsConstants.VIEWFS_URI, conf3); + int trashRootsNum3 = fsView3.getTrashRoots(true).size(); + Assert.assertEquals(afterTrashRootsNum2 + 1, trashRootsNum3); + + // Case 3: single /Users mount point for all users + fsTarget.mkdirs(new Path(targetTestRoot, "Users/.Trash/user1")); + fsTarget.mkdirs(new Path(targetTestRoot, "Users/.Trash/user2")); + Configuration conf4 = new Configuration(conf2); + ConfigUtil.addLink(conf4, "/Users", + new Path(targetTestRoot, "Users").toUri()); + FileSystem fsView4 = FileSystem.get(FsConstants.VIEWFS_URI, conf4); + int trashRootsNum4 = fsView4.getTrashRoots(true).size(); + Assert.assertEquals(afterTrashRootsNum2 + 2, trashRootsNum4); + + // Case 4: test trash roots in fallback FS + fsTarget.mkdirs(new Path(targetTestRoot, ".Trash/user10")); + fsTarget.mkdirs(new Path(targetTestRoot, ".Trash/user11")); + fsTarget.mkdirs(new Path(targetTestRoot, ".Trash/user12")); + Configuration conf5 = new Configuration(conf2); + ConfigUtil.addLinkFallback(conf5, targetTestRoot.toUri()); + FileSystem fsView5 = FileSystem.get(FsConstants.VIEWFS_URI, conf5); + int trashRootsNum5 = fsView5.getTrashRoots(true).size(); + Assert.assertEquals(afterTrashRootsNum2 + 3, trashRootsNum5); + } + + /** + * Test getTrashRoots() for current user. + */ + @Test + public void testTrashRootsCurrentUser() throws IOException { + String currentUser = + UserGroupInformation.getCurrentUser().getShortUserName(); + Configuration conf2 = new Configuration(conf); + conf2.setBoolean(CONFIG_VIEWFS_TRASH_FORCE_INSIDE_MOUNT_POINT, true); + FileSystem fsView2 = FileSystem.get(FsConstants.VIEWFS_URI, conf2); + + int beforeTrashRootNum = fsView.getTrashRoots(false).size(); + int beforeTrashRootNum2 = fsView2.getTrashRoots(false).size(); + Assert.assertEquals(beforeTrashRootNum, beforeTrashRootNum2); + + fsView.mkdirs(new Path("/data/" + TRASH_PREFIX + "/" + currentUser)); + fsView.mkdirs(new Path("/data/" + TRASH_PREFIX + "/user2")); + fsView.mkdirs(new Path("/user/" + TRASH_PREFIX + "/" + currentUser)); + fsView.mkdirs(new Path("/user/" + TRASH_PREFIX + "/user4")); + fsView.mkdirs(new Path("/user2/" + TRASH_PREFIX + "/user5")); + int afterTrashRootsNum = fsView.getTrashRoots(false).size(); + int afterTrashRootsNum2 = fsView2.getTrashRoots(false).size(); + Assert.assertEquals(beforeTrashRootNum, afterTrashRootsNum); + Assert.assertEquals(beforeTrashRootNum2 + 2, afterTrashRootsNum2); + + // Test trash roots in fallback FS + Configuration conf3 = new Configuration(conf2); + fsTarget.mkdirs(new Path(targetTestRoot, TRASH_PREFIX + "/" + currentUser)); + ConfigUtil.addLinkFallback(conf3, targetTestRoot.toUri()); + FileSystem fsView3 = FileSystem.get(FsConstants.VIEWFS_URI, conf3); + int trashRootsNum3 = fsView3.getTrashRoots(false).size(); + Assert.assertEquals(afterTrashRootsNum2 + 1, trashRootsNum3); + } + @Test(expected = NotInMountpointException.class) public void testViewFileSystemUtil() throws Exception { Configuration newConf = new Configuration(conf); @@ -1279,7 +1461,8 @@ public void testLinkTarget() throws Exception { @Test public void testViewFileSystemInnerCache() throws Exception { - ViewFileSystem.InnerCache cache = new ViewFileSystem.InnerCache(); + ViewFileSystem.InnerCache cache = + new ViewFileSystem.InnerCache(new FsGetter()); FileSystem fs = cache.get(fsTarget.getUri(), conf); // InnerCache caches filesystem. @@ -1344,6 +1527,8 @@ public void testChildrenFileSystemLeak() throws Exception { final int cacheSize = TestFileUtil.getCacheSize(); ViewFileSystem viewFs = (ViewFileSystem) FileSystem .get(new URI("viewfs://" + clusterName + "/"), config); + viewFs.resolvePath( + new Path(String.format("viewfs://%s/%s", clusterName, "/user"))); assertEquals(cacheSize + 1, TestFileUtil.getCacheSize()); viewFs.close(); assertEquals(cacheSize, TestFileUtil.getCacheSize()); @@ -1368,4 +1553,158 @@ public void testDeleteOnExit() throws Exception { viewFs.close(); assertFalse(fsTarget.exists(realTestPath)); } + + @Test + public void testGetContentSummary() throws IOException { + ContentSummary summaryBefore = + fsView.getContentSummary(new Path("/internalDir")); + String expected = "GET CONTENT SUMMARY"; + Path filePath = + new Path("/internalDir/internalDir2/linkToDir3", "foo"); + + try (FSDataOutputStream outputStream = fsView.create(filePath)) { + outputStream.write(expected.getBytes()); + } + + Path newDirPath = new Path("/internalDir/linkToDir2", "bar"); + fsView.mkdirs(newDirPath); + + ContentSummary summaryAfter = + fsView.getContentSummary(new Path("/internalDir")); + assertEquals("The file count didn't match", + summaryBefore.getFileCount() + 1, + summaryAfter.getFileCount()); + assertEquals("The size didn't match", + summaryBefore.getLength() + expected.length(), + summaryAfter.getLength()); + assertEquals("The directory count didn't match", + summaryBefore.getDirectoryCount() + 1, + summaryAfter.getDirectoryCount()); + } + + @Test + public void testGetContentSummaryWithFileInLocalFS() throws Exception { + ContentSummary summaryBefore = + fsView.getContentSummary(new Path("/internalDir")); + String expected = "GET CONTENT SUMMARY"; + File localFile = temporaryFolder.newFile("localFile"); + try (FileOutputStream fos = new FileOutputStream(localFile)) { + fos.write(expected.getBytes()); + } + ConfigUtil.addLink(conf, + "/internalDir/internalDir2/linkToLocalFile", localFile.toURI()); + + try (FileSystem fs = FileSystem.get(FsConstants.VIEWFS_URI, conf)) { + ContentSummary summaryAfter = + fs.getContentSummary(new Path("/internalDir")); + assertEquals("The file count didn't match", + summaryBefore.getFileCount() + 1, + summaryAfter.getFileCount()); + assertEquals("The directory count didn't match", + summaryBefore.getLength() + expected.length(), + summaryAfter.getLength()); + } + } + + @Test + public void testTargetFileSystemLazyInitialization() throws Exception { + final String clusterName = "cluster" + new Random().nextInt(); + Configuration config = new Configuration(conf); + config.setBoolean(CONFIG_VIEWFS_ENABLE_INNER_CACHE, false); + config.setClass("fs.mockfs.impl", + TestChRootedFileSystem.MockFileSystem.class, FileSystem.class); + ConfigUtil.addLink(config, clusterName, "/user", + URI.create("mockfs://mockauth1/mockpath")); + ConfigUtil.addLink(config, clusterName, + "/mock", URI.create("mockfs://mockauth/mockpath")); + + final int cacheSize = TestFileUtil.getCacheSize(); + ViewFileSystem viewFs = (ViewFileSystem) FileSystem + .get(new URI("viewfs://" + clusterName + "/"), config); + + // As no inner file system instance has been initialized, + // cache size will remain the same + // cache is disabled for viewfs scheme, so the viewfs:// instance won't + // go in the cache even after the initialization + assertEquals(cacheSize, TestFileUtil.getCacheSize()); + + // This resolve path will initialize the file system corresponding + // to the mount table entry of the path "/user" + viewFs.resolvePath( + new Path(String.format("viewfs://%s/%s", clusterName, "/user"))); + + // Cache size will increase by 1. + assertEquals(cacheSize + 1, TestFileUtil.getCacheSize()); + // This resolve path will initialize the file system corresponding + // to the mount table entry of the path "/mock" + viewFs.resolvePath(new Path(String.format("viewfs://%s/%s", clusterName, + "/mock"))); + // One more file system instance will get initialized. + assertEquals(cacheSize + 2, TestFileUtil.getCacheSize()); + viewFs.close(); + // Initialized FileSystem instances will not be removed from cache as + // viewfs inner cache is disabled + assertEquals(cacheSize + 2, TestFileUtil.getCacheSize()); + } + + @Test + public void testTargetFileSystemLazyInitializationForChecksumMethods() + throws Exception { + final String clusterName = "cluster" + new Random().nextInt(); + Configuration config = new Configuration(conf); + config.setBoolean(CONFIG_VIEWFS_ENABLE_INNER_CACHE, false); + config.setClass("fs.othermockfs.impl", + TestChRootedFileSystem.MockFileSystem.class, FileSystem.class); + ConfigUtil.addLink(config, clusterName, "/user", + URI.create("othermockfs://mockauth1/mockpath")); + ConfigUtil.addLink(config, clusterName, + "/mock", URI.create("othermockfs://mockauth/mockpath")); + + final int cacheSize = TestFileUtil.getCacheSize(); + ViewFileSystem viewFs = (ViewFileSystem) FileSystem.get( + new URI("viewfs://" + clusterName + "/"), config); + + // As no inner file system instance has been initialized, + // cache size will remain the same + // cache is disabled for viewfs scheme, so the viewfs:// instance won't + // go in the cache even after the initialization + assertEquals(cacheSize, TestFileUtil.getCacheSize()); + + // This is not going to initialize any filesystem instance + viewFs.setVerifyChecksum(true); + + // Cache size will remain the same + assertEquals(cacheSize, TestFileUtil.getCacheSize()); + + // This resolve path will initialize the file system corresponding + // to the mount table entry of the path "/user" + viewFs.getFileChecksum( + new Path(String.format("viewfs://%s/%s", clusterName, "/user"))); + + // Cache size will increase by 1. + assertEquals(cacheSize + 1, TestFileUtil.getCacheSize()); + + viewFs.close(); + // Initialized FileSystem instances will not be removed from cache as + // viewfs inner cache is disabled + assertEquals(cacheSize + 1, TestFileUtil.getCacheSize()); + } + + @Test + public void testInvalidMountPoints() throws Exception { + final String clusterName = "cluster" + new Random().nextInt(); + Configuration config = new Configuration(conf); + config.set(ConfigUtil.getConfigViewFsPrefix(clusterName) + "." + + Constants.CONFIG_VIEWFS_LINK + "." + "/invalidPath", + "othermockfs:|mockauth/mockpath"); + + try { + FileSystem viewFs = FileSystem.get( + new URI("viewfs://" + clusterName + "/"), config); + fail("FileSystem should not initialize. Should fail with IOException"); + } catch (IOException ex) { + assertTrue("Should get URISyntax Exception", + ex.getMessage().startsWith("URISyntax exception")); + } + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFsBaseTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFsBaseTest.java index d96cdb172b702..1d855ab442600 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFsBaseTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFsBaseTest.java @@ -56,6 +56,7 @@ import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.FileContextTestHelper.fileType; import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FsConstants; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.UnresolvedLinkException; @@ -69,6 +70,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.LambdaTestUtils; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -95,6 +97,8 @@ *

      */ abstract public class ViewFsBaseTest { + protected static final String MOUNT_TABLE_NAME = "mycluster"; + FileContext fcView; // the view file system - the mounts are here FileContext fcTarget; // the target file system - the mount will point here Path targetTestRoot; @@ -128,6 +132,9 @@ public void setUp() throws Exception { // Set up the defaultMT in the config with our mount point links conf = new Configuration(); + conf.set( + Constants.CONFIG_VIEWFS_DEFAULT_MOUNT_TABLE_NAME_KEY, + MOUNT_TABLE_NAME); ConfigUtil.addLink(conf, "/targetRoot", targetTestRoot.toUri()); ConfigUtil.addLink(conf, "/user", new Path(targetTestRoot,"user").toUri()); @@ -515,7 +522,7 @@ public void testListOnInternalDirsOfMountTable() throws IOException { Assert.assertTrue("A mount should appear as symlink", fs.isSymlink()); } - @Test + @Test(expected = FileNotFoundException.class) public void testFileStatusOnMountLink() throws IOException { Assert.assertTrue("Slash should appear as dir", fcView.getFileStatus(new Path("/")).isDirectory()); @@ -527,12 +534,7 @@ public void testFileStatusOnMountLink() throws IOException { checkFileStatus(fcView, "/internalDir/internalDir2/linkToDir3", fileType.isDir); checkFileStatus(fcView, "/linkToAFile", fileType.isFile); - try { - fcView.getFileStatus(new Path("/danglingLink")); - Assert.fail("Excepted a not found exception here"); - } catch ( FileNotFoundException e) { - // as excepted - } + fcView.getFileStatus(new Path("/danglingLink")); } @Test @@ -540,8 +542,8 @@ public void testGetFileChecksum() throws AccessControlException, UnresolvedLinkException, IOException, URISyntaxException { AbstractFileSystem mockAFS = mock(AbstractFileSystem.class); InodeTree.ResolveResult res = - new InodeTree.ResolveResult(null, mockAFS , null, - new Path("someFile")); + new InodeTree.ResolveResult(null, mockAFS, null, + new Path("someFile"), true); @SuppressWarnings("unchecked") InodeTree fsState = mock(InodeTree.class); when(fsState.resolve(anyString(), anyBoolean())).thenReturn(res); @@ -1001,4 +1003,23 @@ static AbstractFileSystem getMockFs(URI uri) { return mockFs; } } + + @Test + public void testListStatusWithNoGroups() throws Exception { + final UserGroupInformation userUgi = UserGroupInformation + .createUserForTesting("user@HADOOP.COM", new String[] {}); + userUgi.doAs(new PrivilegedExceptionAction() { + @Override + public Object run() throws Exception { + URI viewFsUri = new URI( + FsConstants.VIEWFS_SCHEME, MOUNT_TABLE_NAME, "/", null, null); + FileSystem vfs = FileSystem.get(viewFsUri, conf); + LambdaTestUtils.intercept(IOException.class, + "There is no primary group for UGI", () -> vfs + .listStatus(new Path(viewFsUri.toString() + "internalDir"))); + return null; + } + }); + } + } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFsTestSetup.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFsTestSetup.java index 9b7e17f4a601a..b2d7416aa7675 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFsTestSetup.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFsTestSetup.java @@ -17,16 +17,21 @@ */ package org.apache.hadoop.fs.viewfs; +import java.io.IOException; import java.net.URI; +import java.net.URISyntaxException; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileContextTestHelper; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FsConstants; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.viewfs.ConfigUtil; +import org.apache.hadoop.fs.viewfs.ViewFileSystemOverloadScheme.ChildFsGetter; import org.apache.hadoop.util.Shell; import org.eclipse.jetty.util.log.Log; +import org.junit.Assert; /** @@ -132,4 +137,84 @@ static void linkUpFirstComponents(Configuration conf, String path, + firstComponent + "->" + linkTarget); } + /** + * Adds the given mount links to the given Hadoop compatible file system path. + * Mount link mappings are in sources, targets at their respective index + * locations. + */ + static void addMountLinksToFile(String mountTable, String[] sources, + String[] targets, Path mountTableConfPath, Configuration conf) + throws IOException, URISyntaxException { + ChildFsGetter cfs = new ViewFileSystemOverloadScheme.ChildFsGetter( + mountTableConfPath.toUri().getScheme()); + try (FileSystem fs = cfs.getNewInstance(mountTableConfPath.toUri(), + conf)) { + try (FSDataOutputStream out = fs.create(mountTableConfPath)) { + String prefix = + new StringBuilder(Constants.CONFIG_VIEWFS_PREFIX).append(".") + .append((mountTable == null + ? ConfigUtil.getDefaultMountTableName(conf) + : mountTable)) + .append(".").toString(); + out.writeBytes(""); + for (int i = 0; i < sources.length; i++) { + String src = sources[i]; + String target = targets[i]; + boolean isNfly = src.startsWith(Constants.CONFIG_VIEWFS_LINK_NFLY); + out.writeBytes(""); + if (isNfly) { + String[] srcParts = src.split("[.]"); + Assert.assertEquals("Invalid NFlyLink format", 3, srcParts.length); + String actualSrc = srcParts[srcParts.length - 1]; + String params = srcParts[srcParts.length - 2]; + out.writeBytes(prefix + Constants.CONFIG_VIEWFS_LINK_NFLY + "." + + params + "." + actualSrc); + } else if (Constants.CONFIG_VIEWFS_LINK_FALLBACK.equals(src)) { + out.writeBytes(prefix + Constants.CONFIG_VIEWFS_LINK_FALLBACK); + } else if (Constants.CONFIG_VIEWFS_LINK_MERGE_SLASH.equals(src)) { + out.writeBytes(prefix + Constants.CONFIG_VIEWFS_LINK_MERGE_SLASH); + } else { + out.writeBytes(prefix + Constants.CONFIG_VIEWFS_LINK + "." + src); + } + out.writeBytes(""); + out.writeBytes(""); + out.writeBytes(target); + out.writeBytes(""); + out.flush(); + } + out.writeBytes(("")); + out.flush(); + } + } + } + + /** + * Adds the given mount links to the configuration. Mount link mappings are + * in sources, targets at their respective index locations. + */ + public static void addMountLinksToConf(String mountTable, String[] sources, + String[] targets, Configuration config) throws URISyntaxException { + for (int i = 0; i < sources.length; i++) { + String src = sources[i]; + String target = targets[i]; + String mountTableName = mountTable == null ? + Constants.CONFIG_VIEWFS_DEFAULT_MOUNT_TABLE : mountTable; + boolean isNfly = src.startsWith(Constants.CONFIG_VIEWFS_LINK_NFLY); + if (isNfly) { + String[] srcParts = src.split("[.]"); + Assert.assertEquals("Invalid NFlyLink format", 3, srcParts.length); + String actualSrc = srcParts[srcParts.length - 1]; + String params = srcParts[srcParts.length - 2]; + ConfigUtil.addLinkNfly(config, mountTableName, actualSrc, params, + target); + } else if (src.equals(Constants.CONFIG_VIEWFS_LINK_FALLBACK)) { + ConfigUtil.addLinkFallback(config, mountTableName, new URI(target)); + } else if (src.equals(Constants.CONFIG_VIEWFS_LINK_MERGE_SLASH)) { + ConfigUtil.addLinkMergeSlash(config, mountTableName, new URI(target)); + } else { + ConfigUtil.addLink(config, mountTableName, src, new URI(target)); + } + } + } + } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/ClientBaseWithFixes.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/ClientBaseWithFixes.java index be6181157c0dd..2863a39f14226 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/ClientBaseWithFixes.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/ClientBaseWithFixes.java @@ -41,7 +41,6 @@ import org.apache.zookeeper.ZKTestCase; import org.apache.zookeeper.ZooKeeper; import org.apache.zookeeper.server.ServerCnxnFactory; -import org.apache.zookeeper.server.ServerCnxnFactoryAccessor; import org.apache.zookeeper.server.ZKDatabase; import org.apache.zookeeper.server.ZooKeeperServer; import org.apache.zookeeper.server.persistence.FileTxnLog; @@ -51,7 +50,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Copy-paste of ClientBase from ZooKeeper, but without any of the @@ -60,10 +59,10 @@ * we run these tests with the upstream ClientBase. */ public abstract class ClientBaseWithFixes extends ZKTestCase { - protected static final Logger LOG = LoggerFactory.getLogger(ClientBaseWithFixes.class); + protected static final Logger LOG = LoggerFactory.getLogger(ClientBaseWithFixes.class); - public static int CONNECTION_TIMEOUT = 30000; - static final File BASETEST = GenericTestUtils.getTestDir(); + public static int CONNECTION_TIMEOUT = 30000; + static final File BASETEST = GenericTestUtils.getTestDir(); static { // The 4-letter-words commands are simple diagnostics telnet commands in @@ -74,411 +73,409 @@ public abstract class ClientBaseWithFixes extends ZKTestCase { System.setProperty("zookeeper.4lw.commands.whitelist", "*"); } - protected final String hostPort = initHostPort(); - protected int maxCnxns = 0; - protected ServerCnxnFactory serverFactory = null; - protected File tmpDir = null; + protected final String hostPort = initHostPort(); + protected int maxCnxns = 0; + protected ServerCnxnFactory serverFactory = null; + protected File tmpDir = null; + + long initialFdCount; - long initialFdCount; - - /** - * In general don't use this. Only use in the special case that you - * want to ignore results (for whatever reason) in your test. Don't - * use empty watchers in real code! - * - */ - protected class NullWatcher implements Watcher { - @Override - public void process(WatchedEvent event) { /* nada */ } - } - - protected static class CountdownWatcher implements Watcher { - // XXX this doesn't need to be volatile! (Should probably be final) - volatile CountDownLatch clientConnected; - volatile boolean connected; - protected ZooKeeper client; - - public void initializeWatchedClient(ZooKeeper zk) { - if (client != null) { - throw new RuntimeException("Watched Client was already set"); - } - client = zk; - } + /** + * In general don't use this. Only use in the special case that you + * want to ignore results (for whatever reason) in your test. Don't + * use empty watchers in real code! + * + */ + protected class NullWatcher implements Watcher { + @Override + public void process(WatchedEvent event) { /* nada */ } + } - public CountdownWatcher() { - reset(); - } - synchronized public void reset() { - clientConnected = new CountDownLatch(1); - connected = false; - } - @Override - synchronized public void process(WatchedEvent event) { - if (event.getState() == KeeperState.SyncConnected || - event.getState() == KeeperState.ConnectedReadOnly) { - connected = true; - notifyAll(); - clientConnected.countDown(); - } else { - connected = false; - notifyAll(); - } - } - synchronized boolean isConnected() { - return connected; - } - @VisibleForTesting - public synchronized void waitForConnected(long timeout) - throws InterruptedException, TimeoutException { - long expire = Time.now() + timeout; - long left = timeout; - while(!connected && left > 0) { - wait(left); - left = expire - Time.now(); - } - if (!connected) { - throw new TimeoutException("Did not connect"); - - } - } - @VisibleForTesting - public synchronized void waitForDisconnected(long timeout) - throws InterruptedException, TimeoutException { - long expire = Time.now() + timeout; - long left = timeout; - while(connected && left > 0) { - wait(left); - left = expire - Time.now(); - } - if (connected) { - throw new TimeoutException("Did not disconnect"); - - } - } + protected static class CountdownWatcher implements Watcher { + // XXX this doesn't need to be volatile! (Should probably be final) + volatile CountDownLatch clientConnected; + volatile boolean connected; + protected ZooKeeper client; + + public void initializeWatchedClient(ZooKeeper zk) { + if (client != null) { + throw new RuntimeException("Watched Client was already set"); + } + client = zk; } - protected TestableZooKeeper createClient() - throws IOException, InterruptedException - { - return createClient(hostPort); + public CountdownWatcher() { + reset(); } - - protected TestableZooKeeper createClient(String hp) - throws IOException, InterruptedException - { - CountdownWatcher watcher = new CountdownWatcher(); - return createClient(watcher, hp); + synchronized public void reset() { + clientConnected = new CountDownLatch(1); + connected = false; } - - private LinkedList allClients; - private boolean allClientsSetup = false; - - protected TestableZooKeeper createClient(CountdownWatcher watcher, String hp) - throws IOException, InterruptedException - { - return createClient(watcher, hp, CONNECTION_TIMEOUT); + @Override + synchronized public void process(WatchedEvent event) { + if (event.getState() == KeeperState.SyncConnected || + event.getState() == KeeperState.ConnectedReadOnly) { + connected = true; + notifyAll(); + clientConnected.countDown(); + } else { + connected = false; + notifyAll(); + } } - - protected TestableZooKeeper createClient(CountdownWatcher watcher, - String hp, int timeout) - throws IOException, InterruptedException - { - watcher.reset(); - TestableZooKeeper zk = new TestableZooKeeper(hp, timeout, watcher); - if (!watcher.clientConnected.await(timeout, TimeUnit.MILLISECONDS)) - { - Assert.fail("Unable to connect to server"); - } - synchronized(this) { - if (!allClientsSetup) { - LOG.error("allClients never setup"); - Assert.fail("allClients never setup"); - } - if (allClients != null) { - allClients.add(zk); - } else { - // test done - close the zk, not needed - zk.close(); - } - } - watcher.initializeWatchedClient(zk); - return zk; + synchronized boolean isConnected() { + return connected; } - - public static class HostPort { - String host; - int port; - public HostPort(String host, int port) { - this.host = host; - this.port = port; - } + @VisibleForTesting + public synchronized void waitForConnected(long timeout) + throws InterruptedException, TimeoutException { + long expire = Time.now() + timeout; + long left = timeout; + while(!connected && left > 0) { + wait(left); + left = expire - Time.now(); + } + if (!connected) { + throw new TimeoutException("Did not connect"); + + } } - public static List parseHostPortList(String hplist) { - ArrayList alist = new ArrayList(); - for (String hp: hplist.split(",")) { - int idx = hp.lastIndexOf(':'); - String host = hp.substring(0, idx); - int port; - try { - port = Integer.parseInt(hp.substring(idx + 1)); - } catch(RuntimeException e) { - throw new RuntimeException("Problem parsing " + hp + e.toString()); - } - alist.add(new HostPort(host,port)); - } - return alist; + @VisibleForTesting + public synchronized void waitForDisconnected(long timeout) + throws InterruptedException, TimeoutException { + long expire = Time.now() + timeout; + long left = timeout; + while(connected && left > 0) { + wait(left); + left = expire - Time.now(); + } + if (connected) { + throw new TimeoutException("Did not disconnect"); + + } } + } + + protected TestableZooKeeper createClient() + throws IOException, InterruptedException + { + return createClient(hostPort); + } + + protected TestableZooKeeper createClient(String hp) + throws IOException, InterruptedException + { + CountdownWatcher watcher = new CountdownWatcher(); + return createClient(watcher, hp); + } + + private LinkedList allClients; + private boolean allClientsSetup = false; - /** - * Send the 4letterword - * @param host the destination host - * @param port the destination port - * @param cmd the 4letterword - * @return - * @throws IOException - */ - public static String send4LetterWord(String host, int port, String cmd) - throws IOException + protected TestableZooKeeper createClient(CountdownWatcher watcher, String hp) + throws IOException, InterruptedException + { + return createClient(watcher, hp, CONNECTION_TIMEOUT); + } + + protected TestableZooKeeper createClient(CountdownWatcher watcher, + String hp, int timeout) + throws IOException, InterruptedException + { + watcher.reset(); + TestableZooKeeper zk = new TestableZooKeeper(hp, timeout, watcher); + if (!watcher.clientConnected.await(timeout, TimeUnit.MILLISECONDS)) { - LOG.info("connecting to " + host + " " + port); - Socket sock = new Socket(host, port); - BufferedReader reader = null; - try { - OutputStream outstream = sock.getOutputStream(); - outstream.write(cmd.getBytes()); - outstream.flush(); - // this replicates NC - close the output stream before reading - sock.shutdownOutput(); - - reader = - new BufferedReader( - new InputStreamReader(sock.getInputStream())); - StringBuilder sb = new StringBuilder(); - String line; - while((line = reader.readLine()) != null) { - sb.append(line + "\n"); - } - return sb.toString(); - } finally { - sock.close(); - if (reader != null) { - reader.close(); - } - } + Assert.fail("Unable to connect to server"); + } + synchronized(this) { + if (!allClientsSetup) { + LOG.error("allClients never setup"); + Assert.fail("allClients never setup"); + } + if (allClients != null) { + allClients.add(zk); + } else { + // test done - close the zk, not needed + zk.close(); + } } + watcher.initializeWatchedClient(zk); + return zk; + } - public static boolean waitForServerUp(String hp, long timeout) { - long start = Time.now(); - while (true) { - try { - // if there are multiple hostports, just take the first one - HostPort hpobj = parseHostPortList(hp).get(0); - String result = send4LetterWord(hpobj.host, hpobj.port, "stat"); - if (result.startsWith("Zookeeper version:") && - !result.contains("READ-ONLY")) { - return true; - } - } catch (IOException e) { - // ignore as this is expected - LOG.info("server " + hp + " not up " + e); - } - - if (Time.now() > start + timeout) { - break; - } - try { - Thread.sleep(250); - } catch (InterruptedException e) { - // ignore - } - } - return false; + public static class HostPort { + String host; + int port; + public HostPort(String host, int port) { + this.host = host; + this.port = port; } - public static boolean waitForServerDown(String hp, long timeout) { - long start = Time.now(); - while (true) { - try { - HostPort hpobj = parseHostPortList(hp).get(0); - send4LetterWord(hpobj.host, hpobj.port, "stat"); - } catch (IOException e) { - return true; - } - - if (Time.now() > start + timeout) { - break; - } - try { - Thread.sleep(250); - } catch (InterruptedException e) { - // ignore - } - } - return false; + } + public static List parseHostPortList(String hplist) { + ArrayList alist = new ArrayList(); + for (String hp: hplist.split(",")) { + int idx = hp.lastIndexOf(':'); + String host = hp.substring(0, idx); + int port; + try { + port = Integer.parseInt(hp.substring(idx + 1)); + } catch(RuntimeException e) { + throw new RuntimeException("Problem parsing " + hp + e.toString()); + } + alist.add(new HostPort(host,port)); } + return alist; + } - public static File createTmpDir() throws IOException { - return createTmpDir(BASETEST); - } - static File createTmpDir(File parentDir) throws IOException { - File tmpFile = File.createTempFile("test", ".junit", parentDir); - // don't delete tmpFile - this ensures we don't attempt to create - // a tmpDir with a duplicate name - File tmpDir = new File(tmpFile + ".dir"); - Assert.assertFalse(tmpDir.exists()); // never true if tmpfile does it's job - Assert.assertTrue(tmpDir.mkdirs()); - - return tmpDir; + /** + * Send the 4letterword + * @param host the destination host + * @param port the destination port + * @param cmd the 4letterword + * @return + * @throws IOException + */ + public static String send4LetterWord(String host, int port, String cmd) + throws IOException + { + LOG.info("connecting to " + host + " " + port); + Socket sock = new Socket(host, port); + BufferedReader reader = null; + try { + OutputStream outstream = sock.getOutputStream(); + outstream.write(cmd.getBytes()); + outstream.flush(); + // this replicates NC - close the output stream before reading + sock.shutdownOutput(); + + reader = + new BufferedReader( + new InputStreamReader(sock.getInputStream())); + StringBuilder sb = new StringBuilder(); + String line; + while((line = reader.readLine()) != null) { + sb.append(line + "\n"); + } + return sb.toString(); + } finally { + sock.close(); + if (reader != null) { + reader.close(); + } } + } - private static int getPort(String hostPort) { - String[] split = hostPort.split(":"); - String portstr = split[split.length-1]; - String[] pc = portstr.split("/"); - if (pc.length > 1) { - portstr = pc[0]; - } - return Integer.parseInt(portstr); + public static boolean waitForServerUp(String hp, long timeout) { + long start = Time.now(); + while (true) { + try { + // if there are multiple hostports, just take the first one + HostPort hpobj = parseHostPortList(hp).get(0); + String result = send4LetterWord(hpobj.host, hpobj.port, "stat"); + if (result.startsWith("Zookeeper version:") && + !result.contains("READ-ONLY")) { + return true; + } + } catch (IOException e) { + // ignore as this is expected + LOG.info("server " + hp + " not up " + e); + } + + if (Time.now() > start + timeout) { + break; + } + try { + Thread.sleep(250); + } catch (InterruptedException e) { + // ignore + } } + return false; + } + public static boolean waitForServerDown(String hp, long timeout) { + long start = Time.now(); + while (true) { + try { + HostPort hpobj = parseHostPortList(hp).get(0); + send4LetterWord(hpobj.host, hpobj.port, "stat"); + } catch (IOException e) { + return true; + } + + if (Time.now() > start + timeout) { + break; + } + try { + Thread.sleep(250); + } catch (InterruptedException e) { + // ignore + } + } + return false; + } - static ServerCnxnFactory createNewServerInstance(File dataDir, - ServerCnxnFactory factory, String hostPort, int maxCnxns) - throws IOException, InterruptedException - { - ZooKeeperServer zks = new ZooKeeperServer(dataDir, dataDir, 3000); - final int PORT = getPort(hostPort); - if (factory == null) { - factory = ServerCnxnFactory.createFactory(PORT, maxCnxns); - } - factory.startup(zks); - Assert.assertTrue("waiting for server up", - ClientBaseWithFixes.waitForServerUp("127.0.0.1:" + PORT, - CONNECTION_TIMEOUT)); + public static File createTmpDir() throws IOException { + return createTmpDir(BASETEST); + } + static File createTmpDir(File parentDir) throws IOException { + File tmpFile = File.createTempFile("test", ".junit", parentDir); + // don't delete tmpFile - this ensures we don't attempt to create + // a tmpDir with a duplicate name + File tmpDir = new File(tmpFile + ".dir"); + Assert.assertFalse(tmpDir.exists()); // never true if tmpfile does it's job + Assert.assertTrue(tmpDir.mkdirs()); + + return tmpDir; + } - return factory; + private static int getPort(String hostPort) { + String[] split = hostPort.split(":"); + String portstr = split[split.length-1]; + String[] pc = portstr.split("/"); + if (pc.length > 1) { + portstr = pc[0]; } + return Integer.parseInt(portstr); + } - static void shutdownServerInstance(ServerCnxnFactory factory, - String hostPort) - { - if (factory != null) { - ZKDatabase zkDb; - { - ZooKeeperServer zs = getServer(factory); - - zkDb = zs.getZKDatabase(); - } - factory.shutdown(); - try { - zkDb.close(); - } catch (IOException ie) { - LOG.warn("Error closing logs ", ie); - } - final int PORT = getPort(hostPort); - - Assert.assertTrue("waiting for server down", - ClientBaseWithFixes.waitForServerDown("127.0.0.1:" + PORT, - CONNECTION_TIMEOUT)); - } + static ServerCnxnFactory createNewServerInstance(File dataDir, + ServerCnxnFactory factory, String hostPort, int maxCnxns) + throws IOException, InterruptedException + { + ZooKeeperServer zks = new ZooKeeperServer(dataDir, dataDir, 3000); + final int PORT = getPort(hostPort); + if (factory == null) { + factory = ServerCnxnFactory.createFactory(PORT, maxCnxns); } + factory.startup(zks); + Assert.assertTrue("waiting for server up", + ClientBaseWithFixes.waitForServerUp("127.0.0.1:" + PORT, + CONNECTION_TIMEOUT)); - /** - * Test specific setup - */ - public static void setupTestEnv() { - // during the tests we run with 100K prealloc in the logs. - // on windows systems prealloc of 64M was seen to take ~15seconds - // resulting in test Assert.failure (client timeout on first session). - // set env and directly in order to handle static init/gc issues - System.setProperty("zookeeper.preAllocSize", "100"); - FileTxnLog.setPreallocSize(100 * 1024); - } + return factory; + } - protected void setUpAll() throws Exception { - allClients = new LinkedList(); - allClientsSetup = true; + static void shutdownServerInstance(ServerCnxnFactory factory, + String hostPort) + { + if (factory != null) { + ZKDatabase zkDb; + { + ZooKeeperServer zs = getServer(factory); + + zkDb = zs.getZKDatabase(); + } + factory.shutdown(); + try { + zkDb.close(); + } catch (IOException ie) { + LOG.warn("Error closing logs ", ie); + } + final int PORT = getPort(hostPort); + + Assert.assertTrue("waiting for server down", + ClientBaseWithFixes.waitForServerDown("127.0.0.1:" + PORT, + CONNECTION_TIMEOUT)); } + } - @Before - public void setUp() throws Exception { - BASETEST.mkdirs(); + /** + * Test specific setup + */ + public static void setupTestEnv() { + // during the tests we run with 100K prealloc in the logs. + // on windows systems prealloc of 64M was seen to take ~15seconds + // resulting in test Assert.failure (client timeout on first session). + // set env and directly in order to handle static init/gc issues + System.setProperty("zookeeper.preAllocSize", "100"); + FileTxnLog.setPreallocSize(100 * 1024); + } - setupTestEnv(); + protected void setUpAll() throws Exception { + allClients = new LinkedList(); + allClientsSetup = true; + } - setUpAll(); + @Before + public void setUp() throws Exception { + BASETEST.mkdirs(); - tmpDir = createTmpDir(BASETEST); + setupTestEnv(); - startServer(); + setUpAll(); - LOG.info("Client test setup finished"); - } + tmpDir = createTmpDir(BASETEST); - private String initHostPort() { - BASETEST.mkdirs(); - int port = 0; - try { - port = ServerSocketUtil.getPort(port, 100); - } catch (IOException e) { - throw new RuntimeException(e); - } - return "127.0.0.1:" + port; - } + startServer(); - protected void startServer() throws Exception { - LOG.info("STARTING server"); - serverFactory = createNewServerInstance(tmpDir, serverFactory, hostPort, maxCnxns); - } + LOG.info("Client test setup finished"); + } - protected void stopServer() throws Exception { - LOG.info("STOPPING server"); - shutdownServerInstance(serverFactory, hostPort); - serverFactory = null; + private String initHostPort() { + BASETEST.mkdirs(); + int port = 0; + try { + port = ServerSocketUtil.getPort(port, 100); + } catch (IOException e) { + throw new RuntimeException(e); } + return "127.0.0.1:" + port; + } + protected void startServer() throws Exception { + LOG.info("STARTING server"); + serverFactory = createNewServerInstance(tmpDir, serverFactory, hostPort, maxCnxns); + } - protected static ZooKeeperServer getServer(ServerCnxnFactory fac) { - ZooKeeperServer zs = ServerCnxnFactoryAccessor.getZkServer(fac); + protected void stopServer() throws Exception { + LOG.info("STOPPING server"); + shutdownServerInstance(serverFactory, hostPort); + serverFactory = null; + } - return zs; - } - protected void tearDownAll() throws Exception { - synchronized (this) { - if (allClients != null) for (ZooKeeper zk : allClients) { - try { - if (zk != null) - zk.close(); - } catch (InterruptedException e) { - LOG.warn("ignoring interrupt", e); - } - } - allClients = null; + protected static ZooKeeperServer getServer(ServerCnxnFactory fac) { + return fac.getZooKeeperServer(); + } + + protected void tearDownAll() throws Exception { + synchronized (this) { + if (allClients != null) for (ZooKeeper zk : allClients) { + try { + if (zk != null) + zk.close(); + } catch (InterruptedException e) { + LOG.warn("ignoring interrupt", e); } + } + allClients = null; } + } - @After - public void tearDown() throws Exception { - LOG.info("tearDown starting"); - - tearDownAll(); + @After + public void tearDown() throws Exception { + LOG.info("tearDown starting"); - stopServer(); + tearDownAll(); - if (tmpDir != null) { - Assert.assertTrue("delete " + tmpDir.toString(), recursiveDelete(tmpDir)); - } + stopServer(); - // This has to be set to null when the same instance of this class is reused between test cases - serverFactory = null; + if (tmpDir != null) { + Assert.assertTrue("delete " + tmpDir.toString(), recursiveDelete(tmpDir)); } - public static boolean recursiveDelete(File d) { - if (d.isDirectory()) { - File children[] = d.listFiles(); - for (File f : children) { - Assert.assertTrue("delete " + f.toString(), recursiveDelete(f)); - } - } - return d.delete(); + // This has to be set to null when the same instance of this class is reused between test cases + serverFactory = null; + } + + public static boolean recursiveDelete(File d) { + if (d.isDirectory()) { + File children[] = d.listFiles(); + for (File f : children) { + Assert.assertTrue("delete " + f.toString(), recursiveDelete(f)); + } } + return d.delete(); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/DummyHAService.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/DummyHAService.java index 6505fbb8224f8..b5739f7935ed7 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/DummyHAService.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/DummyHAService.java @@ -28,14 +28,14 @@ import org.apache.hadoop.ha.protocolPB.HAServiceProtocolPB; import org.apache.hadoop.ha.protocolPB.HAServiceProtocolServerSideTranslatorPB; import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.HAServiceProtocolService; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.Server; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.AccessControlException; import org.mockito.Mockito; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -119,7 +119,7 @@ private InetSocketAddress startAndGetRPCServerAddress(InetSocketAddress serverAd try { RPC.setProtocolEngine(conf, - HAServiceProtocolPB.class, ProtobufRpcEngine.class); + HAServiceProtocolPB.class, ProtobufRpcEngine2.class); HAServiceProtocolServerSideTranslatorPB haServiceProtocolXlator = new HAServiceProtocolServerSideTranslatorPB(new MockHAProtocolImpl()); BlockingService haPbService = HAServiceProtocolService diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/MiniZKFCCluster.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/MiniZKFCCluster.java index f63d267f291c0..3c9713bf5fa1d 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/MiniZKFCCluster.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/MiniZKFCCluster.java @@ -37,8 +37,8 @@ import org.apache.zookeeper.data.Stat; import org.apache.zookeeper.server.ZooKeeperServer; -import com.google.common.base.Preconditions; -import com.google.common.primitives.Ints; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.primitives.Ints; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElectorRealZK.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElectorRealZK.java index bbef9ef65b326..badd5afc5e91b 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElectorRealZK.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElectorRealZK.java @@ -37,7 +37,7 @@ import org.mockito.AdditionalMatchers; import org.mockito.Mockito; -import com.google.common.primitives.Ints; +import org.apache.hadoop.thirdparty.com.google.common.primitives.Ints; import org.slf4j.event.Level; /** diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java index 791aaad59e990..3f027fa1c598a 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java @@ -177,7 +177,7 @@ public void testFailoverFromFaultyServiceSucceeds() throws Exception { } // svc1 still thinks it's active, that's OK, it was fenced - assertEquals(1, AlwaysSucceedFencer.fenceCalled); + assertEquals(2, AlwaysSucceedFencer.fenceCalled); assertSame(svc1, AlwaysSucceedFencer.fencedSvc); assertEquals(HAServiceState.ACTIVE, svc1.state); assertEquals(HAServiceState.ACTIVE, svc2.state); @@ -201,7 +201,7 @@ public void testFailoverFromFaultyServiceFencingFailure() throws Exception { } assertEquals(1, AlwaysFailFencer.fenceCalled); - assertSame(svc1, AlwaysFailFencer.fencedSvc); + assertSame(svc2, AlwaysFailFencer.fencedSvc); assertEquals(HAServiceState.ACTIVE, svc1.state); assertEquals(HAServiceState.STANDBY, svc2.state); } @@ -223,7 +223,7 @@ public void testFencingFailureDuringFailover() throws Exception { // If fencing was requested and it failed we don't try to make // svc2 active anyway, and we don't failback to svc1. assertEquals(1, AlwaysFailFencer.fenceCalled); - assertSame(svc1, AlwaysFailFencer.fencedSvc); + assertSame(svc2, AlwaysFailFencer.fencedSvc); assertEquals(HAServiceState.STANDBY, svc1.state); assertEquals(HAServiceState.STANDBY, svc2.state); } @@ -344,7 +344,7 @@ public void testWeFenceOnFailbackIfTransitionToActiveFails() throws Exception { // and we didn't force it, so we failed back to svc1 and fenced svc2. // Note svc2 still thinks it's active, that's OK, we fenced it. assertEquals(HAServiceState.ACTIVE, svc1.state); - assertEquals(1, AlwaysSucceedFencer.fenceCalled); + assertEquals(2, AlwaysSucceedFencer.fenceCalled); assertSame(svc2, AlwaysSucceedFencer.fencedSvc); } @@ -373,7 +373,7 @@ public void testFailureToFenceOnFailbackFailsTheFailback() throws Exception { // so we did not failback to svc1, ie it's still standby. assertEquals(HAServiceState.STANDBY, svc1.state); assertEquals(1, AlwaysFailFencer.fenceCalled); - assertSame(svc2, AlwaysFailFencer.fencedSvc); + assertSame(svc1, AlwaysFailFencer.fencedSvc); } @Test diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java index 63b9c63646d8b..13f7eccd55aea 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java @@ -30,8 +30,8 @@ import org.junit.Before; import org.junit.Test; -import com.google.common.base.Charsets; -import com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestNodeFencer.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestNodeFencer.java index b929bcb2490b0..972113eefa91f 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestNodeFencer.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestNodeFencer.java @@ -29,7 +29,7 @@ import org.junit.Test; import org.mockito.Mockito; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; public class TestNodeFencer { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestShellCommandFencer.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestShellCommandFencer.java index 3a2cf052a60a8..dcff9e30cdba2 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestShellCommandFencer.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestShellCommandFencer.java @@ -23,7 +23,7 @@ import java.net.InetSocketAddress; import java.util.List; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.util.Shell; @@ -163,6 +163,37 @@ public void testTargetAsEnvironment() { } } + /** + * Test if fencing target has peer set, the failover can trigger different + * commands on source and destination respectively. + */ + @Test + public void testEnvironmentWithPeer() { + HAServiceTarget target = new DummyHAService(HAServiceState.ACTIVE, + new InetSocketAddress("dummytarget", 1111)); + HAServiceTarget source = new DummyHAService(HAServiceState.STANDBY, + new InetSocketAddress("dummysource", 2222)); + target.setTransitionTargetHAStatus(HAServiceState.ACTIVE); + source.setTransitionTargetHAStatus(HAServiceState.STANDBY); + String cmd = "echo $target_host $target_port," + + "echo $source_host $source_port"; + if (!Shell.WINDOWS) { + fencer.tryFence(target, cmd); + Mockito.verify(ShellCommandFencer.LOG).info( + Mockito.contains("echo $ta...rget_port: dummytarget 1111")); + fencer.tryFence(source, cmd); + Mockito.verify(ShellCommandFencer.LOG).info( + Mockito.contains("echo $so...urce_port: dummysource 2222")); + } else { + fencer.tryFence(target, cmd); + Mockito.verify(ShellCommandFencer.LOG).info( + Mockito.contains("echo %ta...get_port%: dummytarget 1111")); + fencer.tryFence(source, cmd); + Mockito.verify(ShellCommandFencer.LOG).info( + Mockito.contains("echo %so...urce_port%: dummysource 2222")); + } + } + /** * Test that we properly close off our input to the subprocess diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java index cc1174b2d2c72..e0169f5fc70d3 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java @@ -21,7 +21,7 @@ import java.security.NoSuchAlgorithmException; -import com.google.common.base.Supplier; +import java.util.function.Supplier; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverControllerStress.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverControllerStress.java index bdbf1d9c2c286..4dcc74b86d151 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverControllerStress.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverControllerStress.java @@ -23,6 +23,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.util.Time; +import org.apache.zookeeper.server.ServerCnxn; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -131,7 +132,7 @@ public void testRandomHealthAndDisconnects() throws Exception { long st = Time.now(); while (Time.now() - st < runFor) { cluster.getTestContext().checkException(); - serverFactory.closeAll(); + serverFactory.closeAll(ServerCnxn.DisconnectReason.SERVER_SHUTDOWN); Thread.sleep(50); } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestSSLHttpServerConfigs.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestSSLHttpServerConfigs.java index e88eba342874c..039fae0195730 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestSSLHttpServerConfigs.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestSSLHttpServerConfigs.java @@ -18,7 +18,7 @@ package org.apache.hadoop.http; -import com.google.common.base.Supplier; +import java.util.function.Supplier; import java.io.File; import java.io.IOException; import java.net.URI; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestDefaultStringifier.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestDefaultStringifier.java index b70e011f6aa13..c15ec8caa4f6c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestDefaultStringifier.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestDefaultStringifier.java @@ -26,6 +26,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.junit.Assert.assertEquals; public class TestDefaultStringifier { @@ -98,7 +99,7 @@ public void testStoreLoad() throws IOException { } @Test - public void testStoreLoadArray() throws IOException { + public void testStoreLoadArray() throws Exception { LOG.info("Testing DefaultStringifier#storeArray() and #loadArray()"); conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization"); @@ -107,6 +108,8 @@ public void testStoreLoadArray() throws IOException { Integer[] array = new Integer[] {1,2,3,4,5}; + intercept(IndexOutOfBoundsException.class, () -> + DefaultStringifier.storeArray(conf, new Integer[] {}, keyName)); DefaultStringifier.storeArray(conf, array, keyName); Integer[] claimedArray = DefaultStringifier.loadArray(conf, keyName, Integer.class); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestMoreWeakReferencedElasticByteBufferPool.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestMoreWeakReferencedElasticByteBufferPool.java new file mode 100644 index 0000000000000..6ca380ef0e46b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestMoreWeakReferencedElasticByteBufferPool.java @@ -0,0 +1,97 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.nio.BufferOverflowException; +import java.nio.ByteBuffer; + +import org.assertj.core.api.Assertions; +import org.junit.Test; + +import org.apache.hadoop.test.HadoopTestBase; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +/** + * Non parameterized tests for {@code WeakReferencedElasticByteBufferPool}. + */ +public class TestMoreWeakReferencedElasticByteBufferPool + extends HadoopTestBase { + + @Test + public void testMixedBuffersInPool() { + WeakReferencedElasticByteBufferPool pool = new WeakReferencedElasticByteBufferPool(); + ByteBuffer buffer1 = pool.getBuffer(true, 5); + ByteBuffer buffer2 = pool.getBuffer(true, 10); + ByteBuffer buffer3 = pool.getBuffer(false, 5); + ByteBuffer buffer4 = pool.getBuffer(false, 10); + ByteBuffer buffer5 = pool.getBuffer(true, 15); + + assertBufferCounts(pool, 0, 0); + pool.putBuffer(buffer1); + pool.putBuffer(buffer2); + assertBufferCounts(pool, 2, 0); + pool.putBuffer(buffer3); + assertBufferCounts(pool, 2, 1); + pool.putBuffer(buffer5); + assertBufferCounts(pool, 3, 1); + pool.putBuffer(buffer4); + assertBufferCounts(pool, 3, 2); + pool.release(); + assertBufferCounts(pool, 0, 0); + + } + + @Test + public void testUnexpectedBufferSizes() throws Exception { + WeakReferencedElasticByteBufferPool pool = new WeakReferencedElasticByteBufferPool(); + ByteBuffer buffer1 = pool.getBuffer(true, 0); + + // try writing a random byte in a 0 length buffer. + // Expected exception as buffer requested is of size 0. + intercept(BufferOverflowException.class, + () -> buffer1.put(new byte[1])); + + // Expected IllegalArgumentException as negative length buffer is requested. + intercept(IllegalArgumentException.class, + () -> pool.getBuffer(true, -5)); + + // test returning null buffer to the pool. + intercept(NullPointerException.class, + () -> pool.putBuffer(null)); + } + + /** + * Utility method to assert counts of direct and heap buffers in + * the given buffer pool. + * @param pool buffer pool. + * @param numDirectBuffersExpected expected number of direct buffers. + * @param numHeapBuffersExpected expected number of heap buffers. + */ + private void assertBufferCounts(WeakReferencedElasticByteBufferPool pool, + int numDirectBuffersExpected, + int numHeapBuffersExpected) { + Assertions.assertThat(pool.getCurrentBuffersCount(true)) + .describedAs("Number of direct buffers in pool") + .isEqualTo(numDirectBuffersExpected); + Assertions.assertThat(pool.getCurrentBuffersCount(false)) + .describedAs("Number of heap buffers in pool") + .isEqualTo(numHeapBuffersExpected); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java index 044824356ed30..d0dc73bacddfa 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java @@ -26,10 +26,14 @@ import org.apache.hadoop.io.SequenceFile.Metadata; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.DefaultCodec; +import org.apache.hadoop.io.serializer.Deserializer; +import org.apache.hadoop.io.serializer.Serialization; +import org.apache.hadoop.io.serializer.Serializer; import org.apache.hadoop.io.serializer.avro.AvroReflectSerialization; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.conf.*; +import org.assertj.core.api.Assertions; import org.junit.Test; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -724,6 +728,147 @@ public void testSerializationAvailability() throws IOException { } } + @Test + public void testSequenceFileWriter() throws Exception { + Configuration conf = new Configuration(); + // This test only works with Raw File System and not Local File System + FileSystem fs = FileSystem.getLocal(conf).getRaw(); + Path p = new Path(GenericTestUtils + .getTempPath("testSequenceFileWriter.seq")); + try(SequenceFile.Writer writer = SequenceFile.createWriter( + fs, conf, p, LongWritable.class, Text.class)) { + Assertions.assertThat(writer.hasCapability + (StreamCapabilities.HSYNC)).isEqualTo(true); + Assertions.assertThat(writer.hasCapability( + StreamCapabilities.HFLUSH)).isEqualTo(true); + LongWritable key = new LongWritable(); + key.set(1); + Text value = new Text(); + value.set("somevalue"); + writer.append(key, value); + writer.flush(); + writer.hflush(); + writer.hsync(); + Assertions.assertThat(fs.getFileStatus(p).getLen()).isGreaterThan(0); + } + } + + @Test + public void testSerializationUsingWritableNameAlias() throws IOException { + Configuration config = new Configuration(); + config.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, SimpleSerializer.class.getName()); + Path path = new Path(System.getProperty("test.build.data", "."), + "SerializationUsingWritableNameAlias"); + + // write with the original serializable class + SequenceFile.Writer writer = SequenceFile.createWriter( + config, + SequenceFile.Writer.file(path), + SequenceFile.Writer.keyClass(SimpleSerializable.class), + SequenceFile.Writer.valueClass(SimpleSerializable.class)); + + int max = 10; + try { + SimpleSerializable val = new SimpleSerializable(); + val.setId(-1); + for (int i = 0; i < max; i++) { + SimpleSerializable key = new SimpleSerializable(); + key.setId(i); + writer.append(key, val); + } + } finally { + writer.close(); + } + + // override name so it gets forced to the new serializable + WritableName.setName(AnotherSimpleSerializable.class, SimpleSerializable.class.getName()); + + // read and expect our new serializable, and all the correct values read + SequenceFile.Reader reader = new SequenceFile.Reader( + config, + SequenceFile.Reader.file(path)); + + AnotherSimpleSerializable key = new AnotherSimpleSerializable(); + int count = 0; + while (true) { + key = (AnotherSimpleSerializable) reader.next(key); + if (key == null) { + // make sure we exhausted all the ints we wrote + assertEquals(count, max); + break; + } + assertEquals(count++, key.getId()); + } + } + + public static class SimpleSerializable implements Serializable { + + private int id; + + public int getId() { + return id; + } + + public void setId(int id) { + this.id = id; + } + } + + public static class AnotherSimpleSerializable extends SimpleSerializable { + } + + public static class SimpleSerializer implements Serialization { + + @Override + public boolean accept(Class c) { + return SimpleSerializable.class.isAssignableFrom(c); + } + + @Override + public Serializer getSerializer(Class c) { + return new Serializer() { + private DataOutputStream out; + @Override + public void open(OutputStream out) throws IOException { + this.out = new DataOutputStream(out); + } + + @Override + public void serialize(SimpleSerializable simpleSerializable) throws IOException { + out.writeInt(simpleSerializable.getId()); + } + + @Override + public void close() throws IOException { + out.close(); + } + }; + } + + @Override + public Deserializer getDeserializer(Class c) { + return new Deserializer() { + private DataInputStream dis; + @Override + public void open(InputStream in) throws IOException { + dis = new DataInputStream(in); + } + + @Override + public SimpleSerializable deserialize(SimpleSerializable simpleSerializable) + throws IOException { + simpleSerializable.setId(dis.readInt()); + return simpleSerializable; + } + + @Override + public void close() throws IOException { + dis.close(); + } + }; + } + } + /** For debugging and testing. */ public static void main(String[] args) throws Exception { int count = 1024 * 1024; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java index 59856a4de11f9..0fb20acf4c8e9 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java @@ -23,8 +23,8 @@ import java.nio.ByteBuffer; import java.nio.charset.CharacterCodingException; import java.util.Random; -import com.google.common.base.Charsets; -import com.google.common.primitives.Bytes; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.primitives.Bytes; import org.junit.Test; import static org.assertj.core.api.Assertions.assertThat; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestWeakReferencedElasticByteBufferPool.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestWeakReferencedElasticByteBufferPool.java new file mode 100644 index 0000000000000..1434010ffa652 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestWeakReferencedElasticByteBufferPool.java @@ -0,0 +1,232 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.List; +import java.util.Random; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import org.apache.hadoop.test.HadoopTestBase; + +/** + * Unit tests for {@code WeakReferencedElasticByteBufferPool}. + */ +@RunWith(Parameterized.class) +public class TestWeakReferencedElasticByteBufferPool + extends HadoopTestBase { + + private final boolean isDirect; + + private final String type; + + @Parameterized.Parameters(name = "Buffer type : {0}") + public static List params() { + return Arrays.asList("direct", "array"); + } + + public TestWeakReferencedElasticByteBufferPool(String type) { + this.type = type; + this.isDirect = !"array".equals(type); + } + + @Test + public void testGetAndPutBasic() { + WeakReferencedElasticByteBufferPool pool = new WeakReferencedElasticByteBufferPool(); + int bufferSize = 5; + ByteBuffer buffer = pool.getBuffer(isDirect, bufferSize); + Assertions.assertThat(buffer.isDirect()) + .describedAs("Buffered returned should be of correct type {}", type) + .isEqualTo(isDirect); + Assertions.assertThat(buffer.capacity()) + .describedAs("Initial capacity of returned buffer from pool") + .isEqualTo(bufferSize); + Assertions.assertThat(buffer.position()) + .describedAs("Initial position of returned buffer from pool") + .isEqualTo(0); + + byte[] arr = createByteArray(bufferSize); + buffer.put(arr, 0, arr.length); + buffer.flip(); + validateBufferContent(buffer, arr); + Assertions.assertThat(buffer.position()) + .describedAs("Buffer's position after filling bytes in it") + .isEqualTo(bufferSize); + // releasing buffer to the pool. + pool.putBuffer(buffer); + Assertions.assertThat(buffer.position()) + .describedAs("Position should be reset to 0 after returning buffer to the pool") + .isEqualTo(0); + + } + + @Test + public void testPoolingWithDifferentSizes() { + WeakReferencedElasticByteBufferPool pool = new WeakReferencedElasticByteBufferPool(); + ByteBuffer buffer = pool.getBuffer(isDirect, 5); + ByteBuffer buffer1 = pool.getBuffer(isDirect, 10); + ByteBuffer buffer2 = pool.getBuffer(isDirect, 15); + + Assertions.assertThat(pool.getCurrentBuffersCount(isDirect)) + .describedAs("Number of buffers in the pool") + .isEqualTo(0); + + pool.putBuffer(buffer1); + pool.putBuffer(buffer2); + Assertions.assertThat(pool.getCurrentBuffersCount(isDirect)) + .describedAs("Number of buffers in the pool") + .isEqualTo(2); + ByteBuffer buffer3 = pool.getBuffer(isDirect, 12); + Assertions.assertThat(buffer3.capacity()) + .describedAs("Pooled buffer should have older capacity") + .isEqualTo(15); + Assertions.assertThat(pool.getCurrentBuffersCount(isDirect)) + .describedAs("Number of buffers in the pool") + .isEqualTo(1); + pool.putBuffer(buffer); + ByteBuffer buffer4 = pool.getBuffer(isDirect, 6); + Assertions.assertThat(buffer4.capacity()) + .describedAs("Pooled buffer should have older capacity") + .isEqualTo(10); + Assertions.assertThat(pool.getCurrentBuffersCount(isDirect)) + .describedAs("Number of buffers in the pool") + .isEqualTo(1); + + pool.release(); + Assertions.assertThat(pool.getCurrentBuffersCount(isDirect)) + .describedAs("Number of buffers in the pool post release") + .isEqualTo(0); + } + + @Test + public void testPoolingWithDifferentInsertionTime() { + WeakReferencedElasticByteBufferPool pool = new WeakReferencedElasticByteBufferPool(); + ByteBuffer buffer = pool.getBuffer(isDirect, 10); + ByteBuffer buffer1 = pool.getBuffer(isDirect, 10); + ByteBuffer buffer2 = pool.getBuffer(isDirect, 10); + + Assertions.assertThat(pool.getCurrentBuffersCount(isDirect)) + .describedAs("Number of buffers in the pool") + .isEqualTo(0); + + pool.putBuffer(buffer1); + pool.putBuffer(buffer2); + Assertions.assertThat(pool.getCurrentBuffersCount(isDirect)) + .describedAs("Number of buffers in the pool") + .isEqualTo(2); + ByteBuffer buffer3 = pool.getBuffer(isDirect, 10); + // As buffer1 is returned to the pool before buffer2, it should + // be returned when buffer of same size is asked again from + // the pool. Memory references must match not just content + // that is why {@code Assertions.isSameAs} is used here rather + // than usual {@code Assertions.isEqualTo}. + Assertions.assertThat(buffer3) + .describedAs("Buffers should be returned in order of their " + + "insertion time") + .isSameAs(buffer1); + pool.putBuffer(buffer); + ByteBuffer buffer4 = pool.getBuffer(isDirect, 10); + Assertions.assertThat(buffer4) + .describedAs("Buffers should be returned in order of their " + + "insertion time") + .isSameAs(buffer2); + } + + @Test + public void testGarbageCollection() { + WeakReferencedElasticByteBufferPool pool = new WeakReferencedElasticByteBufferPool(); + ByteBuffer buffer = pool.getBuffer(isDirect, 5); + ByteBuffer buffer1 = pool.getBuffer(isDirect, 10); + ByteBuffer buffer2 = pool.getBuffer(isDirect, 15); + Assertions.assertThat(pool.getCurrentBuffersCount(isDirect)) + .describedAs("Number of buffers in the pool") + .isEqualTo(0); + pool.putBuffer(buffer1); + pool.putBuffer(buffer2); + Assertions.assertThat(pool.getCurrentBuffersCount(isDirect)) + .describedAs("Number of buffers in the pool") + .isEqualTo(2); + // Before GC. + ByteBuffer buffer4 = pool.getBuffer(isDirect, 12); + Assertions.assertThat(buffer4.capacity()) + .describedAs("Pooled buffer should have older capacity") + .isEqualTo(15); + pool.putBuffer(buffer4); + // Removing the references + buffer1 = null; + buffer2 = null; + buffer4 = null; + System.gc(); + ByteBuffer buffer3 = pool.getBuffer(isDirect, 12); + Assertions.assertThat(buffer3.capacity()) + .describedAs("After garbage collection new buffer should be " + + "returned with fixed capacity") + .isEqualTo(12); + } + + @Test + public void testWeakReferencesPruning() { + WeakReferencedElasticByteBufferPool pool = new WeakReferencedElasticByteBufferPool(); + ByteBuffer buffer1 = pool.getBuffer(isDirect, 5); + ByteBuffer buffer2 = pool.getBuffer(isDirect, 10); + ByteBuffer buffer3 = pool.getBuffer(isDirect, 15); + + pool.putBuffer(buffer2); + pool.putBuffer(buffer3); + Assertions.assertThat(pool.getCurrentBuffersCount(isDirect)) + .describedAs("Number of buffers in the pool") + .isEqualTo(2); + + // marking only buffer2 to be garbage collected. + buffer2 = null; + System.gc(); + ByteBuffer buffer4 = pool.getBuffer(isDirect, 10); + // Number of buffers in the pool is 0 as one got garbage + // collected and other got returned in above call. + Assertions.assertThat(pool.getCurrentBuffersCount(isDirect)) + .describedAs("Number of buffers in the pool") + .isEqualTo(0); + Assertions.assertThat(buffer4.capacity()) + .describedAs("After gc, pool should return next greater than " + + "available buffer") + .isEqualTo(15); + + } + + private void validateBufferContent(ByteBuffer buffer, byte[] arr) { + for (int i=0; i { + + @Override + public boolean accept(Class c) { + return c.equals(SimpleSerializable.class); + } + + @Override + public Serializer getSerializer(Class c) { + return null; + } + + @Override + public Deserializer getDeserializer(Class c) { + return null; + } + } + private static final String testName = "mystring"; @Test @@ -95,7 +123,27 @@ public void testAddName() throws Exception { // check original name still works test = WritableName.getClass(testName, conf); assertTrue(test.equals(SimpleWritable.class)); + } + + @Test + public void testAddNameSerializable() throws Exception { + Configuration conf = new Configuration(); + conf.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, SimpleSerializer.class.getName()); + SerializationFactory serializationFactory = + new SerializationFactory(conf); + String altName = testName + ".alt"; + + WritableName.addName(SimpleSerializable.class, altName); + + Class test = WritableName.getClass(altName, conf); + assertEquals(test, SimpleSerializable.class); + assertNotNull(serializationFactory.getSerialization(test)); + + // check original name still works + test = WritableName.getClass(SimpleSerializable.class.getName(), conf); + assertEquals(test, SimpleSerializable.class); + assertNotNull(serializationFactory.getSerialization(test)); } @Test diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java index 35f84b950e427..c016ff0378957 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java @@ -39,10 +39,10 @@ import org.apache.log4j.Logger; import org.junit.Assert; -import com.google.common.base.Joiner; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableSet; import static org.junit.Assert.*; public class CompressDecompressTester { @@ -79,27 +79,6 @@ public ImmutableList> filterOnAssumeWhat( }; } - private static boolean isNativeSnappyLoadable() { - boolean snappyAvailable = false; - boolean loaded = false; - try { - System.loadLibrary("snappy"); - logger.warn("Snappy native library is available"); - snappyAvailable = true; - boolean hadoopNativeAvailable = NativeCodeLoader.isNativeCodeLoaded(); - loaded = snappyAvailable && hadoopNativeAvailable; - if (loaded) { - logger.info("Snappy native library loaded"); - } else { - logger.warn("Snappy native library not loaded"); - } - } catch (Throwable t) { - logger.warn("Failed to load snappy: ", t); - return false; - } - return loaded; - } - public static CompressDecompressTester of( byte[] rawData) { return new CompressDecompressTester(rawData); @@ -126,7 +105,7 @@ private void addPair(T compressor, E decompressor, String name) { builder.add(new TesterPair(name, compressor, decompressor)); } - public void test() throws InstantiationException, IllegalAccessException { + public void test() throws Exception { pairs = builder.build(); pairs = assertionDelegate.filterOnAssumeWhat(pairs); @@ -287,47 +266,45 @@ private boolean checkSetInputArrayIndexOutOfBoundsException( @Override public void assertCompression(String name, Compressor compressor, - Decompressor decompressor, byte[] rawData) { + Decompressor decompressor, byte[] rawData) throws Exception { int cSize = 0; int decompressedSize = 0; - byte[] compressedResult = new byte[rawData.length]; + // Snappy compression can increase data size + int maxCompressedLength = 32 + rawData.length + rawData.length/6; + byte[] compressedResult = new byte[maxCompressedLength]; byte[] decompressedBytes = new byte[rawData.length]; - try { - assertTrue( - joiner.join(name, "compressor.needsInput before error !!!"), - compressor.needsInput()); - assertTrue( + assertTrue( + joiner.join(name, "compressor.needsInput before error !!!"), + compressor.needsInput()); + assertEquals( joiner.join(name, "compressor.getBytesWritten before error !!!"), - compressor.getBytesWritten() == 0); - compressor.setInput(rawData, 0, rawData.length); - compressor.finish(); - while (!compressor.finished()) { - cSize += compressor.compress(compressedResult, 0, - compressedResult.length); - } - compressor.reset(); - - assertTrue( - joiner.join(name, "decompressor.needsInput() before error !!!"), - decompressor.needsInput()); - decompressor.setInput(compressedResult, 0, cSize); - assertFalse( - joiner.join(name, "decompressor.needsInput() after error !!!"), - decompressor.needsInput()); - while (!decompressor.finished()) { - decompressedSize = decompressor.decompress(decompressedBytes, 0, - decompressedBytes.length); - } - decompressor.reset(); - assertTrue(joiner.join(name, " byte size not equals error !!!"), - decompressedSize == rawData.length); - assertArrayEquals( - joiner.join(name, " byte arrays not equals error !!!"), rawData, - decompressedBytes); - } catch (Exception ex) { - fail(joiner.join(name, ex.getMessage())); + 0, compressor.getBytesWritten()); + compressor.setInput(rawData, 0, rawData.length); + compressor.finish(); + while (!compressor.finished()) { + cSize += compressor.compress(compressedResult, 0, + compressedResult.length); + } + compressor.reset(); + + assertTrue( + joiner.join(name, "decompressor.needsInput() before error !!!"), + decompressor.needsInput()); + decompressor.setInput(compressedResult, 0, cSize); + assertFalse( + joiner.join(name, "decompressor.needsInput() after error !!!"), + decompressor.needsInput()); + while (!decompressor.finished()) { + decompressedSize = decompressor.decompress(decompressedBytes, 0, + decompressedBytes.length); } + decompressor.reset(); + assertEquals(joiner.join(name, " byte size not equals error !!!"), + rawData.length, decompressedSize); + assertArrayEquals( + joiner.join(name, " byte arrays not equals error !!!"), rawData, + decompressedBytes); } }), @@ -434,7 +411,7 @@ public void assertCompression(String name, Compressor compressor, joiner.join(name, "byte arrays not equals error !!!"), originalRawData, decompressOut.toByteArray()); } catch (Exception ex) { - fail(joiner.join(name, ex.getMessage())); + throw new AssertionError(name + ex, ex); } finally { try { compressedOut.close(); @@ -496,8 +473,7 @@ public String getName() { private static boolean isAvailable(TesterPair pair) { Compressor compressor = pair.compressor; - if (compressor.getClass().isAssignableFrom(Lz4Compressor.class) - && (NativeCodeLoader.isNativeCodeLoaded())) + if (compressor.getClass().isAssignableFrom(Lz4Compressor.class)) return true; else if (compressor.getClass().isAssignableFrom(BuiltInZlibDeflater.class) @@ -506,11 +482,10 @@ else if (compressor.getClass().isAssignableFrom(BuiltInZlibDeflater.class) else if (compressor.getClass().isAssignableFrom(ZlibCompressor.class)) { return ZlibFactory.isNativeZlibLoaded(new Configuration()); - } - else if (compressor.getClass().isAssignableFrom(SnappyCompressor.class) - && isNativeSnappyLoadable()) + } else if (compressor.getClass().isAssignableFrom(SnappyCompressor.class)) { return true; - + } + return false; } @@ -519,6 +494,6 @@ abstract static class TesterCompressionStrategy { protected final Logger logger = Logger.getLogger(getClass()); abstract void assertCompression(String name, Compressor compressor, - Decompressor decompressor, byte[] originalRawData); + Decompressor decompressor, byte[] originalRawData) throws Exception; } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestBZip2Codec.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestBZip2Codec.java new file mode 100644 index 0000000000000..9dd3215f90d5e --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestBZip2Codec.java @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.hadoop.io.compress; + +import java.io.IOException; +import java.io.InputStream; +import java.util.List; + +import org.apache.hadoop.thirdparty.com.google.common.primitives.Bytes; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.compress.SplittableCompressionCodec.READ_MODE; +import org.apache.hadoop.io.compress.bzip2.BZip2TextFileWriter; +import org.apache.hadoop.io.compress.bzip2.BZip2Utils; + +import static org.apache.hadoop.io.compress.SplittableCompressionCodec.READ_MODE.BYBLOCK; +import static org.apache.hadoop.io.compress.SplittableCompressionCodec.READ_MODE.CONTINUOUS; +import static org.apache.hadoop.io.compress.bzip2.BZip2TextFileWriter.BLOCK_SIZE; +import static org.apache.hadoop.util.Preconditions.checkArgument; +import static org.assertj.core.api.Assertions.assertThatNullPointerException; +import static org.assertj.core.api.AssertionsForClassTypes.assertThatExceptionOfType; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +public final class TestBZip2Codec { + + private static final long HEADER_LEN = 2; + + private Configuration conf; + private FileSystem fs; + private BZip2Codec codec; + private Decompressor decompressor; + private Path tempFile; + + @Before + public void setUp() throws Exception { + conf = new Configuration(); + + Path workDir = new Path(System.getProperty("test.build.data", "target"), + "data/" + getClass().getSimpleName()); + + Path inputDir = new Path(workDir, "input"); + tempFile = new Path(inputDir, "test.txt.bz2"); + + fs = workDir.getFileSystem(conf); + + codec = new BZip2Codec(); + codec.setConf(new Configuration(/* loadDefaults */ false)); + decompressor = CodecPool.getDecompressor(codec); + } + + @After + public void tearDown() throws Exception { + CodecPool.returnDecompressor(decompressor); + fs.delete(tempFile, /* recursive */ false); + } + + @Test + public void createInputStreamWithStartAndEnd() throws Exception { + byte[] data1 = newAlternatingByteArray(BLOCK_SIZE, 'a', 'b'); + byte[] data2 = newAlternatingByteArray(BLOCK_SIZE, 'c', 'd'); + byte[] data3 = newAlternatingByteArray(BLOCK_SIZE, 'e', 'f'); + + try (BZip2TextFileWriter writer = new BZip2TextFileWriter(tempFile, conf)) { + writer.write(data1); + writer.write(data2); + writer.write(data3); + } + long fileSize = fs.getFileStatus(tempFile).getLen(); + + List nextBlockOffsets = BZip2Utils.getNextBlockMarkerOffsets(tempFile, conf); + long block2Start = nextBlockOffsets.get(0); + long block3Start = nextBlockOffsets.get(1); + + try (SplitCompressionInputStream stream = newCompressionStream(tempFile, 0, fileSize, + BYBLOCK)) { + assertEquals(0, stream.getPos()); + assertCasesWhereReadDoesNotAdvanceStream(stream); + assertReadingAtPositionZero(stream, data1); + assertCasesWhereReadDoesNotAdvanceStream(stream); + assertReadingPastEndOfBlock(stream, block2Start, data2); + assertReadingPastEndOfBlock(stream, block3Start, data3); + assertEquals(-1, stream.read()); + } + + try (SplitCompressionInputStream stream = newCompressionStream(tempFile, 1, fileSize - 1, + BYBLOCK)) { + assertEquals(block2Start, stream.getPos()); + assertCasesWhereReadDoesNotAdvanceStream(stream); + assertReadingPastEndOfBlock(stream, block2Start, data2); + assertCasesWhereReadDoesNotAdvanceStream(stream); + assertReadingPastEndOfBlock(stream, block3Start, data3); + assertEquals(-1, stream.read()); + } + + // With continuous mode, only starting at or after the stream header is + // supported. + byte[] allData = Bytes.concat(data1, data2, data3); + assertReadingWithContinuousMode(tempFile, 0, fileSize, allData); + assertReadingWithContinuousMode(tempFile, HEADER_LEN, fileSize - HEADER_LEN, allData); + } + + private void assertReadingWithContinuousMode(Path file, long start, long length, + byte[] expectedData) throws IOException { + try (SplitCompressionInputStream stream = newCompressionStream(file, start, length, + CONTINUOUS)) { + assertEquals(HEADER_LEN, stream.getPos()); + + assertRead(stream, expectedData); + assertEquals(-1, stream.read()); + + // When specifying CONTINUOUS read mode, the position ends up not being + // updated at all. + assertEquals(HEADER_LEN, stream.getPos()); + } + } + + private SplitCompressionInputStream newCompressionStream(Path file, long start, long length, + READ_MODE readMode) throws IOException { + FSDataInputStream rawIn = fs.open(file); + rawIn.seek(start); + long end = start + length; + return codec.createInputStream(rawIn, decompressor, start, end, readMode); + } + + private static byte[] newAlternatingByteArray(int size, int... choices) { + checkArgument(choices.length > 1); + byte[] result = new byte[size]; + for (int i = 0; i < size; i++) { + result[i] = (byte) choices[i % choices.length]; + } + return result; + } + + private static void assertCasesWhereReadDoesNotAdvanceStream(SplitCompressionInputStream in) + throws IOException { + long initialPos = in.getPos(); + + assertEquals(0, in.read(new byte[0])); + + assertThatNullPointerException().isThrownBy(() -> in.read(null, 0, 1)); + assertThatExceptionOfType(IndexOutOfBoundsException.class).isThrownBy( + () -> in.read(new byte[5], -1, 2)); + assertThatExceptionOfType(IndexOutOfBoundsException.class).isThrownBy( + () -> in.read(new byte[5], 0, -1)); + assertThatExceptionOfType(IndexOutOfBoundsException.class).isThrownBy( + () -> in.read(new byte[5], 1, 5)); + + assertEquals(initialPos, in.getPos()); + } + + private static void assertReadingAtPositionZero(SplitCompressionInputStream in, + byte[] expectedData) throws IOException { + byte[] buffer = new byte[expectedData.length]; + assertEquals(1, in.read(buffer, 0, 1)); + assertEquals(expectedData[0], buffer[0]); + assertEquals(0, in.getPos()); + + IOUtils.readFully(in, buffer, 1, expectedData.length - 1); + assertArrayEquals(expectedData, buffer); + assertEquals(0, in.getPos()); + } + + private static void assertReadingPastEndOfBlock(SplitCompressionInputStream in, + long endOfBlockPos, byte[] expectedData) throws IOException { + byte[] buffer = new byte[expectedData.length]; + assertEquals(1, in.read(buffer)); + assertEquals(expectedData[0], buffer[0]); + assertEquals(endOfBlockPos + 1, in.getPos()); + + IOUtils.readFully(in, buffer, 1, expectedData.length - 1); + assertArrayEquals(expectedData, buffer); + assertEquals(endOfBlockPos + 1, in.getPos()); + } + + private static void assertRead(InputStream in, byte[] expectedData) throws IOException { + byte[] buffer = new byte[expectedData.length]; + IOUtils.readFully(in, buffer); + assertArrayEquals(expectedData, buffer); + } +} \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java index 94ff7a88493c7..02785a3da030e 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java @@ -76,8 +76,6 @@ import org.apache.hadoop.util.NativeCodeLoader; import org.apache.hadoop.util.ReflectionUtils; import org.junit.After; -import org.junit.Assert; -import org.junit.Assume; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -135,30 +133,22 @@ public void testBZip2NativeCodec() throws IOException { @Test public void testSnappyCodec() throws IOException { - if (SnappyCodec.isNativeCodeLoaded()) { - codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.SnappyCodec"); - codecTest(conf, seed, count, "org.apache.hadoop.io.compress.SnappyCodec"); - } + codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.SnappyCodec"); + codecTest(conf, seed, count, "org.apache.hadoop.io.compress.SnappyCodec"); } @Test public void testLz4Codec() throws IOException { - if (NativeCodeLoader.isNativeCodeLoaded()) { - if (Lz4Codec.isNativeCodeLoaded()) { - conf.setBoolean( - CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_USELZ4HC_KEY, - false); - codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.Lz4Codec"); - codecTest(conf, seed, count, "org.apache.hadoop.io.compress.Lz4Codec"); - conf.setBoolean( - CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_USELZ4HC_KEY, - true); - codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.Lz4Codec"); - codecTest(conf, seed, count, "org.apache.hadoop.io.compress.Lz4Codec"); - } else { - Assert.fail("Native hadoop library available but lz4 not"); - } - } + conf.setBoolean( + CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_USELZ4HC_KEY, + false); + codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.Lz4Codec"); + codecTest(conf, seed, count, "org.apache.hadoop.io.compress.Lz4Codec"); + conf.setBoolean( + CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_USELZ4HC_KEY, + true); + codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.Lz4Codec"); + codecTest(conf, seed, count, "org.apache.hadoop.io.compress.Lz4Codec"); } @Test @@ -614,7 +604,6 @@ private static void sequenceFileCodecTest(Configuration conf, int lines, */ @Test public void testSnappyMapFile() throws Exception { - Assume.assumeTrue(SnappyCodec.isNativeCodeLoaded()); codecTestMapFile(SnappyCodec.class, CompressionType.BLOCK, 100); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodecPool.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodecPool.java index 1fb25cb9087c2..367b85862e838 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodecPool.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodecPool.java @@ -19,6 +19,10 @@ import static org.junit.Assert.assertEquals; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.OutputStream; +import java.util.Random; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -26,6 +30,8 @@ import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.compress.zlib.BuiltInGzipDecompressor; +import org.apache.hadoop.test.LambdaTestUtils; import org.junit.Before; import org.junit.Test; @@ -189,4 +195,36 @@ public void testDecompressorNotReturnSameInstance() { CodecPool.returnDecompressor(decompressor); } } + + @Test(timeout = 10000) + public void testDoNotPoolDecompressorNotUseableAfterReturn() throws Exception { + + final GzipCodec gzipCodec = new GzipCodec(); + gzipCodec.setConf(new Configuration()); + + final Random random = new Random(); + final byte[] bytes = new byte[1024]; + random.nextBytes(bytes); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (OutputStream outputStream = gzipCodec.createOutputStream(baos)) { + outputStream.write(bytes); + } + + final byte[] gzipBytes = baos.toByteArray(); + final ByteArrayInputStream bais = new ByteArrayInputStream(gzipBytes); + + // BuiltInGzipDecompressor is an explicit example of a Decompressor + // with the @DoNotPool annotation + final Decompressor decompressor = new BuiltInGzipDecompressor(); + CodecPool.returnDecompressor(decompressor); + + final CompressionInputStream inputStream = gzipCodec.createInputStream(bais, decompressor); + LambdaTestUtils.intercept( + AlreadyClosedException.class, + "decompress called on closed decompressor", + "Decompressor from Codec with @DoNotPool should not be " + + "useable after returning to CodecPool", + () -> inputStream.read()); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java index 1f035974883cf..43cb4df1105b2 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java @@ -27,7 +27,7 @@ import org.apache.hadoop.io.compress.zlib.BuiltInZlibInflater; import org.apache.hadoop.test.GenericTestUtils; import org.junit.Test; -import com.google.common.collect.ImmutableSet; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableSet; /** * Test for pairs: @@ -72,20 +72,23 @@ public void testCompressorDecompressor() { } @Test - public void testCompressorDecompressorWithExeedBufferLimit() { - int BYTE_SIZE = 100 * 1024; - byte[] rawData = generate(BYTE_SIZE); + public void testCompressorDecompressorWithExceedBufferLimit() { + // input data size greater than internal buffer size. + final int byteSize = 100 * 1024; + final int bufferSize = 64 * 1024; + byte[] rawData = generate(byteSize); try { CompressDecompressTester.of(rawData) .withCompressDecompressPair( - new SnappyCompressor(BYTE_SIZE + BYTE_SIZE / 2), - new SnappyDecompressor(BYTE_SIZE + BYTE_SIZE / 2)) - .withCompressDecompressPair(new Lz4Compressor(BYTE_SIZE), - new Lz4Decompressor(BYTE_SIZE)) - .withTestCases(ImmutableSet.of(CompressionTestStrategy.COMPRESS_DECOMPRESS_SINGLE_BLOCK, - CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK, - CompressionTestStrategy.COMPRESS_DECOMPRESS_ERRORS, - CompressionTestStrategy.COMPRESS_DECOMPRESS_WITH_EMPTY_STREAM)) + new SnappyCompressor(bufferSize), + new SnappyDecompressor(bufferSize)) + .withCompressDecompressPair( + new Lz4Compressor(bufferSize), + new Lz4Decompressor(bufferSize)) + .withTestCases(ImmutableSet.of( + CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK, + CompressionTestStrategy.COMPRESS_DECOMPRESS_ERRORS, + CompressionTestStrategy.COMPRESS_DECOMPRESS_WITH_EMPTY_STREAM)) .test(); } catch (Exception ex) { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/bzip2/BZip2TextFileWriter.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/bzip2/BZip2TextFileWriter.java new file mode 100644 index 0000000000000..5ca99fd02dfbf --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/bzip2/BZip2TextFileWriter.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.io.compress.bzip2; + +import java.io.Closeable; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.compress.BZip2Codec; + +import static org.apache.hadoop.io.compress.bzip2.CBZip2OutputStream.MIN_BLOCKSIZE; +import static org.apache.hadoop.util.Preconditions.checkArgument; + +/** + * A writer that simplifies creating BZip2 compressed text data for testing + * purposes. + */ +public final class BZip2TextFileWriter implements Closeable { + + // Use minimum block size to reduce amount of data to require to be written + // to CBZip2OutputStream before a new block is created. + private static final int BLOCK_SIZE_100K = MIN_BLOCKSIZE; + + /** + * The amount of bytes of run-length encoded data that needs to be written + * to this writer in order for the next byte written starts a new BZip2 block. + */ + public static final int BLOCK_SIZE = + // The + 1 is needed because of how CBZip2OutputStream checks whether the + // last offset written is less than allowable block size. Because the last + // offset is one less of the amount of bytes written to the block, we need + // to write an extra byte to trigger writing a new block. + CBZip2OutputStream.getAllowableBlockSize(BLOCK_SIZE_100K) + 1; + + private final CBZip2OutputStream out; + + public BZip2TextFileWriter(Path path, Configuration conf) throws IOException { + this(path.getFileSystem(conf).create(path)); + } + + public BZip2TextFileWriter(OutputStream rawOut) throws IOException { + try { + BZip2Codec.writeHeader(rawOut); + out = new CBZip2OutputStream(rawOut, BLOCK_SIZE_100K); + } catch (Throwable e) { + rawOut.close(); + throw e; + } + } + + public void writeManyRecords(int totalSize, int numRecords, byte[] delimiter) + throws IOException { + checkArgument(numRecords > 0); + checkArgument(delimiter.length > 0); + + int minRecordSize = totalSize / numRecords; + checkArgument(minRecordSize >= delimiter.length); + + int lastRecordExtraSize = totalSize % numRecords; + + for (int i = 0; i < numRecords - 1; i++) { + writeRecord(minRecordSize, delimiter); + } + writeRecord(minRecordSize + lastRecordExtraSize, delimiter); + } + + public void writeRecord(int totalSize, byte[] delimiter) throws IOException { + checkArgument(delimiter.length > 0); + checkArgument(totalSize >= delimiter.length); + + int contentSize = totalSize - delimiter.length; + for (int i = 0; i < contentSize; i++) { + // Alternate between characters so that internals of CBZip2OutputStream + // cannot condensed the written bytes using run-length encoding. This + // allows the caller to use #BLOCK_SIZE in order to know whether the next + // write will end just before the end of the current block, or exceed it, + // and by how much. + out.write(i % 2 == 0 ? 'a' : 'b'); + } + write(delimiter); + } + + public void write(String bytes) throws IOException { + write(bytes.getBytes(StandardCharsets.UTF_8)); + } + + public void write(byte[] bytes) throws IOException { + out.write(bytes); + } + + @Override + public void close() throws IOException { + out.close(); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/bzip2/BZip2Utils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/bzip2/BZip2Utils.java new file mode 100644 index 0000000000000..d597ed0e7e0b8 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/bzip2/BZip2Utils.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.compress.bzip2; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import static org.apache.hadoop.io.compress.SplittableCompressionCodec.READ_MODE.BYBLOCK; + +public final class BZip2Utils { + + private BZip2Utils() { + } + + /** + * Returns the start offsets of blocks that follow the first block in the + * BZip2 compressed file at the given path. The first offset corresponds to + * the first byte containing the BZip2 block marker of the second block. The + * i-th offset corresponds to the block marker of the (i + 1)-th block. + */ + public static List getNextBlockMarkerOffsets( + Path path, Configuration conf) throws IOException { + FileSystem fs = path.getFileSystem(conf); + try (InputStream fileIn = fs.open(path)) { + return getNextBlockMarkerOffsets(fileIn); + } + } + + /** + * Returns the start offsets of blocks that follow the first block in the + * BZip2 compressed input stream. The first offset corresponds to + * the first byte containing the BZip2 block marker of the second block. The + * i-th offset corresponds to the block marker of the (i + 1)-th block. + */ + public static List getNextBlockMarkerOffsets(InputStream rawIn) + throws IOException { + try (CBZip2InputStream in = new CBZip2InputStream(rawIn, BYBLOCK)) { + ArrayList offsets = new ArrayList<>(); + while (in.skipToNextBlockMarker()) { + offsets.add(in.getProcessedByteCount()); + } + return offsets; + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/bzip2/TestBZip2TextFileWriter.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/bzip2/TestBZip2TextFileWriter.java new file mode 100644 index 0000000000000..7d92e07f01b6a --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/bzip2/TestBZip2TextFileWriter.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.compress.bzip2; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.List; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import static org.apache.hadoop.io.compress.bzip2.BZip2TextFileWriter.BLOCK_SIZE; +import static org.junit.Assert.assertEquals; + +public final class TestBZip2TextFileWriter { + + private static final byte[] DELIMITER = new byte[] {'\0'}; + + private ByteArrayOutputStream rawOut; + private BZip2TextFileWriter writer; + + @Before + public void setUp() throws Exception { + rawOut = new ByteArrayOutputStream(); + writer = new BZip2TextFileWriter(rawOut); + } + + @After + public void tearDown() throws Exception { + rawOut = null; + writer.close(); + } + + @Test + public void writingSingleBlockSizeOfData() throws Exception { + writer.writeRecord(BLOCK_SIZE, DELIMITER); + writer.close(); + + List nextBlocks = getNextBlockMarkerOffsets(); + assertEquals(0, nextBlocks.size()); + } + + @Test + public void justExceedingBeyondBlockSize() throws Exception { + writer.writeRecord(BLOCK_SIZE + 1, DELIMITER); + writer.close(); + + List nextBlocks = getNextBlockMarkerOffsets(); + assertEquals(1, nextBlocks.size()); + } + + @Test + public void writingTwoBlockSizesOfData() throws Exception { + writer.writeRecord(2 * BLOCK_SIZE, DELIMITER); + writer.close(); + + List nextBlocks = getNextBlockMarkerOffsets(); + assertEquals(1, nextBlocks.size()); + } + + @Test + public void justExceedingBeyondTwoBlocks() throws Exception { + writer.writeRecord(2 * BLOCK_SIZE + 1, DELIMITER); + writer.close(); + + List nextBlocks = getNextBlockMarkerOffsets(); + assertEquals(2, nextBlocks.size()); + } + + private List getNextBlockMarkerOffsets() throws IOException { + ByteArrayInputStream in = new ByteArrayInputStream(rawOut.toByteArray()); + return BZip2Utils.getNextBlockMarkerOffsets(in); + } +} \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/lz4/TestLz4CompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/lz4/TestLz4CompressorDecompressor.java index 6f3b076097aee..8be5ec3d3f78f 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/lz4/TestLz4CompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/lz4/TestLz4CompressorDecompressor.java @@ -27,17 +27,20 @@ import java.io.IOException; import java.util.Random; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.compress.BlockCompressorStream; import org.apache.hadoop.io.compress.BlockDecompressorStream; import org.apache.hadoop.io.compress.CompressionInputStream; import org.apache.hadoop.io.compress.CompressionOutputStream; -import org.apache.hadoop.io.compress.Lz4Codec; import org.apache.hadoop.io.compress.lz4.Lz4Compressor; import org.apache.hadoop.io.compress.lz4.Lz4Decompressor; import org.apache.hadoop.test.MultithreadedTestUtil; -import org.junit.Before; import org.junit.Test; import static org.junit.Assume.*; @@ -45,12 +48,7 @@ public class TestLz4CompressorDecompressor { private static final Random rnd = new Random(12345l); - @Before - public void before() { - assumeTrue(Lz4Codec.isNativeCodeLoaded()); - } - - //test on NullPointerException in {@code compressor.setInput()} + //test on NullPointerException in {@code compressor.setInput()} @Test public void testCompressorSetInputNullPointerException() { try { @@ -330,4 +328,36 @@ public void doWork() throws Exception { ctx.waitFor(60000); } + + @Test + public void testLz4Compatibility() throws Exception { + // The sequence file was created using native Lz4 codec before HADOOP-17292. + // After we use lz4-java for lz4 compression, this test makes sure we can + // decompress the sequence file correctly. + Path filePath = new Path(TestLz4CompressorDecompressor.class + .getResource("/lz4/sequencefile").toURI()); + + Configuration conf = new Configuration(); + conf.setInt("io.seqfile.compress.blocksize", 1000); + FileSystem fs = FileSystem.get(conf); + + int lines = 2000; + + SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf); + + Writable key = (Writable)reader.getKeyClass().newInstance(); + Writable value = (Writable)reader.getValueClass().newInstance(); + + int lc = 0; + try { + while (reader.next(key, value)) { + assertEquals("key" + lc, key.toString()); + assertEquals("value" + lc, value.toString()); + lc++; + } + } finally { + reader.close(); + } + assertEquals(lines, lc); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index cc986c7e0aea4..93c24835f2206 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.io.compress.snappy; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -32,26 +33,28 @@ import java.nio.ByteBuffer; import java.util.Random; +import org.apache.commons.codec.binary.Hex; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.compress.BlockCompressorStream; import org.apache.hadoop.io.compress.BlockDecompressorStream; import org.apache.hadoop.io.compress.CompressionInputStream; import org.apache.hadoop.io.compress.CompressionOutputStream; -import org.apache.hadoop.io.compress.SnappyCodec; import org.apache.hadoop.io.compress.snappy.SnappyDecompressor.SnappyDirectDecompressor; import org.apache.hadoop.test.MultithreadedTestUtil; import org.junit.Assert; import org.junit.Before; import org.junit.Test; - -import static org.junit.Assume.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class TestSnappyCompressorDecompressor { + public static final Logger LOG = + LoggerFactory.getLogger(TestSnappyCompressorDecompressor.class); + @Before public void before() { - assumeTrue(SnappyCodec.isNativeCodeLoaded()); } @Test @@ -167,40 +170,41 @@ public void testSnappyDecompressorCompressAIOBException() { } @Test - public void testSnappyCompressDecompress() { + public void testSnappyCompressDecompress() throws Exception { int BYTE_SIZE = 1024 * 54; byte[] bytes = BytesGenerator.get(BYTE_SIZE); SnappyCompressor compressor = new SnappyCompressor(); - try { - compressor.setInput(bytes, 0, bytes.length); - assertTrue("SnappyCompressDecompress getBytesRead error !!!", - compressor.getBytesRead() > 0); - assertTrue( - "SnappyCompressDecompress getBytesWritten before compress error !!!", - compressor.getBytesWritten() == 0); - - byte[] compressed = new byte[BYTE_SIZE]; - int cSize = compressor.compress(compressed, 0, compressed.length); - assertTrue( - "SnappyCompressDecompress getBytesWritten after compress error !!!", - compressor.getBytesWritten() > 0); - - SnappyDecompressor decompressor = new SnappyDecompressor(BYTE_SIZE); - // set as input for decompressor only compressed data indicated with cSize - decompressor.setInput(compressed, 0, cSize); - byte[] decompressed = new byte[BYTE_SIZE]; - decompressor.decompress(decompressed, 0, decompressed.length); - - assertTrue("testSnappyCompressDecompress finished error !!!", - decompressor.finished()); - Assert.assertArrayEquals(bytes, decompressed); - compressor.reset(); - decompressor.reset(); - assertTrue("decompressor getRemaining error !!!", - decompressor.getRemaining() == 0); - } catch (Exception e) { - fail("testSnappyCompressDecompress ex error!!!"); - } + compressor.setInput(bytes, 0, bytes.length); + assertTrue("SnappyCompressDecompress getBytesRead error !!!", + compressor.getBytesRead() > 0); + assertEquals( + "SnappyCompressDecompress getBytesWritten before compress error !!!", + 0, compressor.getBytesWritten()); + + // snappy compression may increase data size. + // This calculation comes from "Snappy::MaxCompressedLength(size_t)" + int maxSize = 32 + BYTE_SIZE + BYTE_SIZE / 6; + byte[] compressed = new byte[maxSize]; + int cSize = compressor.compress(compressed, 0, compressed.length); + LOG.info("input size: {}", BYTE_SIZE); + LOG.info("compressed size: {}", cSize); + assertTrue( + "SnappyCompressDecompress getBytesWritten after compress error !!!", + compressor.getBytesWritten() > 0); + + SnappyDecompressor decompressor = new SnappyDecompressor(); + // set as input for decompressor only compressed data indicated with cSize + decompressor.setInput(compressed, 0, cSize); + byte[] decompressed = new byte[BYTE_SIZE]; + decompressor.decompress(decompressed, 0, decompressed.length); + + assertTrue("testSnappyCompressDecompress finished error !!!", + decompressor.finished()); + Assert.assertArrayEquals(bytes, decompressed); + compressor.reset(); + decompressor.reset(); + assertEquals("decompressor getRemaining error !!!", + 0, decompressor.getRemaining()); } @Test @@ -278,7 +282,38 @@ public void testSnappyBlockCompression() { fail("testSnappyBlockCompression ex error !!!"); } } - + + @Test + // The buffer size is smaller than the input. + public void testSnappyCompressDecompressWithSmallBuffer() throws Exception { + int inputSize = 1024 * 50; + int bufferSize = 512; + ByteArrayOutputStream out = new ByteArrayOutputStream(); + byte[] buffer = new byte[bufferSize]; + byte[] input = BytesGenerator.get(inputSize); + + SnappyCompressor compressor = new SnappyCompressor(); + compressor.setInput(input, 0, inputSize); + compressor.finish(); + while (!compressor.finished()) { + int len = compressor.compress(buffer, 0, buffer.length); + out.write(buffer, 0, len); + } + byte[] compressed = out.toByteArray(); + assertThat(compressed).hasSizeGreaterThan(0); + out.reset(); + + SnappyDecompressor decompressor = new SnappyDecompressor(); + decompressor.setInput(compressed, 0, compressed.length); + while (!decompressor.finished()) { + int len = decompressor.decompress(buffer, 0, buffer.length); + out.write(buffer, 0, len); + } + byte[] decompressed = out.toByteArray(); + + assertThat(decompressed).isEqualTo(input); + } + private void compressDecompressLoop(int rawDataSize) throws IOException { byte[] rawData = BytesGenerator.get(rawDataSize); byte[] compressedResult = new byte[rawDataSize+20]; @@ -318,8 +353,9 @@ private void compressDecompressLoop(int rawDataSize) throws IOException { @Test public void testSnappyDirectBlockCompression() { - int[] size = { 4 * 1024, 64 * 1024, 128 * 1024, 1024 * 1024 }; - assumeTrue(SnappyCodec.isNativeCodeLoaded()); + int[] size = new int[] { + 4 * 1024, 64 * 1024, 128 * 1024, 1024 * 1024 + }; try { for (int i = 0; i < size.length; i++) { compressDecompressLoop(size[i]); @@ -408,4 +444,52 @@ public void doWork() throws Exception { ctx.waitFor(60000); } + + @Test + public void testSnappyCompatibility() throws Exception { + // HADOOP-17125. Using snappy-java in SnappyCodec. These strings are raw + // data and compressed data using previous native Snappy codec. We use + // updated Snappy codec to decode it and check if it matches. + String rawData = "010a06030a040a0c0109020c0a010204020d02000b010701080605" + + "080b090902060a080502060a0d06070908080a0c0105030904090d050908000" + + "40c090c0d0d0804000d00040b0b0d010d060907020a030a0c09000409050801" + + "07040d0c01060a0b09070a04000b01040b09000e0e00020b06050b060e030e0" + + "a07050d06050d"; + String compressed = "8001f07f010a06030a040a0c0109020c0a010204020d02000b0" + + "10701080605080b090902060a080502060a0d06070908080a0c010503090409" + + "0d05090800040c090c0d0d0804000d00040b0b0d010d060907020a030a0c090" + + "0040905080107040d0c01060a0b09070a04000b01040b09000e0e00020b0605" + + "0b060e030e0a07050d06050d"; + + byte[] rawDataBytes = Hex.decodeHex(rawData); + byte[] compressedBytes = Hex.decodeHex(compressed); + + ByteBuffer inBuf = ByteBuffer.allocateDirect(compressedBytes.length); + inBuf.put(compressedBytes, 0, compressedBytes.length); + inBuf.flip(); + + ByteBuffer outBuf = ByteBuffer.allocateDirect(rawDataBytes.length); + ByteBuffer expected = ByteBuffer.wrap(rawDataBytes); + + SnappyDecompressor.SnappyDirectDecompressor decompressor = + new SnappyDecompressor.SnappyDirectDecompressor(); + + outBuf.clear(); + while(!decompressor.finished()) { + decompressor.decompress(inBuf, outBuf); + if (outBuf.remaining() == 0) { + outBuf.flip(); + while (outBuf.remaining() > 0) { + assertEquals(expected.get(), outBuf.get()); + } + outBuf.clear(); + } + } + outBuf.flip(); + while (outBuf.remaining() > 0) { + assertEquals(expected.get(), outBuf.get()); + } + outBuf.clear(); + assertEquals(0, expected.remaining()); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/zlib/TestZlibCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/zlib/TestZlibCompressorDecompressor.java index 7e2ab241e195d..ac9ea5e8a8468 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/zlib/TestZlibCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/zlib/TestZlibCompressorDecompressor.java @@ -42,7 +42,7 @@ import org.apache.hadoop.util.NativeCodeLoader; import org.junit.Before; import org.junit.Test; -import com.google.common.collect.ImmutableSet; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableSet; public class TestZlibCompressorDecompressor { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/zstd/TestZStandardCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/zstd/TestZStandardCompressorDecompressor.java index dcfb7e9e32df1..4c629be3f4751 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/zstd/TestZStandardCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/zstd/TestZStandardCompressorDecompressor.java @@ -234,6 +234,65 @@ public void testCompressorDecompressorLogicWithCompressionStreams() } } + /** + * Verify decompressor logic with some finish operation in compress. + */ + @Test + public void testCompressorDecompressorWithFinish() throws Exception { + DataOutputStream deflateOut = null; + DataInputStream inflateIn = null; + int byteSize = 1024 * 100; + byte[] bytes = generate(byteSize); + int firstLength = 1024 * 30; + + int bufferSize = IO_FILE_BUFFER_SIZE_DEFAULT; + try { + DataOutputBuffer compressedDataBuffer = new DataOutputBuffer(); + CompressionOutputStream deflateFilter = + new CompressorStream(compressedDataBuffer, new ZStandardCompressor(), + bufferSize); + + deflateOut = + new DataOutputStream(new BufferedOutputStream(deflateFilter)); + + // Write some data and finish. + deflateOut.write(bytes, 0, firstLength); + deflateFilter.finish(); + deflateOut.flush(); + + // ResetState then write some data and finish. + deflateFilter.resetState(); + deflateOut.write(bytes, firstLength, firstLength); + deflateFilter.finish(); + deflateOut.flush(); + + // ResetState then write some data and finish. + deflateFilter.resetState(); + deflateOut.write(bytes, firstLength * 2, byteSize - firstLength * 2); + deflateFilter.finish(); + deflateOut.flush(); + + DataInputBuffer deCompressedDataBuffer = new DataInputBuffer(); + deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, + compressedDataBuffer.getLength()); + + CompressionInputStream inflateFilter = + new DecompressorStream(deCompressedDataBuffer, + new ZStandardDecompressor(bufferSize), bufferSize); + + inflateIn = new DataInputStream(new BufferedInputStream(inflateFilter)); + + byte[] result = new byte[byteSize]; + inflateIn.read(result); + assertArrayEquals( + "original array not equals compress/decompressed array", bytes, + result); + } finally { + IOUtils.closeQuietly(deflateOut); + IOUtils.closeQuietly(inflateIn); + } + } + @Test public void testZStandardCompressDecompressInMultiThreads() throws Exception { MultithreadedTestUtil.TestContext ctx = diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestCoderBase.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestCoderBase.java index 6d14de8a521c5..331cecb4c00e0 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestCoderBase.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestCoderBase.java @@ -527,4 +527,16 @@ protected void corruptSomeChunk(ECChunk[] chunks) { buffer.position(buffer.position() + 1); } } + + /** + * Pollute some chunk. + * @param chunks + */ + protected void polluteSomeChunk(ECChunk[] chunks) { + int idx = new Random().nextInt(chunks.length); + ByteBuffer buffer = chunks[idx].getBuffer(); + buffer.mark(); + buffer.put((byte) ((buffer.get(buffer.position()) + 1))); + buffer.reset(); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/RawErasureCoderBenchmark.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/RawErasureCoderBenchmark.java index 362bde9806327..631991a03cf9c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/RawErasureCoderBenchmark.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/RawErasureCoderBenchmark.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.io.erasurecode.rawcoder; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.io.erasurecode.ErasureCoderOptions; import org.apache.hadoop.util.StopWatch; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestDecodingValidator.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestDecodingValidator.java new file mode 100644 index 0000000000000..06744cccc0a54 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestDecodingValidator.java @@ -0,0 +1,237 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.rawcoder; + +import org.apache.hadoop.io.erasurecode.ECChunk; +import org.apache.hadoop.io.erasurecode.ErasureCodeNative; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.Assert; +import org.junit.Assume; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static org.junit.Assert.assertTrue; + +/** + * Test {@link DecodingValidator} under various decoders. + */ +@RunWith(Parameterized.class) +public class TestDecodingValidator extends TestRawCoderBase { + + private DecodingValidator validator; + + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(new Object[][] { + {RSRawErasureCoderFactory.class, 6, 3, new int[]{1}, new int[]{}}, + {RSRawErasureCoderFactory.class, 6, 3, new int[]{3}, new int[]{0}}, + {RSRawErasureCoderFactory.class, 6, 3, new int[]{2, 4}, new int[]{1}}, + {NativeRSRawErasureCoderFactory.class, 6, 3, new int[]{0}, new int[]{}}, + {XORRawErasureCoderFactory.class, 10, 1, new int[]{0}, new int[]{}}, + {NativeXORRawErasureCoderFactory.class, 10, 1, new int[]{0}, + new int[]{}} + }); + } + + public TestDecodingValidator( + Class factoryClass, int numDataUnits, + int numParityUnits, int[] erasedDataIndexes, int[] erasedParityIndexes) { + this.encoderFactoryClass = factoryClass; + this.decoderFactoryClass = factoryClass; + this.numDataUnits = numDataUnits; + this.numParityUnits = numParityUnits; + this.erasedDataIndexes = erasedDataIndexes; + this.erasedParityIndexes = erasedParityIndexes; + } + + @Before + public void setup() { + if (encoderFactoryClass == NativeRSRawErasureCoderFactory.class + || encoderFactoryClass == NativeXORRawErasureCoderFactory.class) { + Assume.assumeTrue(ErasureCodeNative.isNativeCodeLoaded()); + } + setAllowDump(false); + } + + /** + * Test if the same validator can process direct and non-direct buffers. + */ + @Test + public void testValidate() { + prepare(null, numDataUnits, numParityUnits, erasedDataIndexes, + erasedParityIndexes); + testValidate(true); + testValidate(false); + } + + /** + * Test if the same validator can process variable width of data for + * inputs and outputs. + */ + protected void testValidate(boolean usingDirectBuffer) { + this.usingDirectBuffer = usingDirectBuffer; + prepareCoders(false); + prepareValidator(false); + + performTestValidate(baseChunkSize); + performTestValidate(baseChunkSize - 17); + performTestValidate(baseChunkSize + 18); + } + + protected void prepareValidator(boolean recreate) { + if (validator == null || recreate) { + validator = new DecodingValidator(decoder); + } + } + + protected void performTestValidate(int chunkSize) { + setChunkSize(chunkSize); + prepareBufferAllocator(false); + + // encode + ECChunk[] dataChunks = prepareDataChunksForEncoding(); + ECChunk[] parityChunks = prepareParityChunksForEncoding(); + ECChunk[] clonedDataChunks = cloneChunksWithData(dataChunks); + try { + encoder.encode(dataChunks, parityChunks); + } catch (Exception e) { + Assert.fail("Should not get Exception: " + e.getMessage()); + } + + // decode + backupAndEraseChunks(clonedDataChunks, parityChunks); + ECChunk[] inputChunks = + prepareInputChunksForDecoding(clonedDataChunks, parityChunks); + markChunks(inputChunks); + ensureOnlyLeastRequiredChunks(inputChunks); + ECChunk[] recoveredChunks = prepareOutputChunksForDecoding(); + int[] erasedIndexes = getErasedIndexesForDecoding(); + try { + decoder.decode(inputChunks, erasedIndexes, recoveredChunks); + } catch (Exception e) { + Assert.fail("Should not get Exception: " + e.getMessage()); + } + + // validate + restoreChunksFromMark(inputChunks); + ECChunk[] clonedInputChunks = cloneChunksWithData(inputChunks); + ECChunk[] clonedRecoveredChunks = cloneChunksWithData(recoveredChunks); + int[] clonedErasedIndexes = erasedIndexes.clone(); + + try { + validator.validate(clonedInputChunks, clonedErasedIndexes, + clonedRecoveredChunks); + } catch (Exception e) { + Assert.fail("Should not get Exception: " + e.getMessage()); + } + + // Check if input buffers' positions are moved to the end + verifyBufferPositionAtEnd(clonedInputChunks); + + // Check if validator does not change recovered chunks and erased indexes + verifyChunksEqual(recoveredChunks, clonedRecoveredChunks); + Assert.assertArrayEquals("Erased indexes should not be changed", + erasedIndexes, clonedErasedIndexes); + + // Check if validator uses correct indexes for validation + List validIndexesList = + IntStream.of(CoderUtil.getValidIndexes(inputChunks)).boxed() + .collect(Collectors.toList()); + List newValidIndexesList = + IntStream.of(validator.getNewValidIndexes()).boxed() + .collect(Collectors.toList()); + List erasedIndexesList = + IntStream.of(erasedIndexes).boxed().collect(Collectors.toList()); + int newErasedIndex = validator.getNewErasedIndex(); + Assert.assertTrue( + "Valid indexes for validation should contain" + + " erased indexes for decoding", + newValidIndexesList.containsAll(erasedIndexesList)); + Assert.assertTrue( + "An erased index for validation should be contained" + + " in valid indexes for decoding", + validIndexesList.contains(newErasedIndex)); + Assert.assertFalse( + "An erased index for validation should not be contained" + + " in valid indexes for validation", + newValidIndexesList.contains(newErasedIndex)); + } + + private void verifyChunksEqual(ECChunk[] chunks1, ECChunk[] chunks2) { + boolean result = Arrays.deepEquals(toArrays(chunks1), toArrays(chunks2)); + assertTrue("Recovered chunks should not be changed", result); + } + + /** + * Test if validator throws {@link InvalidDecodingException} when + * a decoded output buffer is polluted. + */ + @Test + public void testValidateWithBadDecoding() throws IOException { + prepare(null, numDataUnits, numParityUnits, erasedDataIndexes, + erasedParityIndexes); + this.usingDirectBuffer = true; + prepareCoders(true); + prepareValidator(true); + prepareBufferAllocator(false); + + // encode + ECChunk[] dataChunks = prepareDataChunksForEncoding(); + ECChunk[] parityChunks = prepareParityChunksForEncoding(); + ECChunk[] clonedDataChunks = cloneChunksWithData(dataChunks); + try { + encoder.encode(dataChunks, parityChunks); + } catch (Exception e) { + Assert.fail("Should not get Exception: " + e.getMessage()); + } + + // decode + backupAndEraseChunks(clonedDataChunks, parityChunks); + ECChunk[] inputChunks = + prepareInputChunksForDecoding(clonedDataChunks, parityChunks); + markChunks(inputChunks); + ensureOnlyLeastRequiredChunks(inputChunks); + ECChunk[] recoveredChunks = prepareOutputChunksForDecoding(); + int[] erasedIndexes = getErasedIndexesForDecoding(); + try { + decoder.decode(inputChunks, erasedIndexes, recoveredChunks); + } catch (Exception e) { + Assert.fail("Should not get Exception: " + e.getMessage()); + } + + // validate + restoreChunksFromMark(inputChunks); + polluteSomeChunk(recoveredChunks); + try { + validator.validate(inputChunks, erasedIndexes, recoveredChunks); + Assert.fail("Validation should fail due to bad decoding"); + } catch (InvalidDecodingException e) { + String expected = "Failed to validate decoding"; + GenericTestUtils.assertExceptionContains(expected, e); + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRawCoderBase.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRawCoderBase.java index 4519e357bd181..eb63494507eaf 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRawCoderBase.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRawCoderBase.java @@ -334,7 +334,7 @@ protected void testInputPosition(boolean usingDirectBuffer) { verifyBufferPositionAtEnd(inputChunks); } - private void verifyBufferPositionAtEnd(ECChunk[] inputChunks) { + void verifyBufferPositionAtEnd(ECChunk[] inputChunks) { for (ECChunk chunk : inputChunks) { if (chunk != null) { Assert.assertEquals(0, chunk.getBuffer().remaining()); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/RPCCallBenchmark.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/RPCCallBenchmark.java index bbb4ec21812e3..6742425e766e8 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/RPCCallBenchmark.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/RPCCallBenchmark.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.ipc; -import com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; import org.apache.hadoop.thirdparty.protobuf.BlockingService; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; @@ -66,7 +66,7 @@ private static class MyOptions { public int secondsToRun = 15; private int msgSize = 1024; public Class rpcEngine = - ProtobufRpcEngine.class; + ProtobufRpcEngine2.class; private MyOptions(String args[]) { try { @@ -181,7 +181,7 @@ private void processOptions(CommandLine line, Options opts) if (line.hasOption('e')) { String eng = line.getOptionValue('e'); if ("protobuf".equals(eng)) { - rpcEngine = ProtobufRpcEngine.class; + rpcEngine = ProtobufRpcEngine2.class; } else { throw new ParseException("invalid engine: " + eng); } @@ -224,7 +224,7 @@ private Server startServer(MyOptions opts) throws IOException { RPC.Server server; // Get RPC server for server side implementation - if (opts.rpcEngine == ProtobufRpcEngine.class) { + if (opts.rpcEngine == ProtobufRpcEngine2.class) { // Create server side implementation PBServerImpl serverImpl = new PBServerImpl(); BlockingService service = TestProtobufRpcProto @@ -378,7 +378,7 @@ private interface RpcServiceWrapper { private RpcServiceWrapper createRpcClient(MyOptions opts) throws IOException { InetSocketAddress addr = NetUtils.createSocketAddr(opts.host, opts.getPort()); - if (opts.rpcEngine == ProtobufRpcEngine.class) { + if (opts.rpcEngine == ProtobufRpcEngine2.class) { final TestRpcService proxy = RPC.getProxy(TestRpcService.class, 0, addr, conf); return new RpcServiceWrapper() { @Override diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestCallerContext.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestCallerContext.java new file mode 100644 index 0000000000000..bb4a119e7db29 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestCallerContext.java @@ -0,0 +1,84 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ipc; + +import org.apache.hadoop.conf.Configuration; +import org.junit.Assert; +import org.junit.Test; + +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_CONTEXT_SEPARATOR_KEY; + +public class TestCallerContext { + @Test + public void testBuilderAppend() { + Configuration conf = new Configuration(); + conf.set(HADOOP_CALLER_CONTEXT_SEPARATOR_KEY, "$"); + CallerContext.Builder builder = new CallerContext.Builder(null, conf); + CallerContext context = builder.append("context1") + .append("context2").append("key3", "value3").build(); + Assert.assertEquals(true, + context.getContext().contains("$")); + String[] items = context.getContext().split("\\$"); + Assert.assertEquals(3, items.length); + Assert.assertEquals("key3:value3", items[2]); + + builder.append("$$"); + Assert.assertEquals("context1$context2$key3:value3$$$", + builder.build().getContext()); + } + + @Test + public void testBuilderAppendIfAbsent() { + Configuration conf = new Configuration(); + conf.set(HADOOP_CALLER_CONTEXT_SEPARATOR_KEY, "$"); + CallerContext.Builder builder = new CallerContext.Builder(null, conf); + builder.append("key1", "value1"); + Assert.assertEquals("key1:value1", + builder.build().getContext()); + + // Append an existed key with different value. + builder.appendIfAbsent("key1", "value2"); + String[] items = builder.build().getContext().split("\\$"); + Assert.assertEquals(1, items.length); + Assert.assertEquals("key1:value1", + builder.build().getContext()); + + // Append an absent key. + builder.appendIfAbsent("key2", "value2"); + String[] items2 = builder.build().getContext().split("\\$"); + Assert.assertEquals(2, items2.length); + Assert.assertEquals("key1:value1$key2:value2", + builder.build().getContext()); + + // Append a key that is a substring of an existing key. + builder.appendIfAbsent("key", "value"); + String[] items3 = builder.build().getContext().split("\\$"); + Assert.assertEquals(3, items3.length); + Assert.assertEquals("key1:value1$key2:value2$key:value", + builder.build().getContext()); + } + + @Test(expected = IllegalArgumentException.class) + public void testNewBuilder() { + Configuration conf = new Configuration(); + // Set illegal separator. + conf.set(HADOOP_CALLER_CONTEXT_SEPARATOR_KEY, "\t"); + CallerContext.Builder builder = new CallerContext.Builder(null, conf); + builder.build(); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestDecayRpcScheduler.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestDecayRpcScheduler.java index 71723325e2c86..3b8c58c26d66e 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestDecayRpcScheduler.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestDecayRpcScheduler.java @@ -42,9 +42,8 @@ public class TestDecayRpcScheduler { private Schedulable mockCall(String id) { Schedulable mockCall = mock(Schedulable.class); - UserGroupInformation ugi = mock(UserGroupInformation.class); + UserGroupInformation ugi = UserGroupInformation.createRemoteUser(id); - when(ugi.getUserName()).thenReturn(id); when(mockCall.getUserGroupInformation()).thenReturn(ugi); return mockCall; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java index 82540637a2004..8950e1f613868 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java @@ -18,6 +18,7 @@ package org.apache.hadoop.ipc; +import static org.assertj.core.api.Assertions.assertThatExceptionOfType; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -47,6 +48,7 @@ import java.net.SocketAddress; import java.net.SocketException; import java.net.SocketTimeoutException; +import java.net.UnknownHostException; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; @@ -54,6 +56,7 @@ import java.util.Random; import java.util.Set; import java.util.concurrent.BrokenBarrierException; +import java.util.concurrent.Callable; import java.util.concurrent.CountDownLatch; import java.util.concurrent.CyclicBarrier; import java.util.concurrent.ExecutorService; @@ -88,6 +91,7 @@ import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; import org.apache.hadoop.security.token.SecretManager.InvalidToken; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.test.Whitebox; import org.apache.hadoop.util.StringUtils; import org.junit.Assert; @@ -98,9 +102,9 @@ import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; -import com.google.common.base.Supplier; -import com.google.common.primitives.Bytes; -import com.google.common.primitives.Ints; +import java.util.function.Supplier; +import org.apache.hadoop.thirdparty.com.google.common.primitives.Bytes; +import org.apache.hadoop.thirdparty.com.google.common.primitives.Ints; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.event.Level; @@ -789,6 +793,130 @@ public Writable call(RPC.RpcKind rpcKind, String protocol, Writable param, } } + @Test(timeout=60000) + public void testIpcHostResolutionTimeout() throws Exception { + final InetSocketAddress addr = new InetSocketAddress("host.invalid", 80); + + // start client + Client.setConnectTimeout(conf, 100); + final Client client = new Client(LongWritable.class, conf); + // set the rpc timeout to twice the MIN_SLEEP_TIME + try { + LambdaTestUtils.intercept(UnknownHostException.class, + new Callable() { + @Override + public Void call() throws IOException { + TestIPC.this.call(client, new LongWritable(RANDOM.nextLong()), + addr, MIN_SLEEP_TIME * 2, conf); + return null; + } + }); + } finally { + client.stop(); + } + } + + /** + * The {@link ConnectionId#hashCode} has to be stable despite updates that occur as the the + * address evolves over time. The {@link ConnectionId} is used as a primary key in maps, so + * its hashCode can't change. + * + * @throws IOException if there is a client or server failure + */ + @Test + public void testStableHashCode() throws IOException { + Server server = new TestServer(5, false); + try { + server.start(); + + // Leave host unresolved to start. Use "localhost" as opposed + // to local IP from NetUtils.getConnectAddress(server) to force + // resolution later + InetSocketAddress unresolvedAddr = InetSocketAddress.createUnresolved( + "localhost", NetUtils.getConnectAddress(server).getPort()); + + // Setup: Create a ConnectionID using an unresolved address, and get it's hashCode to serve + // as a point of comparison. + int rpcTimeout = MIN_SLEEP_TIME * 2; + final ConnectionId remoteId = getConnectionId(unresolvedAddr, rpcTimeout, conf); + int expected = remoteId.hashCode(); + + // Start client + Client.setConnectTimeout(conf, 100); + Client client = new Client(LongWritable.class, conf); + try { + // Test: Call should re-resolve host and succeed + LongWritable param = new LongWritable(RANDOM.nextLong()); + client.call(RPC.RpcKind.RPC_BUILTIN, param, remoteId, + RPC.RPC_SERVICE_CLASS_DEFAULT, null); + int actual = remoteId.hashCode(); + + // Verify: The hashCode should match, although the InetAddress is different since it has + // now been resolved + assertThat(remoteId.getAddress()).isNotEqualTo(unresolvedAddr); + assertThat(remoteId.getAddress().getHostName()).isEqualTo(unresolvedAddr.getHostName()); + assertThat(remoteId.hashCode()).isEqualTo(expected); + + // Test: Call should succeed without having to re-resolve + InetSocketAddress expectedSocketAddress = remoteId.getAddress(); + param = new LongWritable(RANDOM.nextLong()); + client.call(RPC.RpcKind.RPC_BUILTIN, param, remoteId, + RPC.RPC_SERVICE_CLASS_DEFAULT, null); + + // Verify: The same instance of the InetSocketAddress has been used to make the second + // call + assertThat(remoteId.getAddress()).isSameAs(expectedSocketAddress); + + // Verify: The hashCode is protected against updates to the host name + String hostName = InetAddress.getLocalHost().getHostName(); + InetSocketAddress mismatchedHostName = NetUtils.createSocketAddr( + InetAddress.getLocalHost().getHostName(), + remoteId.getAddress().getPort()); + assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy(() -> remoteId.setAddress(mismatchedHostName)) + .withMessageStartingWith("Hostname must match"); + + // Verify: The hashCode is protected against updates to the port + InetSocketAddress mismatchedPort = NetUtils.createSocketAddr( + remoteId.getAddress().getHostName(), + remoteId.getAddress().getPort() + 1); + assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy(() -> remoteId.setAddress(mismatchedPort)) + .withMessageStartingWith("Port must match"); + } finally { + client.stop(); + } + } finally { + server.stop(); + } + } + + @Test(timeout=60000) + public void testIpcFlakyHostResolution() throws IOException { + // start server + Server server = new TestServer(5, false); + server.start(); + + // Leave host unresolved to start. Use "localhost" as opposed + // to local IP from NetUtils.getConnectAddress(server) to force + // resolution later + InetSocketAddress unresolvedAddr = InetSocketAddress.createUnresolved( + "localhost", NetUtils.getConnectAddress(server).getPort()); + + // start client + Client.setConnectTimeout(conf, 100); + Client client = new Client(LongWritable.class, conf); + + try { + // Should re-resolve host and succeed + call(client, new LongWritable(RANDOM.nextLong()), unresolvedAddr, + MIN_SLEEP_TIME * 2, conf); + } finally { + client.stop(); + server.stop(); + } + } + /** * Check that reader queueing works * @throws BrokenBarrierException @@ -1042,6 +1170,10 @@ private static void callAndVerify(Server server, InetSocketAddress addr, call(client, addr, serviceClass, conf); Connection connection = server.getConnections()[0]; + LOG.info("Connection is from: {}", connection); + assertEquals( + "Connection string representation should include both IP address and Host name", 2, + connection.toString().split(" / ").length); int serviceClass2 = connection.getServiceClass(); assertFalse(noChanged ^ serviceClass == serviceClass2); client.stop(); @@ -1090,11 +1222,6 @@ public void testSocketLeak() throws IOException { @Test(timeout=30000) public void testInterrupted() { Client client = new Client(LongWritable.class, conf); - Client.getClientExecutor().submit(new Runnable() { - public void run() { - while(true); - } - }); Thread.currentThread().interrupt(); client.stop(); try { @@ -1215,7 +1342,7 @@ interface DummyProtocol { /** * Test the retry count while used in a retry proxy. */ - @Test(timeout=60000) + @Test(timeout=100000) public void testRetryProxy() throws IOException { final Client client = new Client(LongWritable.class, conf); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIdentityProviders.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIdentityProviders.java index 263841246bf00..b528186ad26a5 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIdentityProviders.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIdentityProviders.java @@ -20,8 +20,9 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.assertj.core.api.Assertions.assertThat; +import org.apache.hadoop.test.LambdaTestUtils; import org.junit.Test; import java.util.List; @@ -33,7 +34,7 @@ import org.apache.hadoop.conf.Configuration; public class TestIdentityProviders { - public class FakeSchedulable implements Schedulable { + public static class FakeSchedulable implements Schedulable { public FakeSchedulable() { } @@ -61,7 +62,9 @@ public void testPluggableIdentityProvider() { CommonConfigurationKeys.IPC_IDENTITY_PROVIDER_KEY, IdentityProvider.class); - assertTrue(providers.size() == 1); + assertThat(providers) + .describedAs("provider list") + .hasSize(1); IdentityProvider ip = providers.get(0); assertNotNull(ip); @@ -69,14 +72,20 @@ public void testPluggableIdentityProvider() { } @Test - public void testUserIdentityProvider() throws IOException { + public void testUserIdentityProvider() throws Exception { UserIdentityProvider uip = new UserIdentityProvider(); - String identity = uip.makeIdentity(new FakeSchedulable()); + FakeSchedulable fakeSchedulable = new FakeSchedulable(); + String identity = uip.makeIdentity(fakeSchedulable); // Get our username UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); String username = ugi.getUserName(); assertEquals(username, identity); + + // FakeSchedulable doesn't override getCallerContext() + // accessing it should throw an UnsupportedOperationException + LambdaTestUtils.intercept(UnsupportedOperationException.class, + "Invalid operation.", fakeSchedulable::getCallerContext); } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestMultipleProtocolServer.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestMultipleProtocolServer.java index 10e23baefef9b..c1b0858697682 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestMultipleProtocolServer.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestMultipleProtocolServer.java @@ -45,7 +45,7 @@ public void testPBService() throws Exception { // Set RPC engine to protobuf RPC engine Configuration conf2 = new Configuration(); RPC.setProtocolEngine(conf2, TestRpcService.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); TestRpcService client = RPC.getProxy(TestRpcService.class, 0, addr, conf2); TestProtoBufRpc.testProtoBufRpc(client); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestProtoBufRPCCompatibility.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestProtoBufRPCCompatibility.java index dfb9e934f6055..d813c6b784f5d 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestProtoBufRPCCompatibility.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestProtoBufRPCCompatibility.java @@ -25,8 +25,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; -import org.apache.hadoop.ipc.protobuf.TestProtos.EchoRequestProto; -import org.apache.hadoop.ipc.protobuf.TestProtos.EchoResponseProto; import org.apache.hadoop.ipc.protobuf.TestProtos.EmptyRequestProto; import org.apache.hadoop.ipc.protobuf.TestProtos.EmptyResponseProto; import org.apache.hadoop.ipc.protobuf.TestProtos.OptRequestProto; @@ -138,7 +136,7 @@ public void testProtocolVersionMismatch() throws IOException, ServiceException { conf = new Configuration(); conf.setInt(CommonConfigurationKeys.IPC_MAXIMUM_DATA_LENGTH, 1024); // Set RPC engine to protobuf RPC engine - RPC.setProtocolEngine(conf, NewRpcService.class, ProtobufRpcEngine.class); + RPC.setProtocolEngine(conf, NewRpcService.class, ProtobufRpcEngine2.class); // Create server side implementation NewServerImpl serverImpl = new NewServerImpl(); @@ -151,7 +149,7 @@ public void testProtocolVersionMismatch() throws IOException, ServiceException { server.start(); - RPC.setProtocolEngine(conf, OldRpcService.class, ProtobufRpcEngine.class); + RPC.setProtocolEngine(conf, OldRpcService.class, ProtobufRpcEngine2.class); OldRpcService proxy = RPC.getProxy(OldRpcService.class, 0, addr, conf); // Verify that exception is thrown if protocolVersion is mismatch between @@ -168,7 +166,8 @@ public void testProtocolVersionMismatch() throws IOException, ServiceException { } // Verify that missing of optional field is still compatible in RPC call. - RPC.setProtocolEngine(conf, NewerRpcService.class, ProtobufRpcEngine.class); + RPC.setProtocolEngine(conf, NewerRpcService.class, + ProtobufRpcEngine2.class); NewerRpcService newProxy = RPC.getProxy(NewerRpcService.class, 0, addr, conf); newProxy.echo(null, emptyRequest); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestProtoBufRpc.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestProtoBufRpc.java index facb8fdd8b191..0740f056c8fc9 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestProtoBufRpc.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestProtoBufRpc.java @@ -17,12 +17,10 @@ */ package org.apache.hadoop.ipc; -import org.apache.hadoop.thirdparty.protobuf.BlockingService; -import org.apache.hadoop.thirdparty.protobuf.RpcController; -import org.apache.hadoop.thirdparty.protobuf.ServiceException; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.ipc.RPC.RpcKind; import org.apache.hadoop.ipc.metrics.RpcMetrics; import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto.RpcErrorCodeProto; import org.apache.hadoop.ipc.protobuf.TestProtos; @@ -30,38 +28,71 @@ import org.apache.hadoop.ipc.protobuf.TestProtos.EchoResponseProto; import org.apache.hadoop.ipc.protobuf.TestProtos.EmptyRequestProto; import org.apache.hadoop.ipc.protobuf.TestProtos.EmptyResponseProto; +import org.apache.hadoop.ipc.protobuf.TestProtosLegacy; import org.apache.hadoop.ipc.protobuf.TestRpcServiceProtos.TestProtobufRpc2Proto; import org.apache.hadoop.ipc.protobuf.TestRpcServiceProtos.TestProtobufRpcProto; +import org.apache.hadoop.ipc.protobuf.TestRpcServiceProtosLegacy; import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.thirdparty.protobuf.BlockingService; +import org.apache.hadoop.thirdparty.protobuf.RpcController; +import org.apache.hadoop.thirdparty.protobuf.ServiceException; import org.junit.After; import org.junit.Before; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; import java.io.IOException; import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Collection; import java.util.concurrent.TimeoutException; -import static org.assertj.core.api.Assertions.assertThat; import static org.apache.hadoop.test.MetricsAsserts.assertCounterGt; import static org.apache.hadoop.test.MetricsAsserts.getMetrics; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.fail; +import static org.junit.Assume.assumeFalse; /** * Test for testing protocol buffer based RPC mechanism. * This test depends on test.proto definition of types in src/test/proto * and protobuf service definition from src/test/test_rpc_service.proto */ +@RunWith(Parameterized.class) public class TestProtoBufRpc extends TestRpcBase { private static RPC.Server server; private final static int SLEEP_DURATION = 1000; + /** + * Test with legacy protobuf implementation in same server. + */ + private boolean testWithLegacy; + /** + * Test with legacy protobuf implementation loaded first while creating the + * RPC server. + */ + private boolean testWithLegacyFirst; + + public TestProtoBufRpc(Boolean testWithLegacy, Boolean testWithLegacyFirst) { + this.testWithLegacy = testWithLegacy; + this.testWithLegacyFirst = testWithLegacyFirst; + } + @ProtocolInfo(protocolName = "testProto2", protocolVersion = 1) public interface TestRpcService2 extends TestProtobufRpc2Proto.BlockingInterface { } + @ProtocolInfo(protocolName="testProtoLegacy", protocolVersion = 1) + public interface TestRpcService2Legacy + extends TestRpcServiceProtosLegacy. + TestProtobufRpc2Proto.BlockingInterface { + } + public static class PBServer2Impl implements TestRpcService2 { @Override @@ -88,23 +119,82 @@ public TestProtos.SleepResponseProto sleep(RpcController controller, } } + public static class PBServer2ImplLegacy implements TestRpcService2Legacy { + + @Override + public TestProtosLegacy.EmptyResponseProto ping2( + com.google.protobuf.RpcController unused, + TestProtosLegacy.EmptyRequestProto request) + throws com.google.protobuf.ServiceException { + return TestProtosLegacy.EmptyResponseProto.newBuilder().build(); + } + + @Override + public TestProtosLegacy.EchoResponseProto echo2( + com.google.protobuf.RpcController unused, + TestProtosLegacy.EchoRequestProto request) + throws com.google.protobuf.ServiceException { + return TestProtosLegacy.EchoResponseProto.newBuilder() + .setMessage(request.getMessage()).build(); + } + + @Override + public TestProtosLegacy.SleepResponseProto sleep( + com.google.protobuf.RpcController controller, + TestProtosLegacy.SleepRequestProto request) + throws com.google.protobuf.ServiceException { + try { + Thread.sleep(request.getMilliSeconds()); + } catch (InterruptedException ex) { + } + return TestProtosLegacy.SleepResponseProto.newBuilder().build(); + } + } + + @Parameters + public static Collection params() { + Collection params = new ArrayList(); + params.add(new Object[] {Boolean.TRUE, Boolean.TRUE }); + params.add(new Object[] {Boolean.TRUE, Boolean.FALSE }); + params.add(new Object[] {Boolean.FALSE, Boolean.FALSE }); + return params; + } + @Before + @SuppressWarnings("deprecation") public void setUp() throws IOException { // Setup server for both protocols conf = new Configuration(); conf.setInt(CommonConfigurationKeys.IPC_MAXIMUM_DATA_LENGTH, 1024); conf.setBoolean(CommonConfigurationKeys.IPC_SERVER_LOG_SLOW_RPC, true); // Set RPC engine to protobuf RPC engine - RPC.setProtocolEngine(conf, TestRpcService.class, ProtobufRpcEngine.class); - RPC.setProtocolEngine(conf, TestRpcService2.class, ProtobufRpcEngine.class); + if (testWithLegacy) { + RPC.setProtocolEngine(conf, TestRpcService2Legacy.class, + ProtobufRpcEngine.class); + } + RPC.setProtocolEngine(conf, TestRpcService.class, ProtobufRpcEngine2.class); + RPC.setProtocolEngine(conf, TestRpcService2.class, + ProtobufRpcEngine2.class); // Create server side implementation PBServerImpl serverImpl = new PBServerImpl(); BlockingService service = TestProtobufRpcProto .newReflectiveBlockingService(serverImpl); - // Get RPC server for server side implementation - server = new RPC.Builder(conf).setProtocol(TestRpcService.class) - .setInstance(service).setBindAddress(ADDRESS).setPort(PORT).build(); + if (testWithLegacy && testWithLegacyFirst) { + PBServer2ImplLegacy server2ImplLegacy = new PBServer2ImplLegacy(); + com.google.protobuf.BlockingService legacyService = + TestRpcServiceProtosLegacy.TestProtobufRpc2Proto + .newReflectiveBlockingService(server2ImplLegacy); + server = new RPC.Builder(conf).setProtocol(TestRpcService2Legacy.class) + .setInstance(legacyService).setBindAddress(ADDRESS).setPort(PORT) + .build(); + server.addProtocol(RpcKind.RPC_PROTOCOL_BUFFER, TestRpcService.class, + service); + } else { + // Get RPC server for server side implementation + server = new RPC.Builder(conf).setProtocol(TestRpcService.class) + .setInstance(service).setBindAddress(ADDRESS).setPort(PORT).build(); + } addr = NetUtils.getConnectAddress(server); // now the second protocol @@ -114,6 +204,16 @@ public void setUp() throws IOException { // Setup server for both protocols server.addProtocol(RPC.RpcKind.RPC_PROTOCOL_BUFFER, TestRpcService2.class, service2); + + if (testWithLegacy && !testWithLegacyFirst) { + PBServer2ImplLegacy server2ImplLegacy = new PBServer2ImplLegacy(); + com.google.protobuf.BlockingService legacyService = + TestRpcServiceProtosLegacy.TestProtobufRpc2Proto + .newReflectiveBlockingService(server2ImplLegacy); + server + .addProtocol(RpcKind.RPC_PROTOCOL_BUFFER, TestRpcService2Legacy.class, + legacyService); + } server.start(); } @@ -127,6 +227,10 @@ private TestRpcService2 getClient2() throws IOException { return RPC.getProxy(TestRpcService2.class, 0, addr, conf); } + private TestRpcService2Legacy getClientLegacy() throws IOException { + return RPC.getProxy(TestRpcService2Legacy.class, 0, addr, conf); + } + @Test (timeout=5000) public void testProtoBufRpc() throws Exception { TestRpcService client = getClient(addr, conf); @@ -178,10 +282,39 @@ public void testProtoBufRpc2() throws Exception { MetricsRecordBuilder rpcDetailedMetrics = getMetrics(server.getRpcDetailedMetrics().name()); assertCounterGt("Echo2NumOps", 0L, rpcDetailedMetrics); + + if (testWithLegacy) { + testProtobufLegacy(); + } + } + + private void testProtobufLegacy() + throws IOException, com.google.protobuf.ServiceException { + TestRpcService2Legacy client = getClientLegacy(); + + // Test ping method + client.ping2(null, TestProtosLegacy.EmptyRequestProto.newBuilder().build()); + + // Test echo method + TestProtosLegacy.EchoResponseProto echoResponse = client.echo2(null, + TestProtosLegacy.EchoRequestProto.newBuilder().setMessage("hello") + .build()); + assertThat(echoResponse.getMessage()).isEqualTo("hello"); + + // Ensure RPC metrics are updated + MetricsRecordBuilder rpcMetrics = getMetrics(server.getRpcMetrics().name()); + assertCounterGt("RpcQueueTimeNumOps", 0L, rpcMetrics); + assertCounterGt("RpcProcessingTimeNumOps", 0L, rpcMetrics); + + MetricsRecordBuilder rpcDetailedMetrics = + getMetrics(server.getRpcDetailedMetrics().name()); + assertCounterGt("Echo2NumOps", 0L, rpcDetailedMetrics); } @Test (timeout=5000) public void testProtoBufRandomException() throws Exception { + //No test with legacy + assumeFalse(testWithLegacy); TestRpcService client = getClient(addr, conf); try { @@ -199,6 +332,8 @@ public void testProtoBufRandomException() throws Exception { @Test(timeout=6000) public void testExtraLongRpc() throws Exception { + //No test with legacy + assumeFalse(testWithLegacy); TestRpcService2 client = getClient2(); final String shortString = StringUtils.repeat("X", 4); // short message goes through @@ -218,6 +353,8 @@ public void testExtraLongRpc() throws Exception { @Test(timeout = 12000) public void testLogSlowRPC() throws IOException, ServiceException, TimeoutException, InterruptedException { + //No test with legacy + assumeFalse(testWithLegacy); TestRpcService2 client = getClient2(); // make 10 K fast calls for (int x = 0; x < 10000; x++) { @@ -243,6 +380,8 @@ public void testLogSlowRPC() throws IOException, ServiceException, @Test(timeout = 12000) public void testEnsureNoLogIfDisabled() throws IOException, ServiceException { + //No test with legacy + assumeFalse(testWithLegacy); // disable slow RPC logging server.setLogSlowRPC(false); TestRpcService2 client = getClient2(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestProtoBufRpcServerHandoff.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestProtoBufRpcServerHandoff.java index 32300d4f876e1..4328655270921 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestProtoBufRpcServerHandoff.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestProtoBufRpcServerHandoff.java @@ -52,7 +52,7 @@ public void test() throws Exception { TestProtobufRpcHandoffProto.newReflectiveBlockingService(serverImpl); RPC.setProtocolEngine(conf, TestProtoBufRpcServerHandoffProtocol.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); RPC.Server server = new RPC.Builder(conf) .setProtocol(TestProtoBufRpcServerHandoffProtocol.class) .setInstance(blockingService) @@ -144,8 +144,8 @@ public static class TestProtoBufRpcServerHandoffServer TestProtos.SleepRequestProto2 request) throws ServiceException { final long startTime = System.currentTimeMillis(); - final ProtobufRpcEngineCallback callback = - ProtobufRpcEngine.Server.registerForDeferredResponse(); + final ProtobufRpcEngineCallback2 callback = + ProtobufRpcEngine2.Server.registerForDeferredResponse2(); final long sleepTime = request.getSleepTime(); new Thread() { @Override diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java index 640ca3d2b89ed..fd1a8ed3f8cf2 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java @@ -18,6 +18,11 @@ package org.apache.hadoop.ipc; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.io.retry.RetryUtils; +import org.apache.hadoop.ipc.metrics.RpcMetrics; + +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.hadoop.thirdparty.protobuf.ServiceException; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.conf.Configuration; @@ -29,6 +34,7 @@ import org.apache.hadoop.ipc.Client.ConnectionId; import org.apache.hadoop.ipc.Server.Call; import org.apache.hadoop.ipc.Server.Connection; +import org.apache.hadoop.ipc.metrics.RpcMetrics; import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto.RpcErrorCodeProto; import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto.RpcStatusProto; import org.apache.hadoop.ipc.protobuf.TestProtos; @@ -51,6 +57,7 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.mockito.Mockito; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.event.Level; @@ -58,13 +65,16 @@ import javax.net.SocketFactory; import java.io.Closeable; import java.io.IOException; +import java.io.InputStream; import java.io.InterruptedIOException; +import java.io.OutputStream; import java.lang.reflect.InvocationHandler; import java.lang.reflect.Method; import java.lang.reflect.Proxy; import java.net.ConnectException; import java.net.InetAddress; import java.net.InetSocketAddress; +import java.net.Socket; import java.net.SocketTimeoutException; import java.nio.ByteBuffer; import java.security.PrivilegedAction; @@ -81,8 +91,11 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.locks.ReentrantLock; import static org.assertj.core.api.Assertions.assertThat; import static org.apache.hadoop.test.MetricsAsserts.assertCounter; @@ -284,6 +297,14 @@ public ProtocolProxy getProxy( rpcTimeout, connectionRetryPolicy, null, null); } + @Override + public ProtocolProxy getProxy(Class protocol, long clientVersion, + ConnectionId connId, Configuration conf, SocketFactory factory, + AlignmentContext alignmentContext) + throws IOException { + throw new UnsupportedOperationException("This proxy is not supported"); + } + @SuppressWarnings("unchecked") @Override public ProtocolProxy getProxy( @@ -359,7 +380,7 @@ public void testConfRpc() throws IOException { assertEquals(confReaders, server.getNumReaders()); server = newServerBuilder(conf) - .setNumHandlers(1).setnumReaders(3).setQueueSizePerHandler(200) + .setNumHandlers(1).setNumReaders(3).setQueueSizePerHandler(200) .setVerbose(false).build(); assertEquals(3, server.getNumReaders()); @@ -385,6 +406,53 @@ public void testProxyAddress() throws Exception { } } + @Test + public void testConnectionWithSocketFactory() throws IOException, ServiceException { + TestRpcService firstProxy = null; + TestRpcService secondProxy = null; + + Configuration newConf = new Configuration(conf); + newConf.set(CommonConfigurationKeysPublic. + HADOOP_RPC_SOCKET_FACTORY_CLASS_DEFAULT_KEY, ""); + + RetryPolicy retryPolicy = RetryUtils.getDefaultRetryPolicy( + newConf, "Test.No.Such.Key", + true, + "Test.No.Such.Key", "10000,6", + null); + + // create a server with two handlers + Server server = setupTestServer(newConf, 2); + try { + // create the first client + firstProxy = getClient(addr, newConf); + // create the second client + secondProxy = getClient(addr, newConf); + + firstProxy.ping(null, newEmptyRequest()); + secondProxy.ping(null, newEmptyRequest()); + + Client client = ProtobufRpcEngine2.getClient(newConf); + assertEquals(1, client.getConnectionIds().size()); + + stop(null, firstProxy, secondProxy); + ProtobufRpcEngine2.clearClientCache(); + + // create the first client with index 1 + firstProxy = getMultipleClientWithIndex(addr, newConf, retryPolicy, 1); + // create the second client with index 2 + secondProxy = getMultipleClientWithIndex(addr, newConf, retryPolicy, 2); + firstProxy.ping(null, newEmptyRequest()); + secondProxy.ping(null, newEmptyRequest()); + + Client client2 = ProtobufRpcEngine2.getClient(newConf); + assertEquals(2, client2.getConnectionIds().size()); + } finally { + System.out.println("Down slow rpc testing"); + stop(server, firstProxy, secondProxy); + } + } + @Test public void testSlowRpc() throws IOException, ServiceException { Server server; @@ -932,6 +1000,196 @@ public void run() { } } + /** + * This tests the case where the server isn't receiving new data and + * multiple threads queue up to send rpc requests. Only one of the requests + * should be written and all of the calling threads should be interrupted. + * + * We use a mock SocketFactory so that we can control when the input and + * output streams are frozen. + */ + @Test(timeout=30000) + public void testSlowConnection() throws Exception { + SocketFactory mockFactory = Mockito.mock(SocketFactory.class); + Socket mockSocket = Mockito.mock(Socket.class); + Mockito.when(mockFactory.createSocket()).thenReturn(mockSocket); + Mockito.when(mockSocket.getPort()).thenReturn(1234); + Mockito.when(mockSocket.getLocalPort()).thenReturn(2345); + MockOutputStream mockOutputStream = new MockOutputStream(); + Mockito.when(mockSocket.getOutputStream()).thenReturn(mockOutputStream); + // Use an input stream that always blocks + Mockito.when(mockSocket.getInputStream()).thenReturn(new InputStream() { + @Override + public int read() throws IOException { + // wait forever + while (true) { + try { + Thread.sleep(TimeUnit.DAYS.toMillis(1)); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new InterruptedIOException("test"); + } + } + } + }); + Configuration clientConf = new Configuration(); + // disable ping & timeout to minimize traffic + clientConf.setBoolean(CommonConfigurationKeys.IPC_CLIENT_PING_KEY, false); + clientConf.setInt(CommonConfigurationKeys.IPC_CLIENT_RPC_TIMEOUT_KEY, 0); + RPC.setProtocolEngine(clientConf, TestRpcService.class, ProtobufRpcEngine.class); + // set async mode so that we don't need to implement the input stream + final boolean wasAsync = Client.isAsynchronousMode(); + TestRpcService client = null; + try { + Client.setAsynchronousMode(true); + client = RPC.getProtocolProxy( + TestRpcService.class, + 0, + new InetSocketAddress("localhost", 1234), + UserGroupInformation.getCurrentUser(), + clientConf, + mockFactory).getProxy(); + // The connection isn't actually made until the first call. + client.ping(null, newEmptyRequest()); + mockOutputStream.waitForFlush(1); + final long headerAndFirst = mockOutputStream.getBytesWritten(); + client.ping(null, newEmptyRequest()); + mockOutputStream.waitForFlush(2); + final long second = mockOutputStream.getBytesWritten() - headerAndFirst; + // pause the writer thread + mockOutputStream.pause(); + // create a set of threads to create calls that will back up + ExecutorService pool = Executors.newCachedThreadPool(); + Future[] futures = new Future[numThreads]; + final AtomicInteger doneThreads = new AtomicInteger(0); + for(int thread = 0; thread < numThreads; ++thread) { + final TestRpcService finalClient = client; + futures[thread] = pool.submit(new Callable() { + @Override + public Void call() throws Exception { + finalClient.ping(null, newEmptyRequest()); + doneThreads.incrementAndGet(); + return null; + } + }); + } + // wait until the threads have started writing + mockOutputStream.waitForWriters(); + // interrupt all the threads + for(int thread=0; thread < numThreads; ++thread) { + assertTrue("cancel thread " + thread, + futures[thread].cancel(true)); + } + // wait until all the writers are cancelled + pool.shutdown(); + pool.awaitTermination(10, TimeUnit.SECONDS); + mockOutputStream.resume(); + // wait for the in flight rpc request to be flushed + mockOutputStream.waitForFlush(3); + // All the threads should have been interrupted + assertEquals(0, doneThreads.get()); + // make sure that only one additional rpc request was sent + assertEquals(headerAndFirst + second * 2, + mockOutputStream.getBytesWritten()); + } finally { + Client.setAsynchronousMode(wasAsync); + if (client != null) { + RPC.stopProxy(client); + } + } + } + + private static final class MockOutputStream extends OutputStream { + private long bytesWritten = 0; + private AtomicInteger flushCount = new AtomicInteger(0); + private ReentrantLock lock = new ReentrantLock(true); + + @Override + public synchronized void write(int b) throws IOException { + lock.lock(); + bytesWritten += 1; + lock.unlock(); + } + + @Override + public void flush() { + flushCount.incrementAndGet(); + } + + public synchronized long getBytesWritten() { + return bytesWritten; + } + + public void pause() { + lock.lock(); + } + + public void resume() { + lock.unlock(); + } + + private static final int DELAY_MS = 250; + + /** + * Wait for the Nth flush, which we assume will happen exactly when the + * Nth RPC request is sent. + * @param flush the total flush count to wait for + * @throws InterruptedException + */ + public void waitForFlush(int flush) throws InterruptedException { + while (flushCount.get() < flush) { + Thread.sleep(DELAY_MS); + } + } + + public void waitForWriters() throws InterruptedException { + while (!lock.hasQueuedThreads()) { + Thread.sleep(DELAY_MS); + } + } + } + + /** + * This test causes an exception in the RPC connection setup to make + * sure that threads aren't leaked. + */ + @Test(timeout=30000) + public void testBadSetup() throws Exception { + SocketFactory mockFactory = Mockito.mock(SocketFactory.class); + Mockito.when(mockFactory.createSocket()) + .thenThrow(new IOException("can't connect")); + Configuration clientConf = new Configuration(); + // Set an illegal value to cause an exception in the constructor + clientConf.set(CommonConfigurationKeys.IPC_MAXIMUM_RESPONSE_LENGTH, + "xxx"); + RPC.setProtocolEngine(clientConf, TestRpcService.class, + ProtobufRpcEngine.class); + TestRpcService client = null; + int threadCount = Thread.getAllStackTraces().size(); + try { + try { + client = RPC.getProtocolProxy( + TestRpcService.class, + 0, + new InetSocketAddress("localhost", 1234), + UserGroupInformation.getCurrentUser(), + clientConf, + mockFactory).getProxy(); + client.ping(null, newEmptyRequest()); + assertTrue("Didn't throw exception!", false); + } catch (ServiceException nfe) { + // ensure no extra threads are running. + assertEquals(threadCount, Thread.getAllStackTraces().size()); + } catch (Throwable t) { + assertTrue("wrong exception: " + t, false); + } + } finally { + if (client != null) { + RPC.stopProxy(client); + } + } + } + @Test public void testConnectionPing() throws Exception { Server server; @@ -1095,7 +1353,9 @@ public TestRpcService run() { proxy.lockAndSleep(null, newSleepRequest(5)); rpcMetrics = getMetrics(server.getRpcMetrics().name()); - assertGauge("RpcLockWaitTimeAvgTime", 10000.0, rpcMetrics); + assertGauge("RpcLockWaitTimeAvgTime", + (double)(server.getRpcMetrics().getMetricsTimeUnit().convert(10L, + TimeUnit.SECONDS)), rpcMetrics); } finally { if (proxy2 != null) { RPC.stopProxy(proxy2); @@ -1290,6 +1550,43 @@ public void testDecayRpcSchedulerMetrics() throws Exception { } } + @Test (timeout=30000) + public void testProtocolUserPriority() throws Exception { + final String ns = CommonConfigurationKeys.IPC_NAMESPACE + ".0"; + conf.set(CLIENT_PRINCIPAL_KEY, "clientForProtocol"); + Server server = null; + try { + server = setupDecayRpcSchedulerandTestServer(ns + "."); + + UserGroupInformation ugi = UserGroupInformation.createRemoteUser("user"); + // normal users start with priority 0. + Assert.assertEquals(0, server.getPriorityLevel(ugi)); + // calls for a protocol defined client will have priority of 0. + Assert.assertEquals(0, server.getPriorityLevel(newSchedulable(ugi))); + + // protocol defined client will have top priority of -1. + ugi = UserGroupInformation.createRemoteUser("clientForProtocol"); + Assert.assertEquals(-1, server.getPriorityLevel(ugi)); + // calls for a protocol defined client will have priority of 0. + Assert.assertEquals(0, server.getPriorityLevel(newSchedulable(ugi))); + } finally { + stop(server, null); + } + } + + private static Schedulable newSchedulable(UserGroupInformation ugi) { + return new Schedulable(){ + @Override + public UserGroupInformation getUserGroupInformation() { + return ugi; + } + @Override + public int getPriorityLevel() { + return 0; // doesn't matter. + } + }; + } + private Server setupDecayRpcSchedulerandTestServer(String ns) throws Exception { final int queueSizePerHandler = 3; @@ -1530,6 +1827,11 @@ public RpcStatusProto getRpcStatusProto() { // if it wasn't fatal, verify there's only one open connection. Connection[] conns = server.getConnections(); assertEquals(reqName, 1, conns.length); + String connectionInfo = conns[0].toString(); + LOG.info("Connection is from: {}", connectionInfo); + assertEquals( + "Connection string representation should include both IP address and Host name", 2, + connectionInfo.split(" / ").length); // verify whether the connection should have been reused. if (isDisconnected) { assertNotSame(reqName, lastConn, conns[0]); @@ -1550,6 +1852,136 @@ public RpcStatusProto getRpcStatusProto() { } } + @Test + public void testSetProtocolEngine() { + Configuration conf = new Configuration(); + RPC.setProtocolEngine(conf, StoppedProtocol.class, StoppedRpcEngine.class); + RpcEngine rpcEngine = RPC.getProtocolEngine(StoppedProtocol.class, conf); + assertTrue(rpcEngine instanceof StoppedRpcEngine); + + RPC.setProtocolEngine(conf, StoppedProtocol.class, ProtobufRpcEngine.class); + rpcEngine = RPC.getProtocolEngine(StoppedProtocol.class, conf); + assertTrue(rpcEngine instanceof StoppedRpcEngine); + } + + @Test + public void testRpcMetricsInNanos() throws Exception { + final Server server; + TestRpcService proxy = null; + + final int interval = 1; + conf.setBoolean(CommonConfigurationKeys. + RPC_METRICS_QUANTILE_ENABLE, true); + conf.set(CommonConfigurationKeys. + RPC_METRICS_PERCENTILES_INTERVALS_KEY, "" + interval); + conf.set(CommonConfigurationKeys.RPC_METRICS_TIME_UNIT, "NANOSECONDS"); + + server = setupTestServer(conf, 5); + String testUser = "testUserInNanos"; + UserGroupInformation anotherUser = + UserGroupInformation.createRemoteUser(testUser); + TestRpcService proxy2 = + anotherUser.doAs((PrivilegedAction) () -> { + try { + return RPC.getProxy(TestRpcService.class, 0, + server.getListenerAddress(), conf); + } catch (IOException e) { + LOG.error("Something went wrong.", e); + } + return null; + }); + try { + proxy = getClient(addr, conf); + for (int i = 0; i < 100; i++) { + proxy.ping(null, newEmptyRequest()); + proxy.echo(null, newEchoRequest("" + i)); + proxy2.echo(null, newEchoRequest("" + i)); + } + MetricsRecordBuilder rpcMetrics = + getMetrics(server.getRpcMetrics().name()); + assertEquals("Expected zero rpc lock wait time", + 0, getDoubleGauge("RpcLockWaitTimeAvgTime", rpcMetrics), 0.001); + MetricsAsserts.assertQuantileGauges("RpcQueueTime" + interval + "s", + rpcMetrics); + MetricsAsserts.assertQuantileGauges("RpcProcessingTime" + interval + "s", + rpcMetrics); + + proxy.lockAndSleep(null, newSleepRequest(5)); + rpcMetrics = getMetrics(server.getRpcMetrics().name()); + assertGauge("RpcLockWaitTimeAvgTime", + (double)(server.getRpcMetrics().getMetricsTimeUnit().convert(10L, + TimeUnit.SECONDS)), rpcMetrics); + LOG.info("RpcProcessingTimeAvgTime: {} , RpcQueueTimeAvgTime: {}", + getDoubleGauge("RpcProcessingTimeAvgTime", rpcMetrics), + getDoubleGauge("RpcQueueTimeAvgTime", rpcMetrics)); + + assertTrue(getDoubleGauge("RpcProcessingTimeAvgTime", rpcMetrics) + > 4000000D); + assertTrue(getDoubleGauge("RpcQueueTimeAvgTime", rpcMetrics) + > 4000D); + } finally { + if (proxy2 != null) { + RPC.stopProxy(proxy2); + } + stop(server, proxy); + } + } + + @Test + public void testNumTotalRequestsMetrics() throws Exception { + UserGroupInformation ugi = UserGroupInformation. + createUserForTesting("userXyz", new String[0]); + + final Server server = setupTestServer(conf, 1); + + ExecutorService executorService = null; + try { + RpcMetrics rpcMetrics = server.getRpcMetrics(); + assertEquals(0, rpcMetrics.getTotalRequests()); + assertEquals(0, rpcMetrics.getTotalRequestsPerSecond()); + + List> externalCallList = new ArrayList<>(); + + executorService = Executors.newSingleThreadExecutor( + new ThreadFactoryBuilder().setDaemon(true).setNameFormat("testNumTotalRequestsMetrics") + .build()); + AtomicInteger rps = new AtomicInteger(0); + CountDownLatch countDownLatch = new CountDownLatch(1); + executorService.submit(() -> { + while (true) { + int numRps = (int) rpcMetrics.getTotalRequestsPerSecond(); + rps.getAndSet(numRps); + if (rps.get() > 0) { + countDownLatch.countDown(); + break; + } + } + }); + + for (int i = 0; i < 100000; i++) { + externalCallList.add(newExtCall(ugi, () -> null)); + } + for (ExternalCall externalCall : externalCallList) { + server.queueCall(externalCall); + } + for (ExternalCall externalCall : externalCallList) { + externalCall.get(); + } + + assertEquals(100000, rpcMetrics.getTotalRequests()); + if (countDownLatch.await(10, TimeUnit.SECONDS)) { + assertTrue(rps.get() > 10); + } else { + throw new AssertionError("total requests per seconds are still 0"); + } + } finally { + if (executorService != null) { + executorService.shutdown(); + } + server.stop(); + } + } + public static void main(String[] args) throws Exception { new TestRPC().testCallsInternal(conf); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPCCompatibility.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPCCompatibility.java index ffee086fa9801..22fdcbbe14e65 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPCCompatibility.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPCCompatibility.java @@ -114,19 +114,19 @@ public void setUp() { ProtocolSignature.resetCache(); RPC.setProtocolEngine(conf, - TestProtocol0.class, ProtobufRpcEngine.class); + TestProtocol0.class, ProtobufRpcEngine2.class); RPC.setProtocolEngine(conf, - TestProtocol1.class, ProtobufRpcEngine.class); + TestProtocol1.class, ProtobufRpcEngine2.class); RPC.setProtocolEngine(conf, - TestProtocol2.class, ProtobufRpcEngine.class); + TestProtocol2.class, ProtobufRpcEngine2.class); RPC.setProtocolEngine(conf, - TestProtocol3.class, ProtobufRpcEngine.class); + TestProtocol3.class, ProtobufRpcEngine2.class); RPC.setProtocolEngine(conf, - TestProtocol4.class, ProtobufRpcEngine.class); + TestProtocol4.class, ProtobufRpcEngine2.class); } @After diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPCWaitForProxy.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPCWaitForProxy.java index d810fe3c5a1e0..90973d2674c01 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPCWaitForProxy.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPCWaitForProxy.java @@ -44,7 +44,7 @@ public class TestRPCWaitForProxy extends TestRpcBase { @Before public void setupProtocolEngine() { RPC.setProtocolEngine(conf, TestRpcService.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); } /** diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestReuseRpcConnections.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestReuseRpcConnections.java index 2729dc3cd9daa..65558a7980a2d 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestReuseRpcConnections.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestReuseRpcConnections.java @@ -26,7 +26,6 @@ import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.io.retry.TestConnectionRetryPolicy; import org.apache.hadoop.ipc.Client.ConnectionId; -import org.apache.hadoop.ipc.TestRpcBase.TestRpcService; import org.junit.Before; import org.junit.Test; @@ -129,7 +128,7 @@ private void verifyRetryPolicyReuseConnections( try { proxy1 = getClient(addr, newConf, retryPolicy1); proxy1.ping(null, newEmptyRequest()); - client = ProtobufRpcEngine.getClient(newConf); + client = ProtobufRpcEngine2.getClient(newConf); final Set conns = client.getConnectionIds(); assertEquals("number of connections in cache is wrong", 1, conns.size()); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRpcBase.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRpcBase.java index bf24d680dde2e..5b5c8bbaa9b73 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRpcBase.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRpcBase.java @@ -18,13 +18,17 @@ package org.apache.hadoop.ipc; +import org.apache.commons.lang3.builder.EqualsBuilder; +import org.apache.commons.lang3.builder.HashCodeBuilder; import org.apache.hadoop.thirdparty.protobuf.BlockingService; import org.apache.hadoop.thirdparty.protobuf.RpcController; import org.apache.hadoop.thirdparty.protobuf.ServiceException; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.ipc.Client.ConnectionId; import org.apache.hadoop.ipc.protobuf.TestProtos; import org.apache.hadoop.ipc.protobuf.TestRpcServiceProtos; import org.apache.hadoop.net.NetUtils; @@ -62,6 +66,8 @@ public class TestRpcBase { protected final static String SERVER_PRINCIPAL_KEY = "test.ipc.server.principal"; + protected final static String CLIENT_PRINCIPAL_KEY = + "test.ipc.client.principal"; protected final static String ADDRESS = "0.0.0.0"; protected final static int PORT = 0; protected static InetSocketAddress addr; @@ -70,7 +76,7 @@ public class TestRpcBase { protected void setupConf() { conf = new Configuration(); // Set RPC engine to protobuf RPC engine - RPC.setProtocolEngine(conf, TestRpcService.class, ProtobufRpcEngine.class); + RPC.setProtocolEngine(conf, TestRpcService.class, ProtobufRpcEngine2.class); UserGroupInformation.setConfiguration(conf); } @@ -122,18 +128,19 @@ protected static RPC.Server setupTestServer( return server; } - protected static TestRpcService getClient(InetSocketAddress serverAddr, - Configuration clientConf) + protected static TestRpcService getClient(InetSocketAddress serverAddr, Configuration clientConf) throws ServiceException { - try { - return RPC.getProxy(TestRpcService.class, 0, serverAddr, clientConf); - } catch (IOException e) { - throw new ServiceException(e); - } + return getClient(serverAddr, clientConf, null); } protected static TestRpcService getClient(InetSocketAddress serverAddr, - Configuration clientConf, final RetryPolicy connectionRetryPolicy) + Configuration clientConf, RetryPolicy connectionRetryPolicy) throws ServiceException { + return getClient(serverAddr, clientConf, connectionRetryPolicy, null); + } + + protected static TestRpcService getClient(InetSocketAddress serverAddr, + Configuration clientConf, final RetryPolicy connectionRetryPolicy, + AtomicBoolean fallbackToSimpleAuth) throws ServiceException { try { return RPC.getProtocolProxy( @@ -144,17 +151,60 @@ protected static TestRpcService getClient(InetSocketAddress serverAddr, clientConf, NetUtils.getDefaultSocketFactory(clientConf), RPC.getRpcTimeout(clientConf), - connectionRetryPolicy, null).getProxy(); + connectionRetryPolicy, fallbackToSimpleAuth).getProxy(); } catch (IOException e) { throw new ServiceException(e); } } - protected static void stop(Server server, TestRpcService proxy) { - if (proxy != null) { - try { - RPC.stopProxy(proxy); - } catch (Exception ignored) {} + /** + * Try to obtain a proxy of TestRpcService with an index. + * @param serverAddr input server address + * @param clientConf input client configuration + * @param retryPolicy input retryPolicy + * @param index input index + * @return one proxy of TestRpcService + */ + protected static TestRpcService getMultipleClientWithIndex(InetSocketAddress serverAddr, + Configuration clientConf, RetryPolicy retryPolicy, int index) + throws ServiceException, IOException { + MockConnectionId connectionId = new MockConnectionId(serverAddr, + TestRpcService.class, UserGroupInformation.getCurrentUser(), + RPC.getRpcTimeout(clientConf), retryPolicy, clientConf, index); + return getClient(connectionId, clientConf); + } + + /** + * Obtain a TestRpcService Proxy by a connectionId. + * @param connId input connectionId + * @param clientConf input configuration + * @return a TestRpcService Proxy + * @throws ServiceException a ServiceException + */ + protected static TestRpcService getClient(ConnectionId connId, + Configuration clientConf) throws ServiceException { + try { + return RPC.getProtocolProxy( + TestRpcService.class, + 0, + connId, + clientConf, + NetUtils.getDefaultSocketFactory(clientConf), + null).getProxy(); + } catch (IOException e) { + throw new ServiceException(e); + } + } + + protected static void stop(Server server, TestRpcService... proxies) { + if (proxies != null) { + for (TestRpcService proxy : proxies) { + if (proxy != null) { + try { + RPC.stopProxy(proxy); + } catch (Exception ignored) {} + } + } } if (server != null) { @@ -185,6 +235,40 @@ protected static int countThreads(String search) { return count; } + public static class MockConnectionId extends ConnectionId { + private static final int PRIME = 16777619; + private final int index; + + public MockConnectionId(InetSocketAddress address, Class protocol, + UserGroupInformation ticket, int rpcTimeout, RetryPolicy connectionRetryPolicy, + Configuration conf, int index) { + super(address, protocol, ticket, rpcTimeout, connectionRetryPolicy, conf); + this.index = index; + } + + @Override + public int hashCode() { + return new HashCodeBuilder() + .append(PRIME * super.hashCode()) + .append(this.index) + .toHashCode(); + } + + @Override + public boolean equals(Object obj) { + if (!super.equals(obj)) { + return false; + } + if (obj instanceof MockConnectionId) { + MockConnectionId other = (MockConnectionId)obj; + return new EqualsBuilder() + .append(this.index, other.index) + .isEquals(); + } + return false; + } + } + public static class TestTokenIdentifier extends TokenIdentifier { private Text tokenid; private Text realUser; @@ -271,7 +355,8 @@ public Token selectToken(Text service, } } - @KerberosInfo(serverPrincipal = SERVER_PRINCIPAL_KEY) + @KerberosInfo(serverPrincipal = SERVER_PRINCIPAL_KEY, + clientPrincipal = CLIENT_PRINCIPAL_KEY) @TokenInfo(TestTokenSelector.class) @ProtocolInfo(protocolName = "org.apache.hadoop.ipc.TestRpcBase$TestRpcService", protocolVersion = 1) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestSaslRPC.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestSaslRPC.java index 72f73822b6fd0..662faea599648 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestSaslRPC.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestSaslRPC.java @@ -72,6 +72,7 @@ import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.regex.Pattern; @@ -169,7 +170,7 @@ public void setup() { clientFallBackToSimpleAllowed = true; // Set RPC engine to protobuf RPC engine - RPC.setProtocolEngine(conf, TestRpcService.class, ProtobufRpcEngine.class); + RPC.setProtocolEngine(conf, TestRpcService.class, ProtobufRpcEngine2.class); } static String getQOPNames (QualityOfProtection[] qops){ @@ -356,7 +357,7 @@ public void testPerConnectionConf() throws Exception { newConf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY, timeouts[0]); proxy1 = getClient(addr, newConf); proxy1.getAuthMethod(null, newEmptyRequest()); - client = ProtobufRpcEngine.getClient(newConf); + client = ProtobufRpcEngine2.getClient(newConf); Set conns = client.getConnectionIds(); assertEquals("number of connections in cache is wrong", 1, conns.size()); // same conf, connection should be re-used @@ -533,13 +534,16 @@ public void handle(Callback[] callbacks) } private static Pattern BadToken = - Pattern.compile(".*DIGEST-MD5: digest response format violation.*"); + Pattern.compile("^" + RemoteException.class.getName() + + "\\("+ SaslException.class.getName() + "\\): " + + "DIGEST-MD5: digest response format violation.*"); private static Pattern KrbFailed = Pattern.compile(".*Failed on local exception:.* " + "Failed to specify server's Kerberos principal name.*"); private static Pattern Denied(AuthMethod method) { - return Pattern.compile(".*RemoteException.*AccessControlException.*: " - + method + " authentication is not enabled.*"); + return Pattern.compile("^" + RemoteException.class.getName() + + "\\(" + AccessControlException.class.getName() + "\\): " + + method + " authentication is not enabled.*"); } private static Pattern No(AuthMethod ... method) { String methods = StringUtils.join(method, ",\\s*"); @@ -547,10 +551,10 @@ private static Pattern No(AuthMethod ... method) { "Client cannot authenticate via:\\[" + methods + "\\].*"); } private static Pattern NoTokenAuth = - Pattern.compile(".*IllegalArgumentException: " + + Pattern.compile("^" + IllegalArgumentException.class.getName() + ": " + "TOKEN authentication requires a secret manager"); private static Pattern NoFallback = - Pattern.compile(".*Failed on local exception:.* " + + Pattern.compile("^" + AccessControlException.class.getName() + ":.* " + "Server asks us to fall back to SIMPLE auth, " + "but this client is configured to only allow secure connections.*"); @@ -566,6 +570,72 @@ public void testSimpleServer() throws Exception { assertAuthEquals(SIMPLE, getAuthMethod(KERBEROS, SIMPLE, UseToken.OTHER)); } + /** + * In DfsClient there is a fallback mechanism to simple auth, which passes in an atomic boolean + * to the ipc Client, which then sets it during setupIOStreams. + * SetupIOStreams were running only once per connection, so if two separate DfsClient was + * instantiated, then due to the connection caching inside the ipc client, the second DfsClient + * did not have the passed in atomic boolean set properly if the first client was not yet closed, + * as setupIOStreams was yielding to set up new streams as it has reused the already existing + * connection. + * This test mimics this behaviour, and asserts the fallback whether it is set correctly. + * @see HADOOP-17975 + */ + @Test + public void testClientFallbackToSimpleAuthForASecondClient() throws Exception { + Configuration serverConf = createConfForAuth(SIMPLE); + Server server = startServer(serverConf, + setupServerUgi(SIMPLE, serverConf), + createServerSecretManager(SIMPLE, new TestTokenSecretManager())); + final InetSocketAddress serverAddress = NetUtils.getConnectAddress(server); + + clientFallBackToSimpleAllowed = true; + Configuration clientConf = createConfForAuth(KERBEROS); + UserGroupInformation clientUgi = setupClientUgi(KERBEROS, clientConf); + + AtomicBoolean fallbackToSimpleAuth1 = new AtomicBoolean(); + AtomicBoolean fallbackToSimpleAuth2 = new AtomicBoolean(); + try { + LOG.info("trying ugi:"+ clientUgi +" tokens:"+ clientUgi.getTokens()); + clientUgi.doAs((PrivilegedExceptionAction) () -> { + TestRpcService proxy1 = null; + TestRpcService proxy2 = null; + try { + proxy1 = getClient(serverAddress, clientConf, null, fallbackToSimpleAuth1); + proxy1.ping(null, newEmptyRequest()); + // make sure the other side thinks we are who we said we are!!! + assertEquals(clientUgi.getUserName(), + proxy1.getAuthUser(null, newEmptyRequest()).getUser()); + AuthMethod authMethod = + convert(proxy1.getAuthMethod(null, newEmptyRequest())); + assertAuthEquals(SIMPLE, authMethod.toString()); + + proxy2 = getClient(serverAddress, clientConf, null, fallbackToSimpleAuth2); + proxy2.ping(null, newEmptyRequest()); + // make sure the other side thinks we are who we said we are!!! + assertEquals(clientUgi.getUserName(), + proxy2.getAuthUser(null, newEmptyRequest()).getUser()); + AuthMethod authMethod2 = + convert(proxy2.getAuthMethod(null, newEmptyRequest())); + assertAuthEquals(SIMPLE, authMethod2.toString()); + } finally { + if (proxy1 != null) { + RPC.stopProxy(proxy1); + } + if (proxy2 != null) { + RPC.stopProxy(proxy2); + } + } + return null; + }); + } finally { + server.stop(); + } + + assertTrue("First client does not set to fall back properly.", fallbackToSimpleAuth1.get()); + assertTrue("Second client does not set to fall back properly.", fallbackToSimpleAuth2.get()); + } + @Test public void testNoClientFallbackToSimple() throws Exception { @@ -812,22 +882,44 @@ private String getAuthMethod( return e.toString(); } } - + private String internalGetAuthMethod( final AuthMethod clientAuth, final AuthMethod serverAuth, final UseToken tokenType) throws Exception { - - final Configuration serverConf = new Configuration(conf); - serverConf.set(HADOOP_SECURITY_AUTHENTICATION, serverAuth.toString()); - UserGroupInformation.setConfiguration(serverConf); - - final UserGroupInformation serverUgi = (serverAuth == KERBEROS) - ? UserGroupInformation.createRemoteUser("server/localhost@NONE") - : UserGroupInformation.createRemoteUser("server"); - serverUgi.setAuthenticationMethod(serverAuth); final TestTokenSecretManager sm = new TestTokenSecretManager(); + + Configuration serverConf = createConfForAuth(serverAuth); + Server server = startServer( + serverConf, + setupServerUgi(serverAuth, serverConf), + createServerSecretManager(serverAuth, sm)); + final InetSocketAddress serverAddress = NetUtils.getConnectAddress(server); + + final Configuration clientConf = createConfForAuth(clientAuth); + final UserGroupInformation clientUgi = setupClientUgi(clientAuth, clientConf); + + setupTokenIfNeeded(tokenType, sm, clientUgi, serverAddress); + + try { + return createClientAndQueryAuthMethod(serverAddress, clientConf, clientUgi, null); + } finally { + server.stop(); + } + } + + private Configuration createConfForAuth(AuthMethod clientAuth) { + final Configuration clientConf = new Configuration(conf); + clientConf.set(HADOOP_SECURITY_AUTHENTICATION, clientAuth.toString()); + clientConf.setBoolean( + CommonConfigurationKeys.IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY, + clientFallBackToSimpleAllowed); + return clientConf; + } + + private SecretManager createServerSecretManager( + AuthMethod serverAuth, TestTokenSecretManager sm) { boolean useSecretManager = (serverAuth != SIMPLE); if (enableSecretManager != null) { useSecretManager &= enableSecretManager; @@ -836,26 +928,43 @@ private String internalGetAuthMethod( useSecretManager |= forceSecretManager; } final SecretManager serverSm = useSecretManager ? sm : null; + return serverSm; + } + private Server startServer(Configuration serverConf, UserGroupInformation serverUgi, + SecretManager serverSm) throws IOException, InterruptedException { Server server = serverUgi.doAs(new PrivilegedExceptionAction() { @Override public Server run() throws IOException { return setupTestServer(serverConf, 5, serverSm); } }); + return server; + } - final Configuration clientConf = new Configuration(conf); - clientConf.set(HADOOP_SECURITY_AUTHENTICATION, clientAuth.toString()); - clientConf.setBoolean( - CommonConfigurationKeys.IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY, - clientFallBackToSimpleAllowed); + private UserGroupInformation setupServerUgi(AuthMethod serverAuth, + Configuration serverConf) { + UserGroupInformation.setConfiguration(serverConf); + + final UserGroupInformation serverUgi = (serverAuth == KERBEROS) + ? UserGroupInformation.createRemoteUser("server/localhost@NONE") + : UserGroupInformation.createRemoteUser("server"); + serverUgi.setAuthenticationMethod(serverAuth); + return serverUgi; + } + + private UserGroupInformation setupClientUgi(AuthMethod clientAuth, + Configuration clientConf) { UserGroupInformation.setConfiguration(clientConf); - + final UserGroupInformation clientUgi = UserGroupInformation.createRemoteUser("client"); - clientUgi.setAuthenticationMethod(clientAuth); + clientUgi.setAuthenticationMethod(clientAuth); + return clientUgi; + } - final InetSocketAddress addr = NetUtils.getConnectAddress(server); + private void setupTokenIfNeeded(UseToken tokenType, TestTokenSecretManager sm, + UserGroupInformation clientUgi, InetSocketAddress addr) { if (tokenType != UseToken.NONE) { TestTokenIdentifier tokenId = new TestTokenIdentifier( new Text(clientUgi.getUserName())); @@ -878,44 +987,44 @@ public Server run() throws IOException { } clientUgi.addToken(token); } + } - try { - LOG.info("trying ugi:"+clientUgi+" tokens:"+clientUgi.getTokens()); - return clientUgi.doAs(new PrivilegedExceptionAction() { - @Override - public String run() throws IOException { - TestRpcService proxy = null; - try { - proxy = getClient(addr, clientConf); - - proxy.ping(null, newEmptyRequest()); - // make sure the other side thinks we are who we said we are!!! - assertEquals(clientUgi.getUserName(), - proxy.getAuthUser(null, newEmptyRequest()).getUser()); - AuthMethod authMethod = - convert(proxy.getAuthMethod(null, newEmptyRequest())); - // verify sasl completed with correct QOP - assertEquals((authMethod != SIMPLE) ? expectedQop.saslQop : null, - RPC.getConnectionIdForProxy(proxy).getSaslQop()); - return authMethod != null ? authMethod.toString() : null; - } catch (ServiceException se) { - if (se.getCause() instanceof RemoteException) { - throw (RemoteException) se.getCause(); - } else if (se.getCause() instanceof IOException) { - throw (IOException) se.getCause(); - } else { - throw new RuntimeException(se.getCause()); - } - } finally { - if (proxy != null) { - RPC.stopProxy(proxy); - } + private String createClientAndQueryAuthMethod(InetSocketAddress serverAddress, + Configuration clientConf, UserGroupInformation clientUgi, AtomicBoolean fallbackToSimpleAuth) + throws IOException, InterruptedException { + LOG.info("trying ugi:"+ clientUgi +" tokens:"+ clientUgi.getTokens()); + return clientUgi.doAs(new PrivilegedExceptionAction() { + @Override + public String run() throws IOException { + TestRpcService proxy = null; + try { + proxy = getClient(serverAddress, clientConf, null, fallbackToSimpleAuth); + + proxy.ping(null, newEmptyRequest()); + // make sure the other side thinks we are who we said we are!!! + assertEquals(clientUgi.getUserName(), + proxy.getAuthUser(null, newEmptyRequest()).getUser()); + AuthMethod authMethod = + convert(proxy.getAuthMethod(null, newEmptyRequest())); + // verify sasl completed with correct QOP + assertEquals((authMethod != SIMPLE) ? expectedQop.saslQop : null, + RPC.getConnectionIdForProxy(proxy).getSaslQop()); + return authMethod != null ? authMethod.toString() : null; + } catch (ServiceException se) { + if (se.getCause() instanceof RemoteException) { + throw (RemoteException) se.getCause(); + } else if (se.getCause() instanceof IOException) { + throw (IOException) se.getCause(); + } else { + throw new RuntimeException(se.getCause()); + } + } finally { + if (proxy != null) { + RPC.stopProxy(proxy); } } - }); - } finally { - server.stop(); - } + } + }); } private static void assertAuthEquals(AuthMethod expect, diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestServer.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestServer.java index fbaa75bd43efd..748d99e2a0d34 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestServer.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestServer.java @@ -25,8 +25,10 @@ import java.net.BindException; import java.net.InetSocketAddress; import java.net.ServerSocket; +import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.ipc.Server.Call; @@ -184,4 +186,22 @@ public void testExceptionsHandlerSuppressed() { assertTrue(handler.isSuppressedLog(IpcException.class)); assertFalse(handler.isSuppressedLog(RpcClientException.class)); } + + @Test (timeout=300000) + public void testPurgeIntervalNanosConf() throws Exception { + Configuration conf = new Configuration(); + conf.setInt(CommonConfigurationKeysPublic. + IPC_SERVER_PURGE_INTERVAL_MINUTES_KEY, 3); + Server server = new Server("0.0.0.0", 0, LongWritable.class, + 1, conf) { + @Override + public Writable call( + RPC.RpcKind rpcKind, String protocol, Writable param, + long receiveTime) throws Exception { + return null; + } + }; + long purgeInterval = TimeUnit.NANOSECONDS.convert(3, TimeUnit.MINUTES); + assertEquals(server.getPurgeIntervalNanos(), purgeInterval); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/log/TestLogThrottlingHelper.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/log/TestLogThrottlingHelper.java index d0eeea3e51393..6c627116f8cb9 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/log/TestLogThrottlingHelper.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/log/TestLogThrottlingHelper.java @@ -142,6 +142,18 @@ public void testPrimaryAndDependentLoggers() { assertTrue(helper.record("bar", 0).shouldLog()); } + @Test + public void testInfrequentPrimaryAndDependentLoggers() { + helper = new LogThrottlingHelper(LOG_PERIOD, "foo", timer); + + assertTrue(helper.record("foo", 0).shouldLog()); + assertTrue(helper.record("bar", 0).shouldLog()); + + // Both should log once the period has elapsed + assertTrue(helper.record("foo", LOG_PERIOD).shouldLog()); + assertTrue(helper.record("bar", LOG_PERIOD).shouldLog()); + } + @Test public void testMultipleLoggersWithValues() { helper = new LogThrottlingHelper(LOG_PERIOD, "foo", timer); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/MetricsRecords.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/MetricsRecords.java index 5d52cad66bb90..786571441fd1b 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/MetricsRecords.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/MetricsRecords.java @@ -18,8 +18,8 @@ package org.apache.hadoop.metrics2.impl; -import com.google.common.base.Predicate; -import com.google.common.collect.Iterables; +import java.util.function.Predicate; +import java.util.stream.StreamSupport; import org.apache.hadoop.metrics2.AbstractMetric; import org.apache.hadoop.metrics2.MetricsRecord; import org.apache.hadoop.metrics2.MetricsTag; @@ -65,16 +65,22 @@ public static void assertMetricNotNull(MetricsRecord record, resourceLimitMetric); } - private static MetricsTag getFirstTagByName(MetricsRecord record, String name) { - return Iterables.getFirst(Iterables.filter(record.tags(), - new MetricsTagPredicate(name)), null); + private static MetricsTag getFirstTagByName(MetricsRecord record, + String name) { + if (record.tags() == null) { + return null; + } + return record.tags().stream().filter( + new MetricsTagPredicate(name)).findFirst().orElse(null); } private static AbstractMetric getFirstMetricByName( MetricsRecord record, String name) { - return Iterables.getFirst( - Iterables.filter(record.metrics(), new AbstractMetricPredicate(name)), - null); + if (record.metrics() == null) { + return null; + } + return StreamSupport.stream(record.metrics().spliterator(), false) + .filter(new AbstractMetricPredicate(name)).findFirst().orElse(null); } private static class MetricsTagPredicate implements Predicate { @@ -86,7 +92,7 @@ public MetricsTagPredicate(String tagName) { } @Override - public boolean apply(MetricsTag input) { + public boolean test(MetricsTag input) { return input.name().equals(tagName); } } @@ -101,7 +107,7 @@ public AbstractMetricPredicate( } @Override - public boolean apply(AbstractMetric input) { + public boolean test(AbstractMetric input) { return input.name().equals(metricName); } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsConfig.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsConfig.java index b53be4d73599a..2ca1c8ad2cc35 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsConfig.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsConfig.java @@ -133,6 +133,22 @@ private void testInstances(MetricsConfig c) throws Exception { assertEq(expected, mc2); } + /** + * Test the config value separated by delimiter + */ + @Test public void testDelimiterConf() { + String filename = getTestFilename("test-metrics2-delimiter"); + new ConfigBuilder().add("p1.foo", "p1foo1,p1foo2,p1foo3").save(filename); + + MetricsConfig mc = MetricsConfig.create("p1", filename); + Configuration expected = new ConfigBuilder() + .add("foo", "p1foo1") + .add("foo", "p1foo2") + .add("foo", "p1foo3") + .config; + assertEq(expected, mc); + } + /** * Return a test filename in the class path * @param basename diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSourceAdapter.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSourceAdapter.java index 3fdf445d66447..0dabe468e49e3 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSourceAdapter.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSourceAdapter.java @@ -28,7 +28,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.hadoop.metrics2.MetricsCollector; import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.metrics2.MetricsSource; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSystemImpl.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSystemImpl.java index 47a3b4cdc092b..5a1f1d1376d4a 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSystemImpl.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSystemImpl.java @@ -23,9 +23,7 @@ import java.util.*; import java.util.concurrent.*; import java.util.concurrent.atomic.*; - -import javax.annotation.Nullable; - +import java.util.stream.StreamSupport; import org.junit.Test; import org.junit.runner.RunWith; @@ -38,9 +36,8 @@ import static org.junit.Assert.*; import static org.mockito.Mockito.*; -import com.google.common.base.Predicate; -import com.google.common.base.Supplier; -import com.google.common.collect.Iterables; +import java.util.function.Supplier; +import org.apache.hadoop.thirdparty.com.google.common.collect.Iterables; import org.apache.commons.configuration2.SubsetConfiguration; import org.apache.hadoop.metrics2.MetricsException; @@ -59,7 +56,6 @@ import org.apache.hadoop.metrics2.lib.MutableCounterLong; import org.apache.hadoop.metrics2.lib.MutableRate; import org.apache.hadoop.metrics2.lib.MutableGaugeLong; -import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -246,13 +242,9 @@ public void run() { for (Thread t : threads) t.join(); assertEquals(0L, ms.droppedPubAll.value()); - assertTrue(StringUtils.join("\n", Arrays.asList(results)), - Iterables.all(Arrays.asList(results), new Predicate() { - @Override - public boolean apply(@Nullable String input) { - return input.equalsIgnoreCase("Passed"); - } - })); + assertTrue(String.join("\n", Arrays.asList(results)), + Arrays.asList(results).stream().allMatch( + input -> input.equalsIgnoreCase("Passed"))); ms.stop(); ms.shutdown(); } @@ -482,14 +474,12 @@ public Object answer(InvocationOnMock invocation) throws Throwable { ms.onTimerEvent(); verify(dataSink, timeout(500).times(2)).putMetrics(r1.capture()); List mr = r1.getAllValues(); - Number qSize = Iterables.find(mr.get(1).metrics(), - new Predicate() { - @Override - public boolean apply(@Nullable AbstractMetric input) { - assert input != null; - return input.name().equals("Sink_slowSinkQsize"); - } - }).value(); + Number qSize = StreamSupport.stream(mr.get(1).metrics().spliterator(), + false).filter( + input -> { + assert input != null; + return input.name().equals("Sink_slowSinkQsize"); + }).findFirst().get().value(); assertEquals(1, qSize); } finally { proceedSignal.countDown(); @@ -639,4 +629,25 @@ private static class TestSource2 { private static String getPluginUrlsAsString() { return "file:metrics2-test-plugin.jar"; } + + @Test + public void testMetricSystemRestart() { + MetricsSystemImpl ms = new MetricsSystemImpl("msRestartTestSystem"); + TestSink ts = new TestSink(); + String sinkName = "restartTestSink"; + + try { + ms.start(); + ms.register(sinkName, "", ts); + assertNotNull("no adapter exists for " + sinkName, + ms.getSinkAdapter(sinkName)); + ms.stop(); + + ms.start(); + assertNotNull("no adapter exists for " + sinkName, + ms.getSinkAdapter(sinkName)); + } finally { + ms.stop(); + } + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableRollingAverages.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableRollingAverages.java index 9bfdd73bee832..ad90c1860514a 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableRollingAverages.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableRollingAverages.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.metrics2.lib; -import com.google.common.base.Supplier; +import java.util.function.Supplier; import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.test.GenericTestUtils; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/sink/TestFileSink.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/sink/TestFileSink.java index b20653e6b204b..67889405c1068 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/sink/TestFileSink.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/sink/TestFileSink.java @@ -115,7 +115,7 @@ public void testFileSink() throws IOException { IOUtils.copyBytes(is, baos, 1024, true); outFileContent = new String(baos.toByteArray(), "UTF-8"); } finally { - IOUtils.cleanup(null, baos, is); + IOUtils.cleanupWithLogger(null, baos, is); } // Check the out file content. Should be something like the following: diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/sink/TestPrometheusMetricsSink.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/sink/TestPrometheusMetricsSink.java index 3fc4aa4cc3430..50c77e135ec40 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/sink/TestPrometheusMetricsSink.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/sink/TestPrometheusMetricsSink.java @@ -21,16 +21,25 @@ import java.io.IOException; import java.io.OutputStreamWriter; +import java.util.ArrayList; +import java.util.List; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.metrics2.MetricsCollector; +import org.apache.hadoop.metrics2.MetricsRecordBuilder; +import org.apache.hadoop.metrics2.MetricsSource; import org.apache.hadoop.metrics2.MetricsSystem; import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metrics; +import org.apache.hadoop.metrics2.annotation.Metric.Type; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.Interns; import org.apache.hadoop.metrics2.lib.MutableCounterLong; import org.junit.Assert; import org.junit.Test; import static java.nio.charset.StandardCharsets.UTF_8; +import static org.assertj.core.api.Assertions.assertThat; /** * Test prometheus Sink. @@ -48,7 +57,6 @@ public void testPublish() throws IOException { TestMetrics testMetrics = metrics .register("TestMetrics", "Testing metrics", new TestMetrics()); - metrics.start(); testMetrics.numBucketCreateFails.incr(); metrics.publishMetricsNow(); ByteArrayOutputStream stream = new ByteArrayOutputStream(); @@ -67,6 +75,104 @@ public void testPublish() throws IOException { "test_metrics_num_bucket_create_fails{context=\"dfs\"") ); + metrics.unregisterSource("TestMetrics"); + metrics.stop(); + metrics.shutdown(); + } + + /** + * Fix for HADOOP-17804, make sure Prometheus metrics get deduped based on metric + * and tags, not just the metric. + */ + @Test + public void testPublishMultiple() throws IOException { + //GIVEN + MetricsSystem metrics = DefaultMetricsSystem.instance(); + + metrics.init("test"); + PrometheusMetricsSink sink = new PrometheusMetricsSink(); + metrics.register("Prometheus", "Prometheus", sink); + TestMetrics testMetrics1 = metrics + .register("TestMetrics1", "Testing metrics", new TestMetrics("1")); + TestMetrics testMetrics2 = metrics + .register("TestMetrics2", "Testing metrics", new TestMetrics("2")); + + testMetrics1.numBucketCreateFails.incr(); + testMetrics2.numBucketCreateFails.incr(); + metrics.publishMetricsNow(); + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + OutputStreamWriter writer = new OutputStreamWriter(stream, UTF_8); + + //WHEN + sink.writeMetrics(writer); + writer.flush(); + + //THEN + String writtenMetrics = stream.toString(UTF_8.name()); + System.out.println(writtenMetrics); + Assert.assertTrue( + "The expected first metric line is missing from prometheus metrics output", + writtenMetrics.contains( + "test_metrics_num_bucket_create_fails{context=\"dfs\",testtag=\"testTagValue1\"") + ); + Assert.assertTrue( + "The expected second metric line is missing from prometheus metrics output", + writtenMetrics.contains( + "test_metrics_num_bucket_create_fails{context=\"dfs\",testtag=\"testTagValue2\"") + ); + + metrics.unregisterSource("TestMetrics1"); + metrics.unregisterSource("TestMetrics2"); + metrics.stop(); + metrics.shutdown(); + } + + /** + * Fix for HADOOP-17804, make sure Prometheus metrics start fresh after each flush. + */ + @Test + public void testPublishFlush() throws IOException { + //GIVEN + MetricsSystem metrics = DefaultMetricsSystem.instance(); + + metrics.init("test"); + PrometheusMetricsSink sink = new PrometheusMetricsSink(); + metrics.register("Prometheus", "Prometheus", sink); + TestMetrics testMetrics = metrics + .register("TestMetrics", "Testing metrics", new TestMetrics("1")); + + testMetrics.numBucketCreateFails.incr(); + metrics.publishMetricsNow(); + + metrics.unregisterSource("TestMetrics"); + testMetrics = metrics + .register("TestMetrics", "Testing metrics", new TestMetrics("2")); + + testMetrics.numBucketCreateFails.incr(); + metrics.publishMetricsNow(); + + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + OutputStreamWriter writer = new OutputStreamWriter(stream, UTF_8); + + //WHEN + sink.writeMetrics(writer); + writer.flush(); + + //THEN + String writtenMetrics = stream.toString(UTF_8.name()); + System.out.println(writtenMetrics); + Assert.assertFalse( + "The first metric should not exist after flushing", + writtenMetrics.contains( + "test_metrics_num_bucket_create_fails{context=\"dfs\",testtag=\"testTagValue1\"") + ); + Assert.assertTrue( + "The expected metric line is missing from prometheus metrics output", + writtenMetrics.contains( + "test_metrics_num_bucket_create_fails{context=\"dfs\",testtag=\"testTagValue2\"") + ); + + metrics.unregisterSource("TestMetrics"); metrics.stop(); metrics.shutdown(); } @@ -121,13 +227,108 @@ public void testNamingWhitespaces() { sink.prometheusName(recordName, metricName)); } + /** + * testTopMetricsPublish. + */ + @Test + public void testTopMetricsPublish() throws IOException { + MetricsSystem metrics = DefaultMetricsSystem.instance(); + + metrics.init("test"); + + //GIVEN + PrometheusMetricsSink sink = new PrometheusMetricsSink(); + + metrics.register("prometheus", "prometheus", sink); + TestTopMetrics topMetrics = new TestTopMetrics(); + topMetrics.add("60000"); + topMetrics.add("1500000"); + metrics.register(TestTopMetrics.TOPMETRICS_METRICS_SOURCE_NAME, + "Top N operations by user", topMetrics); + + metrics.start(); + + metrics.publishMetricsNow(); + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + OutputStreamWriter writer = new OutputStreamWriter(stream, UTF_8); + + //WHEN + sink.writeMetrics(writer); + writer.flush(); + + //THEN + String writtenMetrics = stream.toString(UTF_8.name()); + System.out.println(writtenMetrics); + + assertThat(writtenMetrics) + .contains( + "nn_top_user_op_counts_window_ms_60000_total_count{context=\"dfs\"") + .contains( + "nn_top_user_op_counts_window_ms_60000_count{") + .contains( + "nn_top_user_op_counts_window_ms_1500000_count{") + .contains( + "op=\"rename\",user=\"hadoop/TEST_HOSTNAME.com@HOSTNAME.COM\""); + + metrics.stop(); + metrics.shutdown(); + } + /** * Example metric pojo. */ @Metrics(about = "Test Metrics", context = "dfs") private static class TestMetrics { + private String id; + + TestMetrics() { + this("1"); + } + + TestMetrics(String id) { + this.id = id; + } + + @Metric(value={"testTag", ""}, type=Type.TAG) + String testTag1() { + return "testTagValue" + id; + } @Metric private MutableCounterLong numBucketCreateFails; } + + /** + * Example metric TopMetrics. + */ + private class TestTopMetrics implements MetricsSource { + + public static final String TOPMETRICS_METRICS_SOURCE_NAME = + "NNTopUserOpCounts"; + private final List windowMsNames = new ArrayList<>(); + + public void add(String windowMs) { + windowMsNames.add(String.format(".windowMs=%s", windowMs)); + } + + @Override + public void getMetrics(MetricsCollector collector, boolean all) { + for (String windowMs : windowMsNames) { + MetricsRecordBuilder rb = collector + .addRecord(TOPMETRICS_METRICS_SOURCE_NAME + windowMs) + .setContext("dfs"); + rb.addCounter( + Interns.info("op=" + StringUtils.deleteWhitespace("rename") + + ".TotalCount", "Total operation count"), 2); + rb.addCounter( + Interns.info("op=" + StringUtils.deleteWhitespace("rename") + + ".user=" + "hadoop/TEST_HOSTNAME.com@HOSTNAME.COM" + + ".count", "Total operations performed by user"), 3); + rb.addCounter( + Interns.info("op=" + StringUtils.deleteWhitespace("delete") + + ".user=" + "test_user2" + + ".count", "Total operations performed by user"), 4); + } + } + } } \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/MockDomainNameResolver.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/MockDomainNameResolver.java index aa9370933722f..3e3bdb7b413b1 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/MockDomainNameResolver.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/MockDomainNameResolver.java @@ -23,7 +23,7 @@ import java.util.Map; import java.util.TreeMap; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * This mock resolver class returns the predefined resolving/reverse lookup @@ -39,8 +39,8 @@ public class MockDomainNameResolver implements DomainNameResolver { public static final byte[] BYTE_ADDR_2 = new byte[]{10, 1, 1, 2}; public static final String ADDR_1 = "10.1.1.1"; public static final String ADDR_2 = "10.1.1.2"; - public static final String FQDN_1 = "host01.com"; - public static final String FQDN_2 = "host02.com"; + public static final String FQDN_1 = "host01.test"; + public static final String FQDN_2 = "host02.test"; /** Internal mapping of domain names and IP addresses. */ private Map addrs = new TreeMap<>(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/ServerSocketUtil.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/ServerSocketUtil.java index 80f2ebc98ced8..872791d1ff7bf 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/ServerSocketUtil.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/ServerSocketUtil.java @@ -22,6 +22,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.net.InetAddress; import java.net.ServerSocket; import java.util.Random; @@ -49,7 +50,8 @@ public static int getPort(int port, int retries) throws IOException { if (tryPort == 0) { continue; } - try (ServerSocket s = new ServerSocket(tryPort)) { + try (ServerSocket s = new ServerSocket(tryPort, 50, + InetAddress.getLoopbackAddress())) { LOG.info("Using port " + tryPort); return tryPort; } catch (IOException e) { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestClusterTopology.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestClusterTopology.java index fbed6052a5c03..6b07d4a455d9d 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestClusterTopology.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestClusterTopology.java @@ -22,6 +22,7 @@ import java.util.List; import java.util.Arrays; +import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.math3.stat.inference.ChiSquareTest; import org.apache.hadoop.conf.Configuration; import org.junit.Assert; @@ -234,4 +235,41 @@ private NodeElement getNewNode(String name, String rackLocation) { node.setNetworkLocation(rackLocation); return node; } + + private NodeElement getNewNode(NetworkTopology cluster, + String name, String rackLocation) { + NodeElement node = getNewNode(name, rackLocation); + cluster.add(node); + return node; + } + + @Test + @SuppressWarnings("unchecked") + public void testWeights() { + // create the topology + NetworkTopology cluster = NetworkTopology.getInstance(new Configuration()); + NodeElement node1 = getNewNode(cluster, "node1", "/r1"); + NodeElement node2 = getNewNode(cluster, "node2", "/r1"); + NodeElement node3 = getNewNode(cluster, "node3", "/r2"); + for (Pair test: new Pair[]{Pair.of(0, node1), + Pair.of(2, node2), Pair.of(4, node3)}) { + int expect = test.getLeft(); + assertEquals(test.toString(), expect, cluster.getWeight(node1, test.getRight())); + assertEquals(test.toString(), expect, + cluster.getWeightUsingNetworkLocation(node1, test.getRight())); + } + // Reset so that we can have 2 levels + cluster = NetworkTopology.getInstance(new Configuration()); + NodeElement node5 = getNewNode(cluster, "node5", "/pod1/r1"); + NodeElement node6 = getNewNode(cluster, "node6", "/pod1/r1"); + NodeElement node7 = getNewNode(cluster, "node7", "/pod1/r2"); + NodeElement node8 = getNewNode(cluster, "node8", "/pod2/r3"); + for (Pair test: new Pair[]{Pair.of(0, node5), + Pair.of(2, node6), Pair.of(4, node7), Pair.of(6, node8)}) { + int expect = test.getLeft(); + assertEquals(test.toString(), expect, cluster.getWeight(node5, test.getRight())); + assertEquals(test.toString(), expect, + cluster.getWeightUsingNetworkLocation(node5, test.getRight())); + } + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestDNSDomainNameResolver.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestDNSDomainNameResolver.java new file mode 100644 index 0000000000000..4729cee118818 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestDNSDomainNameResolver.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.net; + +import org.junit.Test; + +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.Objects; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assume.assumeFalse; + + +public class TestDNSDomainNameResolver { + + static DNSDomainNameResolver DNR = new DNSDomainNameResolver(); + + @Test + public void testGetHostNameByIP() throws UnknownHostException { + InetAddress localhost = InetAddress.getLocalHost(); + assumeFalse("IP lookup support required", + Objects.equals(localhost.getCanonicalHostName(), localhost.getHostAddress())); + + // Precondition: host name and canonical host name for unresolved returns an IP address. + InetAddress unresolved = InetAddress.getByAddress(localhost.getHostAddress(), + localhost.getAddress()); + assertEquals(localhost.getHostAddress(), unresolved.getHostName()); + + // Test: Get the canonical name despite InetAddress caching + String canonicalHostName = DNR.getHostnameByIP(unresolved); + + // Verify: The canonical host name doesn't match the host address but does match the localhost. + assertNotEquals(localhost.getHostAddress(), canonicalHostName); + assertEquals(localhost.getCanonicalHostName(), canonicalHostName); + } + +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetUtils.java index b11b1e96ded59..0bf2c4473a76e 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetUtils.java @@ -43,6 +43,7 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.security.KerberosAuthException; import org.apache.hadoop.security.NetUtilsTestResolver; +import org.apache.hadoop.test.GenericTestUtils; import org.junit.Assume; import org.junit.Before; import org.junit.BeforeClass; @@ -95,7 +96,26 @@ public void testAvoidLoopbackTcpSockets() throws Throwable { assertInException(se, "Invalid argument"); } } - + + @Test + public void testInvalidAddress() throws Throwable { + Configuration conf = new Configuration(); + + Socket socket = NetUtils.getDefaultSocketFactory(conf) + .createSocket(); + socket.bind(new InetSocketAddress("127.0.0.1", 0)); + try { + NetUtils.connect(socket, + new InetSocketAddress("invalid-test-host", + 0), 20000); + socket.close(); + fail("Should not have connected"); + } catch (UnknownHostException uhe) { + LOG.info("Got exception: ", uhe); + GenericTestUtils.assertExceptionContains("invalid-test-host:0", uhe); + } + } + @Test public void testSocketReadTimeoutWithChannel() throws Exception { doSocketReadTimeoutTest(true); @@ -334,7 +354,7 @@ public void testCreateSocketAddress() throws Throwable { assertEquals(1000, addr.getPort()); try { - addr = NetUtils.createSocketAddr( + NetUtils.createSocketAddr( "127.0.0.1:blahblah", 1000, "myconfig"); fail("Should have failed to parse bad port"); } catch (IllegalArgumentException iae) { @@ -342,6 +362,49 @@ public void testCreateSocketAddress() throws Throwable { } } + @Test + public void testCreateSocketAddressWithURICache() throws Throwable { + InetSocketAddress addr = NetUtils.createSocketAddr( + "127.0.0.1:12345", 1000, "myconfig", true); + assertEquals("127.0.0.1", addr.getAddress().getHostAddress()); + assertEquals(12345, addr.getPort()); + + addr = NetUtils.createSocketAddr( + "127.0.0.1:12345", 1000, "myconfig", true); + assertEquals("127.0.0.1", addr.getAddress().getHostAddress()); + assertEquals(12345, addr.getPort()); + + // ---------------------------------------------------- + + addr = NetUtils.createSocketAddr( + "127.0.0.1", 1000, "myconfig", true); + assertEquals("127.0.0.1", addr.getAddress().getHostAddress()); + assertEquals(1000, addr.getPort()); + + addr = NetUtils.createSocketAddr( + "127.0.0.1", 1000, "myconfig", true); + assertEquals("127.0.0.1", addr.getAddress().getHostAddress()); + assertEquals(1000, addr.getPort()); + + // ---------------------------------------------------- + + try { + NetUtils.createSocketAddr( + "127.0.0.1:blahblah", 1000, "myconfig", true); + fail("Should have failed to parse bad port"); + } catch (IllegalArgumentException iae) { + assertInException(iae, "myconfig"); + } + + try { + NetUtils.createSocketAddr( + "127.0.0.1:blahblah", 1000, "myconfig", true); + fail("Should have failed to parse bad port"); + } catch (IllegalArgumentException iae) { + assertInException(iae, "myconfig"); + } + } + private void assertRemoteDetailsIncluded(IOException wrapped) throws Throwable { assertInException(wrapped, "desthost"); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestSocketIOWithTimeout.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestSocketIOWithTimeout.java index 272eae70bb62e..c55f020e65ecb 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestSocketIOWithTimeout.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestSocketIOWithTimeout.java @@ -24,6 +24,11 @@ import java.net.SocketTimeoutException; import java.nio.channels.Pipe; import java.util.Arrays; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.MultithreadedTestUtil; @@ -186,6 +191,46 @@ public void doWork() throws Exception { } } + @Test + public void testSocketIOWithTimeoutByMultiThread() throws Exception { + CountDownLatch latch = new CountDownLatch(1); + Runnable ioTask = () -> { + try { + Pipe pipe = Pipe.open(); + try (Pipe.SourceChannel source = pipe.source(); + InputStream in = new SocketInputStream(source, TIMEOUT); + Pipe.SinkChannel sink = pipe.sink(); + OutputStream out = new SocketOutputStream(sink, TIMEOUT)) { + + byte[] writeBytes = TEST_STRING.getBytes(); + byte[] readBytes = new byte[writeBytes.length]; + latch.await(); + + out.write(writeBytes); + doIO(null, out, TIMEOUT); + + in.read(readBytes); + assertArrayEquals(writeBytes, readBytes); + doIO(in, null, TIMEOUT); + } + } catch (Exception e) { + fail(e.getMessage()); + } + }; + + int threadCnt = 64; + ExecutorService threadPool = Executors.newFixedThreadPool(threadCnt); + for (int i = 0; i < threadCnt; ++i) { + threadPool.submit(ioTask); + } + + Thread.sleep(1000); + latch.countDown(); + + threadPool.shutdown(); + assertTrue(threadPool.awaitTermination(3, TimeUnit.SECONDS)); + } + @Test public void testSocketIOWithTimeoutInterrupted() throws Exception { Pipe pipe = Pipe.open(); @@ -223,4 +268,38 @@ public void doWork() throws Exception { ctx.stop(); } } + + @Test + public void testSocketIOWithTimeoutInterruptedByMultiThread() + throws Exception { + final int timeout = TIMEOUT * 10; + AtomicLong readCount = new AtomicLong(); + AtomicLong exceptionCount = new AtomicLong(); + Runnable ioTask = () -> { + try { + Pipe pipe = Pipe.open(); + try (Pipe.SourceChannel source = pipe.source(); + InputStream in = new SocketInputStream(source, timeout)) { + in.read(); + readCount.incrementAndGet(); + } catch (InterruptedIOException ste) { + exceptionCount.incrementAndGet(); + } + } catch (Exception e) { + fail(e.getMessage()); + } + }; + + int threadCnt = 64; + ExecutorService threadPool = Executors.newFixedThreadPool(threadCnt); + for (int i = 0; i < threadCnt; ++i) { + threadPool.submit(ioTask); + } + Thread.sleep(1000); + threadPool.shutdownNow(); + threadPool.awaitTermination(1, TimeUnit.SECONDS); + + assertEquals(0, readCount.get()); + assertEquals(threadCnt, exceptionCount.get()); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestTableMapping.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestTableMapping.java index 4b8b02f0171d4..86870e1257119 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestTableMapping.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestTableMapping.java @@ -21,8 +21,8 @@ import static org.junit.Assert.assertEquals; -import com.google.common.base.Charsets; -import com.google.common.io.Files; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.io.Files; import java.io.File; import java.io.IOException; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java index c0d204f86aa82..61cbd85f8d69f 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java @@ -49,7 +49,7 @@ import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.Shell; -import com.google.common.io.Files; +import org.apache.hadoop.thirdparty.com.google.common.io.Files; public class TestDomainSocket { private static TemporarySocketDirectory sockDir; @@ -759,6 +759,6 @@ public void run() { readerThread.join(); Assert.assertFalse(failed.get()); Assert.assertEquals(3, bytesRead.get()); - IOUtils.cleanup(null, socks); + IOUtils.cleanupWithLogger(null, socks); } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocketWatcher.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocketWatcher.java index aa522f266014b..ca801dac2c247 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocketWatcher.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocketWatcher.java @@ -31,7 +31,7 @@ import org.junit.Before; import org.junit.Test; -import com.google.common.util.concurrent.Uninterruptibles; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Uninterruptibles; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestDoAsEffectiveUser.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestDoAsEffectiveUser.java index c86b9ae344195..edd537011c4a8 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestDoAsEffectiveUser.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestDoAsEffectiveUser.java @@ -21,7 +21,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.io.Text; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.Server; import org.apache.hadoop.ipc.TestRpcBase; @@ -151,7 +151,7 @@ public void testRealUserSetup() throws IOException { configureSuperUserIPAddresses(conf, REAL_USER_SHORT_NAME); // Set RPC engine to protobuf RPC engine RPC.setProtocolEngine(conf, TestRpcService.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); UserGroupInformation.setConfiguration(conf); final Server server = setupTestServer(conf, 5); @@ -181,7 +181,7 @@ public void testRealUserAuthorizationSuccess() throws IOException { getProxySuperuserGroupConfKey(REAL_USER_SHORT_NAME), "group1"); RPC.setProtocolEngine(conf, TestRpcService.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); UserGroupInformation.setConfiguration(conf); final Server server = setupTestServer(conf, 5); @@ -215,7 +215,7 @@ public void testRealUserIPAuthorizationFailure() throws IOException { getProxySuperuserGroupConfKey(REAL_USER_SHORT_NAME), "group1"); RPC.setProtocolEngine(conf, TestRpcService.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); UserGroupInformation.setConfiguration(conf); final Server server = setupTestServer(conf, 5); @@ -251,7 +251,7 @@ public void testRealUserIPNotSpecified() throws IOException { conf.setStrings(DefaultImpersonationProvider.getTestProvider(). getProxySuperuserGroupConfKey(REAL_USER_SHORT_NAME), "group1"); RPC.setProtocolEngine(conf, TestRpcService.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); UserGroupInformation.setConfiguration(conf); final Server server = setupTestServer(conf, 2); @@ -286,7 +286,7 @@ public void testRealUserGroupNotSpecified() throws IOException { final Configuration conf = new Configuration(); configureSuperUserIPAddresses(conf, REAL_USER_SHORT_NAME); RPC.setProtocolEngine(conf, TestRpcService.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); UserGroupInformation.setConfiguration(conf); final Server server = setupTestServer(conf, 2); @@ -322,7 +322,7 @@ public void testRealUserGroupAuthorizationFailure() throws IOException { getProxySuperuserGroupConfKey(REAL_USER_SHORT_NAME), "group3"); RPC.setProtocolEngine(conf, TestRpcService.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); UserGroupInformation.setConfiguration(conf); final Server server = setupTestServer(conf, 2); @@ -363,7 +363,7 @@ public void testProxyWithToken() throws Exception { TestTokenSecretManager sm = new TestTokenSecretManager(); SecurityUtil.setAuthenticationMethod(AuthenticationMethod.KERBEROS, conf); RPC.setProtocolEngine(conf, TestRpcService.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); UserGroupInformation.setConfiguration(conf); final Server server = setupTestServer(conf, 5, sm); @@ -411,7 +411,7 @@ public void testTokenBySuperUser() throws Exception { SecurityUtil.setAuthenticationMethod(AuthenticationMethod.KERBEROS, newConf); // Set RPC engine to protobuf RPC engine RPC.setProtocolEngine(newConf, TestRpcService.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); UserGroupInformation.setConfiguration(newConf); final Server server = setupTestServer(newConf, 5, sm); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestGroupsCaching.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestGroupsCaching.java index 46e9f92258502..ebff93d50d5e1 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestGroupsCaching.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestGroupsCaching.java @@ -34,7 +34,7 @@ import org.junit.Before; import org.junit.Test; -import com.google.common.base.Supplier; +import java.util.function.Supplier; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestKDiag.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestKDiag.java index e395566dae739..706701f67bd50 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestKDiag.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestKDiag.java @@ -36,6 +36,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.Properties; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION; @@ -234,7 +235,7 @@ public void testKeytabUnknownPrincipal() throws Throwable { */ private void dump(File file) throws IOException { try (FileInputStream in = new FileInputStream(file)) { - for (String line : IOUtils.readLines(in)) { + for (String line : IOUtils.readLines(in, StandardCharsets.UTF_8)) { LOG.info(line); } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestLdapGroupsMapping.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestLdapGroupsMapping.java index dba2b086f897f..aba3997187747 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestLdapGroupsMapping.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestLdapGroupsMapping.java @@ -389,8 +389,9 @@ public void run() { fail("The LDAP query should have timed out!"); } catch (NamingException ne) { LOG.debug("Got the exception while LDAP querying: ", ne); - assertExceptionContains("LDAP response read timed out, timeout used:" + - connectionTimeoutMs + "ms", ne); + assertExceptionContains("LDAP response read timed out, timeout used", + ne); + assertExceptionContains("" + connectionTimeoutMs, ne); assertFalse(ne.getMessage().contains("remaining name")); } finally { finLatch.countDown(); @@ -444,8 +445,9 @@ public void run() { fail("The LDAP query should have timed out!"); } catch (NamingException ne) { LOG.debug("Got the exception while LDAP querying: ", ne); - assertExceptionContains("LDAP response read timed out, timeout used:" + - readTimeoutMs + "ms", ne); + assertExceptionContains("LDAP response read timed out, timeout used", + ne); + assertExceptionContains(""+ readTimeoutMs, ne); assertExceptionContains("remaining name", ne); } finally { finLatch.countDown(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestLdapGroupsMappingWithBindUserSwitch.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestLdapGroupsMappingWithBindUserSwitch.java index 4f080b3fede96..3b4c77d9e4ab9 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestLdapGroupsMappingWithBindUserSwitch.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestLdapGroupsMappingWithBindUserSwitch.java @@ -18,7 +18,7 @@ package org.apache.hadoop.security; -import com.google.common.collect.Iterators; +import org.apache.hadoop.thirdparty.com.google.common.collect.Iterators; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.security.alias.CredentialProvider; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestLdapGroupsMappingWithOneQuery.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestLdapGroupsMappingWithOneQuery.java index 7ae802e26d36d..c86f1768b7f01 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestLdapGroupsMappingWithOneQuery.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestLdapGroupsMappingWithOneQuery.java @@ -18,19 +18,21 @@ package org.apache.hadoop.security; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import javax.naming.NamingEnumeration; import javax.naming.NamingException; import javax.naming.directory.Attribute; +import javax.naming.directory.DirContext; import javax.naming.directory.SearchControls; import javax.naming.directory.SearchResult; import org.apache.hadoop.conf.Configuration; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.mockito.stubbing.Stubber; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyString; @@ -49,48 +51,121 @@ public class TestLdapGroupsMappingWithOneQuery extends TestLdapGroupsMappingBase { - @Before - public void setupMocks() throws NamingException { + public void setupMocks(List listOfDNs) throws NamingException { Attribute groupDN = mock(Attribute.class); NamingEnumeration groupNames = getGroupNames(); doReturn(groupNames).when(groupDN).getAll(); - String groupName1 = "CN=abc,DC=foo,DC=bar,DC=com"; - String groupName2 = "CN=xyz,DC=foo,DC=bar,DC=com"; - String groupName3 = "CN=sss,CN=foo,DC=bar,DC=com"; - doReturn(groupName1).doReturn(groupName2).doReturn(groupName3). - when(groupNames).next(); - when(groupNames.hasMore()).thenReturn(true).thenReturn(true). - thenReturn(true).thenReturn(false); + buildListOfGroupDNs(listOfDNs).when(groupNames).next(); + when(groupNames.hasMore()). + thenReturn(true).thenReturn(true). + thenReturn(true).thenReturn(false); when(getAttributes().get(eq("memberOf"))).thenReturn(groupDN); } + /** + * Build and return a list of individually added group DNs such + * that calls to .next() will result in a single value each time. + * + * @param listOfDNs + * @return the stubber to use for the .when().next() call + */ + private Stubber buildListOfGroupDNs(List listOfDNs) { + Stubber stubber = null; + for (String s : listOfDNs) { + if (stubber != null) { + stubber.doReturn(s); + } else { + stubber = doReturn(s); + } + } + return stubber; + } + @Test public void testGetGroups() throws NamingException { // given a user whose ldap query returns a user object with three "memberOf" // properties, return an array of strings representing its groups. String[] testGroups = new String[] {"abc", "xyz", "sss"}; doTestGetGroups(Arrays.asList(testGroups)); + + // test fallback triggered by NamingException + doTestGetGroupsWithFallback(); } private void doTestGetGroups(List expectedGroups) throws NamingException { + List groupDns = new ArrayList<>(); + groupDns.add("CN=abc,DC=foo,DC=bar,DC=com"); + groupDns.add("CN=xyz,DC=foo,DC=bar,DC=com"); + groupDns.add("CN=sss,DC=foo,DC=bar,DC=com"); + + setupMocks(groupDns); String ldapUrl = "ldap://test"; Configuration conf = getBaseConf(ldapUrl); // enable single-query lookup conf.set(LdapGroupsMapping.MEMBEROF_ATTR_KEY, "memberOf"); - LdapGroupsMapping groupsMapping = getGroupsMapping(); + TestLdapGroupsMapping groupsMapping = new TestLdapGroupsMapping(); groupsMapping.setConf(conf); // Username is arbitrary, since the spy is mocked to respond the same, // regardless of input List groups = groupsMapping.getGroups("some_user"); Assert.assertEquals(expectedGroups, groups); + Assert.assertFalse("Second LDAP query should NOT have been called.", + groupsMapping.isSecondaryQueryCalled()); // We should have only made one query because single-query lookup is enabled verify(getContext(), times(1)).search(anyString(), anyString(), any(Object[].class), any(SearchControls.class)); } -} \ No newline at end of file + + private void doTestGetGroupsWithFallback() + throws NamingException { + List groupDns = new ArrayList<>(); + groupDns.add("CN=abc,DC=foo,DC=bar,DC=com"); + groupDns.add("CN=xyz,DC=foo,DC=bar,DC=com"); + groupDns.add("ipaUniqueID=e4a9a634-bb24-11ec-aec1-06ede52b5fe1," + + "CN=sudo,DC=foo,DC=bar,DC=com"); + setupMocks(groupDns); + String ldapUrl = "ldap://test"; + Configuration conf = getBaseConf(ldapUrl); + // enable single-query lookup + conf.set(LdapGroupsMapping.MEMBEROF_ATTR_KEY, "memberOf"); + conf.set(LdapGroupsMapping.LDAP_NUM_ATTEMPTS_KEY, "1"); + + TestLdapGroupsMapping groupsMapping = new TestLdapGroupsMapping(); + groupsMapping.setConf(conf); + // Username is arbitrary, since the spy is mocked to respond the same, + // regardless of input + List groups = groupsMapping.getGroups("some_user"); + + // expected to be empty due to invalid memberOf + Assert.assertEquals(0, groups.size()); + + // expect secondary query to be called: getGroups() + Assert.assertTrue("Second LDAP query should have been called.", + groupsMapping.isSecondaryQueryCalled()); + + // We should have fallen back to the second query because first threw + // NamingException expected count is 3 since testGetGroups calls + // doTestGetGroups and doTestGetGroupsWithFallback in succession and + // the count is across both test scenarios. + verify(getContext(), times(3)).search(anyString(), anyString(), + any(Object[].class), any(SearchControls.class)); + } + + private static final class TestLdapGroupsMapping extends LdapGroupsMapping { + private boolean secondaryQueryCalled = false; + public boolean isSecondaryQueryCalled() { + return secondaryQueryCalled; + } + List lookupGroup(SearchResult result, DirContext c, + int goUpHierarchy) throws NamingException { + secondaryQueryCalled = true; + return super.lookupGroup(result, c, goUpHierarchy); + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestSecurityUtil.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestSecurityUtil.java index 016c589ae3a24..b6b9684445342 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestSecurityUtil.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestSecurityUtil.java @@ -46,7 +46,7 @@ import org.junit.Test; import org.mockito.Mockito; -import com.google.common.io.Files; +import org.apache.hadoop.thirdparty.com.google.common.io.Files; public class TestSecurityUtil { @@ -370,6 +370,16 @@ public void testSocketAddrWithNameToStaticIP() { verifyServiceAddr(staticHost, "255.255.255.255"); } + @Test + public void testSocketAddrWithChangeIP() { + String staticHost = "host4"; + NetUtils.addStaticResolution(staticHost, "255.255.255.255"); + verifyServiceAddr(staticHost, "255.255.255.255"); + + NetUtils.addStaticResolution(staticHost, "255.255.255.254"); + verifyServiceAddr(staticHost, "255.255.255.254"); + } + // this is a bizarre case, but it's if a test tries to remap an ip address @Test public void testSocketAddrWithIPToStaticIP() { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestShellBasedIdMapping.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestShellBasedIdMapping.java index e6fdc2bcdfbd4..939209d267b50 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestShellBasedIdMapping.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestShellBasedIdMapping.java @@ -32,8 +32,8 @@ import org.apache.hadoop.security.ShellBasedIdMapping.StaticMapping; import org.junit.Test; -import com.google.common.collect.BiMap; -import com.google.common.collect.HashBiMap; +import org.apache.hadoop.thirdparty.com.google.common.collect.BiMap; +import org.apache.hadoop.thirdparty.com.google.common.collect.HashBiMap; import static org.assertj.core.api.Assertions.assertThat; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUGILoginFromKeytab.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUGILoginFromKeytab.java index 8ede451db964c..db0095f2171e2 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUGILoginFromKeytab.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUGILoginFromKeytab.java @@ -23,6 +23,7 @@ import org.apache.hadoop.minikdc.MiniKdc; import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.util.Time; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -101,12 +102,35 @@ public void stopMiniKdc() { } } + /** + * Login from keytab using the MiniKDC. + */ + @Test + public void testUGILoginFromKeytab() throws Exception { + long beforeLogin = Time.now(); + String principal = "foo"; + File keytab = new File(workDir, "foo.keytab"); + kdc.createPrincipal(keytab, principal); + + UserGroupInformation.loginUserFromKeytab(principal, keytab.getPath()); + UserGroupInformation ugi = UserGroupInformation.getLoginUser(); + Assert.assertTrue("UGI should be configured to login from keytab", + ugi.isFromKeytab()); + + User user = getUser(ugi.getSubject()); + Assert.assertNotNull(user.getLogin()); + + Assert.assertTrue("User login time is less than before login time, " + + "beforeLoginTime:" + beforeLogin + " userLoginTime:" + user.getLastLogin(), + user.getLastLogin() > beforeLogin); + } + /** * Login from keytab using the MiniKDC and verify the UGI can successfully * relogin from keytab as well. This will catch regressions like HADOOP-10786. */ @Test - public void testUGILoginFromKeytab() throws Exception { + public void testUGIReLoginFromKeytab() throws Exception { String principal = "foo"; File keytab = new File(workDir, "foo.keytab"); kdc.createPrincipal(keytab, principal); @@ -122,6 +146,9 @@ public void testUGILoginFromKeytab() throws Exception { final LoginContext login1 = user.getLogin(); Assert.assertNotNull(login1); + // Sleep for 2 secs to have a difference between first and second login + Thread.sleep(2000); + ugi.reloginFromKeytab(); final long secondLogin = user.getLastLogin(); final LoginContext login2 = user.getLogin(); @@ -131,6 +158,42 @@ public void testUGILoginFromKeytab() throws Exception { Assert.assertNotSame(login1, login2); } + /** + * Force re-login from keytab using the MiniKDC and verify the UGI can + * successfully relogin from keytab as well. + */ + @Test + public void testUGIForceReLoginFromKeytab() throws Exception { + // Set this to false as we are testing force re-login anyways + UserGroupInformation.setShouldRenewImmediatelyForTests(false); + String principal = "foo"; + File keytab = new File(workDir, "foo.keytab"); + kdc.createPrincipal(keytab, principal); + + UserGroupInformation.loginUserFromKeytab(principal, keytab.getPath()); + UserGroupInformation ugi = UserGroupInformation.getLoginUser(); + Assert.assertTrue("UGI should be configured to login from keytab", + ugi.isFromKeytab()); + + // Verify relogin from keytab. + User user = getUser(ugi.getSubject()); + final long firstLogin = user.getLastLogin(); + final LoginContext login1 = user.getLogin(); + Assert.assertNotNull(login1); + + // Sleep for 2 secs to have a difference between first and second login + Thread.sleep(2000); + + // Force relogin from keytab + ugi.forceReloginFromKeytab(); + final long secondLogin = user.getLastLogin(); + final LoginContext login2 = user.getLogin(); + Assert.assertTrue("User should have been able to relogin from keytab", + secondLogin > firstLogin); + Assert.assertNotNull(login2); + Assert.assertNotSame(login1, login2); + } + @Test public void testGetUGIFromKnownSubject() throws Exception { KerberosPrincipal principal = new KerberosPrincipal("user"); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/alias/TestCredentialProviderFactory.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/alias/TestCredentialProviderFactory.java index ee7e42cb1daf0..d786750e5dd25 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/alias/TestCredentialProviderFactory.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/alias/TestCredentialProviderFactory.java @@ -42,6 +42,7 @@ import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; import static org.junit.Assert.assertTrue; public class TestCredentialProviderFactory { @@ -245,6 +246,19 @@ public void testLocalJksProvider() throws Exception { checkPermissionRetention(conf, ourUrl, path); } + @Test + public void testLocalBCFKSProvider() throws Exception { + Configuration conf = new Configuration(); + final Path ksPath = new Path(tmpDir.toString(), "test.bcfks"); + final String ourUrl = LocalBouncyCastleFipsKeyStoreProvider.SCHEME_NAME + + "://file" + ksPath.toUri(); + conf.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, ourUrl); + + IOException e = assertThrows(IOException.class, + () -> CredentialProviderFactory.getProviders(conf)); + assertTrue(e.getMessage().contains("Can't create keystore")); + } + public void checkPermissionRetention(Configuration conf, String ourUrl, Path path) throws Exception { CredentialProvider provider = CredentialProviderFactory.getProviders(conf).get(0); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authentication/server/TestProxyUserAuthenticationFilter.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authentication/server/TestProxyUserAuthenticationFilter.java index 16c0e1eb112ac..978c15d8f2a0f 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authentication/server/TestProxyUserAuthenticationFilter.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authentication/server/TestProxyUserAuthenticationFilter.java @@ -18,21 +18,27 @@ package org.apache.hadoop.security.authentication.server; +import java.io.IOException; +import java.io.PrintWriter; import java.security.Principal; +import java.util.Collection; import java.util.Collections; import java.util.Enumeration; import java.util.HashMap; +import java.util.Locale; import java.util.Map; import javax.servlet.FilterConfig; import javax.servlet.FilterChain; import javax.servlet.ServletContext; +import javax.servlet.ServletOutputStream; import javax.servlet.ServletResponse; import javax.servlet.ServletRequest; +import javax.servlet.http.Cookie; import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; import static org.assertj.core.api.Assertions.assertThat; -import org.glassfish.grizzly.servlet.HttpServletResponseImpl; import org.junit.Test; import org.mockito.Mockito; @@ -76,8 +82,192 @@ public ServletContext getServletContext() { } } - private class HttpServletResponseForTest extends HttpServletResponseImpl { + private class HttpServletResponseForTest implements HttpServletResponse { + @Override + public void addCookie(Cookie cookie) { + + } + + @Override + public boolean containsHeader(String name) { + return false; + } + + @Override + public String encodeURL(String url) { + return null; + } + + @Override + public String encodeRedirectURL(String url) { + return null; + } + + @Override + public String encodeUrl(String url) { + return null; + } + + @Override + public String encodeRedirectUrl(String url) { + return null; + } + + @Override + public void sendError(int sc, String msg) throws IOException { + + } + + @Override + public void sendError(int sc) throws IOException { + + } + + @Override + public void sendRedirect(String location) throws IOException { + + } + + @Override + public void setDateHeader(String name, long date) { + + } + + @Override + public void addDateHeader(String name, long date) { + + } + + @Override + public void setHeader(String name, String value) { + + } + + @Override + public void addHeader(String name, String value) { + + } + + @Override + public void setIntHeader(String name, int value) { + + } + + @Override + public void addIntHeader(String name, int value) { + + } + + @Override + public void setStatus(int sc) { + + } + + @Override + public void setStatus(int sc, String sm) { + + } + + @Override + public int getStatus() { + return 0; + } + + @Override + public String getHeader(String name) { + return null; + } + + @Override + public Collection getHeaders(String name) { + return null; + } + + @Override + public Collection getHeaderNames() { + return null; + } + + @Override + public String getCharacterEncoding() { + return null; + } + + @Override + public String getContentType() { + return null; + } + + @Override + public ServletOutputStream getOutputStream() throws IOException { + return null; + } + + @Override + public PrintWriter getWriter() throws IOException { + return null; + } + + @Override + public void setCharacterEncoding(String charset) { + + } + + @Override + public void setContentLength(int len) { + + } + + @Override + public void setContentLengthLong(long len) { + + } + + @Override + public void setContentType(String type) { + + } + + @Override + public void setBufferSize(int size) { + + } + + @Override + public int getBufferSize() { + return 0; + } + + @Override + public void flushBuffer() throws IOException { + + } + + @Override + public void resetBuffer() { + + } + + @Override + public boolean isCommitted() { + return false; + } + + @Override + public void reset() { + + } + + @Override + public void setLocale(Locale loc) { + + } + + @Override + public Locale getLocale() { + return null; + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestAccessControlList.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestAccessControlList.java index 8e1b82bea9605..53ab275b664fb 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestAccessControlList.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestAccessControlList.java @@ -471,4 +471,22 @@ private void assertUserNotAllowed(UserGroupInformation ugi, + " is incorrectly granted the access-control!!", acl.isUserAllowed(ugi)); } + + @Test + public void testUseRealUserAclsForProxiedUser() { + String realUser = "realUser"; + AccessControlList acl = new AccessControlList(realUser); + UserGroupInformation realUserUgi = + UserGroupInformation.createRemoteUser(realUser); + UserGroupInformation user1 = + UserGroupInformation.createProxyUserForTesting("regularJane", + realUserUgi, new String [] {"group1"}); + assertFalse("User " + user1 + " should not have been granted access.", + acl.isUserAllowed(user1)); + + acl = new AccessControlList(AccessControlList.USE_REAL_ACLS + realUser); + + assertTrue("User " + user1 + " should have access but was denied.", + acl.isUserAllowed(user1)); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestProxyUsers.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestProxyUsers.java index 9061fe752c88e..ab9de2d308ac0 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestProxyUsers.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestProxyUsers.java @@ -21,6 +21,8 @@ import static org.junit.Assert.fail; import java.io.IOException; +import java.net.InetAddress; +import java.net.UnknownHostException; import java.security.SecureRandom; import java.util.Arrays; import java.util.Collection; @@ -370,7 +372,7 @@ public void testNullIpAddress() throws Exception { PROXY_USER_NAME, realUserUgi, GROUP_NAMES); // remote address is null - ProxyUsers.authorize(proxyUserUgi, null); + ProxyUsers.authorize(proxyUserUgi, (InetAddress) null); } @Test @@ -533,9 +535,21 @@ public void testNoHostsForUsers() throws Exception { assertNotAuthorized(proxyUserUgi, "1.2.3.4"); } + private static InetAddress toFakeAddress(String ip) { + try { + InetAddress addr = InetAddress.getByName(ip); + return InetAddress.getByAddress(ip.replace('.', '-'), + addr.getAddress()); + } catch (UnknownHostException e) { + throw new IllegalArgumentException(e); + } + } + private void assertNotAuthorized(UserGroupInformation proxyUgi, String host) { try { + // test both APIs. ProxyUsers.authorize(proxyUgi, host); + ProxyUsers.authorize(proxyUgi, toFakeAddress(host)); fail("Allowed authorization of " + proxyUgi + " from " + host); } catch (AuthorizationException e) { // Expected @@ -544,7 +558,9 @@ private void assertNotAuthorized(UserGroupInformation proxyUgi, String host) { private void assertAuthorized(UserGroupInformation proxyUgi, String host) { try { + // test both APIs. ProxyUsers.authorize(proxyUgi, host); + ProxyUsers.authorize(proxyUgi, toFakeAddress(host)); } catch (AuthorizationException e) { fail("Did not allow authorization of " + proxyUgi + " from " + host); } @@ -560,9 +576,9 @@ public void init(String configurationPrefix) { * Authorize a user (superuser) to impersonate another user (user1) if the * superuser belongs to the group "sudo_user1" . */ - - public void authorize(UserGroupInformation user, - String remoteAddress) throws AuthorizationException{ + @Override + public void authorize(UserGroupInformation user, + InetAddress remoteAddress) throws AuthorizationException{ UserGroupInformation superUser = user.getRealUser(); String sudoGroupName = "sudo_" + user.getShortUserName(); @@ -572,6 +588,7 @@ public void authorize(UserGroupInformation user, } } + @Override public void setConf(Configuration conf) { @@ -597,7 +614,6 @@ public static void loadTest(String ipString, int testRange) { ); ProxyUsers.refreshSuperUserGroupsConfiguration(conf); - // First try proxying a group that's allowed UserGroupInformation realUserUgi = UserGroupInformation .createRemoteUser(REAL_USER_NAME); @@ -608,7 +624,8 @@ public static void loadTest(String ipString, int testRange) { SecureRandom sr = new SecureRandom(); for (int i=1; i < 1000000; i++){ try { - ProxyUsers.authorize(proxyUserUgi, "1.2.3."+ sr.nextInt(testRange)); + ProxyUsers.authorize(proxyUserUgi, + toFakeAddress("1.2.3."+ sr.nextInt(testRange))); } catch (AuthorizationException e) { } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/ssl/TestReloadingX509KeyManager.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/ssl/TestReloadingX509KeyManager.java new file mode 100644 index 0000000000000..bf0a6abdc676d --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/ssl/TestReloadingX509KeyManager.java @@ -0,0 +1,205 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.security.ssl; + +import org.apache.hadoop.thirdparty.com.google.common.base.Supplier; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Paths; +import java.security.KeyPair; +import java.security.cert.X509Certificate; +import java.util.Timer; +import java.util.concurrent.TimeoutException; + +import static org.apache.hadoop.security.ssl.KeyStoreTestUtil.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; + +public class TestReloadingX509KeyManager { + + private static final String BASEDIR = GenericTestUtils.getTempPath( + TestReloadingX509TrustManager.class.getSimpleName()); + + private final GenericTestUtils.LogCapturer reloaderLog = GenericTestUtils.LogCapturer.captureLogs( + FileMonitoringTimerTask.LOG); + + @BeforeClass + public static void setUp() throws Exception { + File base = new File(BASEDIR); + FileUtil.fullyDelete(base); + base.mkdirs(); + } + + @Test(expected = IOException.class) + public void testLoadMissingKeyStore() throws Exception { + String keystoreLocation = BASEDIR + "/testmissing.jks"; + + ReloadingX509KeystoreManager tm = + new ReloadingX509KeystoreManager("jks", keystoreLocation, + "password", + "password"); + } + + @Test(expected = IOException.class) + public void testLoadCorruptKeyStore() throws Exception { + String keystoreLocation = BASEDIR + "/testcorrupt.jks"; + OutputStream os = new FileOutputStream(keystoreLocation); + os.write(1); + os.close(); + + ReloadingX509KeystoreManager tm = + new ReloadingX509KeystoreManager("jks", keystoreLocation, + "password", + "password"); + } + + @Test (timeout = 3000000) + public void testReload() throws Exception { + KeyPair kp = generateKeyPair("RSA"); + X509Certificate sCert = generateCertificate("CN=localhost, O=server", kp, 30, + "SHA1withRSA"); + String keystoreLocation = BASEDIR + "/testreload.jks"; + createKeyStore(keystoreLocation, "password", "cert1", kp.getPrivate(), sCert); + + long reloadInterval = 10; + Timer fileMonitoringTimer = new Timer(FileBasedKeyStoresFactory.SSL_MONITORING_THREAD_NAME, true); + ReloadingX509KeystoreManager tm = + new ReloadingX509KeystoreManager("jks", keystoreLocation, + "password", + "password"); + try { + fileMonitoringTimer.schedule(new FileMonitoringTimerTask( + Paths.get(keystoreLocation), tm::loadFrom,null), reloadInterval, reloadInterval); + assertEquals(kp.getPrivate(), tm.getPrivateKey("cert1")); + + // Wait so that the file modification time is different + Thread.sleep((reloadInterval+ 1000)); + + // Change the certificate with a new keypair + final KeyPair anotherKP = generateKeyPair("RSA"); + sCert = KeyStoreTestUtil.generateCertificate("CN=localhost, O=server", anotherKP, 30, + "SHA1withRSA"); + createKeyStore(keystoreLocation, "password", "cert1", anotherKP.getPrivate(), sCert); + + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + return tm.getPrivateKey("cert1").equals(kp.getPrivate()); + } + }, (int) reloadInterval, 100000); + } finally { + fileMonitoringTimer.cancel(); + } + } + + @Test (timeout = 30000) + public void testReloadMissingTrustStore() throws Exception { + KeyPair kp = generateKeyPair("RSA"); + X509Certificate cert1 = generateCertificate("CN=Cert1", kp, 30, "SHA1withRSA"); + String keystoreLocation = BASEDIR + "/testmissing.jks"; + createKeyStore(keystoreLocation, "password", "cert1", kp.getPrivate(), cert1); + + long reloadInterval = 10; + Timer fileMonitoringTimer = new Timer(FileBasedKeyStoresFactory.SSL_MONITORING_THREAD_NAME, true); + ReloadingX509KeystoreManager tm = + new ReloadingX509KeystoreManager("jks", keystoreLocation, + "password", + "password"); + try { + fileMonitoringTimer.schedule(new FileMonitoringTimerTask( + Paths.get(keystoreLocation), tm::loadFrom,null), reloadInterval, reloadInterval); + assertEquals(kp.getPrivate(), tm.getPrivateKey("cert1")); + + assertFalse(reloaderLog.getOutput().contains( + FileMonitoringTimerTask.PROCESS_ERROR_MESSAGE)); + + // Wait for the first reload to happen so we actually detect a change after the delete + Thread.sleep((reloadInterval+ 1000)); + + new File(keystoreLocation).delete(); + + // Wait for the reload to happen and log to get written to + Thread.sleep((reloadInterval+ 1000)); + + waitForFailedReloadAtLeastOnce((int) reloadInterval); + + assertEquals(kp.getPrivate(), tm.getPrivateKey("cert1")); + } finally { + reloaderLog.stopCapturing(); + fileMonitoringTimer.cancel(); + } + } + + + @Test (timeout = 30000) + public void testReloadCorruptTrustStore() throws Exception { + KeyPair kp = generateKeyPair("RSA"); + X509Certificate cert1 = generateCertificate("CN=Cert1", kp, 30, "SHA1withRSA"); + String keystoreLocation = BASEDIR + "/testmissing.jks"; + createKeyStore(keystoreLocation, "password", "cert1", kp.getPrivate(), cert1); + + long reloadInterval = 10; + Timer fileMonitoringTimer = new Timer(FileBasedKeyStoresFactory.SSL_MONITORING_THREAD_NAME, true); + ReloadingX509KeystoreManager tm = + new ReloadingX509KeystoreManager("jks", keystoreLocation, + "password", + "password"); + try { + fileMonitoringTimer.schedule(new FileMonitoringTimerTask( + Paths.get(keystoreLocation), tm::loadFrom,null), reloadInterval, reloadInterval); + assertEquals(kp.getPrivate(), tm.getPrivateKey("cert1")); + + // Wait so that the file modification time is different + Thread.sleep((reloadInterval + 1000)); + + assertFalse(reloaderLog.getOutput().contains( + FileMonitoringTimerTask.PROCESS_ERROR_MESSAGE)); + OutputStream os = new FileOutputStream(keystoreLocation); + os.write(1); + os.close(); + + waitForFailedReloadAtLeastOnce((int) reloadInterval); + + assertEquals(kp.getPrivate(), tm.getPrivateKey("cert1")); + } finally { + reloaderLog.stopCapturing(); + fileMonitoringTimer.cancel(); + } + } + + /**Wait for the reloader thread to load the configurations at least once + * by probing the log of the thread if the reload fails. + */ + private void waitForFailedReloadAtLeastOnce(int reloadInterval) + throws InterruptedException, TimeoutException { + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + return reloaderLog.getOutput().contains( + FileMonitoringTimerTask.PROCESS_ERROR_MESSAGE); + } + }, reloadInterval, 10 * 1000); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/ssl/TestReloadingX509TrustManager.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/ssl/TestReloadingX509TrustManager.java index 3fb203ee2b93b..63589592f35dd 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/ssl/TestReloadingX509TrustManager.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/ssl/TestReloadingX509TrustManager.java @@ -21,7 +21,7 @@ import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils.LogCapturer; -import com.google.common.base.Supplier; +import java.util.function.Supplier; import org.junit.BeforeClass; import org.junit.Test; @@ -30,10 +30,12 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.nio.file.Paths; import java.security.KeyPair; import java.security.cert.X509Certificate; import java.util.HashMap; import java.util.Map; +import java.util.Timer; import java.util.concurrent.TimeoutException; import static org.junit.Assert.assertEquals; @@ -50,7 +52,7 @@ public class TestReloadingX509TrustManager { private X509Certificate cert1; private X509Certificate cert2; private final LogCapturer reloaderLog = LogCapturer.captureLogs( - ReloadingX509TrustManager.LOG); + FileMonitoringTimerTask.LOG); @BeforeClass public static void setUp() throws Exception { @@ -64,12 +66,7 @@ public void testLoadMissingTrustStore() throws Exception { String truststoreLocation = BASEDIR + "/testmissing.jks"; ReloadingX509TrustManager tm = - new ReloadingX509TrustManager("jks", truststoreLocation, "password", 10); - try { - tm.init(); - } finally { - tm.destroy(); - } + new ReloadingX509TrustManager("jks", truststoreLocation, "password"); } @Test(expected = IOException.class) @@ -80,12 +77,7 @@ public void testLoadCorruptTrustStore() throws Exception { os.close(); ReloadingX509TrustManager tm = - new ReloadingX509TrustManager("jks", truststoreLocation, "password", 10); - try { - tm.init(); - } finally { - tm.destroy(); - } + new ReloadingX509TrustManager("jks", truststoreLocation, "password"); } @Test (timeout = 30000) @@ -96,14 +88,17 @@ public void testReload() throws Exception { String truststoreLocation = BASEDIR + "/testreload.jks"; createTrustStore(truststoreLocation, "password", "cert1", cert1); + long reloadInterval = 10; + Timer fileMonitoringTimer = new Timer(FileBasedKeyStoresFactory.SSL_MONITORING_THREAD_NAME, true); final ReloadingX509TrustManager tm = - new ReloadingX509TrustManager("jks", truststoreLocation, "password", 10); + new ReloadingX509TrustManager("jks", truststoreLocation, "password"); try { - tm.init(); + fileMonitoringTimer.schedule(new FileMonitoringTimerTask( + Paths.get(truststoreLocation), tm::loadFrom,null), reloadInterval, reloadInterval); assertEquals(1, tm.getAcceptedIssuers().length); // Wait so that the file modification time is different - Thread.sleep((tm.getReloadInterval() + 1000)); + Thread.sleep((reloadInterval+ 1000)); // Add another cert Map certs = new HashMap(); @@ -116,9 +111,9 @@ public void testReload() throws Exception { public Boolean get() { return tm.getAcceptedIssuers().length == 2; } - }, (int) tm.getReloadInterval(), 10000); + }, (int) reloadInterval, 100000); } finally { - tm.destroy(); + fileMonitoringTimer.cancel(); } } @@ -130,27 +125,38 @@ public void testReloadMissingTrustStore() throws Exception { String truststoreLocation = BASEDIR + "/testmissing.jks"; createTrustStore(truststoreLocation, "password", "cert1", cert1); + long reloadInterval = 10; + Timer fileMonitoringTimer = new Timer(FileBasedKeyStoresFactory.SSL_MONITORING_THREAD_NAME, true); ReloadingX509TrustManager tm = - new ReloadingX509TrustManager("jks", truststoreLocation, "password", 10); + new ReloadingX509TrustManager("jks", truststoreLocation, "password"); try { - tm.init(); + fileMonitoringTimer.schedule(new FileMonitoringTimerTask( + Paths.get(truststoreLocation), tm::loadFrom,null), reloadInterval, reloadInterval); assertEquals(1, tm.getAcceptedIssuers().length); X509Certificate cert = tm.getAcceptedIssuers()[0]; assertFalse(reloaderLog.getOutput().contains( - ReloadingX509TrustManager.RELOAD_ERROR_MESSAGE)); + FileMonitoringTimerTask.PROCESS_ERROR_MESSAGE)); + + // Wait for the first reload to happen so we actually detect a change after the delete + Thread.sleep((reloadInterval+ 1000)); + new File(truststoreLocation).delete(); - waitForFailedReloadAtLeastOnce((int) tm.getReloadInterval()); + // Wait for the reload to happen and log to get written to + Thread.sleep((reloadInterval+ 1000)); + + waitForFailedReloadAtLeastOnce((int) reloadInterval); assertEquals(1, tm.getAcceptedIssuers().length); assertEquals(cert, tm.getAcceptedIssuers()[0]); } finally { reloaderLog.stopCapturing(); - tm.destroy(); + fileMonitoringTimer.cancel(); } } + @Test (timeout = 30000) public void testReloadCorruptTrustStore() throws Exception { KeyPair kp = generateKeyPair("RSA"); @@ -159,29 +165,32 @@ public void testReloadCorruptTrustStore() throws Exception { String truststoreLocation = BASEDIR + "/testcorrupt.jks"; createTrustStore(truststoreLocation, "password", "cert1", cert1); + long reloadInterval = 10; + Timer fileMonitoringTimer = new Timer(FileBasedKeyStoresFactory.SSL_MONITORING_THREAD_NAME, true); ReloadingX509TrustManager tm = - new ReloadingX509TrustManager("jks", truststoreLocation, "password", 10); + new ReloadingX509TrustManager("jks", truststoreLocation, "password"); try { - tm.init(); + fileMonitoringTimer.schedule(new FileMonitoringTimerTask( + Paths.get(truststoreLocation), tm::loadFrom,null), reloadInterval, reloadInterval); assertEquals(1, tm.getAcceptedIssuers().length); final X509Certificate cert = tm.getAcceptedIssuers()[0]; // Wait so that the file modification time is different - Thread.sleep((tm.getReloadInterval() + 1000)); + Thread.sleep((reloadInterval + 1000)); assertFalse(reloaderLog.getOutput().contains( - ReloadingX509TrustManager.RELOAD_ERROR_MESSAGE)); + FileMonitoringTimerTask.PROCESS_ERROR_MESSAGE)); OutputStream os = new FileOutputStream(truststoreLocation); os.write(1); os.close(); - waitForFailedReloadAtLeastOnce((int) tm.getReloadInterval()); + waitForFailedReloadAtLeastOnce((int) reloadInterval); assertEquals(1, tm.getAcceptedIssuers().length); assertEquals(cert, tm.getAcceptedIssuers()[0]); } finally { reloaderLog.stopCapturing(); - tm.destroy(); + fileMonitoringTimer.cancel(); } } @@ -194,7 +203,7 @@ private void waitForFailedReloadAtLeastOnce(int reloadInterval) @Override public Boolean get() { return reloaderLog.getOutput().contains( - ReloadingX509TrustManager.RELOAD_ERROR_MESSAGE); + FileMonitoringTimerTask.PROCESS_ERROR_MESSAGE); } }, reloadInterval, 10 * 1000); } @@ -208,13 +217,15 @@ public void testNoPassword() throws Exception { String truststoreLocation = BASEDIR + "/testreload.jks"; createTrustStore(truststoreLocation, "password", "cert1", cert1); + Timer fileMonitoringTimer = new Timer(FileBasedKeyStoresFactory.SSL_MONITORING_THREAD_NAME, true); final ReloadingX509TrustManager tm = - new ReloadingX509TrustManager("jks", truststoreLocation, null, 10); + new ReloadingX509TrustManager("jks", truststoreLocation, null); try { - tm.init(); + fileMonitoringTimer.schedule(new FileMonitoringTimerTask( + Paths.get(truststoreLocation), tm::loadFrom,null), 10, 10); assertEquals(1, tm.getAcceptedIssuers().length); } finally { - tm.destroy(); + fileMonitoringTimer.cancel(); } } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/TestDelegationToken.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/TestDelegationToken.java index 8bc881ae5d1da..225cc658d39ba 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/TestDelegationToken.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/TestDelegationToken.java @@ -30,6 +30,13 @@ import java.util.List; import java.util.Map; +import java.util.concurrent.Callable; +import org.apache.hadoop.fs.statistics.IOStatisticAssertions; +import org.apache.hadoop.fs.statistics.MeanStatistic; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.MutableCounterLong; +import org.apache.hadoop.metrics2.lib.MutableRate; +import org.apache.hadoop.test.LambdaTestUtils; import org.junit.Assert; import org.apache.hadoop.io.DataInputBuffer; @@ -155,6 +162,55 @@ public DelegationKey getKey(TestDelegationTokenIdentifier id) { return allKeys.get(id.getMasterKeyId()); } } + + public static class TestFailureDelegationTokenSecretManager + extends TestDelegationTokenSecretManager { + private boolean throwError = false; + private long errorSleepMillis; + + public TestFailureDelegationTokenSecretManager(long errorSleepMillis) { + super(24*60*60*1000, 10*1000, 1*1000, 60*60*1000); + this.errorSleepMillis = errorSleepMillis; + } + + public void setThrowError(boolean throwError) { + this.throwError = throwError; + } + + private void sleepAndThrow() throws IOException { + try { + Thread.sleep(errorSleepMillis); + throw new IOException("Test exception"); + } catch (InterruptedException e) { + } + } + + @Override + protected void storeNewToken(TestDelegationTokenIdentifier ident, long renewDate) + throws IOException { + if (throwError) { + sleepAndThrow(); + } + super.storeNewToken(ident, renewDate); + } + + @Override + protected void removeStoredToken(TestDelegationTokenIdentifier ident) throws IOException { + if (throwError) { + sleepAndThrow(); + } + super.removeStoredToken(ident); + } + + @Override + protected void updateStoredToken(TestDelegationTokenIdentifier ident, long renewDate) + throws IOException { + if (throwError) { + sleepAndThrow(); + } + super.updateStoredToken(ident, renewDate); + } + } public static class TokenSelector extends AbstractDelegationTokenSelector{ @@ -579,4 +635,102 @@ public void testEmptyToken() throws IOException { assertEquals(token1, token2); assertEquals(token1.encodeToUrlString(), token2.encodeToUrlString()); } + + @Test + public void testMultipleDelegationTokenSecretManagerMetrics() { + TestDelegationTokenSecretManager dtSecretManager1 = + new TestDelegationTokenSecretManager(0, 0, 0, 0); + assertNotNull(dtSecretManager1.getMetrics()); + + TestDelegationTokenSecretManager dtSecretManager2 = + new TestDelegationTokenSecretManager(0, 0, 0, 0); + assertNotNull(dtSecretManager2.getMetrics()); + + DefaultMetricsSystem.instance().init("test"); + + TestDelegationTokenSecretManager dtSecretManager3 = + new TestDelegationTokenSecretManager(0, 0, 0, 0); + assertNotNull(dtSecretManager3.getMetrics()); + } + + @Test + public void testDelegationTokenSecretManagerMetrics() throws Exception { + TestDelegationTokenSecretManager dtSecretManager = + new TestDelegationTokenSecretManager(24*60*60*1000, + 10*1000, 1*1000, 60*60*1000); + try { + dtSecretManager.startThreads(); + + final Token token = callAndValidateMetrics( + dtSecretManager, dtSecretManager.getMetrics().getStoreToken(), "storeToken", + () -> generateDelegationToken(dtSecretManager, "SomeUser", "JobTracker")); + + callAndValidateMetrics(dtSecretManager, dtSecretManager.getMetrics().getUpdateToken(), + "updateToken", () -> dtSecretManager.renewToken(token, "JobTracker")); + + callAndValidateMetrics(dtSecretManager, dtSecretManager.getMetrics().getRemoveToken(), + "removeToken", () -> dtSecretManager.cancelToken(token, "JobTracker")); + } finally { + dtSecretManager.stopThreads(); + } + } + + @Test + public void testDelegationTokenSecretManagerMetricsFailures() throws Exception { + int errorSleepMillis = 200; + TestFailureDelegationTokenSecretManager dtSecretManager = + new TestFailureDelegationTokenSecretManager(errorSleepMillis); + + try { + dtSecretManager.startThreads(); + + final Token token = + generateDelegationToken(dtSecretManager, "SomeUser", "JobTracker"); + + dtSecretManager.setThrowError(true); + + callAndValidateFailureMetrics(dtSecretManager, "storeToken", false, + errorSleepMillis, + () -> generateDelegationToken(dtSecretManager, "SomeUser", "JobTracker")); + + callAndValidateFailureMetrics(dtSecretManager, "updateToken", true, + errorSleepMillis, () -> dtSecretManager.renewToken(token, "JobTracker")); + + callAndValidateFailureMetrics(dtSecretManager, "removeToken", true, + errorSleepMillis, () -> dtSecretManager.cancelToken(token, "JobTracker")); + } finally { + dtSecretManager.stopThreads(); + } + } + + private T callAndValidateMetrics(TestDelegationTokenSecretManager dtSecretManager, + MutableRate metric, String statName, Callable callable) + throws Exception { + MeanStatistic stat = IOStatisticAssertions.lookupMeanStatistic( + dtSecretManager.getMetrics().getIoStatistics(), statName + ".mean"); + long metricBefore = metric.lastStat().numSamples(); + long statBefore = stat.getSamples(); + T returnedObject = callable.call(); + assertEquals(metricBefore + 1, metric.lastStat().numSamples()); + assertEquals(statBefore + 1, stat.getSamples()); + return returnedObject; + } + + private void callAndValidateFailureMetrics(TestDelegationTokenSecretManager dtSecretManager, + String statName, boolean expectError, int errorSleepMillis, Callable callable) + throws Exception { + MutableCounterLong counter = dtSecretManager.getMetrics().getTokenFailure(); + MeanStatistic failureStat = IOStatisticAssertions.lookupMeanStatistic( + dtSecretManager.getMetrics().getIoStatistics(), statName + ".failures.mean"); + long counterBefore = counter.value(); + long statBefore = failureStat.getSamples(); + if (expectError) { + LambdaTestUtils.intercept(IOException.class, callable); + } else { + callable.call(); + } + assertEquals(counterBefore + 1, counter.value()); + assertEquals(statBefore + 1, failureStat.getSamples()); + assertTrue(failureStat.getSum() >= errorSleepMillis); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/TestZKDelegationTokenSecretManager.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/TestZKDelegationTokenSecretManager.java index c9571ff21e847..6dc8c59b25e40 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/TestZKDelegationTokenSecretManager.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/TestZKDelegationTokenSecretManager.java @@ -21,14 +21,14 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutorService; -import com.google.common.base.Supplier; +import java.util.function.Supplier; import org.apache.curator.RetryPolicy; import org.apache.curator.framework.CuratorFramework; import org.apache.curator.framework.CuratorFrameworkFactory; import org.apache.curator.framework.api.ACLProvider; +import org.apache.curator.framework.api.CreateBuilder; +import org.apache.curator.framework.api.ProtectACLCreateModeStatPathAndBytesable; import org.apache.curator.retry.ExponentialBackoffRetry; import org.apache.curator.test.TestingServer; import org.apache.hadoop.conf.Configuration; @@ -39,9 +39,12 @@ import org.apache.hadoop.security.token.delegation.web.DelegationTokenIdentifier; import org.apache.hadoop.security.token.delegation.web.DelegationTokenManager; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.LambdaTestUtils; +import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.ZooDefs; import org.apache.zookeeper.data.ACL; import org.apache.zookeeper.data.Id; +import org.apache.zookeeper.data.Stat; import org.apache.zookeeper.server.auth.DigestAuthenticationProvider; import org.junit.After; import org.junit.Assert; @@ -59,15 +62,15 @@ public class TestZKDelegationTokenSecretManager { private static final Logger LOG = LoggerFactory.getLogger(TestZKDelegationTokenSecretManager.class); - private static final int TEST_RETRIES = 2; + protected static final int TEST_RETRIES = 2; - private static final int RETRY_COUNT = 5; + protected static final int RETRY_COUNT = 5; - private static final int RETRY_WAIT = 1000; + protected static final int RETRY_WAIT = 1000; - private static final long DAY_IN_SECS = 86400; + protected static final long DAY_IN_SECS = 86400; - private TestingServer zkServer; + protected TestingServer zkServer; @Rule public Timeout globalTimeout = new Timeout(300000); @@ -86,17 +89,17 @@ public void tearDown() throws Exception { } protected Configuration getSecretConf(String connectString) { - Configuration conf = new Configuration(); - conf.setBoolean(DelegationTokenManager.ENABLE_ZK_KEY, true); - conf.set(ZKDelegationTokenSecretManager.ZK_DTSM_ZK_CONNECTION_STRING, connectString); - conf.set(ZKDelegationTokenSecretManager.ZK_DTSM_ZNODE_WORKING_PATH, "testPath"); - conf.set(ZKDelegationTokenSecretManager.ZK_DTSM_ZK_AUTH_TYPE, "none"); - conf.setLong(ZKDelegationTokenSecretManager.ZK_DTSM_ZK_SHUTDOWN_TIMEOUT, 100); - conf.setLong(DelegationTokenManager.UPDATE_INTERVAL, DAY_IN_SECS); - conf.setLong(DelegationTokenManager.MAX_LIFETIME, DAY_IN_SECS); - conf.setLong(DelegationTokenManager.RENEW_INTERVAL, DAY_IN_SECS); - conf.setLong(DelegationTokenManager.REMOVAL_SCAN_INTERVAL, DAY_IN_SECS); - return conf; + Configuration conf = new Configuration(); + conf.setBoolean(DelegationTokenManager.ENABLE_ZK_KEY, true); + conf.set(ZKDelegationTokenSecretManager.ZK_DTSM_ZK_CONNECTION_STRING, connectString); + conf.set(ZKDelegationTokenSecretManager.ZK_DTSM_ZNODE_WORKING_PATH, "testPath"); + conf.set(ZKDelegationTokenSecretManager.ZK_DTSM_ZK_AUTH_TYPE, "none"); + conf.setLong(ZKDelegationTokenSecretManager.ZK_DTSM_ZK_SHUTDOWN_TIMEOUT, 100); + conf.setLong(DelegationTokenManager.UPDATE_INTERVAL, DAY_IN_SECS); + conf.setLong(DelegationTokenManager.MAX_LIFETIME, DAY_IN_SECS); + conf.setLong(DelegationTokenManager.RENEW_INTERVAL, DAY_IN_SECS); + conf.setLong(DelegationTokenManager.REMOVAL_SCAN_INTERVAL, DAY_IN_SECS); + return conf; } @SuppressWarnings("unchecked") @@ -217,6 +220,58 @@ public void testNodeUpAferAWhile() throws Exception { } } + @SuppressWarnings("unchecked") + @Test + public void testMultiNodeCompeteForSeqNum() throws Exception { + DelegationTokenManager tm1, tm2 = null; + String connectString = zkServer.getConnectString(); + Configuration conf = getSecretConf(connectString); + conf.setInt( + ZKDelegationTokenSecretManager.ZK_DTSM_TOKEN_SEQNUM_BATCH_SIZE, 1000); + tm1 = new DelegationTokenManager(conf, new Text("bla")); + tm1.init(); + + Token token1 = + (Token) tm1.createToken( + UserGroupInformation.getCurrentUser(), "foo"); + Assert.assertNotNull(token1); + AbstractDelegationTokenIdentifier id1 = + tm1.getDelegationTokenSecretManager().decodeTokenIdentifier(token1); + Assert.assertEquals( + "Token seq should be the same", 1, id1.getSequenceNumber()); + Token token2 = + (Token) tm1.createToken( + UserGroupInformation.getCurrentUser(), "foo"); + Assert.assertNotNull(token2); + AbstractDelegationTokenIdentifier id2 = + tm1.getDelegationTokenSecretManager().decodeTokenIdentifier(token2); + Assert.assertEquals( + "Token seq should be the same", 2, id2.getSequenceNumber()); + + tm2 = new DelegationTokenManager(conf, new Text("bla")); + tm2.init(); + + Token token3 = + (Token) tm2.createToken( + UserGroupInformation.getCurrentUser(), "foo"); + Assert.assertNotNull(token3); + AbstractDelegationTokenIdentifier id3 = + tm2.getDelegationTokenSecretManager().decodeTokenIdentifier(token3); + Assert.assertEquals( + "Token seq should be the same", 1001, id3.getSequenceNumber()); + Token token4 = + (Token) tm2.createToken( + UserGroupInformation.getCurrentUser(), "foo"); + Assert.assertNotNull(token4); + AbstractDelegationTokenIdentifier id4 = + tm2.getDelegationTokenSecretManager().decodeTokenIdentifier(token4); + Assert.assertEquals( + "Token seq should be the same", 1002, id4.getSequenceNumber()); + + verifyDestroy(tm1, conf); + verifyDestroy(tm2, conf); + } + @SuppressWarnings("unchecked") @Test public void testRenewTokenSingleManager() throws Exception { @@ -265,19 +320,13 @@ public void testCancelTokenSingleManager() throws Exception { @SuppressWarnings("rawtypes") protected void verifyDestroy(DelegationTokenManager tm, Configuration conf) throws Exception { - AbstractDelegationTokenSecretManager sm = - tm.getDelegationTokenSecretManager(); - ZKDelegationTokenSecretManager zksm = (ZKDelegationTokenSecretManager) sm; - ExecutorService es = zksm.getListenerThreadPool(); tm.destroy(); - Assert.assertTrue(es.isShutdown()); // wait for the pool to terminate long timeout = conf.getLong( ZKDelegationTokenSecretManager.ZK_DTSM_ZK_SHUTDOWN_TIMEOUT, ZKDelegationTokenSecretManager.ZK_DTSM_ZK_SHUTDOWN_TIMEOUT_DEFAULT); Thread.sleep(timeout * 3); - Assert.assertTrue(es.isTerminated()); } @SuppressWarnings({ "unchecked", "rawtypes" }) @@ -301,20 +350,9 @@ public void testStopThreads() throws Exception { tm1.init(); Token token = - (Token) + (Token) tm1.createToken(UserGroupInformation.getCurrentUser(), "foo"); Assert.assertNotNull(token); - - AbstractDelegationTokenSecretManager sm = tm1.getDelegationTokenSecretManager(); - ZKDelegationTokenSecretManager zksm = (ZKDelegationTokenSecretManager)sm; - ExecutorService es = zksm.getListenerThreadPool(); - es.submit(new Callable() { - public Void call() throws Exception { - Thread.sleep(shutdownTimeoutMillis * 2); // force this to be shutdownNow - return null; - } - }); - tm1.destroy(); } @@ -326,7 +364,7 @@ public void testACLs() throws Exception { RetryPolicy retryPolicy = new ExponentialBackoffRetry(1000, 3); String userPass = "myuser:mypass"; final ACL digestACL = new ACL(ZooDefs.Perms.ALL, new Id("digest", - DigestAuthenticationProvider.generateDigest(userPass))); + DigestAuthenticationProvider.generateDigest(userPass))); ACLProvider digestAclProvider = new ACLProvider() { @Override public List getAclForPath(String path) { return getDefaultAcl(); } @@ -340,12 +378,12 @@ public List getDefaultAcl() { }; CuratorFramework curatorFramework = - CuratorFrameworkFactory.builder() - .connectString(connectString) - .retryPolicy(retryPolicy) - .aclProvider(digestAclProvider) - .authorization("digest", userPass.getBytes("UTF-8")) - .build(); + CuratorFrameworkFactory.builder() + .connectString(connectString) + .retryPolicy(retryPolicy) + .aclProvider(digestAclProvider) + .authorization("digest", userPass.getBytes("UTF-8")) + .build(); curatorFramework.start(); ZKDelegationTokenSecretManager.setCurator(curatorFramework); tm1 = new DelegationTokenManager(conf, new Text("bla")); @@ -373,7 +411,7 @@ private void verifyACL(CuratorFramework curatorFramework, // cancelled but.. that would mean having to make an RPC call for every // verification request. // Thus, the eventual consistency tradef-off should be acceptable here... - private void verifyTokenFail(DelegationTokenManager tm, + protected void verifyTokenFail(DelegationTokenManager tm, Token token) throws IOException, InterruptedException { verifyTokenFailWithRetry(tm, token, RETRY_COUNT); @@ -473,4 +511,65 @@ public Boolean get() { } }, 1000, 5000); } + + @Test + public void testCreatingParentContainersIfNeeded() throws Exception { + + String connectString = zkServer.getConnectString(); + RetryPolicy retryPolicy = new ExponentialBackoffRetry(1000, 3); + Configuration conf = getSecretConf(connectString); + CuratorFramework curatorFramework = + CuratorFrameworkFactory.builder() + .connectString(connectString) + .retryPolicy(retryPolicy) + .build(); + curatorFramework.start(); + ZKDelegationTokenSecretManager.setCurator(curatorFramework); + DelegationTokenManager tm1 = new DelegationTokenManager(conf, new Text("foo")); + + // When the init method is called, + // the ZKDelegationTokenSecretManager#startThread method will be called, + // and the creatingParentContainersIfNeeded will be called to create the nameSpace. + tm1.init(); + + String workingPath = "/" + conf.get(ZKDelegationTokenSecretManager.ZK_DTSM_ZNODE_WORKING_PATH, + ZKDelegationTokenSecretManager.ZK_DTSM_ZNODE_WORKING_PATH_DEAFULT) + "/ZKDTSMRoot"; + + // Check if the created NameSpace exists. + Stat stat = curatorFramework.checkExists().forPath(workingPath); + Assert.assertNotNull(stat); + + tm1.destroy(); + curatorFramework.close(); + } + + @Test + public void testCreateNameSpaceRepeatedly() throws Exception { + + String connectString = zkServer.getConnectString(); + RetryPolicy retryPolicy = new ExponentialBackoffRetry(1000, 3); + Configuration conf = getSecretConf(connectString); + CuratorFramework curatorFramework = + CuratorFrameworkFactory.builder(). + connectString(connectString). + retryPolicy(retryPolicy). + build(); + curatorFramework.start(); + + String workingPath = "/" + conf.get(ZKDelegationTokenSecretManager.ZK_DTSM_ZNODE_WORKING_PATH, + ZKDelegationTokenSecretManager.ZK_DTSM_ZNODE_WORKING_PATH_DEAFULT) + "/ZKDTSMRoot-Test"; + CreateBuilder createBuilder = curatorFramework.create(); + ProtectACLCreateModeStatPathAndBytesable createModeStat = + createBuilder.creatingParentContainersIfNeeded(); + createModeStat.forPath(workingPath); + + // Check if the created NameSpace exists. + Stat stat = curatorFramework.checkExists().forPath(workingPath); + Assert.assertNotNull(stat); + + // Repeated creation will throw NodeExists exception + LambdaTestUtils.intercept(KeeperException.class, + "KeeperErrorCode = NodeExists for "+workingPath, + () -> createModeStat.forPath(workingPath)); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/web/TestWebDelegationToken.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/web/TestWebDelegationToken.java index 9b5bd22dbe6ac..69e252222be84 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/web/TestWebDelegationToken.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/web/TestWebDelegationToken.java @@ -64,6 +64,7 @@ import java.io.Writer; import java.net.HttpURLConnection; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.security.Principal; import java.security.PrivilegedActionException; import java.security.PrivilegedExceptionAction; @@ -554,7 +555,7 @@ public Void run() throws Exception { HttpURLConnection conn = aUrl.openConnection(url, token); Assert.assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode()); - List ret = IOUtils.readLines(conn.getInputStream()); + List ret = IOUtils.readLines(conn.getInputStream(), StandardCharsets.UTF_8); Assert.assertEquals(1, ret.size()); Assert.assertEquals(FOO_USER, ret.get(0)); @@ -624,7 +625,7 @@ public Void run() throws Exception { HttpURLConnection conn = aUrl.openConnection(url, token); Assert.assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode()); - List ret = IOUtils.readLines(conn.getInputStream()); + List ret = IOUtils.readLines(conn.getInputStream(), StandardCharsets.UTF_8); Assert.assertEquals(1, ret.size()); Assert.assertEquals(FOO_USER, ret.get(0)); @@ -848,14 +849,14 @@ public void testProxyUser() throws Exception { HttpURLConnection conn = (HttpURLConnection) new URL(strUrl).openConnection(); Assert.assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode()); - List ret = IOUtils.readLines(conn.getInputStream()); + List ret = IOUtils.readLines(conn.getInputStream(), StandardCharsets.UTF_8); Assert.assertEquals(1, ret.size()); Assert.assertEquals(OK_USER, ret.get(0)); strUrl = String.format("%s?user.name=%s&DOAS=%s", url.toExternalForm(), FOO_USER, OK_USER); conn = (HttpURLConnection) new URL(strUrl).openConnection(); Assert.assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode()); - ret = IOUtils.readLines(conn.getInputStream()); + ret = IOUtils.readLines(conn.getInputStream(), StandardCharsets.UTF_8); Assert.assertEquals(1, ret.size()); Assert.assertEquals(OK_USER, ret.get(0)); @@ -872,7 +873,8 @@ public Void run() throws Exception { HttpURLConnection conn = aUrl.openConnection(url, token, OK_USER); Assert.assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode()); - List ret = IOUtils.readLines(conn.getInputStream()); + List ret = IOUtils + .readLines(conn.getInputStream(), StandardCharsets.UTF_8); Assert.assertEquals(1, ret.size()); Assert.assertEquals(OK_USER, ret.get(0)); @@ -892,7 +894,8 @@ public Void run() throws Exception { conn = aUrl.openConnection(url, token, OK_USER); Assert.assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode()); - ret = IOUtils.readLines(conn.getInputStream()); + ret = IOUtils + .readLines(conn.getInputStream(), StandardCharsets.UTF_8); Assert.assertEquals(1, ret.size()); Assert.assertEquals(FOO_USER, ret.get(0)); @@ -953,7 +956,8 @@ public Void run() throws Exception { HttpURLConnection conn = aUrl.openConnection(url, token); Assert.assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode()); - List ret = IOUtils.readLines(conn.getInputStream()); + List ret = IOUtils + .readLines(conn.getInputStream(), StandardCharsets.UTF_8); Assert.assertEquals(1, ret.size()); Assert.assertEquals("remoteuser=" + FOO_USER+ ":ugi=" + FOO_USER, ret.get(0)); @@ -962,7 +966,7 @@ public Void run() throws Exception { conn = aUrl.openConnection(url, token, OK_USER); Assert.assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode()); - ret = IOUtils.readLines(conn.getInputStream()); + ret = IOUtils.readLines(conn.getInputStream(), StandardCharsets.UTF_8); Assert.assertEquals(1, ret.size()); Assert.assertEquals("realugi=" + FOO_USER +":remoteuser=" + OK_USER + ":ugi=" + OK_USER, ret.get(0)); @@ -1014,7 +1018,7 @@ public Void run() throws Exception { HttpURLConnection conn = aUrl.openConnection(url, token, OK_USER); Assert.assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode()); - List ret = IOUtils.readLines(conn.getInputStream()); + List ret = IOUtils.readLines(conn.getInputStream(), StandardCharsets.UTF_8); Assert.assertEquals(1, ret.size()); Assert.assertEquals("realugi=" + FOO_USER +":remoteuser=" + OK_USER + ":ugi=" + OK_USER, ret.get(0)); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/AssertExtensions.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/AssertExtensions.java new file mode 100644 index 0000000000000..8c5e553f71ee3 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/AssertExtensions.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.test; + +import java.util.concurrent.Callable; + +import org.assertj.core.description.Description; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Extra classes to work with AssertJ. + * These are kept separate from {@link LambdaTestUtils} so there's + * no requirement for AssertJ to be on the classpath in that broadly + * used class. + */ +public final class AssertExtensions { + + private static final Logger LOG = + LoggerFactory.getLogger(AssertExtensions.class); + + private AssertExtensions() { + } + + /** + * A description for AssertJ "describedAs" clauses which evaluates the + * lambda-expression only on failure. That must return a string + * or null/"" to be skipped. + * @param eval lambda expression to invoke + * @return a description for AssertJ + */ + public static Description dynamicDescription(Callable eval) { + return new DynamicDescription(eval); + } + + private static final class DynamicDescription extends Description { + private final Callable eval; + + private DynamicDescription(final Callable eval) { + this.eval = eval; + } + + @Override + public String value() { + try { + return eval.call(); + } catch (Exception e) { + LOG.warn("Failed to evaluate description: " + e); + LOG.debug("Evaluation failure", e); + // return null so that the description evaluation chain + // will skip this one + return null; + } + } + } + + +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java index 9e91634873607..03f8b359e7f8d 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java @@ -30,22 +30,32 @@ import java.lang.management.ThreadInfo; import java.lang.management.ThreadMXBean; import java.lang.reflect.InvocationTargetException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import java.util.Locale; +import java.util.Objects; import java.util.Random; import java.util.Set; import java.util.Enumeration; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Supplier; import java.util.regex.Pattern; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.RandomStringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.impl.Log4JLogger; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.BlockingThreadPoolExecutorService; +import org.apache.hadoop.util.DurationInfo; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; import org.apache.log4j.Appender; @@ -59,16 +69,27 @@ import org.junit.Assume; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; +import org.slf4j.LoggerFactory; -import com.google.common.base.Joiner; -import com.google.common.base.Supplier; -import com.google.common.collect.Sets; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; +import static org.apache.hadoop.util.functional.CommonCallableSupplier.submit; +import static org.apache.hadoop.util.functional.CommonCallableSupplier.waitForCompletion; /** * Test provides some very generic helpers which might be used across the tests */ public abstract class GenericTestUtils { + public static final int EXECUTOR_THREAD_COUNT = 64; + + private static final org.slf4j.Logger LOG = + LoggerFactory.getLogger(GenericTestUtils.class); + + public static final String PREFIX = "file-"; + private static final AtomicInteger sequence = new AtomicInteger(); /** @@ -88,7 +109,8 @@ public abstract class GenericTestUtils { public static final String DEFAULT_TEST_DATA_PATH = "target/test/data/"; /** - * Error string used in {@link GenericTestUtils#waitFor(Supplier, int, int)}. + * Error string used in + * {@link GenericTestUtils#waitFor(Supplier, long, long)}. */ public static final String ERROR_MISSING_ARGUMENT = "Input supplier interface should be initailized"; @@ -227,6 +249,22 @@ public static int uniqueSequenceId() { return sequence.incrementAndGet(); } + /** + * Creates a directory for the data/logs of the unit test. + * It first deletes the directory if it exists. + * + * @param testClass the unit test class. + * @return the Path of the root directory. + */ + public static File setupTestRootDir(Class testClass) { + File testRootDir = getTestDir(testClass.getSimpleName()); + if (testRootDir.exists()) { + FileUtil.fullyDelete(testRootDir); + } + testRootDir.mkdirs(); + return testRootDir; + } + /** * Get the (created) base directory for tests. * @return the absolute directory @@ -380,9 +418,29 @@ public static void assertExceptionContains(String expectedText, public static void waitFor(final Supplier check, final long checkEveryMillis, final long waitForMillis) throws TimeoutException, InterruptedException { - if (check == null) { - throw new NullPointerException(ERROR_MISSING_ARGUMENT); - } + waitFor(check, checkEveryMillis, waitForMillis, null); + } + + /** + * Wait for the specified test to return true. The test will be performed + * initially and then every {@code checkEveryMillis} until at least + * {@code waitForMillis} time has expired. If {@code check} is null or + * {@code waitForMillis} is less than {@code checkEveryMillis} this method + * will throw an {@link IllegalArgumentException}. + * + * @param check the test to perform. + * @param checkEveryMillis how often to perform the test. + * @param waitForMillis the amount of time after which no more tests will be + * performed. + * @param errorMsg error message to provide in TimeoutException. + * @throws TimeoutException if the test does not return true in the allotted + * time. + * @throws InterruptedException if the method is interrupted while waiting. + */ + public static void waitFor(final Supplier check, + final long checkEveryMillis, final long waitForMillis, + final String errorMsg) throws TimeoutException, InterruptedException { + Objects.requireNonNull(check, ERROR_MISSING_ARGUMENT); if (waitForMillis < checkEveryMillis) { throw new IllegalArgumentException(ERROR_INVALID_ARGUMENT); } @@ -396,9 +454,12 @@ public static void waitFor(final Supplier check, } if (!result) { - throw new TimeoutException("Timed out waiting for condition. " + - "Thread diagnostics:\n" + - TimedOutTestsListener.buildThreadDiagnosticString()); + final String exceptionErrorMsg = "Timed out waiting for condition. " + + (org.apache.commons.lang3.StringUtils.isNotEmpty(errorMsg) + ? "Error Message: " + errorMsg : "") + + "\nThread diagnostics:\n" + + TimedOutTestsListener.buildThreadDiagnosticString(); + throw new TimeoutException(exceptionErrorMsg); } } @@ -880,5 +941,132 @@ public static int getTestsThreadCount() { } return threadCount; } + /** + * Write the text to a file asynchronously. Logs the operation duration. + * @param fs filesystem + * @param path path + * @return future to the patch created. + */ + private static CompletableFuture put(FileSystem fs, + Path path, String text) { + return submit(EXECUTOR, () -> { + try (DurationInfo ignore = + new DurationInfo(LOG, false, "Creating %s", path)) { + createFile(fs, path, true, text.getBytes(StandardCharsets.UTF_8)); + return path; + } + }); + } + + /** + * Build a set of files in a directory tree. + * @param fs filesystem + * @param destDir destination + * @param depth file depth + * @param fileCount number of files to create. + * @param dirCount number of dirs to create at each level + * @return the list of files created. + */ + public static List createFiles(final FileSystem fs, + final Path destDir, + final int depth, + final int fileCount, + final int dirCount) throws IOException { + return createDirsAndFiles(fs, destDir, depth, fileCount, dirCount, + new ArrayList<>(fileCount), + new ArrayList<>(dirCount)); + } + + /** + * Build a set of files in a directory tree. + * @param fs filesystem + * @param destDir destination + * @param depth file depth + * @param fileCount number of files to create. + * @param dirCount number of dirs to create at each level + * @param paths [out] list of file paths created + * @param dirs [out] list of directory paths created. + * @return the list of files created. + */ + public static List createDirsAndFiles(final FileSystem fs, + final Path destDir, + final int depth, + final int fileCount, + final int dirCount, + final List paths, + final List dirs) throws IOException { + buildPaths(paths, dirs, destDir, depth, fileCount, dirCount); + List> futures = new ArrayList<>(paths.size() + + dirs.size()); + + // create directories. With dir marker retention, that adds more entries + // to cause deletion issues + try (DurationInfo ignore = + new DurationInfo(LOG, "Creating %d directories", dirs.size())) { + for (Path path : dirs) { + futures.add(submit(EXECUTOR, () ->{ + fs.mkdirs(path); + return path; + })); + } + waitForCompletion(futures); + } + + try (DurationInfo ignore = + new DurationInfo(LOG, "Creating %d files", paths.size())) { + for (Path path : paths) { + futures.add(put(fs, path, path.getName())); + } + waitForCompletion(futures); + return paths; + } + } -} + /** + * Recursive method to build up lists of files and directories. + * @param filePaths list of file paths to add entries to. + * @param dirPaths list of directory paths to add entries to. + * @param destDir destination directory. + * @param depth depth of directories + * @param fileCount number of files. + * @param dirCount number of directories. + */ + public static void buildPaths(final List filePaths, + final List dirPaths, final Path destDir, final int depth, + final int fileCount, final int dirCount) { + if (depth <= 0) { + return; + } + // create the file paths + for (int i = 0; i < fileCount; i++) { + String name = filenameOfIndex(i); + Path p = new Path(destDir, name); + filePaths.add(p); + } + for (int i = 0; i < dirCount; i++) { + String name = String.format("dir-%03d", i); + Path p = new Path(destDir, name); + dirPaths.add(p); + buildPaths(filePaths, dirPaths, p, depth - 1, fileCount, dirCount); + } + } + + /** + * Given an index, return a string to use as the filename. + * @param i index + * @return name + */ + public static String filenameOfIndex(final int i) { + return String.format("%s%03d", PREFIX, i); + } + + /** + * For submitting work. + */ + private static final BlockingThreadPoolExecutorService EXECUTOR = + BlockingThreadPoolExecutorService.newInstance( + EXECUTOR_THREAD_COUNT, + EXECUTOR_THREAD_COUNT * 2, + 30, TimeUnit.SECONDS, + "test-operations"); +} \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/LambdaTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/LambdaTestUtils.java index ad265afc3a022..3e0d31dc6a150 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/LambdaTestUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/LambdaTestUtils.java @@ -18,7 +18,7 @@ package org.apache.hadoop.test; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.junit.Assert; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java index 0cb5288e881ff..eb8d938994735 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java @@ -18,7 +18,7 @@ package org.apache.hadoop.test; -import static com.google.common.base.Preconditions.*; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.*; import org.junit.Assert; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MoreAsserts.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MoreAsserts.java index 142669b78682e..f6e6055d78e2c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MoreAsserts.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MoreAsserts.java @@ -19,6 +19,9 @@ package org.apache.hadoop.test; import java.util.Iterator; +import java.util.concurrent.CompletableFuture; + +import org.assertj.core.api.Assertions; import org.junit.Assert; /** @@ -28,17 +31,18 @@ public class MoreAsserts { /** * Assert equivalence for array and iterable - * @param the type of the elements - * @param s the name/message for the collection - * @param expected the expected array of elements - * @param actual the actual iterable of elements + * + * @param the type of the elements + * @param s the name/message for the collection + * @param expected the expected array of elements + * @param actual the actual iterable of elements */ public static void assertEquals(String s, T[] expected, Iterable actual) { Iterator it = actual.iterator(); int i = 0; for (; i < expected.length && it.hasNext(); ++i) { - Assert.assertEquals("Element "+ i +" for "+ s, expected[i], it.next()); + Assert.assertEquals("Element " + i + " for " + s, expected[i], it.next()); } Assert.assertTrue("Expected more elements", i == expected.length); Assert.assertTrue("Expected less elements", !it.hasNext()); @@ -46,7 +50,8 @@ public static void assertEquals(String s, T[] expected, /** * Assert equality for two iterables - * @param the type of the elements + * + * @param the type of the elements * @param s * @param expected * @param actual @@ -57,10 +62,40 @@ public static void assertEquals(String s, Iterable expected, Iterator ita = actual.iterator(); int i = 0; while (ite.hasNext() && ita.hasNext()) { - Assert.assertEquals("Element "+ i +" for "+s, ite.next(), ita.next()); + Assert.assertEquals("Element " + i + " for " + s, ite.next(), ita.next()); } Assert.assertTrue("Expected more elements", !ite.hasNext()); Assert.assertTrue("Expected less elements", !ita.hasNext()); } + + public static void assertFutureCompletedSuccessfully(CompletableFuture future) { + Assertions.assertThat(future.isDone()) + .describedAs("This future is supposed to be " + + "completed successfully") + .isTrue(); + Assertions.assertThat(future.isCompletedExceptionally()) + .describedAs("This future is supposed to be " + + "completed successfully") + .isFalse(); + } + + public static void assertFutureFailedExceptionally(CompletableFuture future) { + Assertions.assertThat(future.isCompletedExceptionally()) + .describedAs("This future is supposed to be " + + "completed exceptionally") + .isTrue(); + } + + /** + * Assert two same type of values. + * @param actual actual value. + * @param expected expected value. + * @param message error message to print in case of mismatch. + */ + public static void assertEqual(T actual, T expected, String message) { + Assertions.assertThat(actual) + .describedAs("Mismatch in %s", message) + .isEqualTo(expected); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/StatUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/StatUtils.java index fef35d0561cb1..8da6df88c2bec 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/StatUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/StatUtils.java @@ -22,7 +22,7 @@ import java.io.BufferedReader; import java.io.InputStreamReader; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -113,10 +113,9 @@ private static String getPermissionStringFromProcess(String[] shellCommand, ExecutorService executorService = Executors.newSingleThreadExecutor(); executorService.awaitTermination(2000, TimeUnit.MILLISECONDS); try { - Future future = - executorService.submit(() -> new BufferedReader( - new InputStreamReader(process.getInputStream(), - Charset.defaultCharset())).lines().findFirst().orElse("")); + Future future = executorService.submit(() -> new BufferedReader( + new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8)).lines() + .findFirst().orElse("")); return future.get(); } finally { process.destroy(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TestGenericTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TestGenericTestUtils.java index fb7bd22fedfc9..8489e3d24f368 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TestGenericTestUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TestGenericTestUtils.java @@ -23,7 +23,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Supplier; +import java.util.function.Supplier; import org.slf4j.event.Level; import static org.junit.Assert.assertEquals; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/tracing/SetSpanReceiver.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/tracing/SetSpanReceiver.java deleted file mode 100644 index d4599b0ecc2d7..0000000000000 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/tracing/SetSpanReceiver.java +++ /dev/null @@ -1,110 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.tracing; - -import com.google.common.base.Supplier; -import org.apache.hadoop.test.GenericTestUtils; -import org.apache.htrace.core.Span; -import org.apache.htrace.core.SpanId; -import org.apache.htrace.core.SpanReceiver; -import org.apache.htrace.core.HTraceConfiguration; -import java.util.Collection; -import java.util.HashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.TimeoutException; -import org.junit.Assert; - -/** - * Span receiver that puts all spans into a single set. - * This is useful for testing. - *

      - * We're not using HTrace's POJOReceiver here so as that doesn't - * push all the metrics to a static place, and would make testing - * SpanReceiverHost harder. - */ -public class SetSpanReceiver extends SpanReceiver { - - public SetSpanReceiver(HTraceConfiguration conf) { - } - - public void receiveSpan(Span span) { - SetHolder.spans.put(span.getSpanId(), span); - } - - public void close() { - } - - public static void clear() { - SetHolder.spans.clear(); - } - - public static int size() { - return SetHolder.spans.size(); - } - - public static Collection getSpans() { - return SetHolder.spans.values(); - } - - public static Map> getMap() { - return SetHolder.getMap(); - } - - public static class SetHolder { - public static ConcurrentHashMap spans = - new ConcurrentHashMap(); - - public static Map> getMap() { - Map> map = new HashMap>(); - - for (Span s : spans.values()) { - List l = map.get(s.getDescription()); - if (l == null) { - l = new LinkedList(); - map.put(s.getDescription(), l); - } - l.add(s); - } - return map; - } - } - - public static void assertSpanNamesFound(final String[] expectedSpanNames) { - try { - GenericTestUtils.waitFor(new Supplier() { - @Override - public Boolean get() { - Map> map = SetSpanReceiver.SetHolder.getMap(); - for (String spanName : expectedSpanNames) { - if (!map.containsKey(spanName)) { - return false; - } - } - return true; - } - }, 100, 1000); - } catch (TimeoutException e) { - Assert.fail("timed out to get expected spans: " + e.getMessage()); - } catch (InterruptedException e) { - Assert.fail("interrupted while waiting spans: " + e.getMessage()); - } - } -} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/tracing/TestTraceUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/tracing/TestTraceUtils.java deleted file mode 100644 index fc0726e3eef20..0000000000000 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/tracing/TestTraceUtils.java +++ /dev/null @@ -1,71 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.tracing; - -import static org.junit.Assert.assertEquals; - -import java.net.URI; -import java.util.LinkedList; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.tracing.SpanReceiverInfo.ConfigurationPair; -import org.apache.htrace.core.HTraceConfiguration; -import org.junit.Test; - -public class TestTraceUtils { - private static String TEST_PREFIX = "test.prefix.htrace."; - - @Test - public void testWrappedHadoopConf() { - String key = "sampler"; - String value = "ProbabilitySampler"; - Configuration conf = new Configuration(); - conf.set(TEST_PREFIX + key, value); - HTraceConfiguration wrapped = TraceUtils.wrapHadoopConf(TEST_PREFIX, conf); - assertEquals(value, wrapped.get(key)); - } - - @Test - public void testExtraConfig() { - String key = "test.extra.config"; - String oldValue = "old value"; - String newValue = "new value"; - Configuration conf = new Configuration(); - conf.set(TEST_PREFIX + key, oldValue); - LinkedList extraConfig = - new LinkedList(); - extraConfig.add(new ConfigurationPair(TEST_PREFIX + key, newValue)); - HTraceConfiguration wrapped = TraceUtils.wrapHadoopConf(TEST_PREFIX, conf, extraConfig); - assertEquals(newValue, wrapped.get(key)); - } - - /** - * Test tracing the globber. This is a regression test for HDFS-9187. - */ - @Test - public void testTracingGlobber() throws Exception { - // Bypass the normal FileSystem object creation path by just creating an - // instance of a subclass. - FileSystem fs = new LocalFileSystem(); - fs.initialize(new URI("file:///"), new Configuration()); - fs.globStatus(new Path("/")); - fs.close(); - } -} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/JarFinder.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/JarFinder.java index 478a29b3317b4..85d95738b5ef5 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/JarFinder.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/JarFinder.java @@ -13,7 +13,7 @@ */ package org.apache.hadoop.util; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.File; @@ -132,6 +132,10 @@ private static void createJar(File dir, File jarFile) throws IOException { * @return path to the Jar containing the class. */ public static String getJar(Class klass) { + return getJar(klass, null); + } + + public static String getJar(Class klass, String testSubDir) { Preconditions.checkNotNull(klass, "klass"); ClassLoader loader = klass.getClassLoader(); if (loader != null) { @@ -154,15 +158,18 @@ else if ("file".equals(url.getProtocol())) { klassName = klassName.replace(".", "/") + ".class"; path = path.substring(0, path.length() - klassName.length()); File baseDir = new File(path); - File testDir = GenericTestUtils.getTestDir(); + File testDir = + testSubDir == null ? GenericTestUtils.getTestDir() + : GenericTestUtils.getTestDir(testSubDir); testDir = testDir.getAbsoluteFile(); if (!testDir.exists()) { testDir.mkdirs(); } - File tempJar = File.createTempFile("hadoop-", "", testDir); - tempJar = new File(tempJar.getAbsolutePath() + ".jar"); + File tempFile = File.createTempFile("hadoop-", "", testDir); + File tempJar = new File(tempFile.getAbsolutePath() + ".jar"); createJar(baseDir, tempJar); tempJar.deleteOnExit(); + tempFile.deleteOnExit(); return tempJar.getAbsolutePath(); } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestApplicationClassLoader.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestApplicationClassLoader.java index 570e54214ad54..ba20636891345 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestApplicationClassLoader.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestApplicationClassLoader.java @@ -31,6 +31,7 @@ import java.io.IOException; import java.io.InputStream; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.jar.JarOutputStream; import java.util.zip.ZipEntry; @@ -41,8 +42,8 @@ import org.junit.Before; import org.junit.Test; -import com.google.common.base.Splitter; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.base.Splitter; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; public class TestApplicationClassLoader { @@ -135,7 +136,7 @@ public void testGetResource() throws IOException { InputStream in = appClassloader.getResourceAsStream("resource.txt"); assertNotNull("Resource should not be null for app classloader", in); - assertEquals("hello", IOUtils.toString(in)); + assertEquals("hello", IOUtils.toString(in, StandardCharsets.UTF_8)); } private File makeTestJar() throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestClassUtil.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestClassUtil.java index 98e182236c94c..04337929abd9f 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestClassUtil.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestClassUtil.java @@ -35,6 +35,6 @@ public void testFindContainingJar() { Assert.assertTrue("Containing jar does not exist on file system ", jarFile.exists()); Assert.assertTrue("Incorrect jar file " + containingJar, - jarFile.getName().matches("log4j.*[.]jar")); + jarFile.getName().matches("reload4j.*[.]jar")); } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestDirectBufferPool.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestDirectBufferPool.java index c8fd754666c9e..d6da2f86cc3c4 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestDirectBufferPool.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestDirectBufferPool.java @@ -26,7 +26,7 @@ import org.junit.Test; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; public class TestDirectBufferPool { final org.apache.hadoop.util.DirectBufferPool pool = new org.apache.hadoop.util.DirectBufferPool(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestExitUtil.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestExitUtil.java new file mode 100644 index 0000000000000..58a1997e9bc59 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestExitUtil.java @@ -0,0 +1,127 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import org.apache.hadoop.util.ExitUtil.ExitException; +import org.apache.hadoop.util.ExitUtil.HaltException; +import org.apache.hadoop.test.AbstractHadoopTestBase; + +public class TestExitUtil extends AbstractHadoopTestBase { + + @Before + public void before() { + ExitUtil.disableSystemExit(); + ExitUtil.disableSystemHalt(); + ExitUtil.resetFirstExitException(); + ExitUtil.resetFirstHaltException(); + } + + @After + public void after() { + ExitUtil.resetFirstExitException(); + ExitUtil.resetFirstHaltException(); + } + + @Test + public void testGetSetExitExceptions() throws Throwable { + // prepare states and exceptions + ExitException ee1 = new ExitException(1, "TestExitUtil forged 1st ExitException"); + ExitException ee2 = new ExitException(2, "TestExitUtil forged 2nd ExitException"); + // check proper initial settings + assertFalse("ExitUtil.terminateCalled initial value should be false", + ExitUtil.terminateCalled()); + assertNull("ExitUtil.getFirstExitException initial value should be null", + ExitUtil.getFirstExitException()); + + // simulate/check 1st call + ExitException ee = intercept(ExitException.class, ()->ExitUtil.terminate(ee1)); + assertSame("ExitUtil.terminate should have rethrown its ExitException argument but it " + + "had thrown something else", ee1, ee); + assertTrue("ExitUtil.terminateCalled should be true after 1st ExitUtil.terminate call", + ExitUtil.terminateCalled()); + assertSame("ExitUtil.terminate should store its 1st call's ExitException", + ee1, ExitUtil.getFirstExitException()); + + // simulate/check 2nd call not overwritting 1st one + ee = intercept(ExitException.class, ()->ExitUtil.terminate(ee2)); + assertSame("ExitUtil.terminate should have rethrown its HaltException argument but it " + + "had thrown something else", ee2, ee); + assertTrue("ExitUtil.terminateCalled should still be true after 2nd ExitUtil.terminate call", + ExitUtil.terminateCalled()); + // 2nd call rethrown the 2nd ExitException yet only the 1st only should have been stored + assertSame("ExitUtil.terminate when called twice should only remember 1st call's " + + "ExitException", ee1, ExitUtil.getFirstExitException()); + + // simulate cleanup, also tries to make sure state is ok for all junit still has to do + ExitUtil.resetFirstExitException(); + assertFalse("ExitUtil.terminateCalled should be false after " + + "ExitUtil.resetFirstExitException call", ExitUtil.terminateCalled()); + assertNull("ExitUtil.getFirstExitException should be null after " + + "ExitUtil.resetFirstExitException call", ExitUtil.getFirstExitException()); + } + + @Test + public void testGetSetHaltExceptions() throws Throwable { + // prepare states and exceptions + ExitUtil.disableSystemHalt(); + ExitUtil.resetFirstHaltException(); + HaltException he1 = new HaltException(1, "TestExitUtil forged 1st HaltException"); + HaltException he2 = new HaltException(2, "TestExitUtil forged 2nd HaltException"); + + // check proper initial settings + assertFalse("ExitUtil.haltCalled initial value should be false", + ExitUtil.haltCalled()); + assertNull("ExitUtil.getFirstHaltException initial value should be null", + ExitUtil.getFirstHaltException()); + + // simulate/check 1st call + HaltException he = intercept(HaltException.class, ()->ExitUtil.halt(he1)); + assertSame("ExitUtil.halt should have rethrown its HaltException argument but it had " + +"thrown something else", he1, he); + assertTrue("ExitUtil.haltCalled should be true after 1st ExitUtil.halt call", + ExitUtil.haltCalled()); + assertSame("ExitUtil.halt should store its 1st call's HaltException", + he1, ExitUtil.getFirstHaltException()); + + // simulate/check 2nd call not overwritting 1st one + he = intercept(HaltException.class, ()->ExitUtil.halt(he2)); + assertSame("ExitUtil.halt should have rethrown its HaltException argument but it had " + +"thrown something else", he2, he); + assertTrue("ExitUtil.haltCalled should still be true after 2nd ExitUtil.halt call", + ExitUtil.haltCalled()); + assertSame("ExitUtil.halt when called twice should only remember 1st call's HaltException", + he1, ExitUtil.getFirstHaltException()); + + // simulate cleanup, also tries to make sure state is ok for all junit still has to do + ExitUtil.resetFirstHaltException(); + assertFalse("ExitUtil.haltCalled should be false after " + + "ExitUtil.resetFirstHaltException call", ExitUtil.haltCalled()); + assertNull("ExitUtil.getFirstHaltException should be null after " + + "ExitUtil.resetFirstHaltException call", ExitUtil.getFirstHaltException()); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGenericOptionsParser.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGenericOptionsParser.java index 0dbfe3d6cdd36..592b0bdc4483e 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGenericOptionsParser.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGenericOptionsParser.java @@ -50,7 +50,7 @@ import org.junit.Before; import org.junit.Test; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; public class TestGenericOptionsParser { File testDir; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestJsonSerialization.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestJsonSerialization.java index 991697d96bc95..4a106e8fdf1f3 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestJsonSerialization.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestJsonSerialization.java @@ -28,9 +28,11 @@ import org.junit.Test; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.test.HadoopTestBase; import org.apache.hadoop.test.LambdaTestUtils; @@ -151,6 +153,9 @@ public void testEmptyFile() throws Throwable { } } + /** + * round trip through both load APIs. + */ @Test public void testFileSystemRoundTrip() throws Throwable { File tempFile = File.createTempFile("Keyval", ".json"); @@ -159,19 +164,30 @@ public void testFileSystemRoundTrip() throws Throwable { LocalFileSystem fs = FileSystem.getLocal(new Configuration()); try { serDeser.save(fs, tempPath, source, false); - assertEquals(source, serDeser.load(fs, tempPath)); + assertEquals("JSON loaded with load(fs, path)", + source, + serDeser.load(fs, tempPath)); + assertEquals("JSON loaded with load(fs, path, status)", + source, + serDeser.load(fs, tempPath, fs.getFileStatus(tempPath))); } finally { fs.delete(tempPath, false); } } + /** + * 0 byte file through the load(path) API will fail with a wrapped + * Parser exception. + * 0 byte file through the load(path, status) API will fail with a wrapped + * Parser exception. + */ @Test public void testFileSystemEmptyPath() throws Throwable { File tempFile = File.createTempFile("Keyval", ".json"); Path tempPath = new Path(tempFile.toURI()); LocalFileSystem fs = FileSystem.getLocal(new Configuration()); try { - LambdaTestUtils.intercept(EOFException.class, + LambdaTestUtils.intercept(PathIOException.class, () -> serDeser.load(fs, tempPath)); fs.delete(tempPath, false); LambdaTestUtils.intercept(FileNotFoundException.class, @@ -181,5 +197,23 @@ public void testFileSystemEmptyPath() throws Throwable { } } + /** + * 0 byte file through the load(path, status) API will fail with an + * EOFException. + */ + @Test + public void testFileSystemEmptyStatus() throws Throwable { + File tempFile = File.createTempFile("Keyval", ".json"); + Path tempPath = new Path(tempFile.toURI()); + LocalFileSystem fs = FileSystem.getLocal(new Configuration()); + try { + final FileStatus st = fs.getFileStatus(tempPath); + LambdaTestUtils.intercept(EOFException.class, + () -> serDeser.load(fs, tempPath, st)); + } finally { + fs.delete(tempPath, false); + } + } + } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLists.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLists.java new file mode 100644 index 0000000000000..53241da695c63 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLists.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import org.assertj.core.api.Assertions; +import org.junit.Assert; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Simple tests for utility class Lists. + */ +public class TestLists { + + @Test + public void testAddToEmptyArrayList() { + List list = Lists.newArrayList(); + list.add("record1"); + Assert.assertEquals(1, list.size()); + Assert.assertEquals("record1", list.get(0)); + } + + @Test + public void testAddToEmptyLinkedList() { + List list = Lists.newLinkedList(); + list.add("record1"); + Assert.assertEquals(1, list.size()); + Assert.assertEquals("record1", list.get(0)); + } + + @Test + public void testVarArgArrayLists() { + List list = Lists.newArrayList("record1", "record2", "record3"); + list.add("record4"); + Assert.assertEquals(4, list.size()); + Assert.assertEquals("record1", list.get(0)); + Assert.assertEquals("record2", list.get(1)); + Assert.assertEquals("record3", list.get(2)); + Assert.assertEquals("record4", list.get(3)); + } + + @Test + public void testItrArrayLists() { + Set set = new HashSet<>(); + set.add("record1"); + set.add("record2"); + set.add("record3"); + List list = Lists.newArrayList(set); + list.add("record4"); + Assert.assertEquals(4, list.size()); + } + + @Test + public void testItrLinkedLists() { + Set set = new HashSet<>(); + set.add("record1"); + set.add("record2"); + set.add("record3"); + List list = Lists.newLinkedList(set); + list.add("record4"); + Assert.assertEquals(4, list.size()); + } + + @Test + public void testListsPartition() { + List list = new ArrayList<>(); + list.add("a"); + list.add("b"); + list.add("c"); + list.add("d"); + list.add("e"); + List> res = Lists. + partition(list, 2); + Assertions.assertThat(res) + .describedAs("Number of partitions post partition") + .hasSize(3); + Assertions.assertThat(res.get(0)) + .describedAs("Number of elements in first partition") + .hasSize(2); + Assertions.assertThat(res.get(2)) + .describedAs("Number of elements in last partition") + .hasSize(1); + + List> res2 = Lists. + partition(list, 1); + Assertions.assertThat(res2) + .describedAs("Number of partitions post partition") + .hasSize(5); + Assertions.assertThat(res2.get(0)) + .describedAs("Number of elements in first partition") + .hasSize(1); + Assertions.assertThat(res2.get(4)) + .describedAs("Number of elements in last partition") + .hasSize(1); + + List> res3 = Lists. + partition(list, 6); + Assertions.assertThat(res3) + .describedAs("Number of partitions post partition") + .hasSize(1); + Assertions.assertThat(res3.get(0)) + .describedAs("Number of elements in first partition") + .hasSize(5); + } + + @Test + public void testArrayListWithSize() { + List list = Lists.newArrayListWithCapacity(3); + list.add("record1"); + list.add("record2"); + list.add("record3"); + Assert.assertEquals(3, list.size()); + Assert.assertEquals("record1", list.get(0)); + Assert.assertEquals("record2", list.get(1)); + Assert.assertEquals("record3", list.get(2)); + list = Lists.newArrayListWithCapacity(3); + list.add("record1"); + list.add("record2"); + list.add("record3"); + Assert.assertEquals(3, list.size()); + Assert.assertEquals("record1", list.get(0)); + Assert.assertEquals("record2", list.get(1)); + Assert.assertEquals("record3", list.get(2)); + } + +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestMachineList.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestMachineList.java index d721c29530f17..4d26ac55e9cf8 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestMachineList.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestMachineList.java @@ -25,9 +25,11 @@ import java.net.InetAddress; import java.net.UnknownHostException; import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import org.apache.hadoop.thirdparty.com.google.common.net.InetAddresses;; import org.junit.Test; -import org.mockito.Mockito; public class TestMachineList { private static String IP_LIST = "10.119.103.110,10.119.103.112,10.119.103.114"; @@ -43,10 +45,40 @@ public class TestMachineList { private static String HOSTNAME_IP_CIDR_LIST = "host1,10.222.0.0/16,10.119.103.110,10.119.103.112,10.119.103.114,10.241.23.0/24,host4,"; + class TestAddressFactory extends MachineList.InetAddressFactory { + private Map cache = new HashMap<>(); + InetAddress put(String ip) throws UnknownHostException { + return put(ip, ip); + } + InetAddress put(String ip, String... hosts) throws UnknownHostException { + InetAddress addr = InetAddress.getByName(ip); + for (String host : hosts) { + addr = InetAddress.getByAddress(host, addr.getAddress()); + cache.put(host, addr); + // last host wins the PTR lookup. + cache.put(ip, addr); + } + return addr; + } + @Override + public InetAddress getByName(String host) throws UnknownHostException { + InetAddress addr = cache.get(host); + if (addr == null) { + if (!InetAddresses.isInetAddress(host)) { + throw new UnknownHostException(host); + } + // ip resolves to itself to fake being unresolvable. + addr = InetAddress.getByName(host); + addr = InetAddress.getByAddress(host, addr.getAddress()); + } + return addr; + } + } + @Test public void testWildCard() { //create MachineList with a list of of IPs - MachineList ml = new MachineList("*"); + MachineList ml = new MachineList("*", new TestAddressFactory()); //test for inclusion with any IP assertTrue(ml.includes("10.119.103.112")); @@ -56,7 +88,7 @@ public void testWildCard() { @Test public void testIPList() { //create MachineList with a list of of IPs - MachineList ml = new MachineList(IP_LIST); + MachineList ml = new MachineList(IP_LIST, new TestAddressFactory()); //test for inclusion with an known IP assertTrue(ml.includes("10.119.103.112")); @@ -68,7 +100,7 @@ public void testIPList() { @Test public void testIPListSpaces() { //create MachineList with a ip string which has duplicate ip and spaces - MachineList ml = new MachineList(IP_LIST_SPACES); + MachineList ml = new MachineList(IP_LIST_SPACES, new TestAddressFactory()); //test for inclusion with an known IP assertTrue(ml.includes("10.119.103.112")); @@ -79,42 +111,28 @@ public void testIPListSpaces() { @Test public void testStaticIPHostNameList()throws UnknownHostException { - //create MachineList with a list of of Hostnames - InetAddress addressHost1 = InetAddress.getByName("1.2.3.1"); - InetAddress addressHost4 = InetAddress.getByName("1.2.3.4"); - - MachineList.InetAddressFactory addressFactory = - Mockito.mock(MachineList.InetAddressFactory.class); - Mockito.when(addressFactory.getByName("host1")).thenReturn(addressHost1); - Mockito.when(addressFactory.getByName("host4")).thenReturn(addressHost4); + // create MachineList with a list of of Hostnames + TestAddressFactory addressFactory = new TestAddressFactory(); + addressFactory.put("1.2.3.1", "host1"); + addressFactory.put("1.2.3.4", "host4"); MachineList ml = new MachineList( StringUtils.getTrimmedStringCollection(HOST_LIST), addressFactory); - //test for inclusion with an known IP + // test for inclusion with an known IP assertTrue(ml.includes("1.2.3.4")); - //test for exclusion with an unknown IP + // test for exclusion with an unknown IP assertFalse(ml.includes("1.2.3.5")); } @Test public void testHostNames() throws UnknownHostException { - //create MachineList with a list of of Hostnames - InetAddress addressHost1 = InetAddress.getByName("1.2.3.1"); - InetAddress addressHost4 = InetAddress.getByName("1.2.3.4"); - InetAddress addressMockHost4 = Mockito.mock(InetAddress.class); - Mockito.when(addressMockHost4.getCanonicalHostName()).thenReturn("differentName"); - - InetAddress addressMockHost5 = Mockito.mock(InetAddress.class); - Mockito.when(addressMockHost5.getCanonicalHostName()).thenReturn("host5"); - - MachineList.InetAddressFactory addressFactory = - Mockito.mock(MachineList.InetAddressFactory.class); - Mockito.when(addressFactory.getByName("1.2.3.4")).thenReturn(addressMockHost4); - Mockito.when(addressFactory.getByName("1.2.3.5")).thenReturn(addressMockHost5); - Mockito.when(addressFactory.getByName("host1")).thenReturn(addressHost1); - Mockito.when(addressFactory.getByName("host4")).thenReturn(addressHost4); + // create MachineList with a list of of Hostnames + TestAddressFactory addressFactory = new TestAddressFactory(); + addressFactory.put("1.2.3.1", "host1"); + addressFactory.put("1.2.3.4", "host4", "differentname"); + addressFactory.put("1.2.3.5", "host5"); MachineList ml = new MachineList( StringUtils.getTrimmedStringCollection(HOST_LIST), addressFactory ); @@ -128,21 +146,11 @@ public void testHostNames() throws UnknownHostException { @Test public void testHostNamesReverserIpMatch() throws UnknownHostException { - //create MachineList with a list of of Hostnames - InetAddress addressHost1 = InetAddress.getByName("1.2.3.1"); - InetAddress addressHost4 = InetAddress.getByName("1.2.3.4"); - InetAddress addressMockHost4 = Mockito.mock(InetAddress.class); - Mockito.when(addressMockHost4.getCanonicalHostName()).thenReturn("host4"); - - InetAddress addressMockHost5 = Mockito.mock(InetAddress.class); - Mockito.when(addressMockHost5.getCanonicalHostName()).thenReturn("host5"); - - MachineList.InetAddressFactory addressFactory = - Mockito.mock(MachineList.InetAddressFactory.class); - Mockito.when(addressFactory.getByName("1.2.3.4")).thenReturn(addressMockHost4); - Mockito.when(addressFactory.getByName("1.2.3.5")).thenReturn(addressMockHost5); - Mockito.when(addressFactory.getByName("host1")).thenReturn(addressHost1); - Mockito.when(addressFactory.getByName("host4")).thenReturn(addressHost4); + // create MachineList with a list of of Hostnames + TestAddressFactory addressFactory = new TestAddressFactory(); + addressFactory.put("1.2.3.1", "host1"); + addressFactory.put("1.2.3.4", "host4"); + addressFactory.put("1.2.3.5", "host5"); MachineList ml = new MachineList( StringUtils.getTrimmedStringCollection(HOST_LIST), addressFactory ); @@ -157,7 +165,7 @@ public void testHostNamesReverserIpMatch() throws UnknownHostException { @Test public void testCIDRs() { //create MachineList with a list of of ip ranges specified in CIDR format - MachineList ml = new MachineList(CIDR_LIST); + MachineList ml = new MachineList(CIDR_LIST, new TestAddressFactory()); //test for inclusion/exclusion assertFalse(ml.includes("10.221.255.255")); @@ -181,16 +189,17 @@ public void testCIDRs() { @Test(expected = IllegalArgumentException.class) public void testNullIpAddress() { //create MachineList with a list of of ip ranges specified in CIDR format - MachineList ml = new MachineList(CIDR_LIST); + MachineList ml = new MachineList(CIDR_LIST, new TestAddressFactory()); //test for exclusion with a null IP - assertFalse(ml.includes(null)); + assertFalse(ml.includes((String) null)); + assertFalse(ml.includes((InetAddress) null)); } @Test public void testCIDRWith16bitmask() { //create MachineList with a list of of ip ranges specified in CIDR format - MachineList ml = new MachineList(CIDR_LIST1); + MachineList ml = new MachineList(CIDR_LIST1, new TestAddressFactory()); //test for inclusion/exclusion assertFalse(ml.includes("10.221.255.255")); @@ -209,7 +218,7 @@ public void testCIDRWith16bitmask() { @Test public void testCIDRWith8BitMask() { //create MachineList with a list of of ip ranges specified in CIDR format - MachineList ml = new MachineList(CIDR_LIST2); + MachineList ml = new MachineList(CIDR_LIST2, new TestAddressFactory()); //test for inclusion/exclusion assertFalse(ml.includes("10.241.22.255")); @@ -228,7 +237,7 @@ public void testCIDRWith8BitMask() { public void testInvalidCIDR() { //create MachineList with an Invalid CIDR try { - new MachineList(INVALID_CIDR); + MachineList ml = new MachineList(INVALID_CIDR, new TestAddressFactory()); fail("Expected IllegalArgumentException"); } catch (IllegalArgumentException e) { //expected Exception @@ -240,7 +249,7 @@ public void testInvalidCIDR() { @Test public void testIPandCIDRs() { //create MachineList with a list of of ip ranges and ip addresses - MachineList ml = new MachineList(IP_CIDR_LIST); + MachineList ml = new MachineList(IP_CIDR_LIST, new TestAddressFactory()); //test for inclusion with an known IP assertTrue(ml.includes("10.119.103.112")); @@ -263,7 +272,8 @@ public void testIPandCIDRs() { @Test public void testHostNameIPandCIDRs() { //create MachineList with a mix of ip addresses , hostnames and ip ranges - MachineList ml = new MachineList(HOSTNAME_IP_CIDR_LIST); + MachineList ml = new MachineList(HOSTNAME_IP_CIDR_LIST, + new TestAddressFactory()); //test for inclusion with an known IP assertTrue(ml.includes("10.119.103.112")); @@ -286,7 +296,8 @@ public void testHostNameIPandCIDRs() { @Test public void testGetCollection() { //create MachineList with a mix of ip addresses , hostnames and ip ranges - MachineList ml = new MachineList(HOSTNAME_IP_CIDR_LIST); + MachineList ml = + new MachineList(HOSTNAME_IP_CIDR_LIST, new TestAddressFactory()); Collection col = ml.getCollection(); //test getCollectionton to return the full collection diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNativeCodeLoader.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNativeCodeLoader.java index 58874fdcdfba6..98b75bba4793a 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNativeCodeLoader.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNativeCodeLoader.java @@ -21,8 +21,6 @@ import static org.junit.Assert.*; import org.apache.hadoop.crypto.OpensslCipher; -import org.apache.hadoop.io.compress.Lz4Codec; -import org.apache.hadoop.io.compress.SnappyCodec; import org.apache.hadoop.io.compress.zlib.ZlibFactory; import org.apache.hadoop.util.NativeCodeLoader; import org.slf4j.Logger; @@ -52,13 +50,9 @@ public void testNativeCodeLoaded() { // library names are depended on platform and build envs // so just check names are available assertFalse(ZlibFactory.getLibraryName().isEmpty()); - if (NativeCodeLoader.buildSupportsSnappy()) { - assertFalse(SnappyCodec.getLibraryName().isEmpty()); - } if (NativeCodeLoader.buildSupportsOpenssl()) { assertFalse(OpensslCipher.getLibraryName().isEmpty()); } - assertFalse(Lz4Codec.getLibraryName().isEmpty()); LOG.info("TestNativeCodeLoader: libhadoop.so is loaded."); } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestPreconditions.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestPreconditions.java new file mode 100644 index 0000000000000..4a11555535515 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestPreconditions.java @@ -0,0 +1,324 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.util.function.Supplier; + +import org.junit.Test; + +import org.apache.hadoop.test.LambdaTestUtils; + +public class TestPreconditions { + private static final String NON_NULL_STRING = "NON_NULL_OBJECT"; + private static final String NON_INT_STRING = "NOT_INT"; + private static final String EXPECTED_ERROR_MSG = "Expected-Error-MSG"; + private static final String EXPECTED_ERROR_MSG_ARGS = + EXPECTED_ERROR_MSG + " %s number %d"; + private static final String NULL_FORMATTER = null; + + private String errorMessage; + + @Test + public void testCheckNotNullSuccess() { + Preconditions.checkNotNull(NON_NULL_STRING); + // null supplier + Preconditions.checkNotNull(NON_NULL_STRING, null); + // ill-formated string supplier + Preconditions.checkNotNull(NON_NULL_STRING, ()-> String.format("%d", + NON_INT_STRING)); + // null pattern to string formatter + Preconditions.checkNotNull(NON_NULL_STRING, NULL_FORMATTER, null, 1); + // null arguments to string formatter + Preconditions.checkNotNull(NON_NULL_STRING, EXPECTED_ERROR_MSG_ARGS, + null, null); + // illegal format exception + Preconditions.checkNotNull(NON_NULL_STRING, "message %d %d", + NON_INT_STRING, 1); + // insufficient arguments + Preconditions.checkNotNull(NON_NULL_STRING, EXPECTED_ERROR_MSG_ARGS, + NON_INT_STRING); + // null format in string supplier + Preconditions.checkNotNull(NON_NULL_STRING, + () -> String.format(NULL_FORMATTER, NON_INT_STRING)); + } + + @Test + public void testCheckNotNullFailure() throws Exception { + // failure without message + LambdaTestUtils.intercept(NullPointerException.class, + Preconditions.getDefaultNullMSG(), + () -> Preconditions.checkNotNull(null)); + + // failure with message + errorMessage = EXPECTED_ERROR_MSG; + LambdaTestUtils.intercept(NullPointerException.class, + errorMessage, + () -> Preconditions.checkNotNull(null, errorMessage)); + + // failure with Null message + LambdaTestUtils.intercept(NullPointerException.class, + null, + () -> Preconditions.checkNotNull(null, errorMessage)); + + // failure with message format + errorMessage = EXPECTED_ERROR_MSG + " %s"; + String arg = "NPE"; + String expectedMSG = String.format(errorMessage, arg); + LambdaTestUtils.intercept(NullPointerException.class, + expectedMSG, + () -> Preconditions.checkNotNull(null, errorMessage, arg)); + + // failure with multiple arg message format + errorMessage = EXPECTED_ERROR_MSG_ARGS; + expectedMSG = + String.format(errorMessage, arg, 1); + LambdaTestUtils.intercept(NullPointerException.class, + expectedMSG, + () -> Preconditions.checkNotNull(null, errorMessage, arg, 1)); + + // illegal format will be thrown if the case is not handled correctly + LambdaTestUtils.intercept(NullPointerException.class, + Preconditions.getDefaultNullMSG(), + () -> Preconditions.checkNotNull(null, + errorMessage, 1, NON_INT_STRING)); + + // illegal format will be thrown for insufficient Insufficient Args + LambdaTestUtils.intercept(NullPointerException.class, + Preconditions.getDefaultNullMSG(), + () -> Preconditions.checkNotNull(null, errorMessage, NON_NULL_STRING)); + + // illegal format in supplier + LambdaTestUtils.intercept(NullPointerException.class, + Preconditions.getDefaultNullMSG(), + () -> Preconditions.checkNotNull(null, + () -> String.format(errorMessage, 1, NON_INT_STRING))); + + // insufficient arguments in string Supplier + LambdaTestUtils.intercept(NullPointerException.class, + Preconditions.getDefaultNullMSG(), + () -> Preconditions.checkNotNull(null, + () -> String.format(errorMessage, NON_NULL_STRING))); + + // null formatter + LambdaTestUtils.intercept(NullPointerException.class, + Preconditions.getDefaultNullMSG(), + () -> Preconditions.checkNotNull(null, + () -> String.format(NULL_FORMATTER, NON_NULL_STRING))); + } + + @Test + public void testCheckArgumentWithSuccess() throws Exception { + // success + Preconditions.checkArgument(true); + // null supplier + Preconditions.checkArgument(true, null); + // null message + Preconditions.checkArgument(true, (String) null); + Preconditions.checkArgument(true, NON_NULL_STRING); + // null in string format + Preconditions.checkArgument(true, EXPECTED_ERROR_MSG_ARGS, null, null); + // illegalformat + Preconditions.checkArgument(true, EXPECTED_ERROR_MSG_ARGS, 1, 2); + // ill-formated string supplier + Preconditions.checkArgument(true, ()-> String.format("%d", + NON_INT_STRING)); + // null pattern to string formatter + Preconditions.checkArgument(true, NULL_FORMATTER, null, 1); + // null arguments to string formatter + Preconditions.checkArgument(true, EXPECTED_ERROR_MSG_ARGS, + null, null); + // illegal format exception + Preconditions.checkArgument(true, "message %d %d", + NON_INT_STRING, 1); + // insufficient arguments + Preconditions.checkArgument(true, EXPECTED_ERROR_MSG_ARGS, + NON_INT_STRING); + // null format in string supplier + Preconditions.checkArgument(true, + () -> String.format(NULL_FORMATTER, NON_INT_STRING)); + } + + @Test + public void testCheckArgumentWithFailure() throws Exception { + // failure without message + LambdaTestUtils.intercept(IllegalArgumentException.class, + () -> Preconditions.checkArgument(false)); + errorMessage = null; + // failure with Null message + LambdaTestUtils.intercept(IllegalArgumentException.class, + null, + () -> Preconditions.checkArgument(false, errorMessage)); + // failure with message + errorMessage = EXPECTED_ERROR_MSG; + LambdaTestUtils.intercept(IllegalArgumentException.class, + errorMessage, + () -> Preconditions.checkArgument(false, errorMessage)); + + // failure with message format + errorMessage = EXPECTED_ERROR_MSG + " %s"; + String arg = "IllegalArgExcep"; + String expectedMSG = String.format(errorMessage, arg); + LambdaTestUtils.intercept(IllegalArgumentException.class, + expectedMSG, + () -> Preconditions.checkArgument(false, errorMessage, arg)); + + // failure with multiple arg message format + errorMessage = EXPECTED_ERROR_MSG_ARGS; + expectedMSG = + String.format(errorMessage, arg, 1); + LambdaTestUtils.intercept(IllegalArgumentException.class, + expectedMSG, + () -> Preconditions.checkArgument(false, errorMessage, arg, 1)); + + // ignore illegal format will be thrown if the case is not handled correctly + LambdaTestUtils.intercept(IllegalArgumentException.class, + Preconditions.getDefaultCheckArgumentMSG(), + () -> Preconditions.checkArgument(false, + errorMessage, 1, NON_INT_STRING)); + + // ignore illegal format will be thrown for insufficient Insufficient Args + LambdaTestUtils.intercept(IllegalArgumentException.class, + Preconditions.getDefaultCheckArgumentMSG(), + () -> Preconditions.checkArgument(false, errorMessage, NON_NULL_STRING)); + + // failure with Null supplier + final Supplier nullSupplier = null; + LambdaTestUtils.intercept(IllegalArgumentException.class, + null, + () -> Preconditions.checkArgument(false, nullSupplier)); + + // ignore illegal format in supplier + LambdaTestUtils.intercept(IllegalArgumentException.class, + Preconditions.getDefaultCheckArgumentMSG(), + () -> Preconditions.checkArgument(false, + () -> String.format(errorMessage, 1, NON_INT_STRING))); + + // ignore insufficient arguments in string Supplier + LambdaTestUtils.intercept(IllegalArgumentException.class, + Preconditions.getDefaultCheckArgumentMSG(), + () -> Preconditions.checkArgument(false, + () -> String.format(errorMessage, NON_NULL_STRING))); + + // ignore null formatter + LambdaTestUtils.intercept(IllegalArgumentException.class, + Preconditions.getDefaultCheckArgumentMSG(), + () -> Preconditions.checkArgument(false, + () -> String.format(NULL_FORMATTER, NON_NULL_STRING))); + } + + @Test + public void testCheckStateWithSuccess() throws Exception { + // success + Preconditions.checkState(true); + // null supplier + Preconditions.checkState(true, null); + // null message + Preconditions.checkState(true, (String) null); + Preconditions.checkState(true, NON_NULL_STRING); + // null in string format + Preconditions.checkState(true, EXPECTED_ERROR_MSG_ARGS, null, null); + // illegalformat + Preconditions.checkState(true, EXPECTED_ERROR_MSG_ARGS, 1, 2); + // ill-formated string supplier + Preconditions.checkState(true, ()-> String.format("%d", + NON_INT_STRING)); + // null pattern to string formatter + Preconditions.checkState(true, NULL_FORMATTER, null, 1); + // null arguments to string formatter + Preconditions.checkState(true, EXPECTED_ERROR_MSG_ARGS, + null, null); + // illegal format exception + Preconditions.checkState(true, "message %d %d", + NON_INT_STRING, 1); + // insufficient arguments + Preconditions.checkState(true, EXPECTED_ERROR_MSG_ARGS, + NON_INT_STRING); + // null format in string supplier + Preconditions.checkState(true, + () -> String.format(NULL_FORMATTER, NON_INT_STRING)); + } + + @Test + public void testCheckStateWithFailure() throws Exception { + // failure without message + LambdaTestUtils.intercept(IllegalStateException.class, + () -> Preconditions.checkState(false)); + errorMessage = null; + // failure with Null message + LambdaTestUtils.intercept(IllegalStateException.class, + null, + () -> Preconditions.checkState(false, errorMessage)); + // failure with message + errorMessage = EXPECTED_ERROR_MSG; + LambdaTestUtils.intercept(IllegalStateException.class, + errorMessage, + () -> Preconditions.checkState(false, errorMessage)); + + // failure with message format + errorMessage = EXPECTED_ERROR_MSG + " %s"; + String arg = "IllegalStaExcep"; + String expectedMSG = String.format(errorMessage, arg); + LambdaTestUtils.intercept(IllegalStateException.class, + expectedMSG, + () -> Preconditions.checkState(false, errorMessage, arg)); + + // failure with multiple arg message format + errorMessage = EXPECTED_ERROR_MSG_ARGS; + expectedMSG = + String.format(errorMessage, arg, 1); + LambdaTestUtils.intercept(IllegalStateException.class, + expectedMSG, + () -> Preconditions.checkState(false, errorMessage, arg, 1)); + + // ignore illegal format will be thrown if the case is not handled correctly + LambdaTestUtils.intercept(IllegalStateException.class, + Preconditions.getDefaultCheckStateMSG(), + () -> Preconditions.checkState(false, + errorMessage, 1, NON_INT_STRING)); + + // ignore illegal format will be thrown for insufficient Insufficient Args + LambdaTestUtils.intercept(IllegalStateException.class, + Preconditions.getDefaultCheckStateMSG(), + () -> Preconditions.checkState(false, errorMessage, NON_NULL_STRING)); + + // failure with Null supplier + final Supplier nullSupplier = null; + LambdaTestUtils.intercept(IllegalStateException.class, + null, + () -> Preconditions.checkState(false, nullSupplier)); + + // ignore illegal format in supplier + LambdaTestUtils.intercept(IllegalStateException.class, + Preconditions.getDefaultCheckStateMSG(), + () -> Preconditions.checkState(false, + () -> String.format(errorMessage, 1, NON_INT_STRING))); + + // ignore insufficient arguments in string Supplier + LambdaTestUtils.intercept(IllegalStateException.class, + Preconditions.getDefaultCheckStateMSG(), + () -> Preconditions.checkState(false, + () -> String.format(errorMessage, NON_NULL_STRING))); + + // ignore null formatter + LambdaTestUtils.intercept(IllegalStateException.class, + Preconditions.getDefaultCheckStateMSG(), + () -> Preconditions.checkState(false, + () -> String.format(NULL_FORMATTER, NON_NULL_STRING))); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestShell.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestShell.java index 578d267114128..153c1847ced82 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestShell.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestShell.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.util; -import com.google.common.base.Supplier; +import java.util.function.Supplier; import org.apache.commons.io.FileUtils; import org.apache.hadoop.security.alias.AbstractJavaKeyStoreProvider; import org.junit.Assert; @@ -50,7 +50,7 @@ public class TestShell extends Assert { /** - * Set the timeout for every test + * Set the timeout for every test. */ @Rule public Timeout testTimeout = new Timeout(30000); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestWeakReferenceMap.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestWeakReferenceMap.java new file mode 100644 index 0000000000000..3203de8a96488 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestWeakReferenceMap.java @@ -0,0 +1,284 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicLong; + +import org.assertj.core.api.Assertions; +import org.junit.Before; +import org.junit.Test; + +import org.apache.hadoop.fs.impl.WeakReferenceThreadMap; +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +/** + * Test {@link WeakReferenceMap} and {@link WeakReferenceThreadMap}. + * There's no attempt to force GC here, so the tests are + * more about the basic behavior not the handling of empty references. + */ +public class TestWeakReferenceMap extends AbstractHadoopTestBase { + + public static final String FACTORY_STRING = "recreated %d"; + + /** + * The map to test. + */ + private WeakReferenceMap referenceMap; + + /** + * List of references notified of loss. + */ + private List lostReferences; + + @Before + public void setup() { + lostReferences = new ArrayList<>(); + referenceMap = new WeakReferenceMap<>( + this::factory, + this::referenceLost); + } + + /** + * Reference lost callback. + * @param key key lost + */ + private void referenceLost(Integer key) { + lostReferences.add(key); + } + + + /** + * Basic insertions and lookups of those values. + */ + @Test + public void testBasicOperationsWithValidReferences() { + + referenceMap.put(1, "1"); + referenceMap.put(2, "2"); + assertMapSize(2); + assertMapContainsKey(1); + assertMapEntryEquals(1, "1"); + assertMapEntryEquals(2, "2"); + // overwrite + referenceMap.put(1, "3"); + assertMapEntryEquals(1, "3"); + + // remove an entry + referenceMap.remove(1); + assertMapDoesNotContainKey(1); + assertMapSize(1); + + // clear the map + referenceMap.clear(); + assertMapSize(0); + } + + /** + * pruning removes null entries, leaves the others alone. + */ + @Test + public void testPruneNullEntries() { + referenceMap.put(1, "1"); + assertPruned(0); + referenceMap.put(2, null); + assertMapSize(2); + assertPruned(1); + assertMapSize(1); + assertMapDoesNotContainKey(2); + assertMapEntryEquals(1, "1"); + assertLostCount(1); + } + + /** + * Demand create entries. + */ + @Test + public void testDemandCreateEntries() { + + // ask for an unknown key and expect a generated value + assertMapEntryEquals(1, factory(1)); + assertMapSize(1); + assertMapContainsKey(1); + assertLostCount(0); + + // an empty ref has the same outcome + referenceMap.put(2, null); + assertMapEntryEquals(2, factory(2)); + // but the lost coun goes up + assertLostCount(1); + + } + + /** + * It is an error to have a factory which returns null. + */ + @Test + public void testFactoryReturningNull() throws Throwable { + referenceMap = new WeakReferenceMap<>( + (k) -> null, + null); + intercept(NullPointerException.class, () -> + referenceMap.get(0)); + } + + /** + * Test the WeakReferenceThreadMap extension. + */ + @Test + public void testWeakReferenceThreadMapAssignment() + throws Throwable { + + // counters foor the callbacks + final AtomicLong created = new AtomicLong(); + final AtomicLong lost = new AtomicLong(); + + WeakReferenceThreadMap threadMap = new WeakReferenceThreadMap<>( + id -> "Entry for thread ID " + id + " (" + created.incrementAndGet() + ")", + id -> lost.incrementAndGet()); + + Assertions.assertThat(threadMap.setForCurrentThread("hello")) + .describedAs("current thread map value on first set") + .isNull(); + + // second attempt returns itself + Assertions.assertThat(threadMap.setForCurrentThread("hello")) + .describedAs("current thread map value on second set") + .isEqualTo("hello"); + + // it is forbidden to explicitly set to null via the set() call. + intercept(NullPointerException.class, () -> + threadMap.setForCurrentThread(null)); + + // the map is unchanged + Assertions.assertThat(threadMap.getForCurrentThread()) + .describedAs("current thread map value") + .isEqualTo("hello"); + + // remove the value and assert what the removed entry was + Assertions.assertThat(threadMap.removeForCurrentThread()) + .describedAs("removed thread map value") + .isEqualTo("hello"); + + // remove the value again; this time the removed value is null + Assertions.assertThat(threadMap.removeForCurrentThread()) + .describedAs("removed thread map value on second call") + .isNull(); + + // lookup will return a new instance created by the factory + long c1 = created.get(); + String dynamicValue = threadMap.getForCurrentThread(); + Assertions.assertThat(dynamicValue) + .describedAs("dynamically created thread map value") + .startsWith("Entry for thread ID") + .contains("(" + (c1 + 1) + ")"); + + // and we can overwrite that + Assertions.assertThat(threadMap.setForCurrentThread("hello2")) + .describedAs("value before the thread entry is changed") + .isEqualTo(dynamicValue); + + // simulate a weak gc + long threadId = threadMap.currentThreadId(); + threadMap.put(threadId, null); + String updated = threadMap.getForCurrentThread(); + Assertions.assertThat(lost.get()) + .describedAs("lost count") + .isEqualTo(1); + Assertions.assertThat(updated) + .describedAs("dynamically created thread map value") + .startsWith("Entry for thread ID") + .contains("(" + (c1 + 2) + ")"); + } + + /** + * Assert that the value of a map entry is as expected. + * Will trigger entry creation if the key is absent. + * @param key key + * @param val expected value + */ + private void assertMapEntryEquals(int key, String val) { + Assertions.assertThat(referenceMap.get(key)) + .describedAs("map enty of key %d", key) + .isEqualTo(val); + } + + /** + * Assert that a map entry is present. + * @param key key + */ + private void assertMapContainsKey(int key) { + Assertions.assertThat(referenceMap.containsKey(key)) + .describedAs("map entry of key %d should be present", key) + .isTrue(); + } + + /** + * Assert that a map entry is not present. + * @param key key + */ + private void assertMapDoesNotContainKey(int key) { + Assertions.assertThat(referenceMap.containsKey(key)) + .describedAs("map enty of key %d should be absent", key) + .isFalse(); + } + + /** + * Assert map size. + * @param size expected size. + */ + private void assertMapSize(int size) { + Assertions.assertThat(referenceMap.size()) + .describedAs("size of map %s", referenceMap) + .isEqualTo(size); + } + + /** + * Assert prune returned the given count. + * @param count expected count. + */ + private void assertPruned(int count) { + Assertions.assertThat(referenceMap.prune()) + .describedAs("number of entries pruned from map %s", referenceMap) + .isEqualTo(count); + } + + /** + * Assert number of entries lost matches expected count. + * @param count expected count. + */ + private void assertLostCount(int count) { + Assertions.assertThat(lostReferences) + .describedAs("number of entries lost from map %s", referenceMap) + .hasSize(count); + } + + /** + * Factory operation. + * @param key map key + * @return a string + */ + private String factory(Integer key) { + return String.format(FACTORY_STRING, key); + } + +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java new file mode 100644 index 0000000000000..6db16b6c0c598 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java @@ -0,0 +1,153 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util; + +import java.io.InputStream; +import java.io.StringReader; +import java.io.StringWriter; +import java.util.concurrent.atomic.AtomicBoolean; +import javax.xml.XMLConstants; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.SAXParser; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import javax.xml.transform.stream.StreamSource; + +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import org.assertj.core.api.Assertions; +import org.junit.Assert; +import org.junit.Test; +import org.w3c.dom.Document; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.DefaultHandler; + +public class TestXMLUtils extends AbstractHadoopTestBase { + + @Test + public void testSecureDocumentBuilderFactory() throws Exception { + DocumentBuilder db = XMLUtils.newSecureDocumentBuilderFactory().newDocumentBuilder(); + Document doc = db.parse(new InputSource(new StringReader(""))); + Assertions.assertThat(doc).describedAs("parsed document").isNotNull(); + } + + @Test(expected = SAXException.class) + public void testExternalDtdWithSecureDocumentBuilderFactory() throws Exception { + DocumentBuilder db = XMLUtils.newSecureDocumentBuilderFactory().newDocumentBuilder(); + try (InputStream stream = getResourceStream("/xml/external-dtd.xml")) { + Document doc = db.parse(stream); + } + } + + @Test(expected = SAXException.class) + public void testEntityDtdWithSecureDocumentBuilderFactory() throws Exception { + DocumentBuilder db = XMLUtils.newSecureDocumentBuilderFactory().newDocumentBuilder(); + try (InputStream stream = getResourceStream("/xml/entity-dtd.xml")) { + Document doc = db.parse(stream); + } + } + + @Test + public void testSecureSAXParserFactory() throws Exception { + SAXParser parser = XMLUtils.newSecureSAXParserFactory().newSAXParser(); + parser.parse(new InputSource(new StringReader("")), new DefaultHandler()); + } + + @Test(expected = SAXException.class) + public void testExternalDtdWithSecureSAXParserFactory() throws Exception { + SAXParser parser = XMLUtils.newSecureSAXParserFactory().newSAXParser(); + try (InputStream stream = getResourceStream("/xml/external-dtd.xml")) { + parser.parse(stream, new DefaultHandler()); + } + } + + @Test(expected = SAXException.class) + public void testEntityDtdWithSecureSAXParserFactory() throws Exception { + SAXParser parser = XMLUtils.newSecureSAXParserFactory().newSAXParser(); + try (InputStream stream = getResourceStream("/xml/entity-dtd.xml")) { + parser.parse(stream, new DefaultHandler()); + } + } + + @Test + public void testSecureTransformerFactory() throws Exception { + Transformer transformer = XMLUtils.newSecureTransformerFactory().newTransformer(); + DocumentBuilder db = XMLUtils.newSecureDocumentBuilderFactory().newDocumentBuilder(); + Document doc = db.parse(new InputSource(new StringReader(""))); + try (StringWriter stringWriter = new StringWriter()) { + transformer.transform(new DOMSource(doc), new StreamResult(stringWriter)); + Assertions.assertThat(stringWriter.toString()).contains(""))); + try (StringWriter stringWriter = new StringWriter()) { + transformer.transform(new DOMSource(doc), new StreamResult(stringWriter)); + Assertions.assertThat(stringWriter.toString()).contains(" { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/bloom/TestBloomFilters.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/bloom/TestBloomFilters.java index fbbb6d81935cd..cfd9628885d4f 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/bloom/TestBloomFilters.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/bloom/TestBloomFilters.java @@ -31,9 +31,9 @@ import org.junit.Assert; import org.junit.Test; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableSet; public class TestBloomFilters { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/curator/TestChildReaper.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/curator/TestChildReaper.java deleted file mode 100644 index 960471841948c..0000000000000 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/curator/TestChildReaper.java +++ /dev/null @@ -1,209 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.util.curator; - -import org.apache.curator.framework.recipes.locks.Reaper; -import org.apache.curator.test.TestingServer; -import org.apache.curator.utils.CloseableUtils; -import org.apache.curator.framework.CuratorFramework; -import org.apache.curator.framework.CuratorFrameworkFactory; -import org.apache.curator.retry.RetryOneTime; -import org.apache.curator.test.Timing; -import org.apache.zookeeper.data.Stat; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -import java.net.BindException; -import java.util.Random; - -import static org.assertj.core.api.Assertions.assertThat; - -/** - * This is a copy of Curator 2.7.1's TestChildReaper class, with minor - * modifications to make it work with JUnit (some setup code taken from - * Curator's BaseClassForTests). This is to ensure that the ChildReaper - * class we modified is still correct. - */ -public class TestChildReaper -{ - protected TestingServer server; - - @Before - public void setup() throws Exception { - while(this.server == null) { - try { - this.server = new TestingServer(); - } catch (BindException var2) { - System.err.println("Getting bind exception - retrying to allocate server"); - this.server = null; - } - } - } - - @After - public void teardown() throws Exception { - this.server.close(); - this.server = null; - } - - @Test - public void testSomeNodes() throws Exception - { - - Timing timing = new Timing(); - ChildReaper reaper = null; - CuratorFramework client = CuratorFrameworkFactory.newClient(server.getConnectString(), timing.session(), timing.connection(), new RetryOneTime(1)); - try - { - client.start(); - - Random r = new Random(); - int nonEmptyNodes = 0; - for ( int i = 0; i < 10; ++i ) - { - client.create().creatingParentsIfNeeded().forPath("/test/" + Integer.toString(i)); - if ( r.nextBoolean() ) - { - client.create().forPath("/test/" + Integer.toString(i) + "/foo"); - ++nonEmptyNodes; - } - } - - reaper = new ChildReaper(client, "/test", Reaper.Mode.REAP_UNTIL_DELETE, 1); - reaper.start(); - - timing.forWaiting().sleepABit(); - - Stat stat = client.checkExists().forPath("/test"); - assertThat(stat.getNumChildren()).isEqualTo(nonEmptyNodes); - } - finally - { - CloseableUtils.closeQuietly(reaper); - CloseableUtils.closeQuietly(client); - } - } - - @Test - public void testSimple() throws Exception - { - Timing timing = new Timing(); - ChildReaper reaper = null; - CuratorFramework client = CuratorFrameworkFactory.newClient(server.getConnectString(), timing.session(), timing.connection(), new RetryOneTime(1)); - try - { - client.start(); - - for ( int i = 0; i < 10; ++i ) - { - client.create().creatingParentsIfNeeded().forPath("/test/" + Integer.toString(i)); - } - - reaper = new ChildReaper(client, "/test", Reaper.Mode.REAP_UNTIL_DELETE, 1); - reaper.start(); - - timing.forWaiting().sleepABit(); - - Stat stat = client.checkExists().forPath("/test"); - assertThat(stat.getNumChildren()).isZero(); - } - finally - { - CloseableUtils.closeQuietly(reaper); - CloseableUtils.closeQuietly(client); - } - } - - @Test - public void testMultiPath() throws Exception - { - Timing timing = new Timing(); - ChildReaper reaper = null; - CuratorFramework client = CuratorFrameworkFactory.newClient(server.getConnectString(), timing.session(), timing.connection(), new RetryOneTime(1)); - try - { - client.start(); - - for ( int i = 0; i < 10; ++i ) - { - client.create().creatingParentsIfNeeded().forPath("/test1/" + Integer.toString(i)); - client.create().creatingParentsIfNeeded().forPath("/test2/" + Integer.toString(i)); - client.create().creatingParentsIfNeeded().forPath("/test3/" + Integer.toString(i)); - } - - reaper = new ChildReaper(client, "/test2", Reaper.Mode.REAP_UNTIL_DELETE, 1); - reaper.start(); - reaper.addPath("/test1"); - - timing.forWaiting().sleepABit(); - - Stat stat = client.checkExists().forPath("/test1"); - assertThat(stat.getNumChildren()).isZero(); - stat = client.checkExists().forPath("/test2"); - assertThat(stat.getNumChildren()).isZero(); - stat = client.checkExists().forPath("/test3"); - assertThat(stat.getNumChildren()).isEqualTo(10); - } - finally - { - CloseableUtils.closeQuietly(reaper); - CloseableUtils.closeQuietly(client); - } - } - - @Test - public void testNamespace() throws Exception - { - Timing timing = new Timing(); - ChildReaper reaper = null; - CuratorFramework client = CuratorFrameworkFactory.builder() - .connectString(server.getConnectString()) - .sessionTimeoutMs(timing.session()) - .connectionTimeoutMs(timing.connection()) - .retryPolicy(new RetryOneTime(1)) - .namespace("foo") - .build(); - try - { - client.start(); - - for ( int i = 0; i < 10; ++i ) - { - client.create().creatingParentsIfNeeded().forPath("/test/" + Integer.toString(i)); - } - - reaper = new ChildReaper(client, "/test", Reaper.Mode.REAP_UNTIL_DELETE, 1); - reaper.start(); - - timing.forWaiting().sleepABit(); - - Stat stat = client.checkExists().forPath("/test"); - assertThat(stat.getNumChildren()).isZero(); - - stat = client.usingNamespace(null).checkExists().forPath("/foo/test"); - assertThat(stat).isNotNull(); - assertThat(stat.getNumChildren()).isZero(); - } - finally - { - CloseableUtils.closeQuietly(reaper); - CloseableUtils.closeQuietly(client); - } - } -} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/curator/TestZKCuratorManager.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/curator/TestZKCuratorManager.java index a2156ee6d93af..fd15a0c2b1bf4 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/curator/TestZKCuratorManager.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/curator/TestZKCuratorManager.java @@ -25,11 +25,15 @@ import java.util.Arrays; import java.util.List; +import javax.security.auth.login.AppConfigurationEntry; import org.apache.curator.test.TestingServer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.security.authentication.util.JaasConfiguration; import org.apache.hadoop.util.ZKUtil; import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.client.ZKClientConfig; import org.apache.zookeeper.data.ACL; import org.apache.zookeeper.data.Stat; import org.junit.After; @@ -154,4 +158,51 @@ public void testTransaction() throws Exception { assertFalse(curator.exists(node2)); assertTrue(Arrays.equals(setData, curator.getData(node1))); } + + @Test + public void testJaasConfiguration() throws Exception { + // Validate that HadoopZooKeeperFactory will set ZKConfig with given principals + ZKCuratorManager.HadoopZookeeperFactory factory1 = + new ZKCuratorManager.HadoopZookeeperFactory("foo1", "bar1", "bar1.keytab"); + ZooKeeper zk1 = factory1.newZooKeeper("connString", 1000, null, false); + validateJaasConfiguration(ZKCuratorManager.HadoopZookeeperFactory.JAAS_CLIENT_ENTRY, + "bar1", "bar1.keytab", zk1); + + // Validate that a new HadoopZooKeeperFactory will use the new principals + ZKCuratorManager.HadoopZookeeperFactory factory2 = + new ZKCuratorManager.HadoopZookeeperFactory("foo2", "bar2", "bar2.keytab"); + ZooKeeper zk2 = factory2.newZooKeeper("connString", 1000, null, false); + validateJaasConfiguration(ZKCuratorManager.HadoopZookeeperFactory.JAAS_CLIENT_ENTRY, + "bar2", "bar2.keytab", zk2); + + try { + // Setting global configuration + String testClientConfig = "TestClientConfig"; + JaasConfiguration jconf = new JaasConfiguration(testClientConfig, "test", "test.keytab"); + javax.security.auth.login.Configuration.setConfiguration(jconf); + System.setProperty(ZKClientConfig.LOGIN_CONTEXT_NAME_KEY, testClientConfig); + + // Validate that a new HadoopZooKeeperFactory will use the global principals + ZKCuratorManager.HadoopZookeeperFactory factory3 = + new ZKCuratorManager.HadoopZookeeperFactory("foo3", "bar3", "bar3.keytab"); + ZooKeeper zk3 = factory3.newZooKeeper("connString", 1000, null, false); + validateJaasConfiguration(testClientConfig, "test", "test.keytab", zk3); + } finally { + // Remove global configuration + System.clearProperty(ZKClientConfig.LOGIN_CONTEXT_NAME_KEY); + } + } + + private void validateJaasConfiguration(String clientConfig, String principal, String keytab, + ZooKeeper zk) { + assertEquals("Validate that expected clientConfig is set in ZK config", clientConfig, + zk.getClientConfig().getProperty(ZKClientConfig.LOGIN_CONTEXT_NAME_KEY)); + + AppConfigurationEntry[] entries = javax.security.auth.login.Configuration.getConfiguration() + .getAppConfigurationEntry(clientConfig); + assertEquals("Validate that expected principal is set in Jaas config", principal, + entries[0].getOptions().get("principal")); + assertEquals("Validate that expected keytab is set in Jaas config", keytab, + entries[0].getOptions().get("keyTab")); + } } \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/functional/TestRemoteIterators.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/functional/TestRemoteIterators.java new file mode 100644 index 0000000000000..373e1003ef728 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/functional/TestRemoteIterators.java @@ -0,0 +1,523 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.functional; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Iterator; +import java.util.NoSuchElementException; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.hadoop.util.Preconditions; + +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsSnapshot; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.extractStatistics; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.apache.hadoop.util.functional.RemoteIterators.*; +import static org.apache.hadoop.util.functional.RemoteIterators.haltableRemoteIterator; +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Test for {@link RemoteIterators}. + * + */ +public class TestRemoteIterators extends AbstractHadoopTestBase { + + private static final Logger LOG = LoggerFactory.getLogger( + TestRemoteIterators.class); + + private static final String[] DATA = {"a", "b", "c"}; + + /** Counter for lambda-expressions. */ + private int counter; + + @Test + public void testIterateArray() throws Throwable { + verifyInvoked(remoteIteratorFromArray(DATA), DATA.length, + (s) -> LOG.info(s)); + } + + @Test + public void testIterateArrayMapped() throws Throwable { + verifyInvoked( + mappingRemoteIterator( + remoteIteratorFromArray(DATA), + (d) -> { + counter += d.length(); + return d; + }), + DATA.length, + this::log); + assertCounterValue(3); + } + + public void log(Object o) { + LOG.info("{}", o); + } + + /** + * Singleton is iterated through once. + * The toString() call is passed through. + */ + @Test + public void testSingleton() throws Throwable { + StringBuffer result = new StringBuffer(); + String name = "singleton"; + RemoteIterator it = remoteIteratorFromSingleton(name); + assertStringValueContains(it, "SingletonIterator"); + assertStringValueContains(it, name); + verifyInvoked( + it, + 1, + (s) -> result.append(s)); + assertThat(result.toString()) + .isEqualTo(name); + } + + @Test + public void testSingletonNotClosed() throws Throwable { + CloseCounter closeCounter = new CloseCounter(); + RemoteIterator it = remoteIteratorFromSingleton(closeCounter); + verifyInvoked(it, 1, this::log); + close(it); + closeCounter.assertCloseCount(0); + } + + /** + * A null singleton is not an error. + */ + @Test + public void testNullSingleton() throws Throwable { + verifyInvoked(remoteIteratorFromSingleton(null), 0, this::log); + } + + + /** + * If you create a singleton iterator and it is an IOStatisticsSource, + * then that is the statistics which can be extracted from the + * iterator. + */ + @Test + public void testSingletonStats() throws Throwable { + IOStatsInstance singleton = new IOStatsInstance(); + RemoteIterator it + = remoteIteratorFromSingleton(singleton); + extractStatistics(it); + } + + /** + * The mapping remote iterator passes IOStatistics + * calls down. + */ + @Test + public void testMappedSingletonStats() throws Throwable { + IOStatsInstance singleton = new IOStatsInstance(); + RemoteIterator it + = mappingRemoteIterator(remoteIteratorFromSingleton(singleton), + Object::toString); + verifyInvoked(it, 1, this::log); + extractStatistics(it); + } + + /** + * Close() calls are passed through. + */ + @Test + public void testClosePassthrough() throws Throwable { + CountdownRemoteIterator countdown = new CountdownRemoteIterator(0); + RemoteIterator it = mappingRemoteIterator( + countdown, + i -> i); + verifyInvoked(it, 0, this::log); + // the foreach() operation called close() + countdown.assertCloseCount(1); + extractStatistics(countdown); + ((Closeable)it).close(); + countdown.assertCloseCount(1); + } + + @Test + public void testMapping() throws Throwable { + CountdownRemoteIterator countdown = new CountdownRemoteIterator(100); + RemoteIterator it = mappingRemoteIterator( + countdown, + i -> i); + verifyInvoked(it, 100, c -> counter++); + assertCounterValue(100); + extractStatistics(it); + assertStringValueContains(it, "CountdownRemoteIterator"); + close(it); + countdown.assertCloseCount(1); + } + + @Test + public void testFiltering() throws Throwable { + CountdownRemoteIterator countdown = new CountdownRemoteIterator(100); + // only even numbers are passed through + RemoteIterator it = filteringRemoteIterator( + countdown, + i -> (i % 2) == 0); + verifyInvoked(it, 50, c -> counter++); + assertCounterValue(50); + extractStatistics(it); + close(it); + countdown.assertCloseCount(1); + } + + /** + * A filter which accepts nothing results in + * an empty iteration. + */ + @Test + public void testFilterNoneAccepted() throws Throwable { + // nothing gets through + RemoteIterator it = filteringRemoteIterator( + new CountdownRemoteIterator(100), + i -> false); + verifyInvoked(it, 0, c -> counter++); + assertCounterValue(0); + extractStatistics(it); + } + + @Test + public void testFilterAllAccepted() throws Throwable { + // nothing gets through + RemoteIterator it = filteringRemoteIterator( + new CountdownRemoteIterator(100), + i -> true); + verifyInvoked(it, 100, c -> counter++); + assertStringValueContains(it, "CountdownRemoteIterator"); + } + + @Test + public void testJavaIteratorSupport() throws Throwable { + CountdownIterator countdownIterator = new CountdownIterator(100); + RemoteIterator it = remoteIteratorFromIterator( + countdownIterator); + verifyInvoked(it, 100, c -> counter++); + assertStringValueContains(it, "CountdownIterator"); + extractStatistics(it); + close(it); + countdownIterator.assertCloseCount(1); + } + + @Test + public void testJavaIterableSupport() throws Throwable { + CountdownIterable countdown = new CountdownIterable(100); + RemoteIterator it = remoteIteratorFromIterable( + countdown); + verifyInvoked(it, 100, c -> counter++); + assertStringValueContains(it, "CountdownIterator"); + extractStatistics(it); + // close the iterator + close(it); + countdown.assertCloseCount(0); + // and a new iterator can be crated + verifyInvoked(remoteIteratorFromIterable(countdown), + 100, c -> counter++); + } + + /** + * If a RemoteIterator is constructed from an iterable + * and that is to be closed, we close it. + */ + @Test + public void testJavaIterableClose() throws Throwable { + CountdownIterable countdown = new CountdownIterable(100); + RemoteIterator it = closingRemoteIterator( + remoteIteratorFromIterable(countdown), + countdown); + verifyInvoked(it, 100, c -> counter++); + assertStringValueContains(it, "CountdownIterator"); + extractStatistics(it); + + // verify the iterator was self closed in hasNext() + countdown.assertCloseCount(1); + + // explicitly close the iterator + close(it); + countdown.assertCloseCount(1); + // and a new iterator cannot be created + intercept(IllegalStateException.class, () -> + remoteIteratorFromIterable(countdown)); + } + + /** + * If a RemoteIterator is constructed from an iterable + * and that is to be closed, we close it. + */ + @SuppressWarnings("InfiniteLoopStatement") + @Test + public void testJavaIterableCloseInNextLoop() throws Throwable { + CountdownIterable countdown = new CountdownIterable(100); + RemoteIterator it = closingRemoteIterator( + remoteIteratorFromIterable(countdown), + countdown); + try { + while(true) { + it.next(); + } + } catch (NoSuchElementException expected) { + + } + // verify the iterator was self closed in next() + countdown.assertCloseCount(1); + + } + + @Test + public void testHaltableIterator() throws Throwable { + final int limit = 4; + AtomicInteger count = new AtomicInteger(limit); + + // a countdown of 10, but the halting predicate will fail earlier + // if the value of "count" has dropped to zero + final RemoteIterator it = + haltableRemoteIterator( + rangeExcludingIterator(0, 10), + () -> count.get() > 0); + + verifyInvoked(it, limit, (v) -> count.decrementAndGet()); + } + + @Test + public void testHaltableIteratorNoHalt() throws Throwable { + + // a countdown of 10, but the halting predicate will fail earlier + // if the value of "count" has dropped to zero + final int finish = 10; + final RemoteIterator it = + haltableRemoteIterator( + rangeExcludingIterator(0, finish), + () -> true); + + verifyInvoked(it, finish); + } + + @Test + public void testRangeExcludingIterator() throws Throwable { + verifyInvoked(rangeExcludingIterator(0, 0), 0); + verifyInvoked(rangeExcludingIterator(0, -1), 0); + verifyInvoked(rangeExcludingIterator(0, 100), 100); + intercept(NoSuchElementException.class, () -> + rangeExcludingIterator(0, 0).next()); + } + + /** + * assert that the string value of an object contains the + * expected text. + * @param o object + * @param expected expected text + */ + protected void assertStringValueContains( + final Object o, + final String expected) { + assertThat(o.toString()) + .describedAs("Object string value") + .contains(expected); + } + + /** + * Assert that the counter field is at a specific value. + * @param expected counter + */ + protected void assertCounterValue(final int expected) { + assertThat(counter) + .describedAs("Counter value") + .isEqualTo(expected); + } + + /** + * Verify that the iteration completes with a given size. + * @param it iterator + * @param type. + * @param length expected size + * @param consumer consumer + */ + protected void verifyInvoked(final RemoteIterator it, + int length, + ConsumerRaisingIOE consumer) + throws IOException { + assertThat(foreach(it, consumer)) + .describedAs("Scan through iterator %s", it) + .isEqualTo(length); + } + + /** + * Verify that the iteration completes with a given invocation count. + * @param it iterator + * @param type. + * @param length expected size + */ + protected void verifyInvoked( + final RemoteIterator it, + final int length) + throws IOException { + verifyInvoked(it, length, (t) -> { }); + + } + /** + * Close an iterator if it is iterable. + * @param it iterator + * @param type. + */ + private void close(final RemoteIterator it) throws IOException { + if (it instanceof Closeable) { + ((Closeable) it).close(); + } + } + + /** + * Class whose close() call increments a counter. + */ + private static class CloseCounter extends + IOStatsInstance implements Closeable { + + private int closeCount; + + @Override + public void close() throws IOException { + closeCount++; + LOG.info("close ${}", closeCount); + } + + public int getCloseCount() { + return closeCount; + } + + public void reset() { + closeCount = 0; + } + + public void assertCloseCount(int expected) { + assertThat(closeCount) + .describedAs("Close count") + .isEqualTo(expected); + } + + } + + /** + * Simple class to implement IOStatistics. + */ + private static class IOStatsInstance implements IOStatisticsSource { + + private IOStatisticsSnapshot stats = new IOStatisticsSnapshot(); + + @Override + public IOStatistics getIOStatistics() { + return stats; + } + + } + + /** + * Iterator which counts down. + */ + private static final class CountdownRemoteIterator extends CloseCounter + implements RemoteIterator { + + private int limit; + + private CountdownRemoteIterator(final int limit) { + this.limit = limit; + } + + @Override + public boolean hasNext() throws IOException { + return limit > 0; + } + + @Override + public Integer next() throws IOException { + return limit--; + } + + @Override + public String toString() { + return "CountdownRemoteIterator{" + + "limit=" + limit + + '}'; + } + } + + /** + * Iterator which counts down. + */ + private static final class CountdownIterator extends CloseCounter + implements Iterator { + + private int limit; + + private CountdownIterator(final int limit) { + this.limit = limit; + } + + @Override + public boolean hasNext() { + return limit > 0; + } + + @Override + public Integer next() { + if (!hasNext()) { + throw new NoSuchElementException("limit reached"); + } + return limit--; + } + + @Override + public String toString() { + return "CountdownIterator{" + + "limit=" + limit + + '}'; + } + } + + /** + * Iterable for countdown iterators. + * Once closed, calls to iterator() raise an exception. + */ + private static final class CountdownIterable extends CloseCounter + implements Iterable { + + private int limit; + + private CountdownIterable(final int limit) { + this.limit = limit; + } + + @Override + public Iterator iterator() { + Preconditions.checkState(getCloseCount() == 0); + + return new CountdownIterator(limit); + } + } + +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/functional/TestTaskPool.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/functional/TestTaskPool.java new file mode 100644 index 0000000000000..dfee6fc75dcb3 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/functional/TestTaskPool.java @@ -0,0 +1,585 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.functional; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.test.HadoopTestBase; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +/** + * Test Task Pool class. + * This is pulled straight out of the S3A version. + */ +@RunWith(Parameterized.class) +public class TestTaskPool extends HadoopTestBase { + + private static final Logger LOG = + LoggerFactory.getLogger(TestTaskPool.class); + + public static final int ITEM_COUNT = 16; + + private static final int FAILPOINT = 8; + + private final int numThreads; + + /** + * Thread pool for task execution. + */ + private ExecutorService threadPool; + + /** + * Task submitter bonded to the thread pool, or + * null for the 0-thread case. + */ + private TaskPool.Submitter submitter; + + private final CounterTask failingTask + = new CounterTask("failing committer", FAILPOINT, Item::commit); + + private final FailureCounter failures + = new FailureCounter("failures", 0, null); + + private final CounterTask reverter + = new CounterTask("reverter", 0, Item::revert); + + private final CounterTask aborter + = new CounterTask("aborter", 0, Item::abort); + + /** + * Test array for parameterized test runs: how many threads and + * to use. Threading makes some of the assertions brittle; there are + * more checks on single thread than parallel ops. + * @return a list of parameter tuples. + */ + @Parameterized.Parameters(name = "threads={0}") + public static Collection params() { + return Arrays.asList(new Object[][]{ + {0}, + {1}, + {3}, + {8}, + {16}, + }); + } + + private List items; + + /** + * Construct the parameterized test. + * @param numThreads number of threads + */ + public TestTaskPool(int numThreads) { + this.numThreads = numThreads; + } + + /** + * In a parallel test run there is more than one thread doing the execution. + * @return true if the threadpool size is >1 + */ + public boolean isParallel() { + return numThreads > 1; + } + + @Before + public void setup() { + items = IntStream.rangeClosed(1, ITEM_COUNT) + .mapToObj(i -> new Item(i, + String.format("With %d threads", numThreads))) + .collect(Collectors.toList()); + + if (numThreads > 0) { + threadPool = Executors.newFixedThreadPool(numThreads, + new ThreadFactoryBuilder() + .setDaemon(true) + .setNameFormat(getMethodName() + "-pool-%d") + .build()); + submitter = new PoolSubmitter(); + } else { + submitter = null; + } + + } + + @After + public void teardown() { + if (threadPool != null) { + threadPool.shutdown(); + threadPool = null; + } + } + + private class PoolSubmitter implements TaskPool.Submitter { + + @Override + public Future submit(final Runnable task) { + return threadPool.submit(task); + } + + } + + /** + * create the builder. + * @return pre-inited builder + */ + private TaskPool.Builder builder() { + return TaskPool.foreach(items).executeWith(submitter); + } + + private void assertRun(TaskPool.Builder builder, + CounterTask task) throws IOException { + boolean b = builder.run(task); + assertTrue("Run of " + task + " failed", b); + } + + private void assertFailed(TaskPool.Builder builder, + CounterTask task) throws IOException { + boolean b = builder.run(task); + assertFalse("Run of " + task + " unexpectedly succeeded", b); + } + + private String itemsToString() { + return "[" + items.stream().map(Item::toString) + .collect(Collectors.joining("\n")) + "]"; + } + + @Test + public void testSimpleInvocation() throws Throwable { + CounterTask t = new CounterTask("simple", 0, Item::commit); + assertRun(builder(), t); + t.assertInvoked("", ITEM_COUNT); + } + + @Test + public void testFailNoStoppingSuppressed() throws Throwable { + assertFailed(builder().suppressExceptions(), failingTask); + failingTask.assertInvoked("Continued through operations", ITEM_COUNT); + items.forEach(Item::assertCommittedOrFailed); + } + + @Test + public void testFailFastSuppressed() throws Throwable { + assertFailed(builder() + .suppressExceptions() + .stopOnFailure(), + failingTask); + if (isParallel()) { + failingTask.assertInvokedAtLeast("stop fast", FAILPOINT); + } else { + failingTask.assertInvoked("stop fast", FAILPOINT); + } + } + + @Test + public void testFailedCallAbortSuppressed() throws Throwable { + assertFailed(builder() + .stopOnFailure() + .suppressExceptions() + .abortWith(aborter), + failingTask); + failingTask.assertInvokedAtLeast("success", FAILPOINT); + if (!isParallel()) { + aborter.assertInvokedAtLeast("abort", 1); + // all uncommitted items were aborted + items.stream().filter(i -> !i.committed) + .map(Item::assertAborted); + items.stream().filter(i -> i.committed) + .forEach(i -> assertFalse(i.toString(), i.aborted)); + } + } + + @Test + public void testFailedCalledWhenNotStoppingSuppressed() throws Throwable { + assertFailed(builder() + .suppressExceptions() + .onFailure(failures), + failingTask); + failingTask.assertInvokedAtLeast("success", FAILPOINT); + // only one failure was triggered + failures.assertInvoked("failure event", 1); + } + + @Test + public void testFailFastCallRevertSuppressed() throws Throwable { + assertFailed(builder() + .stopOnFailure() + .revertWith(reverter) + .abortWith(aborter) + .suppressExceptions() + .onFailure(failures), + failingTask); + failingTask.assertInvokedAtLeast("success", FAILPOINT); + if (!isParallel()) { + aborter.assertInvokedAtLeast("abort", 1); + // all uncommitted items were aborted + items.stream().filter(i -> !i.committed) + .filter(i -> !i.failed) + .forEach(Item::assertAborted); + } + // all committed were reverted + items.stream().filter(i -> i.committed && !i.failed) + .forEach(Item::assertReverted); + // all reverted items are committed + items.stream().filter(i -> i.reverted) + .forEach(Item::assertCommitted); + + // only one failure was triggered + failures.assertInvoked("failure event", 1); + } + + @Test + public void testFailSlowCallRevertSuppressed() throws Throwable { + assertFailed(builder() + .suppressExceptions() + .revertWith(reverter) + .onFailure(failures), + failingTask); + failingTask.assertInvokedAtLeast("success", FAILPOINT); + // all committed were reverted + // identify which task failed from the set + int failing = failures.getItem().id; + items.stream() + .filter(i -> i.id != failing) + .filter(i -> i.committed) + .forEach(Item::assertReverted); + // all reverted items are committed + items.stream().filter(i -> i.reverted) + .forEach(Item::assertCommitted); + + // only one failure was triggered + failures.assertInvoked("failure event", 1); + } + + @Test + public void testFailFastExceptions() throws Throwable { + intercept(IOException.class, + () -> builder() + .stopOnFailure() + .run(failingTask)); + if (isParallel()) { + failingTask.assertInvokedAtLeast("stop fast", FAILPOINT); + } else { + failingTask.assertInvoked("stop fast", FAILPOINT); + } + } + + @Test + public void testFailSlowExceptions() throws Throwable { + intercept(IOException.class, + () -> builder() + .run(failingTask)); + failingTask.assertInvoked("continued through operations", ITEM_COUNT); + items.forEach(Item::assertCommittedOrFailed); + } + + @Test + public void testFailFastExceptionsWithAbortFailure() throws Throwable { + CounterTask failFirst = new CounterTask("task", 1, Item::commit); + CounterTask a = new CounterTask("aborter", 1, Item::abort); + intercept(IOException.class, + () -> builder() + .stopOnFailure() + .abortWith(a) + .run(failFirst)); + if (!isParallel()) { + // expect the other tasks to be aborted + a.assertInvokedAtLeast("abort", ITEM_COUNT - 1); + } + } + + @Test + public void testFailFastExceptionsWithAbortFailureStopped() throws Throwable { + CounterTask failFirst = new CounterTask("task", 1, Item::commit); + CounterTask a = new CounterTask("aborter", 1, Item::abort); + intercept(IOException.class, + () -> builder() + .stopOnFailure() + .stopAbortsOnFailure() + .abortWith(a) + .run(failFirst)); + if (!isParallel()) { + // expect the other tasks to be aborted + a.assertInvoked("abort", 1); + } + } + + /** + * Fail the last one committed, all the rest will be reverted. + * The actual ID of the last task has to be picke dup from the + * failure callback, as in the pool it may be one of any. + */ + @Test + public void testRevertAllSuppressed() throws Throwable { + CounterTask failLast = new CounterTask("task", ITEM_COUNT, Item::commit); + + assertFailed(builder() + .suppressExceptions() + .stopOnFailure() + .revertWith(reverter) + .abortWith(aborter) + .onFailure(failures), + failLast); + failLast.assertInvoked("success", ITEM_COUNT); + int abCount = aborter.getCount(); + int revCount = reverter.getCount(); + assertEquals(ITEM_COUNT, 1 + abCount + revCount); + // identify which task failed from the set + int failing = failures.getItem().id; + // all committed were reverted + items.stream() + .filter(i -> i.id != failing) + .filter(i -> i.committed) + .forEach(Item::assertReverted); + items.stream() + .filter(i -> i.id != failing) + .filter(i -> !i.committed) + .forEach(Item::assertAborted); + // all reverted items are committed + items.stream().filter(i -> i.reverted) + .forEach(Item::assertCommitted); + + // only one failure was triggered + failures.assertInvoked("failure event", 1); + } + + + /** + * The Item which tasks process. + */ + private final class Item { + + private final int id; + + private final String text; + + private volatile boolean committed, aborted, reverted, failed; + + private Item(int item, String text) { + this.id = item; + this.text = text; + } + + boolean commit() { + committed = true; + return true; + } + + boolean abort() { + aborted = true; + return true; + } + + boolean revert() { + reverted = true; + return true; + } + + boolean fail() { + failed = true; + return true; + } + + public Item assertCommitted() { + assertTrue(toString() + " was not committed in\n" + + itemsToString(), + committed); + return this; + } + + public Item assertCommittedOrFailed() { + assertTrue(toString() + " was not committed nor failed in\n" + + itemsToString(), + committed || failed); + return this; + } + + public Item assertAborted() { + assertTrue(toString() + " was not aborted in\n" + + itemsToString(), + aborted); + return this; + } + + public Item assertReverted() { + assertTrue(toString() + " was not reverted in\n" + + itemsToString(), + reverted); + return this; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("Item{"); + sb.append(String.format("[%02d]", id)); + sb.append(", committed=").append(committed); + sb.append(", aborted=").append(aborted); + sb.append(", reverted=").append(reverted); + sb.append(", failed=").append(failed); + sb.append(", text=").append(text); + sb.append('}'); + return sb.toString(); + } + } + + /** + * Class which can count invocations and, if limit > 0, will raise + * an exception on the specific invocation of {@link #note(Object)} + * whose count == limit. + */ + private class BaseCounter { + + private final AtomicInteger counter = new AtomicInteger(0); + + private final int limit; + + private final String name; + + private Item item; + + private final Optional> action; + + /** + * Base counter, tracks items. + * @param name name for string/exception/logs. + * @param limit limit at which an exception is raised, 0 == never + * @param action optional action to invoke after the increment, + * before limit check + */ + BaseCounter(String name, + int limit, + Function action) { + this.name = name; + this.limit = limit; + this.action = Optional.ofNullable(action); + } + + /** + * Apply the action to an item; log at info afterwards with both the + * before and after string values of the item. + * @param i item to process. + * @throws IOException failure in the action + */ + void process(Item i) throws IOException { + this.item = i; + int count = counter.incrementAndGet(); + if (limit == count) { + i.fail(); + LOG.info("{}: Failed {}", this, i); + throw new IOException(String.format("%s: Limit %d reached for %s", + this, limit, i)); + } + String before = i.toString(); + action.map(a -> a.apply(i)); + LOG.info("{}: {} -> {}", this, before, i); + } + + int getCount() { + return counter.get(); + } + + Item getItem() { + return item; + } + + void assertInvoked(String text, int expected) { + assertEquals(toString() + ": " + text, expected, getCount()); + } + + void assertInvokedAtLeast(String text, int expected) { + int actual = getCount(); + assertTrue(toString() + ": " + text + + "-expected " + expected + + " invocations, but got " + actual + + " in " + itemsToString(), + expected <= actual); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder( + "BaseCounter{"); + sb.append("name='").append(name).append('\''); + sb.append(", count=").append(counter.get()); + sb.append(", limit=").append(limit); + sb.append(", item=").append(item); + sb.append('}'); + return sb.toString(); + } + } + + private final class CounterTask + extends BaseCounter implements TaskPool.Task { + + private CounterTask(String name, int limit, + Function action) { + super(name, limit, action); + } + + @Override + public void run(Item item) throws IOException { + process(item); + } + + } + + private final class FailureCounter + extends BaseCounter + implements TaskPool.FailureTask { + + private Exception exception; + + private FailureCounter(String name, int limit, + Function action) { + super(name, limit, action); + } + + @Override + public void run(Item item, Exception ex) throws IOException { + process(item); + this.exception = ex; + } + + private Exception getException() { + return exception; + } + + } + +} diff --git a/hadoop-common-project/hadoop-common/src/test/proto/test_legacy.proto b/hadoop-common-project/hadoop-common/src/test/proto/test_legacy.proto new file mode 100644 index 0000000000000..7d585e30c2bb7 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/proto/test_legacy.proto @@ -0,0 +1,101 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +option java_package = "org.apache.hadoop.ipc.protobuf"; +option java_outer_classname = "TestProtosLegacy"; +option java_generate_equals_and_hash = true; +package hadoop.common; + +message EmptyRequestProto { +} + +message EmptyResponseProto { +} + +message EchoRequestProto { + required string message = 1; +} + +message EchoResponseProto { + required string message = 1; +} + +message OptRequestProto { + optional string message = 1; +} + +message OptResponseProto { + optional string message = 1; +} + +message SleepRequestProto{ + required int32 milliSeconds = 1; +} + +message SleepResponseProto{ +} + +message SlowPingRequestProto { + required bool shouldSlow = 1; +} + +message EchoRequestProto2 { + repeated string message = 1; +} + +message EchoResponseProto2 { + repeated string message = 1; +} + +message AddRequestProto { + required int32 param1 = 1; + required int32 param2 = 2; +} + +message AddRequestProto2 { + repeated int32 params = 1; +} + +message AddResponseProto { + required int32 result = 1; +} + +message ExchangeRequestProto { + repeated int32 values = 1; +} + +message ExchangeResponseProto { + repeated int32 values = 1; +} + +message AuthMethodResponseProto { + required int32 code = 1; + required string mechanismName = 2; +} + +message UserResponseProto { + required string user = 1; +} + +message SleepRequestProto2 { + optional int64 sleep_time = 1; +} + +message SleepResponseProto2 { + optional int64 receive_time = 1; + optional int64 response_time = 2; +} diff --git a/hadoop-common-project/hadoop-common/src/test/proto/test_rpc_service_legacy.proto b/hadoop-common-project/hadoop-common/src/test/proto/test_rpc_service_legacy.proto new file mode 100644 index 0000000000000..95fd6bbe59352 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/proto/test_rpc_service_legacy.proto @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +syntax = "proto2"; +option java_package = "org.apache.hadoop.ipc.protobuf"; +option java_outer_classname = "TestRpcServiceProtosLegacy"; +option java_generic_services = true; +option java_generate_equals_and_hash = true; +package hadoop.common; + +import "test_legacy.proto"; + + +/** + * A protobuf service for use in tests + */ +service TestProtobufRpcProto { + rpc ping(EmptyRequestProto) returns (EmptyResponseProto); + rpc echo(EchoRequestProto) returns (EchoResponseProto); + rpc error(EmptyRequestProto) returns (EmptyResponseProto); + rpc error2(EmptyRequestProto) returns (EmptyResponseProto); + rpc slowPing(SlowPingRequestProto) returns (EmptyResponseProto); + rpc echo2(EchoRequestProto2) returns (EchoResponseProto2); + rpc add(AddRequestProto) returns (AddResponseProto); + rpc add2(AddRequestProto2) returns (AddResponseProto); + rpc testServerGet(EmptyRequestProto) returns (EmptyResponseProto); + rpc exchange(ExchangeRequestProto) returns (ExchangeResponseProto); + rpc sleep(SleepRequestProto) returns (EmptyResponseProto); + rpc lockAndSleep(SleepRequestProto) returns (EmptyResponseProto); + rpc getAuthMethod(EmptyRequestProto) returns (AuthMethodResponseProto); + rpc getAuthUser(EmptyRequestProto) returns (UserResponseProto); + rpc echoPostponed(EchoRequestProto) returns (EchoResponseProto); + rpc sendPostponed(EmptyRequestProto) returns (EmptyResponseProto); + rpc getCurrentUser(EmptyRequestProto) returns (UserResponseProto); + rpc getServerRemoteUser(EmptyRequestProto) returns (UserResponseProto); +} + +service TestProtobufRpc2Proto { + rpc ping2(EmptyRequestProto) returns (EmptyResponseProto); + rpc echo2(EchoRequestProto) returns (EchoResponseProto); + rpc sleep(SleepRequestProto) returns (SleepResponseProto); +} + +service OldProtobufRpcProto { + rpc ping(EmptyRequestProto) returns (EmptyResponseProto); + rpc echo(EmptyRequestProto) returns (EmptyResponseProto); +} + +service NewProtobufRpcProto { + rpc ping(EmptyRequestProto) returns (EmptyResponseProto); + rpc echo(OptRequestProto) returns (OptResponseProto); +} + +service NewerProtobufRpcProto { + rpc ping(EmptyRequestProto) returns (EmptyResponseProto); + rpc echo(EmptyRequestProto) returns (EmptyResponseProto); +} + +service CustomProto { + rpc ping(EmptyRequestProto) returns (EmptyResponseProto); +} + +service TestProtobufRpcHandoffProto { + rpc sleep(SleepRequestProto2) returns (SleepResponseProto2); +} diff --git a/hadoop-common-project/hadoop-common/src/test/resources/contract/localfs.xml b/hadoop-common-project/hadoop-common/src/test/resources/contract/localfs.xml index b261a63be7df7..03bb3e800fba8 100644 --- a/hadoop-common-project/hadoop-common/src/test/resources/contract/localfs.xml +++ b/hadoop-common-project/hadoop-common/src/test/resources/contract/localfs.xml @@ -121,4 +121,14 @@ case sensitivity and permission options are determined at run time from OS type true + + fs.contract.supports-settimes + true + + + + fs.contract.supports-getfilestatus + true + + diff --git a/hadoop-common-project/hadoop-common/src/test/resources/contract/rawlocal.xml b/hadoop-common-project/hadoop-common/src/test/resources/contract/rawlocal.xml index 8cbd4a0abcf38..198ca566e25a7 100644 --- a/hadoop-common-project/hadoop-common/src/test/resources/contract/rawlocal.xml +++ b/hadoop-common-project/hadoop-common/src/test/resources/contract/rawlocal.xml @@ -127,4 +127,19 @@ true + + fs.contract.supports-hflush + true + + + + fs.contract.supports-hsync + true + + + + fs.contract.metadata_updated_on_hsync + true + + diff --git a/hadoop-common-project/hadoop-common/src/test/resources/contract/sftp.xml b/hadoop-common-project/hadoop-common/src/test/resources/contract/sftp.xml new file mode 100644 index 0000000000000..20a24b7e54061 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/resources/contract/sftp.xml @@ -0,0 +1,79 @@ + + + + + + + fs.contract.test.root-tests-enabled + false + + + + fs.contract.is-case-sensitive + true + + + + fs.contract.supports-append + false + + + + fs.contract.supports-atomic-directory-delete + true + + + + fs.contract.supports-atomic-rename + true + + + + fs.contract.supports-block-locality + false + + + + fs.contract.supports-concat + false + + + + fs.contract.supports-seek + true + + + + fs.contract.rejects-seek-past-eof + true + + + + fs.contract.supports-strict-exceptions + true + + + + fs.contract.supports-unix-permissions + false + + + diff --git a/hadoop-common-project/hadoop-common/src/test/resources/lz4/.sequencefile.crc b/hadoop-common-project/hadoop-common/src/test/resources/lz4/.sequencefile.crc new file mode 100644 index 0000000000000..b36bc54a7c599 Binary files /dev/null and b/hadoop-common-project/hadoop-common/src/test/resources/lz4/.sequencefile.crc differ diff --git a/hadoop-common-project/hadoop-common/src/test/resources/lz4/sequencefile b/hadoop-common-project/hadoop-common/src/test/resources/lz4/sequencefile new file mode 100644 index 0000000000000..eca7cdea3b323 Binary files /dev/null and b/hadoop-common-project/hadoop-common/src/test/resources/lz4/sequencefile differ diff --git a/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml b/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml index 392d39170d5fe..574dfd2852277 100644 --- a/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml +++ b/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml @@ -162,38 +162,6 @@ - - help: help for get - - -help get - - - - - - - RegexpComparator - ^-get( )*\[-f\]( )*\[-p\]( )*\[-ignoreCrc\]( )*\[-crc\]( )*<src> \.\.\. <localdst> :\s* - - - RegexpComparator - \s*Copy files that match the file pattern <src> to the local name. <src> is kept.\s* - - - RegexpComparator - ^( |\t)*When copying multiple files, the destination must be a directory. Passing -f( )* - - - RegexpComparator - ^( |\t)*overwrites the destination if it already exists and -p preserves access and( )* - - - RegexpComparator - ^( |\t)*modification times, ownership and the mode.* - - - - help: help for du @@ -282,7 +250,7 @@ RegexpComparator - ^-count \[-q\] \[-h\] \[-v\] \[-t \[<storage type>\]\] \[-u\] \[-x\] \[-e\] <path> \.\.\. :( )* + ^-count \[-q\] \[-h\] \[-v\] \[-t \[<storage type>\]\] \[-u\] \[-x\] \[-e\] \[-s\] <path> \.\.\. :( )* RegexpComparator @@ -356,51 +324,72 @@ RegexpComparator - ^-cp \[-f\] \[-p \| -p\[topax\]\] \[-d\] <src> \.\.\. <dst> :\s* + ^-cp \[-f\] \[-p \| -p\[topax\]\] \[-d\] \[-t <thread count>\] \[-q <thread pool queue size>\] <src> \.\.\. <dst> :\s* RegexpComparator - ^\s*Copy files that match the file pattern <src> to a destination. When copying( )* + ^\s*Copy files that match the file pattern <src> to a destination. When copying( )* RegexpComparator - ^( |\t)*multiple files, the destination must be a directory.( )*Passing -p preserves status( )* + ^( |\t)*multiple files, the destination must be a directory.( )* RegexpComparator - ^( |\t)*\[topax\] \(timestamps, ownership, permission, ACLs, XAttr\). If -p is specified( )* + ^( |\t)*Flags :( )* RegexpComparator - ^( |\t)*with no <arg>, then preserves timestamps, ownership, permission. If -pa is( )* + ^( |\t)*-p\[topax\]\s+Preserve file attributes \[topx\] \(timestamps,( )* RegexpComparator - ^( |\t)*specified, then preserves permission also because ACL is a super-set of( )* + ^( |\t)*ownership, permission, ACL, XAttr\). If -p is( )* RegexpComparator - ^( |\t)*permission. Passing -f overwrites the destination if it already exists. raw( )* + ^( |\t)*specified with no arg, then preserves timestamps,( )* RegexpComparator - ^( |\t)*namespace extended attributes are preserved if \(1\) they are supported \(HDFS( )* + ^( |\t)*ownership, permission. If -pa is specified, then( )* RegexpComparator - ^( |\t)*only\) and, \(2\) all of the source and target pathnames are in the \/\.reserved\/raw( )* + ^( |\t)*preserves permission also because ACL is a( )* RegexpComparator - ^( |\t)*hierarchy. raw namespace xattr preservation is determined solely by the presence( )* + ^( |\t)*super-set of permission. Determination of whether( )* + - RegexpComparator - ^\s*\(or absence\) of the \/\.reserved\/raw prefix and not by the -p option\. Passing -d( )* + RegexpComparator + ^( |\t)*raw namespace extended attributes are preserved is( )* + + + RegexpComparator + ^( |\t)*independent of the -p flag.( )* RegexpComparator - ^\s*will skip creation of temporary file\(<dst>\._COPYING_\)\.( )* + ^\s*-f\s+Overwrite the destination if it already exists.( )* + + + RegexpComparator + ^\s*-d\s+Skip creation of temporary file\(<dst>\._COPYING_\).( )* + + + RegexpComparator + ^\s*-t <thread count>\s+Number of threads to be used, default is 1.( )* + + + RegexpComparator + ^\s*-q <thread pool queue size>\s+Thread pool queue size to be used, default is( )* + + + RegexpComparator + ^( |\t)*1024.\s* @@ -496,7 +485,10 @@ RegexpComparator - ^-put \[-f\] \[-p\] \[-l\] \[-d\] <localsrc> \.\.\. <dst> :( )* + + RegexpComparator + ^-put \[-f\] \[-p\] \[-l\] \[-d\] \[-t <thread count>\] \[-q <thread pool queue size>\] <localsrc> \.\.\. <dst> :\s* + RegexpComparator @@ -512,27 +504,39 @@ RegexpComparator - ^\s*-p Preserves access and modification times, ownership and the mode.( )* + ^\s*-p\s+Preserves timestamps, ownership and the mode.( )* + + + RegexpComparator + ^\s*-f\s+Overwrites the destination if it already exists.( )* + + + RegexpComparator + ^\s*-t <thread count>\s+Number of threads to be used, default is 1.( )* RegexpComparator - ^\s*-f Overwrites the destination if it already exists.( )* + ^\s*-q <thread pool queue size>\s+Thread pool queue size to be used, default is( )* RegexpComparator - ^\s*-l Allow DataNode to lazily persist the file to disk. Forces( )* + ^( |\t)*1024.\s* RegexpComparator - ^\s*replication factor of 1. This flag will result in reduced( )* + ^\s*-l\s+Allow DataNode to lazily persist the file to disk.( )* RegexpComparator - ^\s*durability. Use with care.( )* + ^\s*Forces replication factor of 1. This flag will( )* RegexpComparator - ^\s*-d Skip creation of temporary file\(<dst>\._COPYING_\).( )* + ^\s*result in reduced durability. Use with care.( )* + + + RegexpComparator + ^\s*-d\s+Skip creation of temporary file\(<dst>\._COPYING_\).( )* @@ -547,51 +551,11 @@ RegexpComparator - ^-copyFromLocal \[-f\] \[-p\] \[-l\] \[-d\] \[-t <thread count>\] <localsrc> \.\.\. <dst> :\s* - - - RegexpComparator - ^\s*Copy files from the local file system into fs.( )*Copying fails if the file already( )* - - - RegexpComparator - ^\s*exists, unless the -f flag is given.( )* - - - RegexpComparator - ^\s*Flags:( )* - - - RegexpComparator - ^\s*-p Preserves access and modification times, ownership and the( )* - - - RegexpComparator - ^\s*mode.( )* - - - RegexpComparator - ^\s*-f Overwrites the destination if it already exists.( )* - - - RegexpComparator - ^\s*-t <thread count> Number of threads to be used, default is 1.( )* - - - RegexpComparator - ^\s*-l Allow DataNode to lazily persist the file to disk. Forces( )* - - - RegexpComparator - ^\s*replication factor of 1. This flag will result in reduced( )* - - - RegexpComparator - ^\s*durability. Use with care.( )* + ^-copyFromLocal \[-f\] \[-p\] \[-l\] \[-d\] \[-t <thread count>\] \[-q <thread pool queue size>\] <localsrc> \.\.\. <dst> :\s* RegexpComparator - ^\s*-d Skip creation of temporary file\(<dst>\._COPYING_\).( )* + ^\s*Identical to the -put command\.\s* @@ -606,11 +570,14 @@ RegexpComparator - ^-moveFromLocal <localsrc> \.\.\. <dst> :\s* + ^-moveFromLocal \[-f\] \[-p\] \[-l\] \[-d\] <localsrc> \.\.\. <dst> :\s* RegexpComparator - ^( |\t)*Same as -put, except that the source is deleted after it's copied. + ^( |\t)*Same as -put, except that the source is deleted after it's copied + + RegexpComparator + ^\s* and -t option has not yet implemented. @@ -626,7 +593,7 @@ RegexpComparator - ^-get( )*\[-f\]( )*\[-p\]( )*\[-ignoreCrc\]( )*\[-crc\]( )*<src> \.\.\. <localdst> :\s* + ^-get \[-f\] \[-p\] \[-crc\] \[-ignoreCrc\] \[-t <thread count>\] \[-q <thread pool queue size>\] <src> \.\.\. <localdst> :\s* RegexpComparator @@ -634,15 +601,39 @@ RegexpComparator - ^( |\t)*When copying multiple files, the destination must be a directory. Passing -f( )* + ^( |\t)*When copying multiple files, the destination must be a directory.( )* + + + RegexpComparator + ^( |\t)*Flags:\s* + + + RegexpComparator + ^( |\t)*-p\s+Preserves timestamps, ownership and the mode.\s* + + + RegexpComparator + ^( |\t)*-f\s+Overwrites the destination if it already exists.\s* + + + RegexpComparator + ^( |\t)*-crc\s+ write CRC checksums for the files downloaded.\s* + + + RegexpComparator + ^( |\t)*-ignoreCrc\s+ Skip CRC checks on the file\(s\) downloaded.\s* + + + RegexpComparator + ^( |\t)*-t <thread count>\s+Number of threads to be used, default is 1.\s* RegexpComparator - ^( |\t)*overwrites the destination if it already exists and -p preserves access and( )* + ^( |\t)*-q <thread pool queue size>\s+Thread pool queue size to be used, default is\s* RegexpComparator - ^( |\t)*modification times, ownership and the mode.* + ^( |\t)*1024.\s* @@ -749,7 +740,7 @@ RegexpComparator - ^-copyToLocal \[-f\] \[-p\] \[-ignoreCrc\] \[-crc\] <src> \.\.\. <localdst> :\s* + ^-copyToLocal \[-f\] \[-p\] \[-crc\] \[-ignoreCrc\] \[-t <thread count>\] \[-q <thread pool queue size>\] <src> \.\.\. <localdst> :\s* RegexpComparator @@ -849,7 +840,7 @@ RegexpComparator - ^-touch \[-a\] \[-m\] \[-t TIMESTAMP \] \[-c\] <path> \.\.\. :( )* + ^-touch \[-a\] \[-m\] \[-t TIMESTAMP \(yyyyMMdd\:HHmmss\) \] \[-c\] <path> \.\.\. :( )* RegexpComparator @@ -877,11 +868,11 @@ RegexpComparator - ^\s*-t\s+TIMESTAMP\s+Use specified timestamp \(in format yyyyMMddHHmmss\) instead of + ^\s*-t\s+TIMESTAMP\s+Use specified timestamp instead of current time( )* RegexpComparator - ^\s*current time( )* + ^\s*TIMESTAMP format yyyyMMdd\:HHmmss RegexpComparator diff --git a/hadoop-common-project/hadoop-common/src/test/resources/xml/entity-dtd.xml b/hadoop-common-project/hadoop-common/src/test/resources/xml/entity-dtd.xml new file mode 100644 index 0000000000000..a3926bd67ad42 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/resources/xml/entity-dtd.xml @@ -0,0 +1,22 @@ + + + + + ]> +&lol; diff --git a/hadoop-common-project/hadoop-common/src/test/resources/xml/external-dtd.xml b/hadoop-common-project/hadoop-common/src/test/resources/xml/external-dtd.xml new file mode 100644 index 0000000000000..08a13938f5f76 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/resources/xml/external-dtd.xml @@ -0,0 +1,23 @@ + + + +

      + First Last + Acme + (555) 123-4567 +
      diff --git a/hadoop-common-project/hadoop-common/src/test/scripts/hadoop-functions_test_helper.bash b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop-functions_test_helper.bash index fa34bdfc4b5dd..9ba5b7e1bbbfc 100755 --- a/hadoop-common-project/hadoop-common/src/test/scripts/hadoop-functions_test_helper.bash +++ b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop-functions_test_helper.bash @@ -16,6 +16,7 @@ setup() { + export LD_LIBRARY_PATH="" RELTMP="${BATS_TEST_DIRNAME}/../../../target/test-dir/bats.$$.${RANDOM}" mkdir -p ${RELTMP} TMP=$(cd -P -- "${RELTMP}" >/dev/null && pwd -P) diff --git a/hadoop-common-project/hadoop-kms/pom.xml b/hadoop-common-project/hadoop-kms/pom.xml index d1f2c226e509b..d99dae8a79b6d 100644 --- a/hadoop-common-project/hadoop-kms/pom.xml +++ b/hadoop-common-project/hadoop-kms/pom.xml @@ -22,11 +22,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-kms - 3.3.0-SNAPSHOT + 3.3.6 jar Apache Hadoop KMS @@ -54,8 +54,8 @@ compile - com.google.guava - guava + org.apache.hadoop.thirdparty + hadoop-shaded-guava compile @@ -134,8 +134,8 @@ test-jar - log4j - log4j + ch.qos.reload4j + reload4j compile @@ -145,7 +145,7 @@ org.slf4j - slf4j-log4j12 + slf4j-reload4j runtime @@ -186,7 +186,6 @@ org.apache.maven.plugins maven-surefire-plugin - ${ignoreTestFailure} 1 false 1 @@ -237,8 +236,8 @@ - org.codehaus.mojo - findbugs-maven-plugin + com.github.spotbugs + spotbugs-maven-plugin ${basedir}/dev-support/findbugsExcludeFile.xml diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMS.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMS.java index b6b425443babc..be0f8d3fbc5d4 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMS.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMS.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.crypto.key.kms.server; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.util.KMSUtil; import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.classification.InterfaceAudience; diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSACLs.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSACLs.java index ba0fe825b4eb1..1b75f9fee0659 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSACLs.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSACLs.java @@ -35,7 +35,7 @@ import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Provides access to the AccessControlLists used by KMS, diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAudit.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAudit.java index 13a2d5c57a74e..4c64a37feabbd 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAudit.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAudit.java @@ -20,8 +20,8 @@ import static org.apache.hadoop.crypto.key.kms.server.KMSAuditLogger.AuditEvent; import static org.apache.hadoop.crypto.key.kms.server.KMSAuditLogger.OpStatus; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.kms.server.KMSACLs.Type; import org.apache.hadoop.crypto.key.kms.server.KeyAuthorizationKeyProvider.KeyOpType; @@ -31,13 +31,13 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Strings; -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.RemovalListener; -import com.google.common.cache.RemovalNotification; -import com.google.common.collect.Sets; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.base.Strings; +import org.apache.hadoop.thirdparty.com.google.common.cache.Cache; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheBuilder; +import org.apache.hadoop.thirdparty.com.google.common.cache.RemovalListener; +import org.apache.hadoop.thirdparty.com.google.common.cache.RemovalNotification; +import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import java.util.HashSet; import java.util.concurrent.Callable; @@ -191,7 +191,7 @@ private void logEvent(final OpStatus status, AuditEvent event) { private void op(final OpStatus opStatus, final Object op, final UserGroupInformation ugi, final String key, final String remoteHost, final String extraMsg) { - final String user = ugi == null ? null: ugi.getShortUserName(); + final String user = ugi == null ? null: ugi.getUserName(); if (!Strings.isNullOrEmpty(user) && !Strings.isNullOrEmpty(key) && (op != null) && AGGREGATE_OPS_WHITELIST.contains(op)) { diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAuditLogger.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAuditLogger.java index 2e2ba1d6a1b8f..2534a44912c99 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAuditLogger.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAuditLogger.java @@ -77,7 +77,7 @@ class AuditEvent { this.user = null; this.impersonator = null; } else { - this.user = ugi.getShortUserName(); + this.user = ugi.getUserName(); if (ugi.getAuthenticationMethod() == UserGroupInformation.AuthenticationMethod.PROXY) { this.impersonator = ugi.getRealUser().getUserName(); diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAuthenticationFilter.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAuthenticationFilter.java index da542ffb191e6..ead22e4686645 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAuthenticationFilter.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAuthenticationFilter.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.crypto.key.kms.server; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.kms.KMSDelegationToken; @@ -28,6 +28,7 @@ import org.apache.hadoop.security.token.delegation.web.DelegationTokenAuthenticationHandler; import org.apache.hadoop.security.token.delegation.web.KerberosDelegationTokenAuthenticationHandler; import org.apache.hadoop.security.token.delegation.web.PseudoDelegationTokenAuthenticationHandler; +import org.eclipse.jetty.server.Response; import javax.servlet.FilterChain; import javax.servlet.FilterConfig; @@ -113,6 +114,18 @@ public void setStatus(int sc) { public void sendError(int sc, String msg) throws IOException { statusCode = sc; this.msg = msg; + + ServletResponse response = getResponse(); + + // After Jetty 9.4.21, sendError() no longer allows a custom message. + // use setStatusWithReason() to set a custom message. + if (response instanceof Response) { + ((Response) response).setStatusWithReason(sc, msg); + } else { + KMS.LOG.warn("The wrapped response object is instance of {}" + + ", not org.eclipse.jetty.server.Response. Can't set custom error " + + "message", response.getClass()); + } super.sendError(sc, HtmlQuoting.quoteHtmlChars(msg)); } diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSMDCFilter.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSMDCFilter.java index f3c0bbdda6183..dc2ba3261cfd8 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSMDCFilter.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSMDCFilter.java @@ -21,7 +21,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.delegation.web.HttpUserGroupInformation; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import javax.servlet.Filter; import javax.servlet.FilterChain; diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java index da597b4da5f81..a92dd1045c01a 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java @@ -26,7 +26,7 @@ import com.codahale.metrics.JmxReporter; import com.codahale.metrics.Meter; import com.codahale.metrics.MetricRegistry; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.CachingKeyProvider; diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebServer.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebServer.java index 7cfc010ac2c76..639d85521c3ce 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebServer.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebServer.java @@ -22,12 +22,16 @@ import java.net.MalformedURLException; import java.net.URI; import java.net.URL; +import java.util.LinkedHashSet; +import java.util.Set; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.http.HttpServer2; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.source.JvmMetrics; +import org.apache.hadoop.security.AuthenticationFilterInitializer; +import org.apache.hadoop.security.authentication.server.ProxyUserAuthenticationFilterInitializer; import org.apache.hadoop.security.authorize.AccessControlList; import org.apache.hadoop.security.ssl.SSLFactory; import org.apache.hadoop.util.JvmPauseMonitor; @@ -94,6 +98,22 @@ public class KMSWebServer { KMSConfiguration.HTTP_PORT_DEFAULT); URI endpoint = new URI(scheme, null, host, port, null, null, null); + String configuredInitializers = + conf.get(HttpServer2.FILTER_INITIALIZER_PROPERTY); + if (configuredInitializers != null) { + Set target = new LinkedHashSet(); + String[] initializers = configuredInitializers.split(","); + for (String init : initializers) { + if (!init.equals(AuthenticationFilterInitializer.class.getName()) && + !init.equals( + ProxyUserAuthenticationFilterInitializer.class.getName())) { + target.add(init); + } + } + String actualInitializers = StringUtils.join(",", target); + conf.set(HttpServer2.FILTER_INITIALIZER_PROPERTY, actualInitializers); + } + httpServer = new HttpServer2.Builder() .setName(NAME) .setConf(conf) diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KeyAuthorizationKeyProvider.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KeyAuthorizationKeyProvider.java index 101591b0310d2..fe3207b31c27a 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KeyAuthorizationKeyProvider.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KeyAuthorizationKeyProvider.java @@ -32,9 +32,9 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.AuthorizationException; -import com.google.common.base.Preconditions; -import com.google.common.base.Strings; -import com.google.common.collect.ImmutableMap; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Strings; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; /** * A {@link KeyProvider} proxy that checks whether the current user derived via diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/SimpleKMSAuditLogger.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/SimpleKMSAuditLogger.java index 4dcbe2c54f2f7..74825097f3038 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/SimpleKMSAuditLogger.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/SimpleKMSAuditLogger.java @@ -23,8 +23,8 @@ import java.util.LinkedList; import java.util.List; -import com.google.common.base.Joiner; -import com.google.common.base.Strings; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Strings; import org.apache.hadoop.conf.Configuration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-common-project/hadoop-kms/src/main/resources/kms-default.xml b/hadoop-common-project/hadoop-kms/src/main/resources/kms-default.xml index 783f4e6c03b2a..134326f5312f3 100644 --- a/hadoop-common-project/hadoop-kms/src/main/resources/kms-default.xml +++ b/hadoop-common-project/hadoop-kms/src/main/resources/kms-default.xml @@ -103,7 +103,7 @@ hadoop.http.idle_timeout.ms - 1000 + 60000 KMS Server connection timeout in milliseconds. diff --git a/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm b/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm index 5490219750020..6ea21d5cf407d 100644 --- a/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm +++ b/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm @@ -307,7 +307,7 @@ Configure `etc/hadoop/ssl-server.xml` with proper values, for example: ``` The SSL passwords can be secured by a credential provider. See -[Credential Provider API](../../../hadoop-project-dist/hadoop-common/CredentialProviderAPI.html). +[Credential Provider API](../hadoop-project-dist/hadoop-common/CredentialProviderAPI.html). You need to create an SSL certificate for the KMS. As the `kms` Unix user, using the Java `keytool` command to create the SSL certificate: @@ -716,7 +716,7 @@ $H4 HTTP Kerberos Principals Configuration When KMS instances are behind a load-balancer or VIP, clients will use the hostname of the VIP. For Kerberos SPNEGO authentication, the hostname of the URL is used to construct the Kerberos service name of the server, `HTTP/#HOSTNAME#`. This means that all KMS instances must have a Kerberos service name with the load-balancer or VIP hostname. -In order to be able to access directly a specific KMS instance, the KMS instance must also have Keberos service name with its own hostname. This is required for monitoring and admin purposes. +In order to be able to access directly a specific KMS instance, the KMS instance must also have Kerberos service name with its own hostname. This is required for monitoring and admin purposes. Both Kerberos service principal credentials (for the load-balancer/VIP hostname and for the actual KMS instance hostname) must be in the keytab file configured for authentication. And the principal name specified in the configuration must be '\*'. For example: @@ -791,10 +791,62 @@ This secret sharing can be done using a Zookeeper service which is configured in $H4 Delegation Tokens Similar to HTTP authentication, KMS uses Hadoop Authentication for delegation tokens too. +Under HA, every KMS instance must verify the delegation token given by another KMS instance. +To do this, all the KMS instances must use ZKDelegationTokenSecretManager to retrieve +the TokenIdentifiers and DelegationKeys from ZooKeeper. -Under HA, A KMS instance must verify the delegation token given by another KMS instance, by checking the shared secret used to sign the delegation token. To do this, all KMS instances must be able to retrieve the shared secret from ZooKeeper. +Sample configuration in `etc/hadoop/kms-site.xml`: -Please see the examples given in HTTP Authentication section to configure ZooKeeper for secret sharing. +```xml + + hadoop.kms.authentication.zk-dt-secret-manager.enable + true + + If true, Hadoop KMS uses ZKDelegationTokenSecretManager to persist + TokenIdentifiers and DelegationKeys in ZooKeeper. + + + + hadoop.kms.authentication.zk-dt-secret-manager.zkConnectionString + #HOSTNAME#:#PORT#,... + + The ZooKeeper connection string, a comma-separated list of hostnames and port. + + + + hadoop.kms.authentication.zk-dt-secret-manager.znodeWorkingPath + /hadoop-kms/zkdtsm + + The ZooKeeper znode path where the KMS instances will store and retrieve + the secret from. All the KMS instances that need to coordinate should point to the same path. + + + + hadoop.kms.authentication.zk-dt-secret-manager.zkAuthType + sasl + + The ZooKeeper authentication type, 'none' (default) or 'sasl' (Kerberos). + + + + hadoop.kms.authentication.zk-dt-secret-manager.kerberos.keytab + /etc/hadoop/conf/kms.keytab + + The absolute path for the Kerberos keytab with the credentials to + connect to ZooKeeper. This parameter is effective only when + hadoop.kms.authentication.zk-dt-secret-manager.zkAuthType is set to 'sasl'. + + + + hadoop.kms.authentication.zk-dt-secret-manager.kerberos.principal + kms/#HOSTNAME# + + The Kerberos service principal used to connect to ZooKeeper. + This parameter is effective only when + hadoop.kms.authentication.zk-dt-secret-manager.zkAuthType is set to 'sasl'. + + +``` $H3 KMS HTTP REST API @@ -1055,7 +1107,8 @@ $H4 Get Key Version Content-Type: application/json { - "name" : "versionName", + "name" : "", + "versionName" : "", "material" : "", //base64 } @@ -1072,11 +1125,13 @@ $H4 Get Key Versions [ { - "name" : "versionName", + "name" : "", + "versionName" : "", "material" : "", //base64 }, { - "name" : "versionName", + "name" : "", + "versionName" : "", "material" : "", //base64 }, ... diff --git a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/MiniKMS.java b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/MiniKMS.java index faa8fa280a9cf..bc4bbc3df70bd 100644 --- a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/MiniKMS.java +++ b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/MiniKMS.java @@ -26,7 +26,7 @@ import java.io.Writer; import java.net.URL; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; diff --git a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java index 3b511a1c5c488..a0a58ff3567f5 100644 --- a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java +++ b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java @@ -17,8 +17,7 @@ */ package org.apache.hadoop.crypto.key.kms.server; -import com.google.common.base.Supplier; -import com.google.common.cache.LoadingCache; +import org.apache.hadoop.thirdparty.com.google.common.cache.LoadingCache; import org.apache.curator.test.TestingServer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.KeyProviderFactory; @@ -38,6 +37,7 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.io.MultipleIOException; import org.apache.hadoop.minikdc.MiniKdc; +import org.apache.hadoop.security.AuthenticationFilterInitializer; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; @@ -91,7 +91,6 @@ import java.util.List; import java.util.Map; import java.util.Properties; -import java.util.Set; import java.util.UUID; import java.util.concurrent.Callable; import java.util.concurrent.LinkedBlockingQueue; @@ -112,9 +111,6 @@ public class TestKMS { private static final Logger LOG = LoggerFactory.getLogger(TestKMS.class); - private static final String SSL_RELOADER_THREAD_NAME = - "Truststore reloader thread"; - private SSLFactory sslFactory; // Keep track of all key providers created during a test case, so they can be @@ -539,34 +535,6 @@ public Void call() throws Exception { url.getProtocol().equals("https")); final URI uri = createKMSUri(getKMSUrl()); - if (ssl) { - KeyProvider testKp = createProvider(uri, conf); - ThreadGroup threadGroup = Thread.currentThread().getThreadGroup(); - while (threadGroup.getParent() != null) { - threadGroup = threadGroup.getParent(); - } - Thread[] threads = new Thread[threadGroup.activeCount()]; - threadGroup.enumerate(threads); - Thread reloaderThread = null; - for (Thread thread : threads) { - if ((thread.getName() != null) - && (thread.getName().contains(SSL_RELOADER_THREAD_NAME))) { - reloaderThread = thread; - } - } - Assert.assertTrue("Reloader is not alive", reloaderThread.isAlive()); - // Explicitly close the provider so we can verify the internal thread - // is shutdown - testKp.close(); - boolean reloaderStillAlive = true; - for (int i = 0; i < 10; i++) { - reloaderStillAlive = reloaderThread.isAlive(); - if (!reloaderStillAlive) break; - Thread.sleep(1000); - } - Assert.assertFalse("Reloader is still alive", reloaderStillAlive); - } - if (kerberos) { for (String user : new String[]{"client", "client/host"}) { doAs(user, new PrivilegedExceptionAction() { @@ -2362,8 +2330,7 @@ public Void run() throws Exception { return null; } }); - // Close the client provider. We will verify all providers' - // Truststore reloader threads are closed later. + // Close the client provider. kp.close(); return null; } finally { @@ -2374,22 +2341,6 @@ public Void run() throws Exception { return null; } }); - - // verify that providers created by KMSTokenRenewer are closed. - if (ssl) { - GenericTestUtils.waitFor(new Supplier() { - @Override - public Boolean get() { - final Set threadSet = Thread.getAllStackTraces().keySet(); - for (Thread t : threadSet) { - if (t.getName().contains(SSL_RELOADER_THREAD_NAME)) { - return false; - } - } - return true; - } - }, 1000, 10000); - } } @Test @@ -3079,4 +3030,45 @@ public Void call() throws Exception { } }); } + + @Test + public void testFilterInitializer() throws Exception { + Configuration conf = new Configuration(); + File testDir = getTestDir(); + conf = createBaseKMSConf(testDir, conf); + conf.set("hadoop.security.authentication", "kerberos"); + conf.set("hadoop.kms.authentication.token.validity", "1"); + conf.set("hadoop.kms.authentication.type", "kerberos"); + conf.set("hadoop.kms.authentication.kerberos.keytab", + keytab.getAbsolutePath()); + conf.set("hadoop.kms.authentication.kerberos.principal", "HTTP/localhost"); + conf.set("hadoop.kms.authentication.kerberos.name.rules", "DEFAULT"); + conf.set("hadoop.http.filter.initializers", + AuthenticationFilterInitializer.class.getName()); + conf.set("hadoop.http.authentication.type", "kerberos"); + conf.set("hadoop.http.authentication.kerberos.principal", "HTTP/localhost"); + conf.set("hadoop.http.authentication.kerberos.keytab", + keytab.getAbsolutePath()); + + writeConf(testDir, conf); + + runServer(null, null, testDir, new KMSCallable() { + @Override + public Void call() throws Exception { + final Configuration conf = new Configuration(); + URL url = getKMSUrl(); + final URI uri = createKMSUri(getKMSUrl()); + + doAs("client", new PrivilegedExceptionAction() { + @Override + public Void run() throws Exception { + final KeyProvider kp = createProvider(uri, conf); + Assert.assertTrue(kp.getKeys().isEmpty()); + return null; + } + }); + return null; + } + }); + } } diff --git a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMSAudit.java b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMSAudit.java index 09145be28a0df..2f47ed794ac84 100644 --- a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMSAudit.java +++ b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMSAudit.java @@ -40,7 +40,6 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.Timeout; -import org.mockito.Mockito; public class TestKMSAudit { @@ -50,6 +49,8 @@ public class TestKMSAudit { private PrintStream capturedOut; private KMSAudit kmsAudit; + private UserGroupInformation luser = + UserGroupInformation.createUserForTesting("luser@REALM", new String[0]); private static class FilterOut extends FilterOutputStream { public FilterOut(OutputStream out) { @@ -95,10 +96,7 @@ private String getAndResetLogOutput() { } @Test - @SuppressWarnings("checkstyle:linelength") public void testAggregation() throws Exception { - UserGroupInformation luser = Mockito.mock(UserGroupInformation.class); - Mockito.when(luser.getShortUserName()).thenReturn("luser"); kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg"); kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg"); kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg"); @@ -120,27 +118,30 @@ public void testAggregation() throws Exception { kmsAudit.evictCacheForTesting(); String out = getAndResetLogOutput(); System.out.println(out); - Assert.assertTrue( - out.matches( - "OK\\[op=DECRYPT_EEK, key=k1, user=luser, accessCount=1, interval=[^m]{1,4}ms\\] testmsg" + boolean doesMatch = out.matches( + "OK\\[op=DECRYPT_EEK, key=k1, user=luser@REALM, accessCount=1, " + + "interval=[^m]{1,4}ms\\] testmsg" // Not aggregated !! - + "OK\\[op=DELETE_KEY, key=k1, user=luser\\] testmsg" - + "OK\\[op=ROLL_NEW_VERSION, key=k1, user=luser\\] testmsg" - + "OK\\[op=INVALIDATE_CACHE, key=k1, user=luser\\] testmsg" + + "OK\\[op=DELETE_KEY, key=k1, user=luser@REALM\\] testmsg" + + "OK\\[op=ROLL_NEW_VERSION, key=k1, user=luser@REALM\\] testmsg" + + "OK\\[op=INVALIDATE_CACHE, key=k1, user=luser@REALM\\] testmsg" // Aggregated - + "OK\\[op=DECRYPT_EEK, key=k1, user=luser, accessCount=6, interval=[^m]{1,4}ms\\] testmsg" - + "OK\\[op=DECRYPT_EEK, key=k1, user=luser, accessCount=1, interval=[^m]{1,4}ms\\] testmsg" - + "OK\\[op=REENCRYPT_EEK, key=k1, user=luser, accessCount=1, interval=[^m]{1,4}ms\\] testmsg" - + "OK\\[op=REENCRYPT_EEK, key=k1, user=luser, accessCount=3, interval=[^m]{1,4}ms\\] testmsg" - + "OK\\[op=REENCRYPT_EEK_BATCH, key=k1, user=luser\\] testmsg" - + "OK\\[op=REENCRYPT_EEK_BATCH, key=k1, user=luser\\] testmsg")); + + "OK\\[op=DECRYPT_EEK, key=k1, user=luser@REALM, accessCount=6, " + + "interval=[^m]{1,4}ms\\] testmsg" + + "OK\\[op=DECRYPT_EEK, key=k1, user=luser@REALM, accessCount=1, " + + "interval=[^m]{1,4}ms\\] testmsg" + + "OK\\[op=REENCRYPT_EEK, key=k1, user=luser@REALM, " + + "accessCount=1, interval=[^m]{1,4}ms\\] testmsg" + + "OK\\[op=REENCRYPT_EEK, key=k1, user=luser@REALM, " + + "accessCount=3, interval=[^m]{1,4}ms\\] testmsg" + + "OK\\[op=REENCRYPT_EEK_BATCH, key=k1, user=luser@REALM\\] testmsg" + + "OK\\[op=REENCRYPT_EEK_BATCH, key=k1, user=luser@REALM\\] " + + "testmsg"); + Assert.assertTrue(doesMatch); } @Test - @SuppressWarnings("checkstyle:linelength") public void testAggregationUnauth() throws Exception { - UserGroupInformation luser = Mockito.mock(UserGroupInformation.class); - Mockito.when(luser.getShortUserName()).thenReturn("luser"); kmsAudit.unauthorized(luser, KMSOp.GENERATE_EEK, "k2"); kmsAudit.evictCacheForTesting(); kmsAudit.ok(luser, KMSOp.GENERATE_EEK, "k3", "testmsg"); @@ -159,25 +160,29 @@ public void testAggregationUnauth() throws Exception { // The UNAUTHORIZED will trigger cache invalidation, which then triggers // the aggregated OK (accessCount=5). But the order of the UNAUTHORIZED and // the aggregated OK is arbitrary - no correctness concerns, but flaky here. - Assert.assertTrue(out.matches( - "UNAUTHORIZED\\[op=GENERATE_EEK, key=k2, user=luser\\] " - + "OK\\[op=GENERATE_EEK, key=k3, user=luser, accessCount=1, interval=[^m]{1,4}ms\\] testmsg" - + "OK\\[op=GENERATE_EEK, key=k3, user=luser, accessCount=5, interval=[^m]{1,4}ms\\] testmsg" - + "UNAUTHORIZED\\[op=GENERATE_EEK, key=k3, user=luser\\] " - + "OK\\[op=GENERATE_EEK, key=k3, user=luser, accessCount=1, interval=[^m]{1,4}ms\\] testmsg") - || out.matches( - "UNAUTHORIZED\\[op=GENERATE_EEK, key=k2, user=luser\\] " - + "OK\\[op=GENERATE_EEK, key=k3, user=luser, accessCount=1, interval=[^m]{1,4}ms\\] testmsg" - + "UNAUTHORIZED\\[op=GENERATE_EEK, key=k3, user=luser\\] " - + "OK\\[op=GENERATE_EEK, key=k3, user=luser, accessCount=5, interval=[^m]{1,4}ms\\] testmsg" - + "OK\\[op=GENERATE_EEK, key=k3, user=luser, accessCount=1, interval=[^m]{1,4}ms\\] testmsg")); + boolean doesMatch = out.matches( + "UNAUTHORIZED\\[op=GENERATE_EEK, key=k2, user=luser@REALM\\] " + + "OK\\[op=GENERATE_EEK, key=k3, user=luser@REALM, accessCount=1," + + " interval=[^m]{1,4}ms\\] testmsg" + + "OK\\[op=GENERATE_EEK, key=k3, user=luser@REALM, accessCount=5," + + " interval=[^m]{1,4}ms\\] testmsg" + + "UNAUTHORIZED\\[op=GENERATE_EEK, key=k3, user=luser@REALM\\] " + + "OK\\[op=GENERATE_EEK, key=k3, user=luser@REALM, accessCount=1," + + " interval=[^m]{1,4}ms\\] testmsg"); + doesMatch = doesMatch || out.matches( + "UNAUTHORIZED\\[op=GENERATE_EEK, key=k2, user=luser@REALM\\] " + + "OK\\[op=GENERATE_EEK, key=k3, user=luser@REALM, accessCount=1," + + " interval=[^m]{1,4}ms\\] testmsg" + + "UNAUTHORIZED\\[op=GENERATE_EEK, key=k3, user=luser@REALM\\] " + + "OK\\[op=GENERATE_EEK, key=k3, user=luser@REALM, accessCount=5," + + " interval=[^m]{1,4}ms\\] testmsg" + + "OK\\[op=GENERATE_EEK, key=k3, user=luser@REALM, accessCount=1," + + " interval=[^m]{1,4}ms\\] testmsg"); + Assert.assertTrue(doesMatch); } @Test - @SuppressWarnings("checkstyle:linelength") public void testAuditLogFormat() throws Exception { - UserGroupInformation luser = Mockito.mock(UserGroupInformation.class); - Mockito.when(luser.getShortUserName()).thenReturn("luser"); kmsAudit.ok(luser, KMSOp.GENERATE_EEK, "k4", "testmsg"); kmsAudit.ok(luser, KMSOp.GENERATE_EEK, "testmsg"); kmsAudit.evictCacheForTesting(); @@ -187,12 +192,15 @@ public void testAuditLogFormat() throws Exception { String out = getAndResetLogOutput(); System.out.println(out); Assert.assertTrue(out.matches( - "OK\\[op=GENERATE_EEK, key=k4, user=luser, accessCount=1, interval=[^m]{1,4}ms\\] testmsg" - + "OK\\[op=GENERATE_EEK, user=luser\\] testmsg" - + "OK\\[op=GENERATE_EEK, key=k4, user=luser, accessCount=1, interval=[^m]{1,4}ms\\] testmsg" - + "UNAUTHORIZED\\[op=DECRYPT_EEK, key=k4, user=luser\\] " - + "ERROR\\[user=luser\\] Method:'method' Exception:'testmsg'" - + "UNAUTHENTICATED RemoteHost:remotehost Method:method URL:url ErrorMsg:'testmsg'")); + "OK\\[op=GENERATE_EEK, key=k4, user=luser@REALM, accessCount=1, " + + "interval=[^m]{1,4}ms\\] testmsg" + + "OK\\[op=GENERATE_EEK, user=luser@REALM\\] testmsg" + + "OK\\[op=GENERATE_EEK, key=k4, user=luser@REALM, accessCount=1," + + " interval=[^m]{1,4}ms\\] testmsg" + + "UNAUTHORIZED\\[op=DECRYPT_EEK, key=k4, user=luser@REALM\\] " + + "ERROR\\[user=luser@REALM\\] Method:'method' Exception:'testmsg'" + + "UNAUTHENTICATED RemoteHost:remotehost Method:method URL:url " + + "ErrorMsg:'testmsg'")); } @SuppressWarnings("unchecked") diff --git a/hadoop-common-project/hadoop-minikdc/pom.xml b/hadoop-common-project/hadoop-minikdc/pom.xml index adbd6e32bee58..487f29b62f74c 100644 --- a/hadoop-common-project/hadoop-minikdc/pom.xml +++ b/hadoop-common-project/hadoop-minikdc/pom.xml @@ -18,12 +18,12 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project 4.0.0 hadoop-minikdc - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop MiniKDC Apache Hadoop MiniKDC jar @@ -40,7 +40,7 @@ org.slf4j - slf4j-log4j12 + slf4j-reload4j compile @@ -53,8 +53,8 @@ - org.codehaus.mojo - findbugs-maven-plugin + com.github.spotbugs + spotbugs-maven-plugin ${basedir}/dev-support/findbugsExcludeFile.xml diff --git a/hadoop-common-project/hadoop-minikdc/src/test/java/org/apache/hadoop/minikdc/TestMiniKdc.java b/hadoop-common-project/hadoop-minikdc/src/test/java/org/apache/hadoop/minikdc/TestMiniKdc.java index 74130cff19b91..45684053a03ab 100644 --- a/hadoop-common-project/hadoop-minikdc/src/test/java/org/apache/hadoop/minikdc/TestMiniKdc.java +++ b/hadoop-common-project/hadoop-minikdc/src/test/java/org/apache/hadoop/minikdc/TestMiniKdc.java @@ -38,8 +38,35 @@ import java.util.Arrays; public class TestMiniKdc extends KerberosSecurityTestcase { - private static final boolean IBM_JAVA = System.getProperty("java.vendor") - .contains("IBM"); + private static final boolean IBM_JAVA = shouldUseIbmPackages(); + // duplicated to avoid cycles in the build + private static boolean shouldUseIbmPackages() { + final List ibmTechnologyEditionSecurityModules = Arrays.asList( + "com.ibm.security.auth.module.JAASLoginModule", + "com.ibm.security.auth.module.Win64LoginModule", + "com.ibm.security.auth.module.NTLoginModule", + "com.ibm.security.auth.module.AIX64LoginModule", + "com.ibm.security.auth.module.LinuxLoginModule", + "com.ibm.security.auth.module.Krb5LoginModule" + ); + + if (System.getProperty("java.vendor").contains("IBM")) { + return ibmTechnologyEditionSecurityModules + .stream().anyMatch((module) -> isSystemClassAvailable(module)); + } + + return false; + } + + private static boolean isSystemClassAvailable(String className) { + try { + Class.forName(className); + return true; + } catch (Exception ignored) { + return false; + } + } + @Test public void testMiniKdcStart() { MiniKdc kdc = getKdc(); @@ -117,9 +144,9 @@ public AppConfigurationEntry[] getAppConfigurationEntry(String name) { options.put("debug", "true"); return new AppConfigurationEntry[]{ - new AppConfigurationEntry(getKrb5LoginModuleName(), - AppConfigurationEntry.LoginModuleControlFlag.REQUIRED, - options)}; + new AppConfigurationEntry(getKrb5LoginModuleName(), + AppConfigurationEntry.LoginModuleControlFlag.REQUIRED, + options)}; } } diff --git a/hadoop-common-project/hadoop-nfs/pom.xml b/hadoop-common-project/hadoop-nfs/pom.xml index e0fedaf1434e6..56c1d67d96446 100644 --- a/hadoop-common-project/hadoop-nfs/pom.xml +++ b/hadoop-common-project/hadoop-nfs/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-nfs - 3.3.0-SNAPSHOT + 3.3.6 jar Apache Hadoop NFS @@ -79,23 +79,23 @@ compile - log4j - log4j + ch.qos.reload4j + reload4j runtime org.slf4j - slf4j-log4j12 + slf4j-reload4j runtime io.netty - netty + netty-all compile - com.google.guava - guava + org.apache.hadoop.thirdparty + hadoop-shaded-guava org.assertj @@ -107,8 +107,8 @@ - org.codehaus.mojo - findbugs-maven-plugin + com.github.spotbugs + spotbugs-maven-plugin ${basedir}/dev-support/findbugsExcludeFile.xml diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/mount/MountdBase.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/mount/MountdBase.java index 0ff3084bf3eb9..58d3e51f2bdfb 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/mount/MountdBase.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/mount/MountdBase.java @@ -41,6 +41,8 @@ abstract public class MountdBase { private final RpcProgram rpcProgram; private int udpBoundPort; // Will set after server starts private int tcpBoundPort; // Will set after server starts + private SimpleUdpServer udpServer = null; + private SimpleTcpServer tcpServer = null; public RpcProgram getRpcProgram() { return rpcProgram; @@ -57,7 +59,7 @@ public MountdBase(RpcProgram program) throws IOException { /* Start UDP server */ private void startUDPServer() { - SimpleUdpServer udpServer = new SimpleUdpServer(rpcProgram.getPort(), + udpServer = new SimpleUdpServer(rpcProgram.getPort(), rpcProgram, 1); rpcProgram.startDaemons(); try { @@ -76,7 +78,7 @@ private void startUDPServer() { /* Start TCP server */ private void startTCPServer() { - SimpleTcpServer tcpServer = new SimpleTcpServer(rpcProgram.getPort(), + tcpServer = new SimpleTcpServer(rpcProgram.getPort(), rpcProgram, 1); rpcProgram.startDaemons(); try { @@ -118,6 +120,14 @@ public void stop() { rpcProgram.unregister(PortmapMapping.TRANSPORT_TCP, tcpBoundPort); tcpBoundPort = 0; } + if (udpServer != null) { + udpServer.shutdown(); + udpServer = null; + } + if (tcpServer != null) { + tcpServer.shutdown(); + tcpServer = null; + } } /** diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/NfsExports.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/NfsExports.java index 3d5088d7006c4..97b8a444ac28a 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/NfsExports.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/NfsExports.java @@ -32,7 +32,7 @@ import org.apache.hadoop.util.LightWeightGSet.LinkedElement; import org.apache.hadoop.util.StringUtils; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Base.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Base.java index ff83a5f19bee1..e6ea29b42bff4 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Base.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Base.java @@ -35,6 +35,7 @@ public abstract class Nfs3Base { public static final Logger LOG = LoggerFactory.getLogger(Nfs3Base.class); private final RpcProgram rpcProgram; private int nfsBoundPort; // Will set after server starts + private SimpleTcpServer tcpServer = null; public RpcProgram getRpcProgram() { return rpcProgram; @@ -61,7 +62,7 @@ public void start(boolean register) { } private void startTCPServer() { - SimpleTcpServer tcpServer = new SimpleTcpServer(rpcProgram.getPort(), + tcpServer = new SimpleTcpServer(rpcProgram.getPort(), rpcProgram, 0); rpcProgram.startDaemons(); try { @@ -84,6 +85,10 @@ public void stop() { nfsBoundPort = 0; } rpcProgram.stopDaemons(); + if (tcpServer != null) { + tcpServer.shutdown(); + tcpServer = null; + } } /** * Priority of the nfsd shutdown hook. diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/request/LOOKUP3Request.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/request/LOOKUP3Request.java index 4d31a8203f24e..46e4f7259d808 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/request/LOOKUP3Request.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/request/LOOKUP3Request.java @@ -23,7 +23,7 @@ import org.apache.hadoop.nfs.nfs3.FileHandle; import org.apache.hadoop.oncrpc.XDR; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * LOOKUP3 Request diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/request/READ3Request.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/request/READ3Request.java index 5898ec588ff31..e2f3e2ff8e03a 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/request/READ3Request.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/request/READ3Request.java @@ -22,7 +22,7 @@ import org.apache.hadoop.nfs.nfs3.FileHandle; import org.apache.hadoop.oncrpc.XDR; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * READ3 Request diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/response/READDIR3Response.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/response/READDIR3Response.java index 5bde2c0f69a8f..6fbfd5f0c6671 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/response/READDIR3Response.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/response/READDIR3Response.java @@ -28,7 +28,7 @@ import org.apache.hadoop.oncrpc.XDR; import org.apache.hadoop.oncrpc.security.Verifier; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * READDIR3 Response diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/response/READDIRPLUS3Response.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/response/READDIRPLUS3Response.java index cf32bd1c87865..5e814c488e7cd 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/response/READDIRPLUS3Response.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/response/READDIRPLUS3Response.java @@ -30,7 +30,7 @@ import org.apache.hadoop.oncrpc.XDR; import org.apache.hadoop.oncrpc.security.Verifier; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * READDIRPLUS3 Response diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RegistrationClient.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RegistrationClient.java index c8528ba4d558f..c96f1d53bb4c5 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RegistrationClient.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RegistrationClient.java @@ -19,10 +19,9 @@ import java.util.Arrays; +import io.netty.buffer.ByteBuf; +import io.netty.channel.ChannelHandlerContext; import org.apache.hadoop.oncrpc.RpcAcceptedReply.AcceptState; -import org.jboss.netty.buffer.ChannelBuffer; -import org.jboss.netty.channel.ChannelHandlerContext; -import org.jboss.netty.channel.MessageEvent; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -58,10 +57,10 @@ private boolean validMessageLength(int len) { } @Override - public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) { - ChannelBuffer buf = (ChannelBuffer) e.getMessage(); // Read reply + public void channelRead(ChannelHandlerContext ctx, Object msg) { + ByteBuf buf = (ByteBuf) msg; // Read reply if (!validMessageLength(buf.readableBytes())) { - e.getChannel().close(); + ctx.channel().close(); return; } @@ -83,7 +82,7 @@ public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) { RpcDeniedReply deniedReply = (RpcDeniedReply) reply; handle(deniedReply); } - e.getChannel().close(); // shutdown now that request is complete + ctx.channel().close(); // shutdown now that request is complete } private void handle(RpcDeniedReply deniedReply) { diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcCallCache.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcCallCache.java index 2d6f30ecb56cb..8632a387c6032 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcCallCache.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcCallCache.java @@ -23,7 +23,7 @@ import java.util.Map; import java.util.Map.Entry; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * This class is used for handling the duplicate non-idempotenty Rpc diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcInfo.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcInfo.java index b434d79285c6f..aba8e9ea2624e 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcInfo.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcInfo.java @@ -19,9 +19,9 @@ import java.net.SocketAddress; -import org.jboss.netty.buffer.ChannelBuffer; -import org.jboss.netty.channel.Channel; -import org.jboss.netty.channel.ChannelHandlerContext; +import io.netty.buffer.ByteBuf; +import io.netty.channel.Channel; +import io.netty.channel.ChannelHandlerContext; /** * RpcInfo records all contextual information of an RPC message. It contains @@ -29,11 +29,11 @@ */ public final class RpcInfo { private final RpcMessage header; - private final ChannelBuffer data; + private final ByteBuf data; private final Channel channel; private final SocketAddress remoteAddress; - public RpcInfo(RpcMessage header, ChannelBuffer data, + public RpcInfo(RpcMessage header, ByteBuf data, ChannelHandlerContext channelContext, Channel channel, SocketAddress remoteAddress) { this.header = header; @@ -46,7 +46,7 @@ public RpcMessage header() { return header; } - public ChannelBuffer data() { + public ByteBuf data() { return data; } diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcProgram.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcProgram.java index 5c059aa4550a4..cff6693f6aa86 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcProgram.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcProgram.java @@ -22,17 +22,16 @@ import java.net.InetSocketAddress; import java.net.SocketAddress; -import com.google.common.annotations.VisibleForTesting; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.channel.ChannelHandlerContext; +import io.netty.channel.ChannelInboundHandlerAdapter; +import io.netty.util.ReferenceCountUtil; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.oncrpc.RpcAcceptedReply.AcceptState; -import org.apache.hadoop.oncrpc.security.Verifier; import org.apache.hadoop.oncrpc.security.VerifierNone; import org.apache.hadoop.portmap.PortmapMapping; import org.apache.hadoop.portmap.PortmapRequest; -import org.jboss.netty.buffer.ChannelBuffer; -import org.jboss.netty.buffer.ChannelBuffers; -import org.jboss.netty.channel.ChannelHandlerContext; -import org.jboss.netty.channel.MessageEvent; -import org.jboss.netty.channel.SimpleChannelUpstreamHandler; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,7 +39,7 @@ * Class for writing RPC server programs based on RFC 1050. Extend this class * and implement {@link #handleInternal} to handle the requests received. */ -public abstract class RpcProgram extends SimpleChannelUpstreamHandler { +public abstract class RpcProgram extends ChannelInboundHandlerAdapter { static final Logger LOG = LoggerFactory.getLogger(RpcProgram.class); public static final int RPCB_PORT = 111; private final String program; @@ -162,11 +161,19 @@ public void startDaemons() {} public void stopDaemons() {} @Override - public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) + public void channelRead(ChannelHandlerContext ctx, Object msg) + throws Exception { + RpcInfo info = (RpcInfo) msg; + try { + channelRead(ctx, info); + } finally { + ReferenceCountUtil.release(info.data()); + } + } + + private void channelRead(ChannelHandlerContext ctx, RpcInfo info) throws Exception { - RpcInfo info = (RpcInfo) e.getMessage(); RpcCall call = (RpcCall) info.header(); - SocketAddress remoteAddress = info.remoteAddress(); if (LOG.isTraceEnabled()) { LOG.trace(program + " procedure #" + call.getProcedure()); @@ -214,7 +221,7 @@ public boolean doPortMonitoring(SocketAddress remoteAddress) { private void sendAcceptedReply(RpcCall call, SocketAddress remoteAddress, AcceptState acceptState, ChannelHandlerContext ctx) { RpcAcceptedReply reply = RpcAcceptedReply.getInstance(call.getXid(), - acceptState, Verifier.VERIFIER_NONE); + acceptState, VerifierNone.INSTANCE); XDR out = new XDR(); reply.write(out); @@ -222,7 +229,7 @@ private void sendAcceptedReply(RpcCall call, SocketAddress remoteAddress, out.writeInt(lowProgVersion); out.writeInt(highProgVersion); } - ChannelBuffer b = ChannelBuffers.wrappedBuffer(out.asReadOnlyWrap() + ByteBuf b = Unpooled.wrappedBuffer(out.asReadOnlyWrap() .buffer()); RpcResponse rsp = new RpcResponse(b, remoteAddress); RpcUtil.sendRpcResponse(ctx, rsp); @@ -235,7 +242,7 @@ protected static void sendRejectedReply(RpcCall call, RpcReply.ReplyState.MSG_DENIED, RpcDeniedReply.RejectState.AUTH_ERROR, new VerifierNone()); reply.write(out); - ChannelBuffer buf = ChannelBuffers.wrappedBuffer(out.asReadOnlyWrap() + ByteBuf buf = Unpooled.wrappedBuffer(out.asReadOnlyWrap() .buffer()); RpcResponse rsp = new RpcResponse(buf, remoteAddress); RpcUtil.sendRpcResponse(ctx, rsp); @@ -258,4 +265,4 @@ public int getPort() { public int getPortmapUdpTimeoutMillis() { return portmapUdpTimeoutMillis; } -} \ No newline at end of file +} diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcReply.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcReply.java index e866a5c419cd3..985629e0285cb 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcReply.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcReply.java @@ -20,7 +20,7 @@ import org.apache.hadoop.oncrpc.security.RpcAuthInfo; import org.apache.hadoop.oncrpc.security.Verifier; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * Represents an RPC message of type RPC reply as defined in RFC 1831 diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcResponse.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcResponse.java index 2e45e6100b108..0d6431f68bd5a 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcResponse.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcResponse.java @@ -19,27 +19,30 @@ import java.net.SocketAddress; -import org.jboss.netty.buffer.ChannelBuffer; +import io.netty.buffer.ByteBuf; +import io.netty.channel.DefaultAddressedEnvelope; /** * RpcResponse encapsulates a response to a RPC request. It contains the data * that is going to cross the wire, as well as the information of the remote * peer. */ -public class RpcResponse { - private final ChannelBuffer data; - private final SocketAddress remoteAddress; +public class RpcResponse extends + DefaultAddressedEnvelope { + public RpcResponse(ByteBuf message, SocketAddress recipient) { + super(message, recipient, null); + } - public RpcResponse(ChannelBuffer data, SocketAddress remoteAddress) { - this.data = data; - this.remoteAddress = remoteAddress; + public RpcResponse(ByteBuf message, SocketAddress recipient, + SocketAddress sender) { + super(message, recipient, sender); } - public ChannelBuffer data() { - return data; + public ByteBuf data() { + return this.content(); } public SocketAddress remoteAddress() { - return remoteAddress; + return this.recipient(); } } diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcUtil.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcUtil.java index cebebd27d0c4b..92354f6b86c85 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcUtil.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcUtil.java @@ -17,16 +17,20 @@ */ package org.apache.hadoop.oncrpc; +import java.net.InetSocketAddress; +import java.net.SocketAddress; import java.nio.ByteBuffer; - -import org.jboss.netty.buffer.ChannelBuffer; -import org.jboss.netty.buffer.ChannelBuffers; -import org.jboss.netty.channel.Channel; -import org.jboss.netty.channel.ChannelHandlerContext; -import org.jboss.netty.channel.Channels; -import org.jboss.netty.channel.MessageEvent; -import org.jboss.netty.channel.SimpleChannelUpstreamHandler; -import org.jboss.netty.handler.codec.frame.FrameDecoder; +import java.util.List; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.channel.ChannelHandler; +import io.netty.channel.ChannelHandlerContext; +import io.netty.channel.ChannelInboundHandlerAdapter; +import io.netty.channel.SimpleChannelInboundHandler; +import io.netty.channel.socket.DatagramPacket; +import io.netty.handler.codec.ByteToMessageDecoder; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,16 +47,16 @@ public static int getNewXid(String caller) { public static void sendRpcResponse(ChannelHandlerContext ctx, RpcResponse response) { - Channels.fireMessageReceived(ctx, response); + ctx.fireChannelRead(response); } - public static FrameDecoder constructRpcFrameDecoder() { + public static ByteToMessageDecoder constructRpcFrameDecoder() { return new RpcFrameDecoder(); } - public static final SimpleChannelUpstreamHandler STAGE_RPC_MESSAGE_PARSER = new RpcMessageParserStage(); - public static final SimpleChannelUpstreamHandler STAGE_RPC_TCP_RESPONSE = new RpcTcpResponseStage(); - public static final SimpleChannelUpstreamHandler STAGE_RPC_UDP_RESPONSE = new RpcUdpResponseStage(); + public static final ChannelInboundHandlerAdapter STAGE_RPC_MESSAGE_PARSER = new RpcMessageParserStage(); + public static final ChannelInboundHandlerAdapter STAGE_RPC_TCP_RESPONSE = new RpcTcpResponseStage(); + public static final ChannelInboundHandlerAdapter STAGE_RPC_UDP_RESPONSE = new RpcUdpResponseStage(); /** * An RPC client can separate a RPC message into several frames (i.e., @@ -62,44 +66,39 @@ public static FrameDecoder constructRpcFrameDecoder() { * RpcFrameDecoder is a stateful pipeline stage. It has to be constructed for * each RPC client. */ - static class RpcFrameDecoder extends FrameDecoder { + static class RpcFrameDecoder extends ByteToMessageDecoder { public static final Logger LOG = LoggerFactory.getLogger(RpcFrameDecoder.class); - private ChannelBuffer currentFrame; + private volatile boolean isLast; @Override - protected Object decode(ChannelHandlerContext ctx, Channel channel, - ChannelBuffer buf) { + protected void decode(ChannelHandlerContext ctx, ByteBuf buf, + List out) { - if (buf.readableBytes() < 4) - return null; + if (buf.readableBytes() < 4) { + return; + } buf.markReaderIndex(); byte[] fragmentHeader = new byte[4]; buf.readBytes(fragmentHeader); int length = XDR.fragmentSize(fragmentHeader); - boolean isLast = XDR.isLastFragment(fragmentHeader); + isLast = XDR.isLastFragment(fragmentHeader); if (buf.readableBytes() < length) { buf.resetReaderIndex(); - return null; + return; } - ChannelBuffer newFragment = buf.readSlice(length); - if (currentFrame == null) { - currentFrame = newFragment; - } else { - currentFrame = ChannelBuffers.wrappedBuffer(currentFrame, newFragment); - } + ByteBuf newFragment = buf.readSlice(length); + newFragment.retain(); + out.add(newFragment); + } - if (isLast) { - ChannelBuffer completeFrame = currentFrame; - currentFrame = null; - return completeFrame; - } else { - return null; - } + @VisibleForTesting + public boolean isLast() { + return isLast; } } @@ -107,30 +106,46 @@ protected Object decode(ChannelHandlerContext ctx, Channel channel, * RpcMessageParserStage parses the network bytes and encapsulates the RPC * request into a RpcInfo instance. */ - static final class RpcMessageParserStage extends SimpleChannelUpstreamHandler { + @ChannelHandler.Sharable + static final class RpcMessageParserStage extends ChannelInboundHandlerAdapter { private static final Logger LOG = LoggerFactory .getLogger(RpcMessageParserStage.class); @Override - public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) + public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception { - ChannelBuffer buf = (ChannelBuffer) e.getMessage(); - ByteBuffer b = buf.toByteBuffer().asReadOnlyBuffer(); + ByteBuf buf; + SocketAddress remoteAddress; + if (msg instanceof DatagramPacket) { + DatagramPacket packet = (DatagramPacket)msg; + buf = packet.content(); + remoteAddress = packet.sender(); + } else { + buf = (ByteBuf) msg; + remoteAddress = ctx.channel().remoteAddress(); + } + + ByteBuffer b = buf.nioBuffer().asReadOnlyBuffer(); XDR in = new XDR(b, XDR.State.READING); RpcInfo info = null; try { RpcCall callHeader = RpcCall.read(in); - ChannelBuffer dataBuffer = ChannelBuffers.wrappedBuffer(in.buffer() - .slice()); - info = new RpcInfo(callHeader, dataBuffer, ctx, e.getChannel(), - e.getRemoteAddress()); + ByteBuf dataBuffer = buf.slice(b.position(), b.remaining()); + + info = new RpcInfo(callHeader, dataBuffer, ctx, ctx.channel(), + remoteAddress); } catch (Exception exc) { - LOG.info("Malformed RPC request from " + e.getRemoteAddress()); + LOG.info("Malformed RPC request from " + remoteAddress); + } finally { + // only release buffer if it is not passed to downstream handler + if (info == null) { + buf.release(); + } } if (info != null) { - Channels.fireMessageReceived(ctx, info); + ctx.fireChannelRead(info); } } } @@ -139,16 +154,17 @@ public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) * RpcTcpResponseStage sends an RpcResponse across the wire with the * appropriate fragment header. */ - private static class RpcTcpResponseStage extends SimpleChannelUpstreamHandler { + @ChannelHandler.Sharable + private static class RpcTcpResponseStage extends ChannelInboundHandlerAdapter { @Override - public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) + public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception { - RpcResponse r = (RpcResponse) e.getMessage(); + RpcResponse r = (RpcResponse) msg; byte[] fragmentHeader = XDR.recordMark(r.data().readableBytes(), true); - ChannelBuffer header = ChannelBuffers.wrappedBuffer(fragmentHeader); - ChannelBuffer d = ChannelBuffers.wrappedBuffer(header, r.data()); - e.getChannel().write(d); + ByteBuf header = Unpooled.wrappedBuffer(fragmentHeader); + ByteBuf d = Unpooled.wrappedBuffer(header, r.data()); + ctx.channel().writeAndFlush(d); } } @@ -156,14 +172,20 @@ public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) * RpcUdpResponseStage sends an RpcResponse as a UDP packet, which does not * require a fragment header. */ + @ChannelHandler.Sharable private static final class RpcUdpResponseStage extends - SimpleChannelUpstreamHandler { + SimpleChannelInboundHandler { + public RpcUdpResponseStage() { + // do not auto release the RpcResponse message. + super(false); + } @Override - public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) - throws Exception { - RpcResponse r = (RpcResponse) e.getMessage(); - e.getChannel().write(r.data(), r.remoteAddress()); + protected void channelRead0(ChannelHandlerContext ctx, + RpcResponse response) throws Exception { + ByteBuf buf = Unpooled.wrappedBuffer(response.data()); + ctx.writeAndFlush(new DatagramPacket( + buf, (InetSocketAddress) response.recipient())); } } } diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleTcpClient.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleTcpClient.java index 32e1b4b839218..7cfef6439b059 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleTcpClient.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleTcpClient.java @@ -18,15 +18,16 @@ package org.apache.hadoop.oncrpc; import java.net.InetSocketAddress; -import java.util.concurrent.Executors; -import org.jboss.netty.bootstrap.ClientBootstrap; -import org.jboss.netty.channel.ChannelFactory; -import org.jboss.netty.channel.ChannelFuture; -import org.jboss.netty.channel.ChannelPipeline; -import org.jboss.netty.channel.ChannelPipelineFactory; -import org.jboss.netty.channel.Channels; -import org.jboss.netty.channel.socket.nio.NioClientSocketChannelFactory; +import io.netty.bootstrap.Bootstrap; +import io.netty.channel.ChannelFuture; +import io.netty.channel.ChannelInitializer; +import io.netty.channel.ChannelOption; +import io.netty.channel.ChannelPipeline; +import io.netty.channel.nio.NioEventLoopGroup; +import io.netty.channel.socket.SocketChannel; +import io.netty.channel.socket.nio.NioSocketChannel; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * A simple TCP based RPC client which just sends a request to a server. @@ -35,8 +36,9 @@ public class SimpleTcpClient { protected final String host; protected final int port; protected final XDR request; - protected ChannelPipelineFactory pipelineFactory; protected final boolean oneShot; + private NioEventLoopGroup workerGroup; + private ChannelFuture future; public SimpleTcpClient(String host, int port, XDR request) { this(host,port, request, true); @@ -48,40 +50,54 @@ public SimpleTcpClient(String host, int port, XDR request, Boolean oneShot) { this.request = request; this.oneShot = oneShot; } - - protected ChannelPipelineFactory setPipelineFactory() { - this.pipelineFactory = new ChannelPipelineFactory() { + + protected ChannelInitializer setChannelHandler() { + return new ChannelInitializer() { @Override - public ChannelPipeline getPipeline() { - return Channels.pipeline( + protected void initChannel(SocketChannel ch) throws Exception { + ChannelPipeline p = ch.pipeline(); + p.addLast( RpcUtil.constructRpcFrameDecoder(), - new SimpleTcpClientHandler(request)); + new SimpleTcpClientHandler(request) + ); } }; - return this.pipelineFactory; } + @VisibleForTesting public void run() { // Configure the client. - ChannelFactory factory = new NioClientSocketChannelFactory( - Executors.newCachedThreadPool(), Executors.newCachedThreadPool(), 1, 1); - ClientBootstrap bootstrap = new ClientBootstrap(factory); - - // Set up the pipeline factory. - bootstrap.setPipelineFactory(setPipelineFactory()); - - bootstrap.setOption("tcpNoDelay", true); - bootstrap.setOption("keepAlive", true); + workerGroup = new NioEventLoopGroup(); + Bootstrap bootstrap = new Bootstrap() + .group(workerGroup) + .channel(NioSocketChannel.class); - // Start the connection attempt. - ChannelFuture future = bootstrap.connect(new InetSocketAddress(host, port)); + try { + future = bootstrap.handler(setChannelHandler()) + .option(ChannelOption.TCP_NODELAY, true) + .option(ChannelOption.SO_KEEPALIVE, true) + .connect(new InetSocketAddress(host, port)).sync(); + } catch (InterruptedException e) { + e.printStackTrace(); + } finally { + if (oneShot) { + stop(); + } + } + } - if (oneShot) { - // Wait until the connection is closed or the connection attempt fails. - future.getChannel().getCloseFuture().awaitUninterruptibly(); + public void stop() { + try { + if (future != null) { + // Wait until the connection is closed or the connection attempt fails. + future.channel().closeFuture().sync(); + } + } catch (InterruptedException e) { + e.printStackTrace(); + } finally { // Shut down thread pools to exit. - bootstrap.releaseExternalResources(); + workerGroup.shutdownGracefully(); } } } diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleTcpClientHandler.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleTcpClientHandler.java index 23b6682361c9b..1acefc857f830 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleTcpClientHandler.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleTcpClientHandler.java @@ -17,19 +17,19 @@ */ package org.apache.hadoop.oncrpc; -import org.jboss.netty.buffer.ChannelBuffer; -import org.jboss.netty.channel.ChannelHandlerContext; -import org.jboss.netty.channel.ChannelStateEvent; -import org.jboss.netty.channel.ExceptionEvent; -import org.jboss.netty.channel.MessageEvent; -import org.jboss.netty.channel.SimpleChannelHandler; +import io.netty.buffer.ByteBuf; +import io.netty.channel.ChannelFuture; +import io.netty.channel.ChannelFutureListener; +import io.netty.channel.ChannelHandlerContext; +import io.netty.channel.ChannelInboundHandlerAdapter; +import io.netty.util.ReferenceCountUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * A simple TCP based RPC client handler used by {@link SimpleTcpServer}. */ -public class SimpleTcpClientHandler extends SimpleChannelHandler { +public class SimpleTcpClientHandler extends ChannelInboundHandlerAdapter { public static final Logger LOG = LoggerFactory.getLogger(SimpleTcpClient.class); protected final XDR request; @@ -39,13 +39,13 @@ public SimpleTcpClientHandler(XDR request) { } @Override - public void channelConnected(ChannelHandlerContext ctx, ChannelStateEvent e) { + public void channelActive(ChannelHandlerContext ctx) throws Exception { // Send the request if (LOG.isDebugEnabled()) { LOG.debug("sending PRC request"); } - ChannelBuffer outBuf = XDR.writeMessageTcp(request, true); - e.getChannel().write(outBuf); + ByteBuf outBuf = XDR.writeMessageTcp(request, true); + ctx.channel().writeAndFlush(outBuf); } /** @@ -53,13 +53,13 @@ public void channelConnected(ChannelHandlerContext ctx, ChannelStateEvent e) { * more interaction with the server. */ @Override - public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) { - e.getChannel().close(); + public void channelRead(ChannelHandlerContext ctx, Object msg) { + ctx.channel().close(); } @Override - public void exceptionCaught(ChannelHandlerContext ctx, ExceptionEvent e) { - LOG.warn("Unexpected exception from downstream: ", e.getCause()); - e.getChannel().close(); + public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) { + LOG.warn("Unexpected exception from downstream: ", cause.getCause()); + ctx.channel().close(); } } diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleTcpServer.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleTcpServer.java index 177fa3d80b1b4..29155c80b1846 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleTcpServer.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleTcpServer.java @@ -20,14 +20,17 @@ import java.net.InetSocketAddress; import java.util.concurrent.Executors; -import org.jboss.netty.bootstrap.ServerBootstrap; -import org.jboss.netty.channel.Channel; -import org.jboss.netty.channel.ChannelFactory; -import org.jboss.netty.channel.ChannelPipeline; -import org.jboss.netty.channel.ChannelPipelineFactory; -import org.jboss.netty.channel.Channels; -import org.jboss.netty.channel.SimpleChannelUpstreamHandler; -import org.jboss.netty.channel.socket.nio.NioServerSocketChannelFactory; +import io.netty.bootstrap.ServerBootstrap; +import io.netty.channel.Channel; +import io.netty.channel.ChannelFuture; +import io.netty.channel.ChannelInboundHandlerAdapter; +import io.netty.channel.ChannelInitializer; +import io.netty.channel.ChannelOption; +import io.netty.channel.ChannelPipeline; +import io.netty.channel.EventLoopGroup; +import io.netty.channel.nio.NioEventLoopGroup; +import io.netty.channel.socket.SocketChannel; +import io.netty.channel.socket.nio.NioServerSocketChannel; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -39,9 +42,11 @@ public class SimpleTcpServer { LoggerFactory.getLogger(SimpleTcpServer.class); protected final int port; protected int boundPort = -1; // Will be set after server starts - protected final SimpleChannelUpstreamHandler rpcProgram; + protected final ChannelInboundHandlerAdapter rpcProgram; private ServerBootstrap server; private Channel ch; + private EventLoopGroup bossGroup; + private EventLoopGroup workerGroup; /** The maximum number of I/O worker threads */ protected final int workerCount; @@ -57,37 +62,32 @@ public SimpleTcpServer(int port, RpcProgram program, int workercount) { this.workerCount = workercount; } - public void run() { + public void run() throws InterruptedException { // Configure the Server. - ChannelFactory factory; - if (workerCount == 0) { - // Use default workers: 2 * the number of available processors - factory = new NioServerSocketChannelFactory( - Executors.newCachedThreadPool(), Executors.newCachedThreadPool()); - } else { - factory = new NioServerSocketChannelFactory( - Executors.newCachedThreadPool(), Executors.newCachedThreadPool(), - workerCount); - } + bossGroup = new NioEventLoopGroup(); + workerGroup = new NioEventLoopGroup(workerCount, Executors.newCachedThreadPool()); - server = new ServerBootstrap(factory); - server.setPipelineFactory(new ChannelPipelineFactory() { + server = new ServerBootstrap(); + server.group(bossGroup, workerGroup) + .channel(NioServerSocketChannel.class) + .childHandler(new ChannelInitializer() { @Override - public ChannelPipeline getPipeline() throws Exception { - return Channels.pipeline(RpcUtil.constructRpcFrameDecoder(), + protected void initChannel(SocketChannel ch) throws Exception { + ChannelPipeline p = ch.pipeline(); + p.addLast(RpcUtil.constructRpcFrameDecoder(), RpcUtil.STAGE_RPC_MESSAGE_PARSER, rpcProgram, RpcUtil.STAGE_RPC_TCP_RESPONSE); - } - }); - server.setOption("child.tcpNoDelay", true); - server.setOption("child.keepAlive", true); - server.setOption("child.reuseAddress", true); - server.setOption("reuseAddress", true); + }}) + .childOption(ChannelOption.TCP_NODELAY, true) + .childOption(ChannelOption.SO_KEEPALIVE, true) + .childOption(ChannelOption.SO_REUSEADDR, true) + .option(ChannelOption.SO_REUSEADDR, true); // Listen to TCP port - ch = server.bind(new InetSocketAddress(port)); - InetSocketAddress socketAddr = (InetSocketAddress) ch.getLocalAddress(); + ChannelFuture f = server.bind(new InetSocketAddress(port)).sync(); + ch = f.channel(); + InetSocketAddress socketAddr = (InetSocketAddress) ch.localAddress(); boundPort = socketAddr.getPort(); LOG.info("Started listening to TCP requests at port " + boundPort + " for " @@ -102,9 +102,17 @@ public int getBoundPort() { public void shutdown() { if (ch != null) { ch.close().awaitUninterruptibly(); + ch = null; + } + + if (workerGroup != null) { + workerGroup.shutdownGracefully(); + workerGroup = null; } - if (server != null) { - server.releaseExternalResources(); + + if (bossGroup != null) { + bossGroup.shutdownGracefully(); + bossGroup = null; } } } diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleUdpServer.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleUdpServer.java index e65003ca64beb..516503c323a08 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleUdpServer.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleUdpServer.java @@ -20,12 +20,16 @@ import java.net.InetSocketAddress; import java.util.concurrent.Executors; -import org.jboss.netty.bootstrap.ConnectionlessBootstrap; -import org.jboss.netty.channel.Channel; -import org.jboss.netty.channel.Channels; -import org.jboss.netty.channel.SimpleChannelUpstreamHandler; -import org.jboss.netty.channel.socket.DatagramChannelFactory; -import org.jboss.netty.channel.socket.nio.NioDatagramChannelFactory; +import io.netty.bootstrap.Bootstrap; +import io.netty.channel.Channel; +import io.netty.channel.ChannelFuture; +import io.netty.channel.ChannelInboundHandlerAdapter; +import io.netty.channel.ChannelInitializer; +import io.netty.channel.ChannelOption; +import io.netty.channel.ChannelPipeline; +import io.netty.channel.EventLoopGroup; +import io.netty.channel.nio.NioEventLoopGroup; +import io.netty.channel.socket.nio.NioDatagramChannel; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -39,36 +43,45 @@ public class SimpleUdpServer { private final int RECEIVE_BUFFER_SIZE = 65536; protected final int port; - protected final SimpleChannelUpstreamHandler rpcProgram; + protected final ChannelInboundHandlerAdapter rpcProgram; protected final int workerCount; protected int boundPort = -1; // Will be set after server starts - private ConnectionlessBootstrap server; + private Bootstrap server; private Channel ch; + private EventLoopGroup workerGroup; - public SimpleUdpServer(int port, SimpleChannelUpstreamHandler program, + public SimpleUdpServer(int port, ChannelInboundHandlerAdapter program, int workerCount) { this.port = port; this.rpcProgram = program; this.workerCount = workerCount; } - public void run() { - // Configure the client. - DatagramChannelFactory f = new NioDatagramChannelFactory( - Executors.newCachedThreadPool(), workerCount); + public void run() throws InterruptedException { + workerGroup = new NioEventLoopGroup(workerCount, Executors.newCachedThreadPool()); - server = new ConnectionlessBootstrap(f); - server.setPipeline(Channels.pipeline(RpcUtil.STAGE_RPC_MESSAGE_PARSER, - rpcProgram, RpcUtil.STAGE_RPC_UDP_RESPONSE)); - - server.setOption("broadcast", "false"); - server.setOption("sendBufferSize", SEND_BUFFER_SIZE); - server.setOption("receiveBufferSize", RECEIVE_BUFFER_SIZE); - server.setOption("reuseAddress", true); + server = new Bootstrap(); + server.group(workerGroup) + .channel(NioDatagramChannel.class) + .option(ChannelOption.SO_BROADCAST, true) + .option(ChannelOption.SO_SNDBUF, SEND_BUFFER_SIZE) + .option(ChannelOption.SO_RCVBUF, RECEIVE_BUFFER_SIZE) + .option(ChannelOption.SO_REUSEADDR, true) + .handler(new ChannelInitializer() { + @Override protected void initChannel(NioDatagramChannel ch) + throws Exception { + ChannelPipeline p = ch.pipeline(); + p.addLast( + RpcUtil.STAGE_RPC_MESSAGE_PARSER, + rpcProgram, + RpcUtil.STAGE_RPC_UDP_RESPONSE); + } + }); // Listen to the UDP port - ch = server.bind(new InetSocketAddress(port)); - InetSocketAddress socketAddr = (InetSocketAddress) ch.getLocalAddress(); + ChannelFuture f = server.bind(new InetSocketAddress(port)).sync(); + ch = f.channel(); + InetSocketAddress socketAddr = (InetSocketAddress) ch.localAddress(); boundPort = socketAddr.getPort(); LOG.info("Started listening to UDP requests at port " + boundPort + " for " @@ -83,9 +96,11 @@ public int getBoundPort() { public void shutdown() { if (ch != null) { ch.close().awaitUninterruptibly(); + ch = null; } - if (server != null) { - server.releaseExternalResources(); + if (workerGroup != null) { + workerGroup.shutdownGracefully(); + workerGroup = null; } } } diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/XDR.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/XDR.java index acdc51c954521..6000fd57a1b65 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/XDR.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/XDR.java @@ -20,11 +20,11 @@ import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; -import org.jboss.netty.buffer.ChannelBuffer; -import org.jboss.netty.buffer.ChannelBuffers; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * Utility class for building XDR messages based on RFC 4506. @@ -242,7 +242,7 @@ static byte[] recordMark(int size, boolean last) { * @param last specifies last request or not * @return TCP buffer */ - public static ChannelBuffer writeMessageTcp(XDR request, boolean last) { + public static ByteBuf writeMessageTcp(XDR request, boolean last) { Preconditions.checkState(request.state == XDR.State.WRITING); ByteBuffer b = request.buf.duplicate(); b.flip(); @@ -250,7 +250,7 @@ public static ChannelBuffer writeMessageTcp(XDR request, boolean last) { ByteBuffer headerBuf = ByteBuffer.wrap(fragmentHeader); // TODO: Investigate whether making a copy of the buffer is necessary. - return ChannelBuffers.copiedBuffer(headerBuf, b); + return Unpooled.wrappedBuffer(headerBuf, b); } /** @@ -258,10 +258,10 @@ public static ChannelBuffer writeMessageTcp(XDR request, boolean last) { * @param response XDR response * @return UDP buffer */ - public static ChannelBuffer writeMessageUdp(XDR response) { + public static ByteBuf writeMessageUdp(XDR response) { Preconditions.checkState(response.state == XDR.State.READING); // TODO: Investigate whether making a copy of the buffer is necessary. - return ChannelBuffers.copiedBuffer(response.buf); + return Unpooled.copiedBuffer(response.buf); } public static int fragmentSize(byte[] mark) { diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/Credentials.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/Credentials.java index 64edf485b29f7..fd832c4ab2409 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/Credentials.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/Credentials.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.oncrpc.security; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.oncrpc.XDR; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/CredentialsNone.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/CredentialsNone.java index 753edba49fbda..f62dc6bd223b7 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/CredentialsNone.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/CredentialsNone.java @@ -19,7 +19,7 @@ import org.apache.hadoop.oncrpc.XDR; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** Credential used by AUTH_NONE */ public class CredentialsNone extends Credentials { diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/CredentialsSys.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/CredentialsSys.java index 19ba32022eef7..8713d210f4678 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/CredentialsSys.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/CredentialsSys.java @@ -21,7 +21,7 @@ import java.net.UnknownHostException; import java.nio.charset.StandardCharsets; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.oncrpc.XDR; /** Credential used by AUTH_SYS */ diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/Verifier.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/Verifier.java index 3c0e5fe36e2b8..585e9fb5f441d 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/Verifier.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/Verifier.java @@ -27,8 +27,6 @@ */ public abstract class Verifier extends RpcAuthInfo { - public static final Verifier VERIFIER_NONE = new VerifierNone(); - protected Verifier(AuthFlavor flavor) { super(flavor); } diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/VerifierNone.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/VerifierNone.java index 8bccd1b9be247..005fe838a3149 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/VerifierNone.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/VerifierNone.java @@ -19,11 +19,13 @@ import org.apache.hadoop.oncrpc.XDR; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** Verifier used by AUTH_NONE. */ public class VerifierNone extends Verifier { + public static final Verifier INSTANCE = new VerifierNone(); + public VerifierNone() { super(AuthFlavor.AUTH_NONE); } diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/portmap/Portmap.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/portmap/Portmap.java index 123999d5e14c0..23c7977e30d05 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/portmap/Portmap.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/portmap/Portmap.java @@ -22,23 +22,29 @@ import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; +import io.netty.bootstrap.Bootstrap; +import io.netty.bootstrap.ServerBootstrap; +import io.netty.channel.Channel; +import io.netty.channel.ChannelFuture; +import io.netty.channel.ChannelInitializer; +import io.netty.channel.ChannelOption; +import io.netty.channel.ChannelPipeline; +import io.netty.channel.EventLoopGroup; +import io.netty.channel.group.ChannelGroup; +import io.netty.channel.group.DefaultChannelGroup; +import io.netty.channel.nio.NioEventLoopGroup; +import io.netty.channel.socket.SocketChannel; +import io.netty.channel.socket.nio.NioDatagramChannel; +import io.netty.channel.socket.nio.NioServerSocketChannel; +import io.netty.handler.logging.LogLevel; +import io.netty.handler.logging.LoggingHandler; +import io.netty.handler.timeout.IdleStateHandler; +import io.netty.util.concurrent.GlobalEventExecutor; import org.apache.hadoop.oncrpc.RpcProgram; import org.apache.hadoop.oncrpc.RpcUtil; import org.apache.hadoop.util.StringUtils; -import org.jboss.netty.bootstrap.ConnectionlessBootstrap; -import org.jboss.netty.bootstrap.ServerBootstrap; -import org.jboss.netty.channel.Channel; -import org.jboss.netty.channel.ChannelPipeline; -import org.jboss.netty.channel.ChannelPipelineFactory; -import org.jboss.netty.channel.Channels; -import org.jboss.netty.channel.group.ChannelGroup; -import org.jboss.netty.channel.group.DefaultChannelGroup; -import org.jboss.netty.channel.socket.nio.NioDatagramChannelFactory; -import org.jboss.netty.channel.socket.nio.NioServerSocketChannelFactory; -import org.jboss.netty.handler.timeout.IdleStateHandler; -import org.jboss.netty.util.HashedWheelTimer; - -import com.google.common.annotations.VisibleForTesting; + +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -49,11 +55,17 @@ final class Portmap { private static final Logger LOG = LoggerFactory.getLogger(Portmap.class); private static final int DEFAULT_IDLE_TIME_MILLISECONDS = 5000; - private ConnectionlessBootstrap udpServer; + private Bootstrap udpServer; private ServerBootstrap tcpServer; - private ChannelGroup allChannels = new DefaultChannelGroup(); + private ChannelGroup allChannels = new DefaultChannelGroup( + GlobalEventExecutor.INSTANCE); private Channel udpChannel; private Channel tcpChannel; + + EventLoopGroup bossGroup; + EventLoopGroup workerGroup; + EventLoopGroup udpGroup; + private final RpcProgramPortmap handler = new RpcProgramPortmap(allChannels); public static void main(String[] args) { @@ -73,18 +85,19 @@ public static void main(String[] args) { void shutdown() { allChannels.close().awaitUninterruptibly(); - tcpServer.releaseExternalResources(); - udpServer.releaseExternalResources(); + bossGroup.shutdownGracefully(); + workerGroup.shutdownGracefully(); + udpGroup.shutdownGracefully(); } @VisibleForTesting SocketAddress getTcpServerLocalAddress() { - return tcpChannel.getLocalAddress(); + return tcpChannel.localAddress(); } @VisibleForTesting SocketAddress getUdpServerLoAddress() { - return udpChannel.getLocalAddress(); + return udpChannel.localAddress(); } @VisibleForTesting @@ -93,38 +106,53 @@ RpcProgramPortmap getHandler() { } void start(final int idleTimeMilliSeconds, final SocketAddress tcpAddress, - final SocketAddress udpAddress) { - - tcpServer = new ServerBootstrap(new NioServerSocketChannelFactory( - Executors.newCachedThreadPool(), Executors.newCachedThreadPool())); - tcpServer.setPipelineFactory(new ChannelPipelineFactory() { - private final HashedWheelTimer timer = new HashedWheelTimer(); - private final IdleStateHandler idleStateHandler = new IdleStateHandler( - timer, 0, 0, idleTimeMilliSeconds, TimeUnit.MILLISECONDS); - - @Override - public ChannelPipeline getPipeline() throws Exception { - return Channels.pipeline(RpcUtil.constructRpcFrameDecoder(), - RpcUtil.STAGE_RPC_MESSAGE_PARSER, idleStateHandler, handler, - RpcUtil.STAGE_RPC_TCP_RESPONSE); - } - }); - tcpServer.setOption("reuseAddress", true); - tcpServer.setOption("child.reuseAddress", true); - - udpServer = new ConnectionlessBootstrap(new NioDatagramChannelFactory( - Executors.newCachedThreadPool())); - - udpServer.setPipeline(Channels.pipeline(RpcUtil.STAGE_RPC_MESSAGE_PARSER, - handler, RpcUtil.STAGE_RPC_UDP_RESPONSE)); - udpServer.setOption("reuseAddress", true); - - tcpChannel = tcpServer.bind(tcpAddress); - udpChannel = udpServer.bind(udpAddress); + final SocketAddress udpAddress) throws InterruptedException { + + bossGroup = new NioEventLoopGroup(); + workerGroup = new NioEventLoopGroup(0, Executors.newCachedThreadPool()); + + tcpServer = new ServerBootstrap(); + tcpServer.group(bossGroup, workerGroup) + .option(ChannelOption.SO_REUSEADDR, true) + .childOption(ChannelOption.SO_REUSEADDR, true) + .channel(NioServerSocketChannel.class) + .childHandler(new ChannelInitializer() { + @Override + protected void initChannel(SocketChannel ch) throws Exception { + ChannelPipeline p = ch.pipeline(); + + p.addLast(RpcUtil.constructRpcFrameDecoder(), + RpcUtil.STAGE_RPC_MESSAGE_PARSER, new IdleStateHandler(0, 0, + idleTimeMilliSeconds, TimeUnit.MILLISECONDS), handler, + RpcUtil.STAGE_RPC_TCP_RESPONSE); + }}); + + udpGroup = new NioEventLoopGroup(0, Executors.newCachedThreadPool()); + + udpServer = new Bootstrap(); + udpServer.group(udpGroup) + .channel(NioDatagramChannel.class) + .handler(new ChannelInitializer() { + @Override protected void initChannel(NioDatagramChannel ch) + throws Exception { + ChannelPipeline p = ch.pipeline(); + p.addLast( + new LoggingHandler(LogLevel.DEBUG), + RpcUtil.STAGE_RPC_MESSAGE_PARSER, handler, RpcUtil.STAGE_RPC_UDP_RESPONSE); + } + }) + .option(ChannelOption.SO_REUSEADDR, true); + + ChannelFuture tcpChannelFuture = null; + tcpChannelFuture = tcpServer.bind(tcpAddress); + ChannelFuture udpChannelFuture = udpServer.bind(udpAddress); + tcpChannel = tcpChannelFuture.sync().channel(); + udpChannel = udpChannelFuture.sync().channel(); + allChannels.add(tcpChannel); allChannels.add(udpChannel); - LOG.info("Portmap server started at tcp://" + tcpChannel.getLocalAddress() - + ", udp://" + udpChannel.getLocalAddress()); + LOG.info("Portmap server started at tcp://" + tcpChannel.localAddress() + + ", udp://" + udpChannel.localAddress()); } } diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/portmap/RpcProgramPortmap.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/portmap/RpcProgramPortmap.java index 0bc380f614c1c..7b33a644fbe76 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/portmap/RpcProgramPortmap.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/portmap/RpcProgramPortmap.java @@ -19,6 +19,14 @@ import java.util.concurrent.ConcurrentHashMap; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.channel.ChannelHandler; +import io.netty.channel.ChannelHandlerContext; +import io.netty.channel.group.ChannelGroup; +import io.netty.handler.timeout.IdleState; +import io.netty.handler.timeout.IdleStateEvent; +import io.netty.handler.timeout.IdleStateHandler; import org.apache.hadoop.oncrpc.RpcAcceptedReply; import org.apache.hadoop.oncrpc.RpcCall; import org.apache.hadoop.oncrpc.RpcInfo; @@ -27,20 +35,12 @@ import org.apache.hadoop.oncrpc.RpcUtil; import org.apache.hadoop.oncrpc.XDR; import org.apache.hadoop.oncrpc.security.VerifierNone; -import org.jboss.netty.buffer.ChannelBuffer; -import org.jboss.netty.buffer.ChannelBuffers; -import org.jboss.netty.channel.ChannelHandlerContext; -import org.jboss.netty.channel.ChannelStateEvent; -import org.jboss.netty.channel.ExceptionEvent; -import org.jboss.netty.channel.MessageEvent; -import org.jboss.netty.channel.group.ChannelGroup; -import org.jboss.netty.handler.timeout.IdleState; -import org.jboss.netty.handler.timeout.IdleStateAwareChannelUpstreamHandler; -import org.jboss.netty.handler.timeout.IdleStateEvent; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; -final class RpcProgramPortmap extends IdleStateAwareChannelUpstreamHandler { +@ChannelHandler.Sharable +final class RpcProgramPortmap extends IdleStateHandler { static final int PROGRAM = 100000; static final int VERSION = 2; @@ -60,6 +60,8 @@ final class RpcProgramPortmap extends IdleStateAwareChannelUpstreamHandler { private final ChannelGroup allChannels; RpcProgramPortmap(ChannelGroup allChannels) { + super(1, 1, 1); + // FIXME: set default idle timeout 1 second. this.allChannels = allChannels; PortmapMapping m = new PortmapMapping(PROGRAM, VERSION, PortmapMapping.TRANSPORT_TCP, RpcProgram.RPCB_PORT); @@ -151,14 +153,14 @@ private XDR dump(int xid, XDR in, XDR out) { } @Override - public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) + public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception { - RpcInfo info = (RpcInfo) e.getMessage(); + RpcInfo info = (RpcInfo) msg; RpcCall rpcCall = (RpcCall) info.header(); final int portmapProc = rpcCall.getProcedure(); int xid = rpcCall.getXid(); - XDR in = new XDR(info.data().toByteBuffer().asReadOnlyBuffer(), + XDR in = new XDR(info.data().nioBuffer().asReadOnlyBuffer(), XDR.State.READING); XDR out = new XDR(); @@ -181,29 +183,29 @@ public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) reply.write(out); } - ChannelBuffer buf = ChannelBuffers.wrappedBuffer(out.asReadOnlyWrap() + ByteBuf buf = Unpooled.wrappedBuffer(out.asReadOnlyWrap() .buffer()); RpcResponse rsp = new RpcResponse(buf, info.remoteAddress()); RpcUtil.sendRpcResponse(ctx, rsp); } @Override - public void channelOpen(ChannelHandlerContext ctx, ChannelStateEvent e) + public void channelActive(ChannelHandlerContext ctx) throws Exception { - allChannels.add(e.getChannel()); + allChannels.add(ctx.channel()); } @Override public void channelIdle(ChannelHandlerContext ctx, IdleStateEvent e) throws Exception { - if (e.getState() == IdleState.ALL_IDLE) { - e.getChannel().close(); + if (e.state() == IdleState.ALL_IDLE) { + ctx.channel().close(); } } @Override - public void exceptionCaught(ChannelHandlerContext ctx, ExceptionEvent e) { - LOG.warn("Encountered ", e.getCause()); - e.getChannel().close(); + public void exceptionCaught(ChannelHandlerContext ctx, Throwable t) { + LOG.warn("Encountered ", t); + ctx.channel().close(); } } diff --git a/hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/oncrpc/TestFrameDecoder.java b/hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/oncrpc/TestFrameDecoder.java index 0e416b3738d20..6d103fdd781c6 100644 --- a/hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/oncrpc/TestFrameDecoder.java +++ b/hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/oncrpc/TestFrameDecoder.java @@ -22,19 +22,19 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; import java.util.Random; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.channel.ChannelException; +import io.netty.channel.ChannelHandler; +import io.netty.channel.ChannelHandlerContext; import org.apache.hadoop.oncrpc.RpcUtil.RpcFrameDecoder; import org.apache.hadoop.oncrpc.security.CredentialsNone; import org.apache.hadoop.oncrpc.security.VerifierNone; import org.apache.hadoop.test.GenericTestUtils; -import org.jboss.netty.buffer.ByteBufferBackedChannelBuffer; -import org.jboss.netty.buffer.ChannelBuffer; -import org.jboss.netty.buffer.ChannelBuffers; -import org.jboss.netty.channel.Channel; -import org.jboss.netty.channel.ChannelException; -import org.jboss.netty.channel.ChannelHandlerContext; import org.junit.Test; import org.mockito.Mockito; import org.slf4j.event.Level; @@ -55,6 +55,7 @@ static void testRequest(XDR request, int serverPort) { tcpClient.run(); } + @ChannelHandler.Sharable static class TestRpcProgram extends RpcProgram { protected TestRpcProgram(String program, String host, int port, @@ -83,7 +84,7 @@ protected void handleInternal(ChannelHandlerContext ctx, RpcInfo info) { new VerifierNone()); XDR out = new XDR(); reply.write(out); - ChannelBuffer b = ChannelBuffers.wrappedBuffer(out.asReadOnlyWrap().buffer()); + ByteBuf b = Unpooled.wrappedBuffer(out.asReadOnlyWrap().buffer()); RpcResponse rsp = new RpcResponse(b, info.remoteAddress()); RpcUtil.sendRpcResponse(ctx, rsp); } @@ -99,13 +100,14 @@ public void testSingleFrame() { RpcFrameDecoder decoder = new RpcFrameDecoder(); // Test "Length field is not received yet" - ByteBuffer buffer = ByteBuffer.allocate(1); - ChannelBuffer buf = new ByteBufferBackedChannelBuffer(buffer); - ChannelBuffer channelBuffer = (ChannelBuffer) decoder.decode( - Mockito.mock(ChannelHandlerContext.class), Mockito.mock(Channel.class), - buf); - assertTrue(channelBuffer == null); + ByteBuf buf = Unpooled.directBuffer(1); + List outputBufs = new ArrayList<>(); + decoder.decode( + Mockito.mock(ChannelHandlerContext.class), buf, + outputBufs); + assertTrue(outputBufs.isEmpty()); + decoder = new RpcFrameDecoder(); // Test all bytes are not received yet byte[] fragment = new byte[4 + 9]; fragment[0] = (byte) (1 << 7); // final fragment @@ -114,15 +116,16 @@ public void testSingleFrame() { fragment[3] = (byte) 10; // fragment size = 10 bytes assertTrue(XDR.isLastFragment(fragment)); assertTrue(XDR.fragmentSize(fragment)==10); + buf.release(); - buffer = ByteBuffer.allocate(4 + 9); - buffer.put(fragment); - buffer.flip(); - buf = new ByteBufferBackedChannelBuffer(buffer); - channelBuffer = (ChannelBuffer) decoder.decode( - Mockito.mock(ChannelHandlerContext.class), Mockito.mock(Channel.class), - buf); - assertTrue(channelBuffer == null); + buf = Unpooled.directBuffer(4 + 9); + buf.writeBytes(fragment); + outputBufs = new ArrayList<>(); + decoder.decode( + Mockito.mock(ChannelHandlerContext.class), buf, + outputBufs); + assertTrue(decoder.isLast()); + buf.release(); } @Test @@ -137,16 +140,15 @@ public void testMultipleFrames() { fragment1[3] = (byte) 10; // fragment size = 10 bytes assertFalse(XDR.isLastFragment(fragment1)); assertTrue(XDR.fragmentSize(fragment1)==10); + + List outputBufs = new ArrayList<>(); // decoder should wait for the final fragment - ByteBuffer buffer = ByteBuffer.allocate(4 + 10); - buffer.put(fragment1); - buffer.flip(); - ChannelBuffer buf = new ByteBufferBackedChannelBuffer(buffer); - ChannelBuffer channelBuffer = (ChannelBuffer) decoder.decode( - Mockito.mock(ChannelHandlerContext.class), Mockito.mock(Channel.class), - buf); - assertTrue(channelBuffer == null); + ByteBuf buf = Unpooled.directBuffer(4 + 10, 4 + 10); + buf.writeBytes(fragment1); + decoder.decode( + Mockito.mock(ChannelHandlerContext.class), buf, + outputBufs); byte[] fragment2 = new byte[4 + 10]; fragment2[0] = (byte) (1 << 7); // final fragment @@ -155,21 +157,22 @@ public void testMultipleFrames() { fragment2[3] = (byte) 10; // fragment size = 10 bytes assertTrue(XDR.isLastFragment(fragment2)); assertTrue(XDR.fragmentSize(fragment2)==10); + buf.release(); - buffer = ByteBuffer.allocate(4 + 10); - buffer.put(fragment2); - buffer.flip(); - buf = new ByteBufferBackedChannelBuffer(buffer); - channelBuffer = (ChannelBuffer) decoder.decode( - Mockito.mock(ChannelHandlerContext.class), Mockito.mock(Channel.class), - buf); - assertTrue(channelBuffer != null); - // Complete frame should have to total size 10+10=20 - assertEquals(20, channelBuffer.readableBytes()); + buf = Unpooled.directBuffer(4 + 10, 4 + 10); + buf.writeBytes(fragment2); + decoder.decode( + Mockito.mock(ChannelHandlerContext.class), buf, + outputBufs); + // Expect two completed frames each 10 bytes + decoder.isLast(); + assertEquals(2, outputBufs.size()); + outputBufs.forEach(b -> assertEquals(((ByteBuf)b).readableBytes(), 10)); + buf.release(); } @Test - public void testFrames() { + public void testFrames() throws InterruptedException { int serverPort = startRpcServer(true); XDR xdrOut = createGetportMount(); @@ -187,7 +190,7 @@ public void testFrames() { } @Test - public void testUnprivilegedPort() { + public void testUnprivilegedPort() throws InterruptedException { // Don't allow connections from unprivileged ports. Given that this test is // presumably not being run by root, this will be the case. int serverPort = startRpcServer(false); @@ -218,23 +221,28 @@ public void testUnprivilegedPort() { assertEquals(requestSize, resultSize); } - private static int startRpcServer(boolean allowInsecurePorts) { + private static int startRpcServer(boolean allowInsecurePorts) + throws InterruptedException { Random rand = new Random(); int serverPort = 30000 + rand.nextInt(10000); int retries = 10; // A few retries in case initial choice is in use. while (true) { + SimpleTcpServer tcpServer = null; try { RpcProgram program = new TestFrameDecoder.TestRpcProgram("TestRpcProgram", "localhost", serverPort, 100000, 1, 2, allowInsecurePorts); - SimpleTcpServer tcpServer = new SimpleTcpServer(serverPort, program, 1); + tcpServer = new SimpleTcpServer(serverPort, program, 1); tcpServer.run(); break; // Successfully bound a port, break out. - } catch (ChannelException ce) { + } catch (InterruptedException | ChannelException e) { + if (tcpServer != null) { + tcpServer.shutdown(); + } if (retries-- > 0) { serverPort += rand.nextInt(20); // Port in use? Try another. } else { - throw ce; // Out of retries. + throw e; // Out of retries. } } } diff --git a/hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/portmap/TestPortmap.java b/hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/portmap/TestPortmap.java index 6941c4a04e998..e2f7c03676c95 100644 --- a/hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/portmap/TestPortmap.java +++ b/hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/portmap/TestPortmap.java @@ -23,8 +23,10 @@ import java.net.DatagramSocket; import java.net.InetSocketAddress; import java.net.Socket; +import java.util.Arrays; import java.util.Map; +import org.apache.hadoop.oncrpc.RpcReply; import org.junit.Assert; import org.apache.hadoop.oncrpc.RpcCall; @@ -36,6 +38,8 @@ import org.junit.BeforeClass; import org.junit.Test; +import static org.junit.Assert.assertEquals; + public class TestPortmap { private static Portmap pm = new Portmap(); private static final int SHORT_TIMEOUT_MILLISECONDS = 10; @@ -43,7 +47,7 @@ public class TestPortmap { private int xid; @BeforeClass - public static void setup() { + public static void setup() throws InterruptedException { pm.start(SHORT_TIMEOUT_MILLISECONDS, new InetSocketAddress("localhost", 0), new InetSocketAddress("localhost", 0)); } @@ -93,6 +97,19 @@ public void testRegistration() throws IOException, InterruptedException { pm.getUdpServerLoAddress()); try { s.send(p); + + // verify that portmap server responds a UDF packet back to the client + byte[] receiveData = new byte[65535]; + DatagramPacket receivePacket = new DatagramPacket(receiveData, + receiveData.length); + s.setSoTimeout(2000); + s.receive(receivePacket); + + // verify that the registration is accepted. + XDR xdr = new XDR(Arrays.copyOfRange(receiveData, 0, + receivePacket.getLength())); + RpcReply reply = RpcReply.read(xdr); + assertEquals(reply.getState(), RpcReply.ReplyState.MSG_ACCEPTED); } finally { s.close(); } diff --git a/hadoop-common-project/hadoop-registry/pom.xml b/hadoop-common-project/hadoop-registry/pom.xml index f3454e57b6d2f..b72273fbba1d7 100644 --- a/hadoop-common-project/hadoop-registry/pom.xml +++ b/hadoop-common-project/hadoop-registry/pom.xml @@ -19,12 +19,12 @@ hadoop-project org.apache.hadoop - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project 4.0.0 hadoop-registry - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop Registry @@ -126,8 +126,8 @@ - com.google.guava - guava + org.apache.hadoop.thirdparty + hadoop-shaded-guava @@ -135,6 +135,17 @@ dnsjava + + io.dropwizard.metrics + metrics-core + + + + org.xerial.snappy + snappy-java + provided + + @@ -163,10 +174,9 @@ - org.codehaus.mojo - findbugs-maven-plugin + com.github.spotbugs + spotbugs-maven-plugin - true true ${project.basedir}/dev-support/findbugs-exclude.xml Max @@ -221,7 +231,6 @@ org.apache.maven.plugins maven-surefire-plugin - ${ignoreTestFailure} false 900 -Xmx1024m -XX:+HeapDumpOnOutOfMemoryError diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/cli/RegistryCli.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/cli/RegistryCli.java index 480ce0ed5fb20..a1349f3e26f00 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/cli/RegistryCli.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/cli/RegistryCli.java @@ -27,7 +27,7 @@ import java.util.List; import java.util.Map; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.GnuParser; diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/api/DNSOperationsFactory.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/api/DNSOperationsFactory.java index 1a8bb3ec0262a..8a26b4b450def 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/api/DNSOperationsFactory.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/api/DNSOperationsFactory.java @@ -18,7 +18,7 @@ package org.apache.hadoop.registry.client.api; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.registry.server.dns.RegistryDNS; diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/api/RegistryOperationsFactory.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/api/RegistryOperationsFactory.java index 5f9c5f37508c1..786bec040b22d 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/api/RegistryOperationsFactory.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/api/RegistryOperationsFactory.java @@ -18,7 +18,7 @@ package org.apache.hadoop.registry.client.api; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.service.ServiceStateException; diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/binding/RegistryPathUtils.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/binding/RegistryPathUtils.java index b8e9ba1bd7fbe..09df00d083c3e 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/binding/RegistryPathUtils.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/binding/RegistryPathUtils.java @@ -18,7 +18,7 @@ package org.apache.hadoop.registry.client.binding; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.PathNotFoundException; diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/binding/RegistryTypeUtils.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/binding/RegistryTypeUtils.java index 05df3255e3a76..9a4369cdda385 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/binding/RegistryTypeUtils.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/binding/RegistryTypeUtils.java @@ -18,7 +18,7 @@ package org.apache.hadoop.registry.client.binding; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.registry.client.exceptions.InvalidRecordException; diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/binding/RegistryUtils.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/binding/RegistryUtils.java index 1b839c253b14d..d862fe649b5ac 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/binding/RegistryUtils.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/binding/RegistryUtils.java @@ -18,8 +18,8 @@ package org.apache.hadoop.registry.client.binding; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/FSRegistryOperationsService.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/FSRegistryOperationsService.java index 41884a984844f..6a08dcc074725 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/FSRegistryOperationsService.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/FSRegistryOperationsService.java @@ -47,8 +47,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * Filesystem-based implementation of RegistryOperations. This class relies diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/CuratorService.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/CuratorService.java index 2eb7aa54f09bc..3457fa28634a6 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/CuratorService.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/CuratorService.java @@ -18,8 +18,11 @@ package org.apache.hadoop.registry.client.impl.zk; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.curator.framework.recipes.cache.CuratorCache; +import org.apache.curator.framework.recipes.cache.CuratorCacheBridge; +import org.apache.curator.framework.recipes.cache.CuratorCacheListener; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.curator.ensemble.EnsembleProvider; import org.apache.curator.ensemble.fixed.FixedEnsembleProvider; import org.apache.curator.framework.CuratorFramework; @@ -28,9 +31,6 @@ import org.apache.curator.framework.api.CreateBuilder; import org.apache.curator.framework.api.DeleteBuilder; import org.apache.curator.framework.api.GetChildrenBuilder; -import org.apache.curator.framework.recipes.cache.TreeCache; -import org.apache.curator.framework.recipes.cache.TreeCacheEvent; -import org.apache.curator.framework.recipes.cache.TreeCacheListener; import org.apache.curator.retry.BoundedExponentialBackoffRetry; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -56,6 +56,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.io.UncheckedIOException; import java.util.List; /** @@ -109,9 +110,9 @@ public class CuratorService extends CompositeService private EnsembleProvider ensembleProvider; /** - * Registry tree cache. + * Registry Curator cache. */ - private TreeCache treeCache; + private CuratorCacheBridge curatorCacheBridge; /** * Construct the service. @@ -189,8 +190,8 @@ protected void serviceStart() throws Exception { protected void serviceStop() throws Exception { IOUtils.closeStream(curator); - if (treeCache != null) { - treeCache.close(); + if (curatorCacheBridge != null) { + curatorCacheBridge.close(); } super.serviceStop(); } @@ -824,73 +825,54 @@ protected String dumpRegistryRobustly(boolean verbose) { * * @param listener the listener. * @return a handle allowing for the management of the listener. - * @throws Exception if registration fails due to error. */ - public ListenerHandle registerPathListener(final PathListener listener) - throws Exception { - - final TreeCacheListener pathChildrenCacheListener = - new TreeCacheListener() { - - public void childEvent(CuratorFramework curatorFramework, - TreeCacheEvent event) - throws Exception { - String path = null; - if (event != null && event.getData() != null) { - path = event.getData().getPath(); - } - assert event != null; - switch (event.getType()) { - case NODE_ADDED: - LOG.info("Informing listener of added node {}", path); - listener.nodeAdded(path); - - break; - - case NODE_REMOVED: - LOG.info("Informing listener of removed node {}", path); - listener.nodeRemoved(path); - - break; - - case NODE_UPDATED: - LOG.info("Informing listener of updated node {}", path); - listener.nodeAdded(path); - - break; - - default: - // do nothing - break; - - } + public ListenerHandle registerPathListener(final PathListener listener) { + + CuratorCacheListener cacheListener = CuratorCacheListener.builder() + .forCreatesAndChanges((oldNode, node) -> { + final String path = node.getPath(); + LOG.info("Informing listener of added/updated node {}", path); + try { + listener.nodeAdded(path); + } catch (IOException e) { + LOG.error("Error while processing Curator listener " + + "NODE_CREATED / NODE_CHANGED event"); + throw new UncheckedIOException(e); } - }; - treeCache.getListenable().addListener(pathChildrenCacheListener); - - return new ListenerHandle() { - @Override - public void remove() { - treeCache.getListenable().removeListener(pathChildrenCacheListener); - } - }; + }) + .forDeletes(childData -> { + final String path = childData.getPath(); + LOG.info("Informing listener of removed node {}", path); + try { + listener.nodeRemoved(path); + } catch (IOException e) { + LOG.error("Error while processing Curator listener " + + "NODE_DELETED event"); + throw new UncheckedIOException(e); + } + }) + .build(); + curatorCacheBridge.listenable().addListener(cacheListener); + return () -> curatorCacheBridge.listenable().removeListener(cacheListener); } // TODO: should caches be stopped and then restarted if need be? /** - * Create the tree cache that monitors the registry for node addition, update, - * and deletion. - * - * @throws Exception if any issue arises during monitoring. + * Instantiate the Curator cache that monitors the registry for node + * addition, update and deletion. */ - public void monitorRegistryEntries() - throws Exception { + public void instantiateCacheForRegistry() { String registryPath = getConfig().get(RegistryConstants.KEY_REGISTRY_ZK_ROOT, RegistryConstants.DEFAULT_ZK_REGISTRY_ROOT); - treeCache = new TreeCache(curator, registryPath); - treeCache.start(); + curatorCacheBridge = CuratorCache.bridgeBuilder(curator, registryPath) + .build(); + } + + public void startCache() { + curatorCacheBridge.start(); } + } diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/RegistryOperationsService.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/RegistryOperationsService.java index 4c911da156b8e..e46a016baa07d 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/RegistryOperationsService.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/RegistryOperationsService.java @@ -18,7 +18,7 @@ package org.apache.hadoop.registry.client.impl.zk; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.registry.client.api.BindFlags; diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/RegistrySecurity.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/RegistrySecurity.java index c3cb021fb53f3..cd67620ba5612 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/RegistrySecurity.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/RegistrySecurity.java @@ -18,12 +18,12 @@ package org.apache.hadoop.registry.client.impl.zk; -import com.google.common.base.Preconditions; -import com.google.common.base.Splitter; -import com.google.common.collect.Lists; +import org.apache.hadoop.security.authentication.util.JaasConfiguration; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Splitter; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.commons.lang3.StringUtils; import org.apache.curator.framework.CuratorFrameworkFactory; -import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authentication.util.KerberosUtil; @@ -46,11 +46,9 @@ import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.Collections; -import java.util.HashMap; import java.util.List; import java.util.ListIterator; import java.util.Locale; -import java.util.Map; import java.util.concurrent.CopyOnWriteArrayList; import static org.apache.hadoop.registry.client.impl.zk.ZookeeperConfigOptions.*; @@ -798,65 +796,6 @@ public void setKerberosPrincipalAndKeytab(String principal, String keytab) { this.keytab = keytab; } - /** - * Creates a programmatic version of a jaas.conf file. This can be used - * instead of writing a jaas.conf file and setting the system property, - * "java.security.auth.login.config", to point to that file. It is meant to be - * used for connecting to ZooKeeper. - */ - @InterfaceAudience.Private - public static class JaasConfiguration extends - javax.security.auth.login.Configuration { - - private final javax.security.auth.login.Configuration baseConfig = - javax.security.auth.login.Configuration.getConfiguration(); - private static AppConfigurationEntry[] entry; - private String entryName; - - /** - * Add an entry to the jaas configuration with the passed in name, - * principal, and keytab. The other necessary options will be set for you. - * - * @param entryName The name of the entry (e.g. "Client") - * @param principal The principal of the user - * @param keytab The location of the keytab - */ - public JaasConfiguration(String entryName, String principal, String keytab) { - this.entryName = entryName; - Map options = new HashMap(); - options.put("keyTab", keytab); - options.put("principal", principal); - options.put("useKeyTab", "true"); - options.put("storeKey", "true"); - options.put("useTicketCache", "false"); - options.put("refreshKrb5Config", "true"); - String jaasEnvVar = System.getenv("HADOOP_JAAS_DEBUG"); - if (jaasEnvVar != null && "true".equalsIgnoreCase(jaasEnvVar)) { - options.put("debug", "true"); - } - entry = new AppConfigurationEntry[]{ - new AppConfigurationEntry(getKrb5LoginModuleName(), - AppConfigurationEntry.LoginModuleControlFlag.REQUIRED, - options)}; - } - - @Override - public AppConfigurationEntry[] getAppConfigurationEntry(String name) { - return (entryName.equals(name)) ? entry : ((baseConfig != null) - ? baseConfig.getAppConfigurationEntry(name) : null); - } - - private String getKrb5LoginModuleName() { - String krb5LoginModuleName; - if (System.getProperty("java.vendor").contains("IBM")) { - krb5LoginModuleName = "com.ibm.security.auth.module.Krb5LoginModule"; - } else { - krb5LoginModuleName = "com.sun.security.auth.module.Krb5LoginModule"; - } - return krb5LoginModuleName; - } - } - /** * Set the client properties. This forces the ZK client into * failing if it can't auth. diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/ZKPathDumper.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/ZKPathDumper.java index 3c4a730608f6e..e045c16e84b0a 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/ZKPathDumper.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/ZKPathDumper.java @@ -18,8 +18,8 @@ package org.apache.hadoop.registry.client.impl.zk; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.curator.framework.CuratorFramework; import org.apache.curator.framework.api.GetChildrenBuilder; import org.apache.zookeeper.data.ACL; diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/types/Endpoint.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/types/Endpoint.java index 392884faf87f5..b92b93df7be55 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/types/Endpoint.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/types/Endpoint.java @@ -20,7 +20,7 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonInclude; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.registry.client.binding.JsonSerDeser; diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/types/ServiceRecord.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/types/ServiceRecord.java index 9bb02c3cc3816..1a85436ed17ef 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/types/ServiceRecord.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/types/ServiceRecord.java @@ -21,7 +21,7 @@ import com.fasterxml.jackson.annotation.JsonAnyGetter; import com.fasterxml.jackson.annotation.JsonAnySetter; import com.fasterxml.jackson.annotation.JsonInclude; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/dns/RegistryDNS.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/dns/RegistryDNS.java index be63d028f1fd5..eeee581540963 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/dns/RegistryDNS.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/dns/RegistryDNS.java @@ -16,7 +16,7 @@ */ package org.apache.hadoop.registry.server.dns; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.commons.io.FileUtils; import org.apache.commons.io.filefilter.IOFileFilter; import org.apache.commons.net.util.Base64; diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/dns/RegistryDNSServer.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/dns/RegistryDNSServer.java index 826b02c5e513c..8d0a38cfd47f9 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/dns/RegistryDNSServer.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/dns/RegistryDNSServer.java @@ -16,7 +16,7 @@ */ package org.apache.hadoop.registry.server.dns; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.PathNotFoundException; import org.apache.hadoop.registry.client.api.DNSOperationsFactory; @@ -106,7 +106,7 @@ protected void serviceStart() throws Exception { private void manageRegistryDNS() { try { - registryOperations.monitorRegistryEntries(); + registryOperations.instantiateCacheForRegistry(); registryOperations.registerPathListener(new PathListener() { private String registryRoot = getConfig(). get(RegistryConstants.KEY_REGISTRY_ZK_ROOT, @@ -157,6 +157,7 @@ public void nodeRemoved(String path) throws IOException { } }); + registryOperations.startCache(); // create listener for record deletions diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/dns/ReverseZoneUtils.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/dns/ReverseZoneUtils.java index 796f46bae108e..bb375831d6eae 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/dns/ReverseZoneUtils.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/dns/ReverseZoneUtils.java @@ -16,7 +16,7 @@ */ package org.apache.hadoop.registry.server.dns; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import java.net.Inet6Address; import java.net.InetAddress; import java.net.UnknownHostException; diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/integration/SelectByYarnPersistence.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/integration/SelectByYarnPersistence.java index 6a1993eafd9c1..8d395e4c5c763 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/integration/SelectByYarnPersistence.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/integration/SelectByYarnPersistence.java @@ -18,7 +18,7 @@ package org.apache.hadoop.registry.server.integration; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.registry.client.types.RegistryPathStatus; import org.apache.hadoop.registry.client.types.ServiceRecord; diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/services/MicroZookeeperService.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/services/MicroZookeeperService.java index a7e2611b3df9d..994a2565c309a 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/services/MicroZookeeperService.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/services/MicroZookeeperService.java @@ -18,7 +18,7 @@ package org.apache.hadoop.registry.server.services; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.lang3.StringUtils; import org.apache.curator.ensemble.fixed.FixedEnsembleProvider; import org.apache.hadoop.classification.InterfaceStability; @@ -229,7 +229,7 @@ protected void serviceStart() throws Exception { setupSecurity(); FileTxnSnapLog ftxn = new FileTxnSnapLog(dataDir, dataDir); - ZooKeeperServer zkServer = new ZooKeeperServer(ftxn, tickTime); + ZooKeeperServer zkServer = new ZooKeeperServer(ftxn, tickTime, ""); LOG.info("Starting Local Zookeeper service"); factory = ServerCnxnFactory.createFactory(); diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/services/RegistryAdminService.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/services/RegistryAdminService.java index d60797e71ea02..3234088e01d84 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/services/RegistryAdminService.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/services/RegistryAdminService.java @@ -19,7 +19,7 @@ package org.apache.hadoop.registry.server.services; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.commons.lang3.StringUtils; import org.apache.curator.framework.api.BackgroundCallback; import org.apache.hadoop.conf.Configuration; diff --git a/hadoop-common-project/hadoop-registry/src/test/java/org/apache/hadoop/registry/secure/AbstractSecureRegistryTest.java b/hadoop-common-project/hadoop-registry/src/test/java/org/apache/hadoop/registry/secure/AbstractSecureRegistryTest.java index 75b6fb287d971..a510f84bd9458 100644 --- a/hadoop-common-project/hadoop-registry/src/test/java/org/apache/hadoop/registry/secure/AbstractSecureRegistryTest.java +++ b/hadoop-common-project/hadoop-registry/src/test/java/org/apache/hadoop/registry/secure/AbstractSecureRegistryTest.java @@ -49,7 +49,7 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.security.Principal; import java.util.HashSet; import java.util.Properties; @@ -220,7 +220,7 @@ public static void setupKDCAndPrincipals() throws Exception { BOB_LOCALHOST, keytab_bob)); jaasFile = new File(kdcWorkDir, "jaas.txt"); - FileUtils.write(jaasFile, jaas.toString(), Charset.defaultCharset()); + FileUtils.write(jaasFile, jaas.toString(), StandardCharsets.UTF_8); LOG.info("\n"+ jaas); RegistrySecurity.bindJVMtoJAASFile(jaasFile); } diff --git a/hadoop-common-project/hadoop-registry/src/test/java/org/apache/hadoop/registry/secure/TestSecureLogins.java b/hadoop-common-project/hadoop-registry/src/test/java/org/apache/hadoop/registry/secure/TestSecureLogins.java index 1cdc47d562d55..52d677e00a56c 100644 --- a/hadoop-common-project/hadoop-registry/src/test/java/org/apache/hadoop/registry/secure/TestSecureLogins.java +++ b/hadoop-common-project/hadoop-registry/src/test/java/org/apache/hadoop/registry/secure/TestSecureLogins.java @@ -20,7 +20,7 @@ import java.io.File; import java.lang.reflect.Constructor; import java.lang.reflect.Method; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.security.Principal; import java.security.PrivilegedExceptionAction; import java.util.HashMap; @@ -93,8 +93,7 @@ public void testClientLogin() throws Throwable { logLoginDetails(ALICE_LOCALHOST, client); String confFilename = System.getProperty(Environment.JAAS_CONF_KEY); assertNotNull("Unset: "+ Environment.JAAS_CONF_KEY, confFilename); - String config = FileUtils.readFileToString(new File(confFilename), - Charset.defaultCharset()); + String config = FileUtils.readFileToString(new File(confFilename), StandardCharsets.UTF_8); LOG.info("{}=\n{}", confFilename, config); RegistrySecurity.setZKSaslClientProperties(ALICE, ALICE_CLIENT_CONTEXT); } finally { @@ -133,8 +132,7 @@ public LoginContext createLoginContextZookeeperLocalhost() throws @Test public void testKerberosAuth() throws Throwable { File krb5conf = getKdc().getKrb5conf(); - String krbConfig = FileUtils.readFileToString(krb5conf, - Charset.defaultCharset()); + String krbConfig = FileUtils.readFileToString(krb5conf, StandardCharsets.UTF_8); LOG.info("krb5.conf at {}:\n{}", krb5conf, krbConfig); Subject subject = new Subject(); Class kerb5LoginClass = diff --git a/hadoop-common-project/pom.xml b/hadoop-common-project/pom.xml index 8be2593c21ffd..4dea4fe8073da 100644 --- a/hadoop-common-project/pom.xml +++ b/hadoop-common-project/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../hadoop-project hadoop-common-project - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop Common Project Apache Hadoop Common Project pom @@ -56,5 +56,4 @@ - diff --git a/hadoop-dist/pom.xml b/hadoop-dist/pom.xml index 07aa7b10a8320..2e1fa6d80d5ef 100644 --- a/hadoop-dist/pom.xml +++ b/hadoop-dist/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../hadoop-project hadoop-dist - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop Distribution Apache Hadoop Distribution jar @@ -169,7 +169,6 @@ ${shell-executable} ${project.build.directory} - false ${basedir}/../dev-support/bin/dist-layout-stitching @@ -188,7 +187,6 @@ ${shell-executable} ${basedir} - false ${basedir}/../dev-support/bin/dist-tools-hooks-maker @@ -209,7 +207,6 @@ ${shell-executable} ${project.build.directory} - false ${basedir}/../dev-support/bin/dist-tar-stitching diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/dev-support/findbugsExcludeFile.xml b/hadoop-hdfs-project/hadoop-hdfs-client/dev-support/findbugsExcludeFile.xml index 278d01dc22d0f..1cefa55baa1ea 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/dev-support/findbugsExcludeFile.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-client/dev-support/findbugsExcludeFile.xml @@ -93,4 +93,17 @@ + + + + + + + + + + + + + diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/pom.xml b/hadoop-hdfs-project/hadoop-hdfs-client/pom.xml index 20523cfbfa0cf..939da7643ade4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-client/pom.xml @@ -20,11 +20,11 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-project-dist - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project-dist hadoop-hdfs-client - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop HDFS Client Apache Hadoop HDFS Client jar @@ -35,9 +35,17 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> - com.squareup.okhttp + com.squareup.okhttp3 okhttp + + org.jetbrains.kotlin + kotlin-stdlib + + + org.jetbrains.kotlin + kotlin-stdlib-common + org.apache.hadoop hadoop-common @@ -48,8 +56,8 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> commons-logging - log4j - log4j + ch.qos.reload4j + reload4j @@ -113,6 +121,11 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> com.fasterxml.jackson.core jackson-databind + + org.bouncycastle + bcprov-jdk15on + test + diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/Hdfs.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/Hdfs.java index 290f2c0e6766f..b014222fea5fc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/Hdfs.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/Hdfs.java @@ -135,6 +135,14 @@ public FileChecksum getFileChecksum(Path f) return dfs.getFileChecksumWithCombineMode(getUriPath(f), Long.MAX_VALUE); } + /** + * {@inheritDoc} + * + * If the given path is a symlink, the path will be resolved to a target path + * and it will get the resolved path's FileStatus object. It will not be + * represented as a symlink and isDirectory API returns true if the resolved + * path is a directory, false otherwise. + */ @Override public FileStatus getFileStatus(Path f) throws IOException, UnresolvedLinkException { @@ -145,7 +153,19 @@ public FileStatus getFileStatus(Path f) throw new FileNotFoundException("File does not exist: " + f.toString()); } } - + + /** + * Synchronize client metadata state with Active NameNode. + *

      + * In HA the client synchronizes its state with the Active NameNode + * in order to guarantee subsequent read consistency from Observer Nodes. + * @throws IOException + */ + @Override + public void msync() throws IOException { + dfs.msync(); + } + @Override public FileStatus getFileLinkStatus(Path f) throws IOException, UnresolvedLinkException { @@ -269,6 +289,20 @@ public HdfsFileStatus getNext() throws IOException { } } + /** + * {@inheritDoc} + * + * If any of the the immediate children of the given path f is a symlink, the + * returned FileStatus object of that children would be represented as a + * symlink. It will not be resolved to the target path and will not get the + * target path FileStatus object. The target path will be available via + * getSymlink on that children's FileStatus object. Since it represents as + * symlink, isDirectory on that children's FileStatus will return false. + * + * If you want to get the FileStatus of target path for that children, you may + * want to use GetFileStatus API with that children's symlink path. Please see + * {@link Hdfs#getFileStatus(Path f)} + */ @Override public FileStatus[] listStatus(Path f) throws IOException, UnresolvedLinkException { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ClientContext.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ClientContext.java index cbd941b6b9d90..f83346ecef7dd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ClientContext.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ClientContext.java @@ -40,10 +40,10 @@ import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.net.NodeBase; import org.apache.hadoop.net.ScriptBasedMapping; -import org.apache.hadoop.util.Daemon; import org.apache.hadoop.util.ReflectionUtils; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -69,6 +69,11 @@ public class ClientContext { */ private final String name; + /** + * The client conf used to initialize context. + */ + private final DfsClientConf dfsClientConf; + /** * String representation of the configuration. */ @@ -77,7 +82,7 @@ public class ClientContext { /** * Caches short-circuit file descriptors, mmap regions. */ - private final ShortCircuitCache shortCircuitCache; + private final ShortCircuitCache[] shortCircuitCache; /** * Caches TCP and UNIX domain sockets for reuse. @@ -119,8 +124,6 @@ public class ClientContext { private NodeBase clientNode; private boolean topologyResolutionEnabled; - private Daemon deadNodeDetectorThr = null; - /** * The switch to DeadNodeDetector. */ @@ -130,15 +133,44 @@ public class ClientContext { * Detect the dead datanodes in advance, and share this information among all * the DFSInputStreams in the same client. */ - private DeadNodeDetector deadNodeDetector = null; + private volatile DeadNodeDetector deadNodeDetector = null; + + /** + * The switch for the {@link LocatedBlocksRefresher}. + */ + private final boolean locatedBlocksRefresherEnabled; + + /** + * Periodically refresh the {@link org.apache.hadoop.hdfs.protocol.LocatedBlocks} backing + * registered {@link DFSInputStream}s, to take advantage of changes in block placement. + */ + private volatile LocatedBlocksRefresher locatedBlocksRefresher = null; + + /** + * Count the reference of ClientContext. + */ + private int counter = 0; + + /** + * ShortCircuitCache array size. + */ + private final int clientShortCircuitNum; + private Configuration configuration; private ClientContext(String name, DfsClientConf conf, Configuration config) { final ShortCircuitConf scConf = conf.getShortCircuitConf(); this.name = name; + this.dfsClientConf = conf; this.confString = scConf.confAsString(); - this.shortCircuitCache = ShortCircuitCache.fromConf(scConf); + this.clientShortCircuitNum = conf.getClientShortCircuitNum(); + this.shortCircuitCache = new ShortCircuitCache[this.clientShortCircuitNum]; + for (int i = 0; i < this.clientShortCircuitNum; i++) { + this.shortCircuitCache[i] = ShortCircuitCache.fromConf(scConf); + } + + this.configuration = config; this.peerCache = new PeerCache(scConf.getSocketCacheCapacity(), scConf.getSocketCacheExpiry()); this.keyProviderCache = new KeyProviderCache( @@ -149,11 +181,7 @@ private ClientContext(String name, DfsClientConf conf, this.byteArrayManager = ByteArrayManager.newInstance( conf.getWriteByteArrayManagerConf()); this.deadNodeDetectionEnabled = conf.isDeadNodeDetectionEnabled(); - if (deadNodeDetectionEnabled && deadNodeDetector == null) { - deadNodeDetector = new DeadNodeDetector(name, config); - deadNodeDetectorThr = new Daemon(deadNodeDetector); - deadNodeDetectorThr.start(); - } + this.locatedBlocksRefresherEnabled = conf.isLocatedBlocksRefresherEnabled(); initTopologyResolution(config); } @@ -191,6 +219,7 @@ public static ClientContext get(String name, DfsClientConf conf, context.printConfWarningIfNeeded(conf); } } + context.reference(); return context; } @@ -228,7 +257,11 @@ public String getConfString() { } public ShortCircuitCache getShortCircuitCache() { - return shortCircuitCache; + return shortCircuitCache[0]; + } + + public ShortCircuitCache getShortCircuitCache(long idx) { + return shortCircuitCache[(int) (idx % clientShortCircuitNum)]; } public PeerCache getPeerCache() { @@ -287,17 +320,51 @@ public DeadNodeDetector getDeadNodeDetector() { } /** - * Close dead node detector thread. + * If true, LocatedBlocksRefresher will be periodically refreshing LocatedBlocks + * of registered DFSInputStreams. */ - public void stopDeadNodeDetectorThread() { - if (deadNodeDetectorThr != null) { - deadNodeDetectorThr.interrupt(); - try { - deadNodeDetectorThr.join(); - } catch (InterruptedException e) { - LOG.warn("Encountered exception while waiting to join on dead " + - "node detector thread.", e); - } + public boolean isLocatedBlocksRefresherEnabled() { + return locatedBlocksRefresherEnabled; + } + + /** + * Obtain LocatedBlocksRefresher of the current client. + */ + public LocatedBlocksRefresher getLocatedBlocksRefresher() { + return locatedBlocksRefresher; + } + + /** + * Increment the counter. Start the dead node detector thread if there is no + * reference. + */ + synchronized void reference() { + counter++; + if (deadNodeDetectionEnabled && deadNodeDetector == null) { + deadNodeDetector = new DeadNodeDetector(name, configuration); + deadNodeDetector.start(); + } + if (locatedBlocksRefresherEnabled && locatedBlocksRefresher == null) { + locatedBlocksRefresher = new LocatedBlocksRefresher(name, configuration, dfsClientConf); + locatedBlocksRefresher.start(); + } + } + + /** + * Decrement the counter. Close the dead node detector thread if there is no + * reference. + */ + synchronized void unreference() { + Preconditions.checkState(counter > 0); + counter--; + if (counter == 0 && deadNodeDetectionEnabled && deadNodeDetector != null) { + deadNodeDetector.shutdown(); + deadNodeDetector = null; + } + + if (counter == 0 && locatedBlocksRefresherEnabled && locatedBlocksRefresher != null) { + locatedBlocksRefresher.shutdown(); + locatedBlocksRefresher = null; } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ClientGSIContext.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ClientGSIContext.java index 9b324bd1b07ab..4de969642d574 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ClientGSIContext.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ClientGSIContext.java @@ -29,7 +29,7 @@ /** * Global State Id context for the client. - *

      + *

      * This is the client side implementation responsible for receiving * state alignment info from server(s). */ diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java index 72b2113943756..ffd7256bb59db 100755 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java @@ -187,16 +187,16 @@ import org.apache.hadoop.util.DataChecksum.Type; import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.Time; -import org.apache.htrace.core.TraceScope; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.tracing.TraceScope; +import org.apache.hadoop.tracing.Tracer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.net.InetAddresses; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.net.InetAddresses; /******************************************************** * DFSClient can connect to a Hadoop Filesystem and @@ -509,7 +509,15 @@ private void beginFileLease(final long inodeId, final DFSOutputStream out) throws IOException { synchronized (filesBeingWritten) { putFileBeingWritten(inodeId, out); - getLeaseRenewer().put(this); + LeaseRenewer renewer = getLeaseRenewer(); + boolean result = renewer.put(this); + if (!result) { + // Existing LeaseRenewer cannot add another Daemon, so remove existing + // and add new one. + LeaseRenewer.remove(renewer); + renewer = getLeaseRenewer(); + renewer.put(this); + } } } @@ -652,7 +660,7 @@ public synchronized void close() throws IOException { clientRunning = false; // close dead node detector thread if (!disabledStopDeadNodeDetectorThreadForTest) { - clientContext.stopDeadNodeDetectorThread(); + clientContext.unreference(); } // close connections to the namenode @@ -862,7 +870,7 @@ public void reportBadBlocks(LocatedBlock[] blocks) throws IOException { } public long getRefreshReadBlkLocationsInterval() { - return dfsClientConf.getRefreshReadBlockLocationsMS(); + return dfsClientConf.getLocatedBlocksRefresherInterval(); } public LocatedBlocks getLocatedBlocks(String src, long start) @@ -1245,7 +1253,7 @@ public DFSOutputStream create(String src, FsPermission permission, /** * Same as {@link #create(String, FsPermission, EnumSet, boolean, short, long, - * addition of Progressable, int, ChecksumOpt, InetSocketAddress[], String)} + * Progressable, int, ChecksumOpt, InetSocketAddress[], String)} * with the storagePolicy that is used to specify a specific storage policy * instead of inheriting any policy from this new file's parent directory. * This policy will be persisted in HDFS. A value of null means inheriting @@ -2004,8 +2012,17 @@ private long getStateByIndex(int stateIndex) throws IOException { * @see ClientProtocol#getStats() */ public FsStatus getDiskStatus() throws IOException { - return new FsStatus(getStateByIndex(0), - getStateByIndex(1), getStateByIndex(2)); + try (TraceScope ignored = tracer.newScope("getStats")) { + long[] states = namenode.getStats(); + return new FsStatus(getStateAtIndex(states, 0), + getStateAtIndex(states, 1), getStateAtIndex(states, 2)); + } catch (RemoteException re) { + throw re.unwrapRemoteException(); + } + } + + private long getStateAtIndex(long[] states, int index) { + return states.length > index ? states[index] : -1; } /** @@ -3378,4 +3395,51 @@ public void removeNodeFromDeadNodeDetector(DFSInputStream dfsInputStream, private boolean isDeadNodeDetectionEnabled() { return clientContext.isDeadNodeDetectionEnabled(); } + + /** + * Obtain DeadNodeDetector of the current client. + */ + public DeadNodeDetector getDeadNodeDetector() { + return clientContext.getDeadNodeDetector(); + } + + /** + * Obtain LocatedBlocksRefresher of the current client. + */ + public LocatedBlocksRefresher getLocatedBlockRefresher() { + return clientContext.getLocatedBlocksRefresher(); + } + + /** + * Adds the {@link DFSInputStream} to the {@link LocatedBlocksRefresher}, so that + * the underlying {@link LocatedBlocks} is periodically refreshed. + */ + public void addLocatedBlocksRefresh(DFSInputStream dfsInputStream) { + if (isLocatedBlocksRefresherEnabled()) { + clientContext.getLocatedBlocksRefresher().addInputStream(dfsInputStream); + } + } + + /** + * Removes the {@link DFSInputStream} from the {@link LocatedBlocksRefresher}, so that + * the underlying {@link LocatedBlocks} is no longer periodically refreshed. + * @param dfsInputStream + */ + public void removeLocatedBlocksRefresh(DFSInputStream dfsInputStream) { + if (isLocatedBlocksRefresherEnabled()) { + clientContext.getLocatedBlocksRefresher().removeInputStream(dfsInputStream); + } + } + + private boolean isLocatedBlocksRefresherEnabled() { + return clientContext.isLocatedBlocksRefresherEnabled(); + } + + public DatanodeInfo[] slowDatanodeReport() throws IOException { + checkOpen(); + try (TraceScope ignored = tracer.newScope("slowDatanodeReport")) { + return namenode.getSlowDatanodeReport(); + } + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClientFaultInjector.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClientFaultInjector.java index d36c0581c2153..e433b9cf5e761 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClientFaultInjector.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClientFaultInjector.java @@ -19,9 +19,10 @@ import java.util.concurrent.atomic.AtomicLong; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hdfs.protocol.LocatedBlock; /** * Used for injecting faults in DFSClient and DFSOutputStream tests. @@ -65,4 +66,7 @@ public boolean skipRollingRestartWait() { public void sleepBeforeHedgedGet() {} public void delayWhenRenewLeaseTimeout() {} + + public void onCreateBlockReader(LocatedBlock block, int chunkIndex, long offset, long length) {} + } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSHedgedReadMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSHedgedReadMetrics.java index 2a228e8d01886..1cd9e82cebb08 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSHedgedReadMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSHedgedReadMetrics.java @@ -19,7 +19,7 @@ import org.apache.hadoop.classification.InterfaceAudience; -import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.LongAdder; /** * The client-side metrics for hedged read feature. @@ -28,20 +28,20 @@ */ @InterfaceAudience.Private public class DFSHedgedReadMetrics { - public final AtomicLong hedgedReadOps = new AtomicLong(); - public final AtomicLong hedgedReadOpsWin = new AtomicLong(); - public final AtomicLong hedgedReadOpsInCurThread = new AtomicLong(); + public final LongAdder hedgedReadOps = new LongAdder(); + public final LongAdder hedgedReadOpsWin = new LongAdder(); + public final LongAdder hedgedReadOpsInCurThread = new LongAdder(); public void incHedgedReadOps() { - hedgedReadOps.incrementAndGet(); + hedgedReadOps.increment(); } public void incHedgedReadOpsInCurThread() { - hedgedReadOpsInCurThread.incrementAndGet(); + hedgedReadOpsInCurThread.increment(); } public void incHedgedReadWins() { - hedgedReadOpsWin.incrementAndGet(); + hedgedReadOpsWin.increment(); } public long getHedgedReadOps() { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInotifyEventInputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInotifyEventInputStream.java index a921a190e4f94..c28216bd0fbbf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInotifyEventInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInotifyEventInputStream.java @@ -26,8 +26,8 @@ import org.apache.hadoop.hdfs.inotify.MissingEventsException; import org.apache.hadoop.hdfs.protocol.ClientProtocol; import org.apache.hadoop.util.Time; -import org.apache.htrace.core.TraceScope; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.tracing.TraceScope; +import org.apache.hadoop.tracing.Tracer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java index af9891a52fd80..8a0bc95d13f1e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java @@ -28,6 +28,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.EnumSet; +import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; @@ -66,6 +67,7 @@ import org.apache.hadoop.hdfs.protocol.BlockType; import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.DatanodeInfoWithStorage; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; @@ -88,7 +90,7 @@ import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import javax.annotation.Nonnull; @@ -128,18 +130,18 @@ public class DFSInputStream extends FSInputStream private long lastBlockBeingWrittenLength = 0; private FileEncryptionInfo fileEncryptionInfo = null; protected CachingStrategy cachingStrategy; + // this is volatile because it will be polled outside the lock, + // but still only updated within the lock + private volatile long lastRefreshedBlocksAt = Time.monotonicNow(); //// + private AtomicBoolean refreshingBlockLocations = new AtomicBoolean(false); protected final ReadStatistics readStatistics = new ReadStatistics(); // lock for state shared between read and pread // Note: Never acquire a lock on with this lock held to avoid deadlocks // (it's OK to acquire this lock when the lock on is held) protected final Object infoLock = new Object(); - // refresh locatedBlocks periodically - private final long refreshReadBlockIntervals; - /** timeStamp of the last time a block location was refreshed. */ - private long locatedBlocksTimeStamp; /** * Track the ByteBuffers that we have handed out to readers. * @@ -156,10 +158,6 @@ public class DFSInputStream extends FSInputStream return extendedReadBuffers; } - private boolean isPeriodicRefreshEnabled() { - return (refreshReadBlockIntervals > 0L); - } - /** * This variable tracks the number of failures since the start of the * most recent user-facing operation. That is to say, it should be reset @@ -181,10 +179,13 @@ private boolean isPeriodicRefreshEnabled() { private byte[] oneByteBuf; // used for 'int read()' protected void addToLocalDeadNodes(DatanodeInfo dnInfo) { + DFSClient.LOG.debug("Add {} to local dead nodes, previously was {}.", + dnInfo, deadNodes); deadNodes.put(dnInfo, dnInfo); } protected void removeFromLocalDeadNodes(DatanodeInfo dnInfo) { + DFSClient.LOG.debug("Remove {} from local dead nodes.", dnInfo); deadNodes.remove(dnInfo); } @@ -203,9 +204,6 @@ protected DFSClient getDFSClient() { DFSInputStream(DFSClient dfsClient, String src, boolean verifyChecksum, LocatedBlocks locatedBlocks) throws IOException { this.dfsClient = dfsClient; - this.refreshReadBlockIntervals = - this.dfsClient.getRefreshReadBlkLocationsInterval(); - setLocatedBlocksTimeStamp(); this.verifyChecksum = verifyChecksum; this.src = src; synchronized (infoLock) { @@ -225,53 +223,57 @@ boolean deadNodesContain(DatanodeInfo nodeInfo) { return deadNodes.containsKey(nodeInfo); } - @VisibleForTesting - void setReadTimeStampsForTesting(long timeStamp) { - setLocatedBlocksTimeStamp(timeStamp); - } - - private void setLocatedBlocksTimeStamp() { - setLocatedBlocksTimeStamp(Time.monotonicNow()); - } - - private void setLocatedBlocksTimeStamp(long timeStamp) { - this.locatedBlocksTimeStamp = timeStamp; - } - /** - * Grab the open-file info from namenode + * Grab the open-file info from namenode. * @param refreshLocatedBlocks whether to re-fetch locatedblocks */ void openInfo(boolean refreshLocatedBlocks) throws IOException { final DfsClientConf conf = dfsClient.getConf(); synchronized(infoLock) { - lastBlockBeingWrittenLength = - fetchLocatedBlocksAndGetLastBlockLength(refreshLocatedBlocks); int retriesForLastBlockLength = conf.getRetryTimesForGetLastBlockLength(); - while (retriesForLastBlockLength > 0) { + + while (true) { + LocatedBlocks newLocatedBlocks; + if (locatedBlocks == null || refreshLocatedBlocks) { + newLocatedBlocks = fetchAndCheckLocatedBlocks(locatedBlocks); + } else { + newLocatedBlocks = locatedBlocks; + } + + long lastBlockLength = getLastBlockLength(newLocatedBlocks); + if (lastBlockLength != -1) { + setLocatedBlocksFields(newLocatedBlocks, lastBlockLength); + return; + } + // Getting last block length as -1 is a special case. When cluster // restarts, DNs may not report immediately. At this time partial block // locations will not be available with NN for getting the length. Lets // retry for 3 times to get the length. - if (lastBlockBeingWrittenLength == -1) { - DFSClient.LOG.warn("Last block locations not available. " - + "Datanodes might not have reported blocks completely." - + " Will retry for " + retriesForLastBlockLength + " times"); - waitFor(conf.getRetryIntervalForGetLastBlockLength()); - lastBlockBeingWrittenLength = - fetchLocatedBlocksAndGetLastBlockLength(true); - } else { - break; + + if (retriesForLastBlockLength-- <= 0) { + throw new IOException("Could not obtain the last block locations."); } - retriesForLastBlockLength--; - } - if (lastBlockBeingWrittenLength == -1 - && retriesForLastBlockLength == 0) { - throw new IOException("Could not obtain the last block locations."); + + DFSClient.LOG.warn("Last block locations not available. " + + "Datanodes might not have reported blocks completely." + + " Will retry for " + retriesForLastBlockLength + " times"); + waitFor(conf.getRetryIntervalForGetLastBlockLength()); } } } + /** + * Set locatedBlocks and related fields, using the passed lastBlockLength. + * Should be called within infoLock. + */ + private void setLocatedBlocksFields(LocatedBlocks locatedBlocksToSet, long lastBlockLength) { + locatedBlocks = locatedBlocksToSet; + lastBlockBeingWrittenLength = lastBlockLength; + fileEncryptionInfo = locatedBlocks.getFileEncryptionInfo(); + setLastRefreshedBlocksAt(); + } + private void waitFor(int waitTime) throws IOException { try { Thread.sleep(waitTime); @@ -282,62 +284,18 @@ private void waitFor(int waitTime) throws IOException { } } - /** - * Checks whether the block locations timestamps have expired. - * In the case of expired timestamp: - * - clear list of deadNodes - * - call openInfo(true) which will re-fetch locatedblocks - * - update locatedBlocksTimeStamp - * @return true when the expiration feature is enabled and locatedblocks - * timestamp has expired. - * @throws IOException - */ - private boolean isLocatedBlocksExpired() { - if (!isPeriodicRefreshEnabled()) { - return false; - } - long now = Time.monotonicNow(); - long elapsed = now - locatedBlocksTimeStamp; - if (elapsed < refreshReadBlockIntervals) { - return false; - } - return true; - } - - /** - * Update the block locations timestamps if they have expired. - * In the case of expired timestamp: - * - clear list of deadNodes - * - call openInfo(true) which will re-fetch locatedblocks - * - update locatedBlocksTimeStamp - * @return true when the locatedblocks list is re-fetched from the namenode. - * @throws IOException - */ - private boolean updateBlockLocationsStamp() throws IOException { - if (!isLocatedBlocksExpired()) { - return false; - } - // clear dead nodes - deadNodes.clear(); - openInfo(true); - setLocatedBlocksTimeStamp(); - return true; - } - - private long fetchLocatedBlocksAndGetLastBlockLength(boolean refresh) + private LocatedBlocks fetchAndCheckLocatedBlocks(LocatedBlocks existing) throws IOException { - LocatedBlocks newInfo = locatedBlocks; - if (locatedBlocks == null || refresh) { - newInfo = dfsClient.getLocatedBlocks(src, 0); - } + LocatedBlocks newInfo = dfsClient.getLocatedBlocks(src, 0); + DFSClient.LOG.debug("newInfo = {}", newInfo); if (newInfo == null) { throw new IOException("Cannot open filename " + src); } - if (locatedBlocks != null) { + if (existing != null) { Iterator oldIter = - locatedBlocks.getLocatedBlocks().iterator(); + existing.getLocatedBlocks().iterator(); Iterator newIter = newInfo.getLocatedBlocks().iterator(); while (oldIter.hasNext() && newIter.hasNext()) { if (!oldIter.next().getBlock().equals(newIter.next().getBlock())) { @@ -345,17 +303,14 @@ private long fetchLocatedBlocksAndGetLastBlockLength(boolean refresh) } } } - locatedBlocks = newInfo; - long lastBlkBeingWrittenLength = getLastBlockLength(); - fileEncryptionInfo = locatedBlocks.getFileEncryptionInfo(); - return lastBlkBeingWrittenLength; + return newInfo; } - private long getLastBlockLength() throws IOException{ + private long getLastBlockLength(LocatedBlocks blocks) throws IOException{ long lastBlockBeingWrittenLength = 0; - if (!locatedBlocks.isLastBlockComplete()) { - final LocatedBlock last = locatedBlocks.getLastLocatedBlock(); + if (!blocks.isLastBlockComplete()) { + final LocatedBlock last = blocks.getLastLocatedBlock(); if (last != null) { if (last.getLocations().length == 0) { if (last.getBlockSize() == 0) { @@ -498,6 +453,14 @@ public List getAllBlocks() throws IOException { return getBlockRange(0, getFileLength()); } + protected String getSrc() { + return src; + } + + protected LocatedBlocks getLocatedBlocks() { + return locatedBlocks; + } + /** * Get block at the specified position. * Fetch it from the namenode if not cached. @@ -540,8 +503,8 @@ protected LocatedBlock fetchBlockAt(long offset) throws IOException { /** Fetch a block from namenode and cache it */ private LocatedBlock fetchBlockAt(long offset, long length, boolean useCache) throws IOException { + maybeRegisterBlockRefresh(); synchronized(infoLock) { - updateBlockLocationsStamp(); int targetBlockIdx = locatedBlocks.findBlock(offset); if (targetBlockIdx < 0) { // block is not cached targetBlockIdx = LocatedBlocks.getInsertIndex(targetBlockIdx); @@ -556,8 +519,7 @@ private LocatedBlock fetchBlockAt(long offset, long length, boolean useCache) } // Update the LastLocatedBlock, if offset is for last block. if (offset >= locatedBlocks.getFileLength()) { - locatedBlocks = newBlocks; - lastBlockBeingWrittenLength = getLastBlockLength(); + setLocatedBlocksFields(newBlocks, getLastBlockLength(newBlocks)); } else { locatedBlocks.insertRange(targetBlockIdx, newBlocks.getLocatedBlocks()); @@ -584,6 +546,7 @@ private List getBlockRange(long offset, throw new IOException("Offset: " + offset + " exceeds file length: " + getFileLength()); } + synchronized(infoLock) { final List blocks; final long lengthOfCompleteBlk = locatedBlocks.getFileLength(); @@ -641,6 +604,9 @@ private synchronized DatanodeInfo blockSeekTo(long target) if (target >= getFileLength()) { throw new IOException("Attempted to read past end of file"); } + + maybeRegisterBlockRefresh(); + // Will be getting a new BlockReader. closeCurrentBlockReaders(); @@ -654,9 +620,6 @@ private synchronized DatanodeInfo blockSeekTo(long target) boolean connectFailedOnce = false; while (true) { - // Re-fetch the locatedBlocks from NN if the timestamp has expired. - updateBlockLocationsStamp(); - // // Compute desired block // @@ -790,6 +753,7 @@ public void accept(ByteBuffer k, Object v) { * this dfsInputStream anymore. */ dfsClient.removeNodeFromDeadNodeDetector(this, locatedBlocks); + maybeDeRegisterBlockRefresh(); } } @@ -868,16 +832,16 @@ protected synchronized int readWithStrategy(ReaderStrategy strategy) int len = strategy.getTargetLength(); CorruptedBlocks corruptedBlocks = new CorruptedBlocks(); failures = 0; + + maybeRegisterBlockRefresh(); + if (pos < getFileLength()) { int retries = 2; while (retries > 0) { try { // currentNode can be left as null if previous read had a checksum // error on the same block. See HDFS-3067 - // currentNode needs to be updated if the blockLocations timestamp has - // expired. - if (pos > blockEnd || currentNode == null - || updateBlockLocationsStamp()) { + if (pos > blockEnd || currentNode == null) { currentNode = blockSeekTo(pos); } int realLen = (int) Math.min(len, (blockEnd - pos + 1L)); @@ -976,7 +940,8 @@ private DNAddrPair chooseDataNode(LocatedBlock block, * @return Returns chosen DNAddrPair; Can be null if refetchIfRequired is * false. */ - private DNAddrPair chooseDataNode(LocatedBlock block, + @VisibleForTesting + DNAddrPair chooseDataNode(LocatedBlock block, Collection ignoredNodes, boolean refetchIfRequired) throws IOException { while (true) { @@ -991,6 +956,14 @@ private DNAddrPair chooseDataNode(LocatedBlock block, } } + /** + * RefetchLocations should only be called when there are no active requests + * to datanodes. In the hedged read case this means futures should be empty. + * @param block The locatedBlock to get new datanode locations for. + * @param ignoredNodes A list of ignored nodes. This list can be null and can be cleared. + * @return the locatedBlock with updated datanode locations. + * @throws IOException + */ private LocatedBlock refetchLocations(LocatedBlock block, Collection ignoredNodes) throws IOException { String errMsg = getBestNodeDNAddrPairErrorString(block.getLocations(), @@ -1000,7 +973,7 @@ private LocatedBlock refetchLocations(LocatedBlock block, String description = "Could not obtain block: " + blockInfo; DFSClient.LOG.warn(description + errMsg + ". Throwing a BlockMissingException"); - throw new BlockMissingException(src, description, + throw new BlockMissingException(src, description + errMsg, block.getStartOffset()); } @@ -1035,13 +1008,24 @@ private LocatedBlock refetchLocations(LocatedBlock block, throw new InterruptedIOException( "Interrupted while choosing DataNode for read."); } - clearLocalDeadNodes(); //2nd option is to remove only nodes[blockId] + clearCachedNodeState(ignoredNodes); openInfo(true); block = refreshLocatedBlock(block); failures++; return block; } + /** + * Clear both the dead nodes and the ignored nodes + * @param ignoredNodes is cleared + */ + private void clearCachedNodeState(Collection ignoredNodes) { + clearLocalDeadNodes(); //2nd option is to remove only nodes[blockId] + if (ignoredNodes != null) { + ignoredNodes.clear(); + } + } + /** * Get the best node from which to stream the data. * @param block LocatedBlock, containing nodes in priority order. @@ -1054,10 +1038,21 @@ protected DNAddrPair getBestNodeDNAddrPair(LocatedBlock block, StorageType[] storageTypes = block.getStorageTypes(); DatanodeInfo chosenNode = null; StorageType storageType = null; - if (nodes != null) { + if (dfsClient.getConf().isReadUseCachePriority()) { + DatanodeInfo[] cachedLocs = block.getCachedLocations(); + if (cachedLocs != null) { + for (int i = 0; i < cachedLocs.length; i++) { + if (isValidNode(cachedLocs[i], ignoredNodes)) { + chosenNode = cachedLocs[i]; + break; + } + } + } + } + + if (chosenNode == null && nodes != null) { for (int i = 0; i < nodes.length; i++) { - if (!dfsClient.getDeadNodes(this).containsKey(nodes[i]) - && (ignoredNodes == null || !ignoredNodes.contains(nodes[i]))) { + if (isValidNode(nodes[i], ignoredNodes)) { chosenNode = nodes[i]; // Storage types are ordered to correspond with nodes, so use the same // index to get storage type. @@ -1075,7 +1070,9 @@ protected DNAddrPair getBestNodeDNAddrPair(LocatedBlock block, final String dnAddr = chosenNode.getXferAddr(dfsClient.getConf().isConnectToDnViaHostname()); DFSClient.LOG.debug("Connecting to datanode {}", dnAddr); - InetSocketAddress targetAddr = NetUtils.createSocketAddr(dnAddr); + boolean uriCacheEnabled = dfsClient.getConf().isUriCacheEnabled(); + InetSocketAddress targetAddr = NetUtils.createSocketAddr(dnAddr, + -1, null, uriCacheEnabled); return new DNAddrPair(chosenNode, targetAddr, storageType, block); } @@ -1090,6 +1087,15 @@ protected void reportLostBlock(LocatedBlock lostBlock, ", ignoredNodes = " + ignoredNodes); } + private boolean isValidNode(DatanodeInfo node, + Collection ignoredNodes) { + if (!dfsClient.getDeadNodes(this).containsKey(node) + && (ignoredNodes == null || !ignoredNodes.contains(node))) { + return true; + } + return false; + } + private static String getBestNodeDNAddrPairErrorString( DatanodeInfo nodes[], AbstractMap deadNodes, Collection ignoredNodes) { @@ -1351,8 +1357,12 @@ private void hedgedFetchBlockByteRange(LocatedBlock block, long start, } catch (InterruptedException ie) { // Ignore and retry } - if (refetch) { - refetchLocations(block, ignored); + // If refetch is true, then all nodes are in deadNodes or ignoredNodes. + // We should loop through all futures and remove them, so we do not + // have concurrent requests to the same node. + // Once all futures are cleared, we can clear the ignoredNodes and retry. + if (refetch && futures.isEmpty()) { + block = refetchLocations(block, ignored); } // We got here if exception. Ignore this node on next go around IFF // we found a chosenNode to hedge read against. @@ -1933,4 +1943,153 @@ public boolean hasCapability(String capability) { return false; } } + + /** + * Many DFSInputStreams can be opened and closed in quick succession, in which case + * they would be registered/deregistered but never need to be refreshed. + * Defers registering with the located block refresher, in order to avoid an additional + * source of unnecessary synchronization for short-lived DFSInputStreams. + */ + protected void maybeRegisterBlockRefresh() { + if (!dfsClient.getConf().isRefreshReadBlockLocationsAutomatically() + || !dfsClient.getConf().isLocatedBlocksRefresherEnabled()) { + return; + } + + if (refreshingBlockLocations.get()) { + return; + } + + // not enough time elapsed to refresh + long timeSinceLastRefresh = Time.monotonicNow() - lastRefreshedBlocksAt; + if (timeSinceLastRefresh < dfsClient.getConf().getLocatedBlocksRefresherInterval()) { + return; + } + + if (!refreshingBlockLocations.getAndSet(true)) { + dfsClient.addLocatedBlocksRefresh(this); + } + } + + /** + * De-register periodic refresh of this inputstream, if it was added to begin with. + */ + private void maybeDeRegisterBlockRefresh() { + if (refreshingBlockLocations.get()) { + dfsClient.removeLocatedBlocksRefresh(this); + } + } + + /** + * Refresh blocks for the input stream, if necessary. + * + * @param addressCache optional map to use as a cache for resolving datanode InetSocketAddress + * @return whether a refresh was performed or not + */ + boolean refreshBlockLocations(Map addressCache) { + LocatedBlocks blocks; + synchronized (infoLock) { + blocks = getLocatedBlocks(); + } + + if (getLocalDeadNodes().isEmpty() && allBlocksLocal(blocks, addressCache)) { + return false; + } + + try { + DFSClient.LOG.debug("Refreshing {} for path {}", this, getSrc()); + LocatedBlocks newLocatedBlocks = fetchAndCheckLocatedBlocks(blocks); + long lastBlockLength = getLastBlockLength(newLocatedBlocks); + if (lastBlockLength == -1) { + DFSClient.LOG.debug( + "Discarding refreshed blocks for path {} because lastBlockLength was -1", + getSrc()); + return true; + } + + setRefreshedValues(newLocatedBlocks, lastBlockLength); + } catch (IOException e) { + DFSClient.LOG.debug("Failed to refresh DFSInputStream for path {}", getSrc(), e); + } + + return true; + } + + /** + * Once new LocatedBlocks have been fetched, sets them on the DFSInputStream and + * updates stateful read location within the necessary locks. + */ + private synchronized void setRefreshedValues(LocatedBlocks blocks, long lastBlockLength) + throws IOException { + synchronized (infoLock) { + setLocatedBlocksFields(blocks, lastBlockLength); + } + + getLocalDeadNodes().clear(); + + // if a stateful read has been initialized, refresh it + if (currentNode != null) { + currentNode = blockSeekTo(pos); + } + } + + private boolean allBlocksLocal(LocatedBlocks blocks, + Map addressCache) { + if (addressCache == null) { + addressCache = new HashMap<>(); + } + + // we only need to check the first location of each block, because the blocks are already + // sorted by distance from the current host + for (LocatedBlock lb : blocks.getLocatedBlocks()) { + if (lb.getLocations().length == 0) { + return false; + } + + DatanodeInfoWithStorage location = lb.getLocations()[0]; + if (location == null) { + return false; + } + + InetSocketAddress targetAddr = addressCache.computeIfAbsent( + location.getDatanodeUuid(), + unused -> { + String dnAddr = location.getXferAddr(dfsClient.getConf().isConnectToDnViaHostname()); + return NetUtils.createSocketAddr( + dnAddr, + -1, + null, + dfsClient.getConf().isUriCacheEnabled()); + }); + + if (!isResolveableAndLocal(targetAddr)) { + return false; + } + } + + return true; + } + + private boolean isResolveableAndLocal(InetSocketAddress targetAddr) { + try { + return DFSUtilClient.isLocalAddress(targetAddr); + } catch (IOException e) { + DFSClient.LOG.debug("Got an error checking if {} is local", targetAddr, e); + return false; + } + } + + @VisibleForTesting + void setLastRefreshedBlocksAtForTesting(long timestamp) { + lastRefreshedBlocksAt = timestamp; + } + + @VisibleForTesting + long getLastRefreshedBlocksAtForTesting() { + return lastRefreshedBlocksAt; + } + + private void setLastRefreshedBlocksAt() { + lastRefreshedBlocksAt = Time.monotonicNow(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSMultipartUploaderFactory.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSMultipartUploaderFactory.java deleted file mode 100644 index e9959c192df83..0000000000000 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSMultipartUploaderFactory.java +++ /dev/null @@ -1,40 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdfs; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.FileSystemMultipartUploader; -import org.apache.hadoop.fs.MultipartUploader; -import org.apache.hadoop.fs.MultipartUploaderFactory; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hdfs.protocol.HdfsConstants; - -/** - * Support for HDFS multipart uploads, built on - * {@link FileSystem#concat(Path, Path[])}. - */ -public class DFSMultipartUploaderFactory extends MultipartUploaderFactory { - protected MultipartUploader createMultipartUploader(FileSystem fs, - Configuration conf) { - if (fs.getScheme().equals(HdfsConstants.HDFS_URI_SCHEME)) { - return new FileSystemMultipartUploader(fs); - } - return null; - } -} diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOpsCountStatistics.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOpsCountStatistics.java index 2113ae5c63544..d8f73bcec148d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOpsCountStatistics.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOpsCountStatistics.java @@ -26,7 +26,7 @@ import java.util.Map; import java.util.Map.Entry; import java.util.NoSuchElementException; -import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.LongAdder; /** * This storage statistics tracks how many times each DFS operation was issued. @@ -140,21 +140,21 @@ public static OpType fromSymbol(String symbol) { public static final String NAME = "DFSOpsCountStatistics"; - private final Map opsCount = new EnumMap<>(OpType.class); + private final Map opsCount = new EnumMap<>(OpType.class); public DFSOpsCountStatistics() { super(NAME); for (OpType opType : OpType.values()) { - opsCount.put(opType, new AtomicLong(0)); + opsCount.put(opType, new LongAdder()); } } public void incrementOpCounter(OpType op) { - opsCount.get(op).addAndGet(1); + opsCount.get(op).increment(); } private class LongIterator implements Iterator { - private Iterator> iterator = + private final Iterator> iterator = opsCount.entrySet().iterator(); @Override @@ -167,9 +167,9 @@ public LongStatistic next() { if (!iterator.hasNext()) { throw new NoSuchElementException(); } - final Entry entry = iterator.next(); + final Entry entry = iterator.next(); return new LongStatistic(entry.getKey().getSymbol(), - entry.getValue().get()); + entry.getValue().longValue()); } @Override @@ -191,7 +191,7 @@ public Iterator getLongStatistics() { @Override public Long getLong(String key) { final OpType type = OpType.fromSymbol(key); - return type == null ? null : opsCount.get(type).get(); + return type == null ? null : opsCount.get(type).longValue(); } @Override @@ -201,8 +201,8 @@ public boolean isTracked(String key) { @Override public void reset() { - for (AtomicLong count : opsCount.values()) { - count.set(0); + for (LongAdder count : opsCount.values()) { + count.reset(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java index a9e44cd5c0def..dbd3c4c43e12c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java @@ -36,6 +36,7 @@ import org.apache.hadoop.fs.ParentNotDirectoryException; import org.apache.hadoop.fs.StreamCapabilities; import org.apache.hadoop.fs.Syncable; +import org.apache.hadoop.fs.impl.StoreImplementationUtils; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.client.HdfsDataOutputStream; @@ -66,14 +67,13 @@ import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.DataChecksum.Type; import org.apache.hadoop.util.Progressable; -import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; -import org.apache.htrace.core.TraceScope; +import org.apache.hadoop.tracing.TraceScope; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /**************************************************************** @@ -480,9 +480,10 @@ private synchronized void writeChunkPrepare(int buflen, currentPacket = createPacket(packetSize, chunksPerPacket, getStreamer() .getBytesCurBlock(), getStreamer().getAndIncCurrentSeqno(), false); DFSClient.LOG.debug("WriteChunk allocating new packet seqno={}," - + " src={}, packetSize={}, chunksPerPacket={}, bytesCurBlock={}", + + " src={}, packetSize={}, chunksPerPacket={}, bytesCurBlock={}," + + " output stream={}", currentPacket.getSeqno(), src, packetSize, chunksPerPacket, - getStreamer().getBytesCurBlock() + ", " + this); + getStreamer().getBytesCurBlock(), this); } } @@ -560,13 +561,7 @@ void endBlock() throws IOException { @Override public boolean hasCapability(String capability) { - switch (StringUtils.toLowerCase(capability)) { - case StreamCapabilities.HSYNC: - case StreamCapabilities.HFLUSH: - return true; - default: - return false; - } + return StoreImplementationUtils.isProbeForSyncable(capability); } /** @@ -909,8 +904,8 @@ protected synchronized void closeImpl() throws IOException { private void completeFile() throws IOException { // get last block before destroying the streamer ExtendedBlock lastBlock = getStreamer().getBlock(); - try (TraceScope ignored = - dfsClient.getTracer().newScope("completeFile")) { + try (TraceScope ignored = dfsClient.getTracer() + .newScope("DFSOutputStream#completeFile")) { completeFile(lastBlock); } } @@ -966,7 +961,10 @@ protected void completeFile(ExtendedBlock last) throws IOException { DFSClient.LOG.info(msg); throw new IOException(msg); } - try { + try (TraceScope scope = dfsClient.getTracer() + .newScope("DFSOutputStream#completeFile: Retry")) { + scope.addKVAnnotation("retries left", retries); + scope.addKVAnnotation("sleeptime (sleeping for)", sleeptime); if (retries == 0) { throw new IOException("Unable to close file because the last block " + last + " does not have enough number of replicas."); diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSPacket.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSPacket.java index 272d8de5c5bd5..a9c87235dce4d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSPacket.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSPacket.java @@ -28,9 +28,8 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader; import org.apache.hadoop.hdfs.util.ByteArrayManager; -import org.apache.htrace.core.Span; -import org.apache.htrace.core.SpanId; -import org.apache.htrace.core.TraceScope; +import org.apache.hadoop.tracing.Span; +import org.apache.hadoop.tracing.SpanContext; /**************************************************************** * DFSPacket is used by DataStreamer and DFSOutputStream. @@ -41,7 +40,7 @@ @InterfaceAudience.Private public class DFSPacket { public static final long HEART_BEAT_SEQNO = -1L; - private static SpanId[] EMPTY = new SpanId[0]; + private static final SpanContext[] EMPTY = new SpanContext[0]; private final long seqno; // sequence number of buffer in block private final long offsetInBlock; // offset in block private boolean syncBlock; // this packet forces the current block to disk @@ -68,9 +67,9 @@ public class DFSPacket { private int checksumPos; private final int dataStart; private int dataPos; - private SpanId[] traceParents = EMPTY; + private SpanContext[] traceParents = EMPTY; private int traceParentsUsed; - private TraceScope scope; + private Span span; /** * Create a new packet. @@ -306,11 +305,11 @@ public void addTraceParent(Span span) { if (span == null) { return; } - addTraceParent(span.getSpanId()); + addTraceParent(span.getContext()); } - public void addTraceParent(SpanId id) { - if (!id.isValid()) { + public void addTraceParent(SpanContext ctx) { + if (ctx == null) { return; } if (traceParentsUsed == traceParents.length) { @@ -318,7 +317,7 @@ public void addTraceParent(SpanId id) { traceParents.length * 2; traceParents = Arrays.copyOf(traceParents, newLength); } - traceParents[traceParentsUsed] = id; + traceParents[traceParentsUsed] = ctx; traceParentsUsed++; } @@ -329,17 +328,17 @@ public void addTraceParent(SpanId id) { *

      * Protected by the DFSOutputStream dataQueue lock. */ - public SpanId[] getTraceParents() { + public SpanContext[] getTraceParents() { // Remove duplicates from the array. int len = traceParentsUsed; Arrays.sort(traceParents, 0, len); int i = 0, j = 0; - SpanId prevVal = SpanId.INVALID; + SpanContext prevVal = null; while (true) { if (i == len) { break; } - SpanId val = traceParents[i]; + SpanContext val = traceParents[i]; if (!val.equals(prevVal)) { traceParents[j] = val; j++; @@ -354,11 +353,11 @@ public SpanId[] getTraceParents() { return traceParents; } - public void setTraceScope(TraceScope scope) { - this.scope = scope; + public void setSpan(Span span) { + this.span = span; } - public TraceScope getTraceScope() { - return scope; + public Span getSpan() { + return span; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedInputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedInputStream.java index ba35d51561162..a1d630e67b349 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedInputStream.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.ReadOption; import org.apache.hadoop.hdfs.protocol.BlockType; @@ -143,14 +143,6 @@ protected ByteBuffer getCurStripeBuf() { return curStripeBuf; } - protected String getSrc() { - return src; - } - - protected LocatedBlocks getLocatedBlocks() { - return locatedBlocks; - } - protected ByteBufferPool getBufferPool() { return BUFFER_POOL; } @@ -168,6 +160,8 @@ synchronized void blockSeekTo(long target) throws IOException { throw new IOException("Attempted to read past end of file"); } + maybeRegisterBlockRefresh(); + // Will be getting a new BlockReader. closeCurrentBlockReaders(); @@ -240,7 +234,7 @@ private long getOffsetInBlockGroup(long pos) { boolean createBlockReader(LocatedBlock block, long offsetInBlock, LocatedBlock[] targetBlocks, BlockReaderInfo[] readerInfos, - int chunkIndex) throws IOException { + int chunkIndex, long readTo) throws IOException { BlockReader reader = null; final ReaderRetryPolicy retry = new ReaderRetryPolicy(); DFSInputStream.DNAddrPair dnInfo = @@ -258,9 +252,14 @@ boolean createBlockReader(LocatedBlock block, long offsetInBlock, if (dnInfo == null) { break; } + if (readTo < 0 || readTo > block.getBlockSize()) { + readTo = block.getBlockSize(); + } reader = getBlockReader(block, offsetInBlock, - block.getBlockSize() - offsetInBlock, + readTo - offsetInBlock, dnInfo.addr, dnInfo.storageType, dnInfo.info); + DFSClientFaultInjector.get().onCreateBlockReader(block, chunkIndex, offsetInBlock, + readTo - offsetInBlock); } catch (IOException e) { if (e instanceof InvalidEncryptionKeyException && retry.shouldRefetchEncryptionKey()) { @@ -495,11 +494,16 @@ protected void fetchBlockByteRange(LocatedBlock block, long start, final LocatedBlock[] blks = StripedBlockUtil.parseStripedBlockGroup( blockGroup, cellSize, dataBlkNum, parityBlkNum); final BlockReaderInfo[] preaderInfos = new BlockReaderInfo[groupSize]; + long readTo = -1; + for (AlignedStripe stripe : stripes) { + readTo = Math.max(readTo, stripe.getOffsetInBlock() + stripe.getSpanInBlock()); + } try { for (AlignedStripe stripe : stripes) { // Parse group to get chosen DN location StripeReader preader = new PositionStripeReader(stripe, ecPolicy, blks, preaderInfos, corruptedBlocks, decoder, this); + preader.setReadTo(readTo); try { preader.readStripe(); } finally { @@ -564,4 +568,5 @@ public synchronized void unbuffer() { parityBuf = null; } } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java index 8d651d855c0c2..64dd77a02d0eb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.CreateFlag; @@ -45,7 +45,7 @@ import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.Time; -import org.apache.htrace.core.TraceScope; +import org.apache.hadoop.tracing.TraceScope; import java.io.IOException; import java.io.InterruptedIOException; @@ -283,6 +283,7 @@ private void flipDataBuffers() { private ExecutorService flushAllExecutor; private CompletionService flushAllExecutorCompletionService; private int blockGroupIndex; + private long datanodeRestartTimeout; /** Construct a new output stream for creating a file. */ DFSStripedOutputStream(DFSClient dfsClient, String src, HdfsFileStatus stat, @@ -322,6 +323,7 @@ private void flipDataBuffers() { streamers.add(streamer); } currentPackets = new DFSPacket[streamers.size()]; + datanodeRestartTimeout = dfsClient.getConf().getDatanodeRestartTimeout(); setCurrentStreamer(0); } @@ -501,8 +503,14 @@ private void allocateNewBlock() throws IOException { LOG.debug("Allocating new block group. The previous block group: " + prevBlockGroup); - final LocatedBlock lb = addBlock(excludedNodes, dfsClient, src, - prevBlockGroup, fileId, favoredNodes, getAddBlockFlags()); + final LocatedBlock lb; + try { + lb = addBlock(excludedNodes, dfsClient, src, + prevBlockGroup, fileId, favoredNodes, getAddBlockFlags()); + } catch (IOException ioe) { + closeAllStreamers(); + throw ioe; + } assert lb.isStriped(); // assign the new block to the current block group currentBlockGroup = lb.getBlock(); @@ -637,6 +645,11 @@ private Set markExternalErrorOnStreamers() { "streamer: " + streamer); streamer.setExternalError(); healthySet.add(streamer); + } else if (!streamer.streamerClosed() + && streamer.getErrorState().hasDatanodeError() + && streamer.getErrorState().doWaitForRestart()) { + healthySet.add(streamer); + failedStreamers.remove(streamer); } } return healthySet; @@ -701,6 +714,14 @@ private void checkStreamerFailures(boolean isNeedFlushAllPackets) for (int i = 0; i < numAllBlocks; i++) { coordinator.offerStreamerUpdateResult(i, newFailed.size() == 0); } + //wait for get notify to failed stream + if (newFailed.size() != 0) { + try { + Thread.sleep(datanodeRestartTimeout); + } catch (InterruptedException e) { + // Do nothing + } + } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java index 95aad12d928b2..2b3c67683c730 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java @@ -17,10 +17,10 @@ */ package org.apache.hadoop.hdfs; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.Maps; -import com.google.common.primitives.SignedBytes; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.primitives.SignedBytes; import java.net.URISyntaxException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java index 1344c3b3a6ef6..358a485900d5d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java @@ -42,7 +42,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.BlockWrite; @@ -75,16 +75,16 @@ import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.Time; -import org.apache.htrace.core.Span; -import org.apache.htrace.core.SpanId; -import org.apache.htrace.core.TraceScope; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.tracing.Span; +import org.apache.hadoop.tracing.SpanContext; +import org.apache.hadoop.tracing.TraceScope; +import org.apache.hadoop.tracing.Tracer; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; -import com.google.common.cache.RemovalListener; -import com.google.common.cache.RemovalNotification; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheBuilder; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheLoader; +import org.apache.hadoop.thirdparty.com.google.common.cache.LoadingCache; +import org.apache.hadoop.thirdparty.com.google.common.cache.RemovalListener; +import org.apache.hadoop.thirdparty.com.google.common.cache.RemovalNotification; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -483,6 +483,7 @@ boolean doWaitForRestart() { private volatile BlockConstructionStage stage; // block construction stage protected long bytesSent = 0; // number of bytes that've been sent private final boolean isLazyPersistFile; + private long lastPacket; /** Nodes have been used in the pipeline before and have failed. */ private final List failed = new ArrayList<>(); @@ -632,6 +633,7 @@ private void initDataStreaming() { response = new ResponseProcessor(nodes); response.start(); stage = BlockConstructionStage.DATA_STREAMING; + lastPacket = Time.monotonicNow(); } protected void endBlock() { @@ -653,7 +655,6 @@ private boolean shouldStop() { */ @Override public void run() { - long lastPacket = Time.monotonicNow(); TraceScope scope = null; while (!streamerClosed && dfsClient.clientRunning) { // if the Responder encountered an error, shutdown Responder @@ -666,47 +667,44 @@ public void run() { // process datanode IO errors if any boolean doSleep = processDatanodeOrExternalError(); - final int halfSocketTimeout = dfsClient.getConf().getSocketTimeout()/2; synchronized (dataQueue) { // wait for a packet to be sent. - long now = Time.monotonicNow(); - while ((!shouldStop() && dataQueue.size() == 0 && - (stage != BlockConstructionStage.DATA_STREAMING || - now - lastPacket < halfSocketTimeout)) || doSleep) { - long timeout = halfSocketTimeout - (now-lastPacket); - timeout = timeout <= 0 ? 1000 : timeout; - timeout = (stage == BlockConstructionStage.DATA_STREAMING)? - timeout : 1000; + while ((!shouldStop() && dataQueue.isEmpty()) || doSleep) { + long timeout = 1000; + if (stage == BlockConstructionStage.DATA_STREAMING) { + timeout = sendHeartbeat(); + } try { dataQueue.wait(timeout); } catch (InterruptedException e) { LOG.debug("Thread interrupted", e); } doSleep = false; - now = Time.monotonicNow(); } if (shouldStop()) { continue; } // get packet to be sent. - if (dataQueue.isEmpty()) { - one = createHeartbeatPacket(); - } else { - try { - backOffIfNecessary(); - } catch (InterruptedException e) { - LOG.debug("Thread interrupted", e); - } - one = dataQueue.getFirst(); // regular data packet - SpanId[] parents = one.getTraceParents(); - if (parents.length > 0) { - scope = dfsClient.getTracer(). - newScope("dataStreamer", parents[0]); - scope.getSpan().setParents(parents); - } + one = dataQueue.getFirst(); // regular data packet + SpanContext[] parents = one.getTraceParents(); + if (parents != null && parents.length > 0) { + // The original code stored multiple parents in the DFSPacket, and + // use them ALL here when creating a new Span. We only use the + // last one FOR NOW. Moreover, we don't activate the Span for now. + scope = dfsClient.getTracer(). + newScope("dataStreamer", parents[0], false); + //scope.getSpan().setParents(parents); } } + // The DataStreamer has to release the dataQueue before sleeping, + // otherwise it will cause the ResponseProcessor to accept the ACK delay. + try { + backOffIfNecessary(); + } catch (InterruptedException e) { + LOG.debug("Thread interrupted", e); + } + // get new block from namenode. LOG.debug("stage={}, {}", stage, this); @@ -731,31 +729,22 @@ public void run() { if (one.isLastPacketInBlock()) { // wait for all data packets have been successfully acked - synchronized (dataQueue) { - while (!shouldStop() && ackQueue.size() != 0) { - try { - // wait for acks to arrive from datanodes - dataQueue.wait(1000); - } catch (InterruptedException e) { - LOG.debug("Thread interrupted", e); - } - } - } - if (shouldStop()) { + waitForAllAcks(); + if(shouldStop()) { continue; } stage = BlockConstructionStage.PIPELINE_CLOSE; } // send the packet - SpanId spanId = SpanId.INVALID; + SpanContext spanContext = null; synchronized (dataQueue) { // move packet from dataQueue to ackQueue if (!one.isHeartbeatPacket()) { if (scope != null) { - spanId = scope.getSpanId(); - scope.detach(); - one.setTraceScope(scope); + one.setSpan(scope.span()); + spanContext = scope.span().getContext(); + scope.close(); } scope = null; dataQueue.removeFirst(); @@ -769,9 +758,8 @@ public void run() { // write out data to remote datanode try (TraceScope ignored = dfsClient.getTracer(). - newScope("DataStreamer#writeTo", spanId)) { - one.writeTo(blockStream); - blockStream.flush(); + newScope("DataStreamer#writeTo", spanContext)) { + sendPacket(one); } catch (IOException e) { // HDFS-3398 treat primary DN is down since client is unable to // write to primary DN. If a failed or restarting node has already @@ -782,7 +770,6 @@ public void run() { errorState.markFirstNodeIfNotMarked(); throw e; } - lastPacket = Time.monotonicNow(); // update bytesSent long tmpBytesSent = one.getLastByteOffsetBlock(); @@ -797,9 +784,17 @@ public void run() { // Is this block full? if (one.isLastPacketInBlock()) { // wait for the close packet has been acked - synchronized (dataQueue) { - while (!shouldStop() && ackQueue.size() != 0) { - dataQueue.wait(1000);// wait for acks to arrive from datanodes + try { + waitForAllAcks(); + } catch (IOException ioe) { + // No need to do a close recovery if the last packet was acked. + // i.e. ackQueue is empty. waitForAllAcks() can get an exception + // (e.g. connection reset) while sending a heartbeat packet, + // if the DN sends the final ack and closes the connection. + synchronized (dataQueue) { + if (!ackQueue.isEmpty()) { + throw ioe; + } } } if (shouldStop()) { @@ -842,6 +837,48 @@ public void run() { closeInternal(); } + private void waitForAllAcks() throws IOException { + // wait until all data packets have been successfully acked + synchronized (dataQueue) { + while (!shouldStop() && !ackQueue.isEmpty()) { + try { + // wait for acks to arrive from datanodes + dataQueue.wait(sendHeartbeat()); + } catch (InterruptedException e) { + LOG.debug("Thread interrupted ", e); + } + } + } + } + + private void sendPacket(DFSPacket packet) throws IOException { + // write out data to remote datanode + try { + packet.writeTo(blockStream); + blockStream.flush(); + } catch (IOException e) { + // HDFS-3398 treat primary DN is down since client is unable to + // write to primary DN. If a failed or restarting node has already + // been recorded by the responder, the following call will have no + // effect. Pipeline recovery can handle only one node error at a + // time. If the primary node fails again during the recovery, it + // will be taken out then. + errorState.markFirstNodeIfNotMarked(); + throw e; + } + lastPacket = Time.monotonicNow(); + } + + private long sendHeartbeat() throws IOException { + final long heartbeatInterval = dfsClient.getConf().getSocketTimeout()/2; + long timeout = heartbeatInterval - (Time.monotonicNow() - lastPacket); + if (timeout <= 0) { + sendPacket(createHeartbeatPacket()); + timeout = heartbeatInterval; + } + return timeout; + } + private void closeInternal() { closeResponder(); // close and join closeStream(); @@ -873,6 +910,8 @@ void waitForAckedSeqno(long seqno) throws IOException { try (TraceScope ignored = dfsClient.getTracer(). newScope("waitForAckedSeqno")) { LOG.debug("{} waiting for ack for: {}", this, seqno); + int dnodes = nodes != null ? nodes.length : 3; + int writeTimeout = dfsClient.getDatanodeWriteTimeout(dnodes); long begin = Time.monotonicNow(); try { synchronized (dataQueue) { @@ -883,6 +922,16 @@ void waitForAckedSeqno(long seqno) throws IOException { } try { dataQueue.wait(1000); // when we receive an ack, we notify on + long duration = Time.monotonicNow() - begin; + if (duration > writeTimeout) { + LOG.error("No ack received, took {}ms (threshold={}ms). " + + "File being written: {}, block: {}, " + + "Write pipeline datanodes: {}.", + duration, writeTimeout, src, block, nodes); + throw new InterruptedIOException("No ack received after " + + duration / 1000 + "s and a timeout of " + + writeTimeout / 1000 + "s"); + } // dataQueue } catch (InterruptedException ie) { throw new InterruptedIOException( @@ -1171,10 +1220,10 @@ public void run() { block.setNumBytes(one.getLastByteOffsetBlock()); synchronized (dataQueue) { - scope = one.getTraceScope(); - if (scope != null) { - scope.reattach(); - one.setTraceScope(null); + if (one.getSpan() != null) { + scope = new TraceScope(new Span()); + // TODO: Use scope = Tracer.curThreadTracer().activateSpan ? + one.setSpan(null); } lastAckedSeqno = seqno; pipelineRecoveryCount = 0; @@ -1269,11 +1318,10 @@ private boolean processDatanodeOrExternalError() throws IOException { synchronized (dataQueue) { DFSPacket endOfBlockPacket = dataQueue.remove(); // remove the end of block packet // Close any trace span associated with this Packet - TraceScope scope = endOfBlockPacket.getTraceScope(); - if (scope != null) { - scope.reattach(); - scope.close(); - endOfBlockPacket.setTraceScope(null); + Span span = endOfBlockPacket.getSpan(); + if (span != null) { + span.finish(); + endOfBlockPacket.setSpan(null); } assert endOfBlockPacket.isLastPacketInBlock(); assert lastAckedSeqno == endOfBlockPacket.getSeqno() - 1; @@ -1644,7 +1692,7 @@ public void updatePipeline(long newGS) throws IOException { DatanodeInfo[] getExcludedNodes() { return excludedNodes.getAllPresent(excludedNodes.asMap().keySet()) - .keySet().toArray(new DatanodeInfo[0]); + .keySet().toArray(DatanodeInfo.EMPTY_ARRAY); } /** @@ -1949,7 +1997,7 @@ ErrorState getErrorState() { void queuePacket(DFSPacket packet) { synchronized (dataQueue) { if (packet == null) return; - packet.addTraceParent(Tracer.getCurrentSpanId()); + packet.addTraceParent(Tracer.getCurrentSpan()); dataQueue.addLast(packet); lastQueuedSeqno = packet.getSeqno(); LOG.debug("Queued {}, {}", packet, this); diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DeadNodeDetector.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DeadNodeDetector.java index a573e8a22aad3..cd46551f0225b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DeadNodeDetector.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DeadNodeDetector.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; @@ -29,9 +29,9 @@ import java.util.HashSet; import java.util.Map; -import java.util.Queue; import java.util.Set; -import java.util.concurrent.ArrayBlockingQueue; +import java.util.Deque; +import java.util.LinkedList; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; @@ -40,8 +40,6 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; -import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_DEAD_NODE_DETECTION_DEAD_NODE_QUEUE_MAX_DEFAULT; -import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_DEAD_NODE_DETECTION_DEAD_NODE_QUEUE_MAX_KEY; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_DEAD_NODE_DETECTION_PROBE_CONNECTION_TIMEOUT_MS_DEFAULT; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_DEAD_NODE_DETECTION_PROBE_CONNECTION_TIMEOUT_MS_KEY; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_DEAD_NODE_DETECTION_PROBE_DEAD_NODE_INTERVAL_MS_DEFAULT; @@ -54,15 +52,15 @@ import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_DEAD_NODE_DETECTION_PROBE_SUSPECT_NODE_THREADS_KEY; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_DEAD_NODE_DETECTION_RPC_THREADS_DEFAULT; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_DEAD_NODE_DETECTION_RPC_THREADS_KEY; -import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_DEAD_NODE_DETECTION_SUSPECT_NODE_QUEUE_MAX_DEFAULT; -import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_DEAD_NODE_DETECTION_SUSPECT_NODE_QUEUE_MAX_KEY; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY; +import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_DEAD_NODE_DETECTION_IDLE_SLEEP_MS_KEY; +import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_DEAD_NODE_DETECTION_IDLE_SLEEP_MS_DEFAULT; /** * Detect the dead nodes in advance, and share this information among all the * DFSInputStreams in the same client. */ -public class DeadNodeDetector implements Runnable { +public class DeadNodeDetector extends Daemon { public static final Logger LOG = LoggerFactory.getLogger(DeadNodeDetector.class); @@ -74,7 +72,7 @@ public class DeadNodeDetector implements Runnable { /** * Waiting time when DeadNodeDetector's state is idle. */ - private static final long IDLE_SLEEP_MS = 10000; + private final long idleSleepMs; /** * Client context name. @@ -113,16 +111,6 @@ public class DeadNodeDetector implements Runnable { */ private long suspectNodeDetectInterval = 0; - /** - * The max queue size of probing dead node. - */ - private int maxDeadNodesProbeQueueLen = 0; - - /** - * The max queue size of probing suspect node. - */ - private int maxSuspectNodesProbeQueueLen; - /** * Connection timeout for probing dead node in milliseconds. */ @@ -131,12 +119,12 @@ public class DeadNodeDetector implements Runnable { /** * The dead node probe queue. */ - private Queue deadNodesProbeQueue; + private UniqueQueue deadNodesProbeQueue; /** * The suspect node probe queue. */ - private Queue suspectNodesProbeQueue; + private UniqueQueue suspectNodesProbeQueue; /** * The thread pool of probing dead node. @@ -181,6 +169,32 @@ private enum State { INIT, CHECK_DEAD, IDLE, ERROR } + /** + * The thread safe unique queue. + */ + static class UniqueQueue { + private Deque queue = new LinkedList<>(); + private Set set = new HashSet<>(); + + synchronized boolean offer(T dn) { + if (set.add(dn)) { + queue.addLast(dn); + return true; + } + return false; + } + + synchronized T poll() { + T dn = queue.pollFirst(); + set.remove(dn); + return dn; + } + + synchronized int size() { + return set.size(); + } + } + /** * Disabled start probe suspect/dead thread for the testing. */ @@ -203,20 +217,14 @@ public DeadNodeDetector(String name, Configuration conf) { DFS_CLIENT_DEAD_NODE_DETECTION_PROBE_SUSPECT_NODE_INTERVAL_MS_DEFAULT); socketTimeout = conf.getInt(DFS_CLIENT_SOCKET_TIMEOUT_KEY, HdfsConstants.READ_TIMEOUT); - maxDeadNodesProbeQueueLen = - conf.getInt(DFS_CLIENT_DEAD_NODE_DETECTION_DEAD_NODE_QUEUE_MAX_KEY, - DFS_CLIENT_DEAD_NODE_DETECTION_DEAD_NODE_QUEUE_MAX_DEFAULT); - maxSuspectNodesProbeQueueLen = - conf.getInt(DFS_CLIENT_DEAD_NODE_DETECTION_SUSPECT_NODE_QUEUE_MAX_KEY, - DFS_CLIENT_DEAD_NODE_DETECTION_SUSPECT_NODE_QUEUE_MAX_DEFAULT); probeConnectionTimeoutMs = conf.getLong( DFS_CLIENT_DEAD_NODE_DETECTION_PROBE_CONNECTION_TIMEOUT_MS_KEY, DFS_CLIENT_DEAD_NODE_DETECTION_PROBE_CONNECTION_TIMEOUT_MS_DEFAULT); + this.deadNodesProbeQueue = new UniqueQueue<>(); + this.suspectNodesProbeQueue = new UniqueQueue<>(); - this.deadNodesProbeQueue = - new ArrayBlockingQueue(maxDeadNodesProbeQueueLen); - this.suspectNodesProbeQueue = - new ArrayBlockingQueue(maxSuspectNodesProbeQueueLen); + idleSleepMs = conf.getLong(DFS_CLIENT_DEAD_NODE_DETECTION_IDLE_SLEEP_MS_KEY, + DFS_CLIENT_DEAD_NODE_DETECTION_IDLE_SLEEP_MS_DEFAULT); int deadNodeDetectDeadThreads = conf.getInt(DFS_CLIENT_DEAD_NODE_DETECTION_PROBE_DEAD_NODE_THREADS_KEY, @@ -271,6 +279,37 @@ public void run() { } } + /** + * Shutdown all the threads. + */ + public void shutdown() { + threadShutDown(this); + threadShutDown(probeDeadNodesSchedulerThr); + threadShutDown(probeSuspectNodesSchedulerThr); + probeDeadNodesThreadPool.shutdown(); + probeSuspectNodesThreadPool.shutdown(); + rpcThreadPool.shutdown(); + } + + private static void threadShutDown(Thread thread) { + if (thread != null && thread.isAlive()) { + thread.interrupt(); + try { + thread.join(); + } catch (InterruptedException e) { + } + } + } + + @VisibleForTesting + boolean isThreadsShutdown() { + return !this.isAlive() && !probeDeadNodesSchedulerThr.isAlive() + && !probeSuspectNodesSchedulerThr.isAlive() + && probeDeadNodesThreadPool.isShutdown() + && probeSuspectNodesThreadPool.isShutdown() + && rpcThreadPool.isShutdown(); + } + @VisibleForTesting static void setDisabledProbeThreadForTest( boolean disabledProbeThreadForTest) { @@ -294,7 +333,7 @@ void startProbeScheduler() { } /** - * Prode datanode by probe byte. + * Prode datanode by probe type. */ private void scheduleProbe(ProbeType type) { LOG.debug("Schedule probe datanode for probe type: {}.", type); @@ -376,9 +415,8 @@ public DatanodeLocalInfo call() throws Exception { } catch (Exception e) { LOG.error("Probe failed, datanode: {}, type: {}.", datanodeInfo, type, e); + deadNodeDetector.probeCallBack(this, false); } - - deadNodeDetector.probeCallBack(this, false); } } @@ -402,7 +440,7 @@ private void probeCallBack(Probe probe, boolean success) { } } else { if (probe.getType() == ProbeType.CHECK_SUSPECT) { - LOG.info("Add the node to dead node list: {}.", + LOG.warn("Probe failed, add suspect node to dead node list: {}.", probe.getDatanodeInfo()); addToDead(probe.getDatanodeInfo()); } @@ -415,11 +453,11 @@ private void probeCallBack(Probe probe, boolean success) { private void checkDeadNodes() { Set datanodeInfos = clearAndGetDetectedDeadNodes(); for (DatanodeInfo datanodeInfo : datanodeInfos) { - LOG.debug("Add dead node to check: {}.", datanodeInfo); if (!deadNodesProbeQueue.offer(datanodeInfo)) { LOG.debug("Skip to add dead node {} to check " + - "since the probe queue is full.", datanodeInfo); - break; + "since the node is already in the probe queue.", datanodeInfo); + } else { + LOG.debug("Add dead node to check: {}.", datanodeInfo); } } state = State.IDLE; @@ -427,7 +465,7 @@ private void checkDeadNodes() { private void idle() { try { - Thread.sleep(IDLE_SLEEP_MS); + Thread.sleep(idleSleepMs); } catch (InterruptedException e) { LOG.debug("Got interrupted while DeadNodeDetector is idle.", e); Thread.currentThread().interrupt(); @@ -452,14 +490,24 @@ private void removeFromDead(DatanodeInfo datanodeInfo) { deadNodes.remove(datanodeInfo.getDatanodeUuid()); } - public Queue getDeadNodesProbeQueue() { + public UniqueQueue getDeadNodesProbeQueue() { return deadNodesProbeQueue; } - public Queue getSuspectNodesProbeQueue() { + public UniqueQueue getSuspectNodesProbeQueue() { return suspectNodesProbeQueue; } + @VisibleForTesting + void setSuspectQueue(UniqueQueue queue) { + this.suspectNodesProbeQueue = queue; + } + + @VisibleForTesting + void setDeadQueue(UniqueQueue queue) { + this.deadNodesProbeQueue = queue; + } + /** * Add datanode to suspectNodes and suspectAndDeadNodes. */ @@ -475,6 +523,7 @@ public synchronized void addNodeToDetect(DFSInputStream dfsInputStream, datanodeInfos.add(datanodeInfo); } + LOG.debug("Add datanode {} to suspectAndDeadNodes.", datanodeInfo); addSuspectNodeToDetect(datanodeInfo); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java index d1babe3280c8d..4b25925a4b1d4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java @@ -18,11 +18,14 @@ package org.apache.hadoop.hdfs; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.hadoop.ipc.RpcNoSuchMethodException; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.commons.collections.list.TreeList; +import org.apache.hadoop.fs.LeaseRecoverable; +import org.apache.hadoop.fs.SafeMode; + +import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -52,6 +55,7 @@ import org.apache.hadoop.fs.GlobalStorageStatistics.StorageStatisticsProvider; import org.apache.hadoop.fs.InvalidPathHandleException; import org.apache.hadoop.fs.PartialListing; +import org.apache.hadoop.fs.MultipartUploaderBuilder; import org.apache.hadoop.fs.PathHandle; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Options; @@ -61,11 +65,13 @@ import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.QuotaUsage; import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.SafeModeAction; import org.apache.hadoop.fs.StorageStatistics; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.fs.UnresolvedLinkException; import org.apache.hadoop.fs.UnsupportedFileSystemException; import org.apache.hadoop.fs.XAttrSetFlag; +import org.apache.hadoop.fs.impl.FileSystemMultipartUploaderBuilder; import org.apache.hadoop.fs.permission.AclEntry; import org.apache.hadoop.fs.permission.AclStatus; import org.apache.hadoop.fs.permission.FsAction; @@ -94,7 +100,6 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; import org.apache.hadoop.hdfs.protocol.HdfsConstants.ReencryptAction; import org.apache.hadoop.hdfs.protocol.HdfsConstants.RollingUpgradeAction; -import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; import org.apache.hadoop.hdfs.protocol.HdfsPathHandle; import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus; @@ -142,7 +147,7 @@ @InterfaceAudience.LimitedPrivate({ "MapReduce", "HBase" }) @InterfaceStability.Unstable public class DistributedFileSystem extends FileSystem - implements KeyProviderTokenIssuer, BatchListingOperations { + implements KeyProviderTokenIssuer, BatchListingOperations, LeaseRecoverable, SafeMode { private Path workingDir; private URI uri; @@ -181,7 +186,7 @@ public void initialize(URI uri, Configuration conf) throws IOException { throw new IOException("Incomplete HDFS URI, no host: "+ uri); } - this.dfs = new DFSClient(uri, conf, statistics); + initDFSClient(uri, conf); this.uri = URI.create(uri.getScheme()+"://"+uri.getAuthority()); this.workingDir = getHomeDirectory(); @@ -195,6 +200,10 @@ public StorageStatistics provide() { }); } + void initDFSClient(URI theUri, Configuration conf) throws IOException { + this.dfs = new DFSClient(theUri, conf, statistics); + } + @Override public Path getWorkingDirectory() { return workingDir; @@ -300,6 +309,7 @@ public void setVerifyChecksum(boolean verifyChecksum) { * @return true if the file is already closed * @throws IOException if an error occurs */ + @Override public boolean recoverLease(final Path f) throws IOException { Path absF = fixRelativePart(f); return new FileSystemLinkResolver() { @@ -416,6 +426,16 @@ public FSDataOutputStream append(Path f, final int bufferSize, return append(f, EnumSet.of(CreateFlag.APPEND), bufferSize, progress); } + @Override + public FSDataOutputStream append(Path f, final int bufferSize, + final Progressable progress, boolean appendToNewBlock) throws IOException { + EnumSet flag = EnumSet.of(CreateFlag.APPEND); + if (appendToNewBlock) { + flag.add(CreateFlag.NEW_BLOCK); + } + return append(f, flag, bufferSize, progress); + } + /** * Append to an existing file (optional operation). * @@ -813,6 +833,8 @@ public BlockStoragePolicySpi next(final FileSystem fs, final Path p) @Override public Collection getAllStoragePolicies() throws IOException { + statistics.incrementReadOps(1); + storageStatistics.incrementOpCounter(OpType.GET_STORAGE_POLICIES); return Arrays.asList(dfs.getStoragePolicies()); } @@ -834,9 +856,7 @@ public long getBytesWithFutureGenerationStamps() throws IOException { */ @Deprecated public BlockStoragePolicy[] getStoragePolicies() throws IOException { - statistics.incrementReadOps(1); - storageStatistics.incrementOpCounter(OpType.GET_STORAGE_POLICIES); - return dfs.getStoragePolicies(); + return getAllStoragePolicies().toArray(new BlockStoragePolicy[0]); } /** @@ -1143,10 +1163,21 @@ private FileStatus[] listStatusInternal(Path p) throws IOException { /** * List all the entries of a directory * - * Note that this operation is not atomic for a large directory. - * The entries of a directory may be fetched from NameNode multiple times. - * It only guarantees that each name occurs once if a directory - * undergoes changes between the calls. + * Note that this operation is not atomic for a large directory. The entries + * of a directory may be fetched from NameNode multiple times. It only + * guarantees that each name occurs once if a directory undergoes changes + * between the calls. + * + * If any of the the immediate children of the given path f is a symlink, the + * returned FileStatus object of that children would be represented as a + * symlink. It will not be resolved to the target path and will not get the + * target path FileStatus object. The target path will be available via + * getSymlink on that children's FileStatus object. Since it represents as + * symlink, isDirectory on that children's FileStatus will return false. + * + * If you want to get the FileStatus of target path for that children, you may + * want to use GetFileStatus API with that children's symlink path. Please see + * {@link DistributedFileSystem#getFileStatus(Path f)} */ @Override public FileStatus[] listStatus(Path p) throws IOException { @@ -1494,10 +1525,14 @@ protected boolean primitiveMkdir(Path f, FsPermission absolutePermission) @Override public void close() throws IOException { try { - dfs.closeOutputStreams(false); + if (dfs != null) { + dfs.closeOutputStreams(false); + } super.close(); } finally { - dfs.close(); + if (dfs != null) { + dfs.close(); + } } } @@ -1602,6 +1637,63 @@ public DatanodeInfo[] getDataNodeStats(final DatanodeReportType type) * @see org.apache.hadoop.hdfs.protocol.ClientProtocol#setSafeMode( * HdfsConstants.SafeModeAction,boolean) */ + @Override + public boolean setSafeMode(SafeModeAction action) + throws IOException { + return setSafeMode(action, false); + } + + /** + * Enter, leave or get safe mode. + * + * @param action + * One of SafeModeAction.ENTER, SafeModeAction.LEAVE and + * SafeModeAction.GET. + * @param isChecked + * If true check only for Active NNs status, else check first NN's + * status. + */ + @Override + @SuppressWarnings("deprecation") + public boolean setSafeMode(SafeModeAction action, boolean isChecked) + throws IOException { + return this.setSafeMode(convertToClientProtocolSafeModeAction(action), isChecked); + } + + /** + * Translating the {@link SafeModeAction} into {@link HdfsConstants.SafeModeAction} + * that is used by {@link DFSClient#setSafeMode(HdfsConstants.SafeModeAction, boolean)}. + * + * @param action any supported action listed in {@link SafeModeAction}. + * @return the converted {@link HdfsConstants.SafeModeAction}. + * @throws UnsupportedOperationException if the provided {@link SafeModeAction} cannot be + * translated. + */ + private static HdfsConstants.SafeModeAction convertToClientProtocolSafeModeAction( + SafeModeAction action) { + switch (action) { + case ENTER: + return HdfsConstants.SafeModeAction.SAFEMODE_ENTER; + case LEAVE: + return HdfsConstants.SafeModeAction.SAFEMODE_LEAVE; + case FORCE_EXIT: + return HdfsConstants.SafeModeAction.SAFEMODE_FORCE_EXIT; + case GET: + return HdfsConstants.SafeModeAction.SAFEMODE_GET; + default: + throw new UnsupportedOperationException("Unsupported safe mode action " + action); + } + } + + /** + * Enter, leave or get safe mode. + * + * @see org.apache.hadoop.hdfs.protocol.ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction, + * boolean) + * + * @deprecated please instead use {@link #setSafeMode(SafeModeAction)}. + */ + @Deprecated public boolean setSafeMode(HdfsConstants.SafeModeAction action) throws IOException { return setSafeMode(action, false); @@ -1612,12 +1704,18 @@ public boolean setSafeMode(HdfsConstants.SafeModeAction action) * * @param action * One of SafeModeAction.ENTER, SafeModeAction.LEAVE and - * SafeModeAction.GET + * SafeModeAction.GET. * @param isChecked * If true check only for Active NNs status, else check first NN's - * status - * @see org.apache.hadoop.hdfs.protocol.ClientProtocol#setSafeMode(SafeModeAction, boolean) + * status. + * + * @see org.apache.hadoop.hdfs.protocol.ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction, + * boolean) + * + * @deprecated please instead use + * {@link DistributedFileSystem#setSafeMode(SafeModeAction, boolean)}. */ + @Deprecated public boolean setSafeMode(HdfsConstants.SafeModeAction action, boolean isChecked) throws IOException { return dfs.setSafeMode(action, isChecked); @@ -1654,7 +1752,7 @@ public long rollEdits() throws IOException { } /** - * enable/disable/check restoreFaileStorage + * enable/disable/check restoreFaileStorage. * * @see org.apache.hadoop.hdfs.protocol.ClientProtocol#restoreFailedStorage(String arg) */ @@ -1712,6 +1810,12 @@ public FsServerDefaults getServerDefaults() throws IOException { /** * Returns the stat information about the file. + * + * If the given path is a symlink, the path will be resolved to a target path + * and it will get the resolved path's FileStatus object. It will not be + * represented as a symlink and isDirectory API returns true if the resolved + * path is a directory, false otherwise. + * * @throws FileNotFoundException if the file does not exist. */ @Override @@ -1737,6 +1841,18 @@ public FileStatus next(final FileSystem fs, final Path p) }.resolve(this, absF); } + /** + * Synchronize client metadata state with Active NameNode. + *

      + * In HA the client synchronizes its state with the Active NameNode + * in order to guarantee subsequent read consistency from Observer Nodes. + * @throws IOException + */ + @Override + public void msync() throws IOException { + dfs.msync(); + } + @SuppressWarnings("deprecation") @Override public void createSymlink(final Path target, final Path link, @@ -1999,7 +2115,7 @@ protected URI canonicalizeUri(URI uri) { * when there is an issue communicating with the NameNode */ public boolean isInSafeMode() throws IOException { - return setSafeMode(SafeModeAction.SAFEMODE_GET, true); + return setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_GET, true); } /** @see org.apache.hadoop.hdfs.client.HdfsAdmin#allowSnapshot(Path) */ @@ -2123,6 +2239,9 @@ public Void next(final FileSystem fs, final Path p) */ public SnapshottableDirectoryStatus[] getSnapshottableDirListing() throws IOException { + statistics.incrementReadOps(1); + storageStatistics + .incrementOpCounter(OpType.GET_SNAPSHOTTABLE_DIRECTORY_LIST); return dfs.getSnapshottableDirListing(); } @@ -2272,8 +2391,15 @@ private SnapshotDiffReport getSnapshotDiffReportInternal( List deletedList = new ChunkedArrayList<>(); SnapshotDiffReportListing report; do { - report = dfs.getSnapshotDiffReportListing(snapshotDir, fromSnapshot, - toSnapshot, startPath, index); + try { + report = dfs.getSnapshotDiffReportListing(snapshotDir, fromSnapshot, + toSnapshot, startPath, index); + } catch (RpcNoSuchMethodException e) { + // In case the server doesn't support getSnapshotDiffReportListing, + // fallback to getSnapshotDiffReport. + DFSClient.LOG.warn("Falling back to getSnapshotDiffReport {}", e.getMessage()); + return dfs.getSnapshotDiffReport(snapshotDir, fromSnapshot, toSnapshot); + } startPath = report.getLastPath(); index = report.getLastIndex(); modifiedList.addAll(report.getModifyList()); @@ -2295,6 +2421,8 @@ private SnapshotDiffReport getSnapshotDiffReportInternal( */ public SnapshotDiffReport getSnapshotDiffReport(final Path snapshotDir, final String fromSnapshot, final String toSnapshot) throws IOException { + statistics.incrementReadOps(1); + storageStatistics.incrementOpCounter(OpType.GET_SNAPSHOT_DIFF); Path absF = fixRelativePart(snapshotDir); return new FileSystemLinkResolver() { @Override @@ -2328,6 +2456,7 @@ public SnapshotDiffReport next(final FileSystem fs, final Path p) * @throws FileNotFoundException if the file does not exist. * @throws IOException If an I/O error occurred */ + @Override public boolean isFileClosed(final Path src) throws IOException { Path absF = fixRelativePart(src); return new FileSystemLinkResolver() { @@ -3243,6 +3372,8 @@ public ECTopologyVerifierResult getECTopologyResultForPolicies( */ @Override public Path getTrashRoot(Path path) { + statistics.incrementReadOps(1); + storageStatistics.incrementOpCounter(OpType.GET_TRASH_ROOT); try { if ((path == null) || !dfs.isHDFSEncryptionEnabled()) { return super.getTrashRoot(path); @@ -3549,7 +3680,8 @@ public RemoteIterator listOpenFiles( public RemoteIterator listOpenFiles( EnumSet openFilesTypes, String path) throws IOException { - return dfs.listOpenFiles(openFilesTypes, path); + Path absF = fixRelativePart(new Path(path)); + return dfs.listOpenFiles(openFilesTypes, getPathName(absF)); } @@ -3583,6 +3715,7 @@ public boolean hasPathCapability(final Path path, final String capability) // (yet/ever) in the WebHDFS API. switch (validatePathCapabilityArgs(path, capability)) { case CommonPathCapabilities.FS_EXPERIMENTAL_BATCH_LISTING: + case CommonPathCapabilities.LEASE_RECOVERABLE: return true; default: // fall through @@ -3590,4 +3723,21 @@ public boolean hasPathCapability(final Path path, final String capability) return super.hasPathCapability(p, capability); } + + @Override + public MultipartUploaderBuilder createMultipartUploader(final Path basePath) + throws IOException { + return new FileSystemMultipartUploaderBuilder(this, basePath); + } + + /** + * Retrieve stats for slow running datanodes. + * + * @return An array of slow datanode info. + * @throws IOException If an I/O error occurs. + */ + public DatanodeInfo[] getSlowDatanodeStats() throws IOException { + return dfs.slowDatanodeReport(); + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/FileChecksumHelper.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/FileChecksumHelper.java index 8f807d5f406a7..c1804a2c22f44 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/FileChecksumHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/FileChecksumHelper.java @@ -623,7 +623,8 @@ static class StripedFileNonStripedChecksumComputer @Override void checksumBlocks() throws IOException { - int tmpTimeout = 3000 * 1 + getClient().getConf().getSocketTimeout(); + int tmpTimeout = getClient().getConf().getChecksumEcSocketTimeout() * 1 + + getClient().getConf().getSocketTimeout(); setTimeout(tmpTimeout); for (bgIdx = 0; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/KeyProviderCache.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/KeyProviderCache.java index 17d20fe014d1b..48afed1816124 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/KeyProviderCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/KeyProviderCache.java @@ -26,14 +26,16 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.KeyProvider; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.util.KMSUtil; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.RemovalListener; -import com.google.common.cache.RemovalNotification; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.cache.Cache; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheBuilder; +import org.apache.hadoop.thirdparty.com.google.common.cache.RemovalListener; +import org.apache.hadoop.thirdparty.com.google.common.cache.RemovalNotification; +import org.apache.hadoop.util.ShutdownHookManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -65,6 +67,12 @@ public void onRemoval( } }) .build(); + + // Register the shutdown hook when not in shutdown + if (!ShutdownHookManager.get().isShutdownInProgress()) { + ShutdownHookManager.get().addShutdownHook( + new KeyProviderCacheFinalizer(), SHUTDOWN_HOOK_PRIORITY); + } } public KeyProvider get(final Configuration conf, @@ -85,6 +93,26 @@ public KeyProvider call() throws Exception { } } + public static final int SHUTDOWN_HOOK_PRIORITY = FileSystem.SHUTDOWN_HOOK_PRIORITY - 1; + + private class KeyProviderCacheFinalizer implements Runnable { + @Override + public synchronized void run() { + invalidateCache(); + } + } + + /** + * Invalidate cache. KeyProviders in the cache will be closed by cache hook. + */ + @VisibleForTesting + synchronized void invalidateCache() { + LOG.debug("Invalidating all cached KeyProviders."); + if (cache != null) { + cache.invalidateAll(); + } + } + private URI createKeyProviderURI(Configuration conf) { final String providerUriStr = conf.getTrimmed( CommonConfigurationKeysPublic.HADOOP_SECURITY_KEY_PROVIDER_PATH); diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/LocatedBlocksRefresher.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/LocatedBlocksRefresher.java new file mode 100644 index 0000000000000..454d1f9cd93e4 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/LocatedBlocksRefresher.java @@ -0,0 +1,210 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs; + +import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_CONTEXT_DEFAULT; +import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_REFRESH_READ_BLOCK_LOCATIONS_THREADS_DEFAULT; +import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_REFRESH_READ_BLOCK_LOCATIONS_THREADS_KEY; + +import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Map; +import java.util.Set; +import java.util.WeakHashMap; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Phaser; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.client.impl.DfsClientConf; +import org.apache.hadoop.hdfs.protocol.LocatedBlocks; +import org.apache.hadoop.util.Daemon; +import org.apache.hadoop.util.Time; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Periodically refresh the underlying cached {@link LocatedBlocks} for eligible registered + * {@link DFSInputStream}s. DFSInputStreams are eligible for refreshing if they have any + * deadNodes or any blocks are lacking local replicas. + * Disabled by default, unless an interval is configured. + */ +public class LocatedBlocksRefresher extends Daemon { + private static final Logger LOG = + LoggerFactory.getLogger(LocatedBlocksRefresher.class); + + private static final String THREAD_PREFIX = "located-block-refresher-"; + + private final String name; + private final long interval; + private final long jitter; + private final ExecutorService refreshThreadPool; + + // Use WeakHashMap so that we don't hold onto references that might have not been explicitly + // closed because they were created and thrown away. + private final Set registeredInputStreams = + Collections.newSetFromMap(new WeakHashMap<>()); + + private int runCount; + private int refreshCount; + + LocatedBlocksRefresher(String name, Configuration conf, DfsClientConf dfsClientConf) { + this.name = name; + this.interval = dfsClientConf.getLocatedBlocksRefresherInterval(); + this.jitter = Math.round(this.interval * 0.1); + int rpcThreads = conf.getInt(DFS_CLIENT_REFRESH_READ_BLOCK_LOCATIONS_THREADS_KEY, + DFS_CLIENT_REFRESH_READ_BLOCK_LOCATIONS_THREADS_DEFAULT); + + String threadPrefix; + if (name.equals(DFS_CLIENT_CONTEXT_DEFAULT)) { + threadPrefix = THREAD_PREFIX; + } else { + threadPrefix = THREAD_PREFIX + name + "-"; + } + + this.refreshThreadPool = Executors.newFixedThreadPool(rpcThreads, new Daemon.DaemonFactory() { + private final AtomicInteger threadIndex = new AtomicInteger(0); + + @Override + public Thread newThread(Runnable r) { + Thread t = super.newThread(r); + t.setName(threadPrefix + threadIndex.getAndIncrement()); + return t; + } + }); + + setName(threadPrefix + "main"); + + LOG.info("Start located block refresher for DFSClient {}.", this.name); + } + + @Override + public void run() { + while (!Thread.currentThread().isInterrupted()) { + + if (!waitForInterval()) { + return; + } + + LOG.debug("Running refresh for {} streams", registeredInputStreams.size()); + long start = Time.monotonicNow(); + AtomicInteger neededRefresh = new AtomicInteger(0); + + Phaser phaser = new Phaser(1); + + Map addressCache = new ConcurrentHashMap<>(); + + for (DFSInputStream inputStream : getInputStreams()) { + phaser.register(); + refreshThreadPool.submit(() -> { + try { + if (isInputStreamTracked(inputStream) && + inputStream.refreshBlockLocations(addressCache)) { + neededRefresh.incrementAndGet(); + } + } finally { + phaser.arriveAndDeregister(); + } + }); + } + + phaser.arriveAndAwaitAdvance(); + + synchronized (this) { + runCount++; + refreshCount += neededRefresh.get(); + } + + LOG.debug( + "Finished refreshing {} of {} streams in {}ms", + neededRefresh, + registeredInputStreams.size(), + Time.monotonicNow() - start + ); + } + } + + public synchronized int getRunCount() { + return runCount; + } + + public synchronized int getRefreshCount() { + return refreshCount; + } + + private boolean waitForInterval() { + try { + Thread.sleep(interval + ThreadLocalRandom.current().nextLong(-jitter, jitter)); + return true; + } catch (InterruptedException e) { + LOG.debug("Interrupted during wait interval", e); + Thread.currentThread().interrupt(); + return false; + } + } + + /** + * Shutdown all the threads. + */ + public void shutdown() { + if (isAlive()) { + interrupt(); + try { + join(); + } catch (InterruptedException e) { + } + } + refreshThreadPool.shutdown(); + } + + /** + * Collects the DFSInputStreams to a list within synchronization, so that we can iterate them + * without potentially blocking callers to {@link #addInputStream(DFSInputStream)} or + * {@link #removeInputStream(DFSInputStream)}. We don't care so much about missing additions, + * and we'll guard against removals by doing an additional + * {@link #isInputStreamTracked(DFSInputStream)} track during iteration. + */ + private synchronized Collection getInputStreams() { + return new ArrayList<>(registeredInputStreams); + } + + public synchronized void addInputStream(DFSInputStream dfsInputStream) { + LOG.trace("Registering {} for {}", dfsInputStream, dfsInputStream.getSrc()); + registeredInputStreams.add(dfsInputStream); + } + + public synchronized void removeInputStream(DFSInputStream dfsInputStream) { + if (isInputStreamTracked(dfsInputStream)) { + LOG.trace("De-registering {} for {}", dfsInputStream, dfsInputStream.getSrc()); + registeredInputStreams.remove(dfsInputStream); + } + } + + public synchronized boolean isInputStreamTracked(DFSInputStream dfsInputStream) { + return registeredInputStreams.contains(dfsInputStream); + } + + public long getInterval() { + return interval; + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/NameNodeProxiesClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/NameNodeProxiesClient.java index c640b39b6f488..3725fc21590c5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/NameNodeProxiesClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/NameNodeProxiesClient.java @@ -35,8 +35,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; @@ -56,7 +56,7 @@ import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.io.retry.RetryProxy; import org.apache.hadoop.io.retry.RetryUtils; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.SecurityUtil; @@ -78,11 +78,6 @@ public class NameNodeProxiesClient { private static final Logger LOG = LoggerFactory.getLogger( NameNodeProxiesClient.class); - /** Maximum # of retries for HAProxy with HAServiceProtocol. */ - private static final int MAX_RETRIES = 3; - /** Initial retry delay for HAProxy with HAServiceProtocol. */ - private static final int DELAY_MILLISECONDS = 200; - /** * Wrapper for a client proxy as well as its associated service ID. * This is simply used as a tuple-like return type for created NN proxy. @@ -355,7 +350,7 @@ public static ClientProtocol createProxyWithAlignmentContext( AlignmentContext alignmentContext) throws IOException { RPC.setProtocolEngine(conf, ClientNamenodeProtocolPB.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); final RetryPolicy defaultPolicy = RetryUtils.getDefaultRetryPolicy( diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/PeerCache.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/PeerCache.java index 0580ed536da15..79f313398be0f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/PeerCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/PeerCache.java @@ -23,9 +23,9 @@ import java.util.List; import java.util.Map.Entry; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.LinkedListMultimap; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.LinkedListMultimap; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/PositionStripeReader.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/PositionStripeReader.java index 65d2c906a952c..efadedb8f082f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/PositionStripeReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/PositionStripeReader.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.util.StripedBlockUtil; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/StatefulStripeReader.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/StatefulStripeReader.java index b37501d2e2be7..bff13bfdc8957 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/StatefulStripeReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/StatefulStripeReader.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.util.StripedBlockUtil; @@ -52,7 +52,9 @@ void prepareDecodeInputs() { cur = dfsStripedInputStream.getCurStripeBuf().duplicate(); } - this.decodeInputs = new ECChunk[dataBlkNum + parityBlkNum]; + if (this.decodeInputs == null) { + this.decodeInputs = new ECChunk[dataBlkNum + parityBlkNum]; + } int bufLen = (int) alignedStripe.getSpanInBlock(); int bufOff = (int) alignedStripe.getOffsetInBlock(); for (int i = 0; i < dataBlkNum; i++) { @@ -72,11 +74,6 @@ void prepareDecodeInputs() { boolean prepareParityChunk(int index) { Preconditions.checkState(index >= dataBlkNum && alignedStripe.chunks[index] == null); - if (readerInfos[index] != null && readerInfos[index].shouldSkip) { - alignedStripe.chunks[index] = new StripingChunk(StripingChunk.MISSING); - // we have failed the block reader before - return false; - } final int parityIndex = index - dataBlkNum; ByteBuffer buf = dfsStripedInputStream.getParityBuffer().duplicate(); buf.position(cellSize * parityIndex); diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/StripeReader.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/StripeReader.java index 8fd38bdb3b795..78e8b7ecde54a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/StripeReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/StripeReader.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.fs.ChecksumException; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; @@ -119,6 +119,7 @@ void skip() { protected final int cellSize; protected final RawErasureDecoder decoder; protected final DFSStripedInputStream dfsStripedInputStream; + private long readTo = -1; protected ECChunk[] decodeInputs; @@ -302,7 +303,7 @@ boolean readChunk(final LocatedBlock block, int chunkIndex) if (readerInfos[chunkIndex] == null) { if (!dfsStripedInputStream.createBlockReader(block, alignedStripe.getOffsetInBlock(), targetBlocks, - readerInfos, chunkIndex)) { + readerInfos, chunkIndex, readTo)) { chunk.state = StripingChunk.MISSING; return false; } @@ -480,4 +481,9 @@ void clearFutures() { boolean useDirectBuffer() { return decoder.preferDirectBuffer(); } + + public void setReadTo(long readTo) { + this.readTo = readTo; + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/StripedDataStreamer.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/StripedDataStreamer.java index d920f18e24748..e90e66ace4988 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/StripedDataStreamer.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/StripedDataStreamer.java @@ -33,7 +33,7 @@ import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.Progressable; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * This class extends {@link DataStreamer} to support writing striped blocks @@ -143,7 +143,8 @@ protected void setupPipelineInternal(DatanodeInfo[] nodes, // set up the pipeline again with the remaining nodes. when a striped // data streamer comes here, it must be in external error state. - assert getErrorState().hasExternalError(); + assert getErrorState().hasExternalError() + || getErrorState().doWaitForRestart(); success = createBlockOutputStream(nodes, nodeStorageTypes, nodeStorageIDs, newGS, true); diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ViewDistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ViewDistributedFileSystem.java new file mode 100644 index 0000000000000..0502ea5b8c7b1 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ViewDistributedFileSystem.java @@ -0,0 +1,2333 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

      + * http://www.apache.org/licenses/LICENSE-2.0 + *

      + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs; + +import org.apache.hadoop.HadoopIllegalArgumentException; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.crypto.key.KeyProvider; +import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.BlockStoragePolicySpi; +import org.apache.hadoop.fs.CacheFlag; +import org.apache.hadoop.fs.ContentSummary; +import org.apache.hadoop.fs.CreateFlag; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileChecksum; +import org.apache.hadoop.fs.FileEncryptionInfo; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.FsServerDefaults; +import org.apache.hadoop.fs.FsStatus; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Options; +import org.apache.hadoop.fs.PartialListing; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.fs.PathHandle; +import org.apache.hadoop.fs.QuotaUsage; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.fs.XAttrSetFlag; +import org.apache.hadoop.fs.permission.AclEntry; +import org.apache.hadoop.fs.permission.AclStatus; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.fs.viewfs.ViewFileSystem; +import org.apache.hadoop.fs.viewfs.ViewFileSystemOverloadScheme; +import org.apache.hadoop.hdfs.client.HdfsDataOutputStream; +import org.apache.hadoop.hdfs.protocol.AddErasureCodingPolicyResponse; +import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy; +import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry; +import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; +import org.apache.hadoop.hdfs.protocol.CachePoolEntry; +import org.apache.hadoop.hdfs.protocol.CachePoolInfo; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.ECTopologyVerifierResult; +import org.apache.hadoop.hdfs.protocol.EncryptionZone; +import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; +import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyInfo; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.protocol.HdfsPathHandle; +import org.apache.hadoop.hdfs.protocol.OpenFileEntry; +import org.apache.hadoop.hdfs.protocol.OpenFilesIterator; +import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo; +import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport; +import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing; +import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus; +import org.apache.hadoop.hdfs.protocol.ZoneReencryptionStatus; +import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; +import org.apache.hadoop.io.MultipleIOException; +import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.security.token.DelegationTokenIssuer; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.util.Progressable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.FileNotFoundException; +import java.io.IOException; + +import java.net.InetSocketAddress; +import java.net.URI; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; + +/** + * The ViewDistributedFileSystem is an extended class to DistributedFileSystem + * with additional mounting functionality. The goal is to have better API + * compatibility for HDFS users when using mounting + * filesystem(ViewFileSystemOverloadScheme). + * The ViewFileSystemOverloadScheme{@link ViewFileSystemOverloadScheme} is a new + * filesystem with inherited mounting functionality from ViewFileSystem. + * For the user who is using ViewFileSystemOverloadScheme by setting + * fs.hdfs.impl=org.apache.hadoop.fs.viewfs.ViewFileSystemOverloadScheme, now + * they can set fs.hdfs.impl=org.apache.hadoop.hdfs.ViewDistributedFileSystem. + * So, that the hdfs users will get closely compatible API with mount + * functionality. For the rest of all other schemes can continue to use + * ViewFileSystemOverloadScheme class directly for mount functionality. Please + * note that ViewFileSystemOverloadScheme provides only + * ViewFileSystem{@link ViewFileSystem} APIs. + * If user configured this class but no mount point configured? Then it will + * simply work as existing DistributedFileSystem class. If user configured both + * fs.hdfs.impl to this class and mount configurations, then users will be able + * to make calls the APIs available in this class, they are nothing but DFS + * APIs, but they will be delegated to viewfs functionality. Please note, APIs + * without any path in arguments( ex: isInSafeMode), will be delegated to + * default filesystem only, that is the configured fallback link. If you want to + * make these API calls on specific child filesystem, you may want to initialize + * them separately and call. In ViewDistributedFileSystem, we strongly recommend + * to configure linkFallBack when you add mount links and it's recommended to + * point be to your base cluster, usually your current fs.defaultFS if that's + * pointing to hdfs. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class ViewDistributedFileSystem extends DistributedFileSystem { + private static final Logger LOGGER = + LoggerFactory.getLogger(ViewDistributedFileSystem.class); + + // A mounting file system. + private ViewFileSystemOverloadScheme vfs; + // A default DFS, which should have set via linkFallback + private DistributedFileSystem defaultDFS; + + @Override + public void initialize(URI uri, Configuration conf) throws IOException { + super.initialize(uri, conf); + try { + this.vfs = tryInitializeMountingViewFs(uri, conf); + } catch (IOException ioe) { + LOGGER.debug(new StringBuilder("Mount tree initialization failed with ") + .append("the reason => {}. Falling back to regular DFS") + .append(" initialization. Please re-initialize the fs after updating") + .append(" mount point.").toString(), ioe.getMessage()); + // Previous super.initialize would have skipped the dfsclient init and + // setWorkingDirectory as we planned to initialize vfs. Since vfs init + // failed, let's init dfsClient now. + super.initDFSClient(uri, conf); + super.setWorkingDirectory(super.getHomeDirectory()); + return; + } + + setConf(conf); + // A child DFS with the current initialized URI. This must be same as + // fallback fs. The fallback must point to root of your filesystems. + // Some APIs(without path in argument, for example isInSafeMode) will + // support only for base cluster filesystem. Only that APIs will use this + // fs. + defaultDFS = (DistributedFileSystem) this.vfs.getFallbackFileSystem(); + // Please don't access internal dfs client directly except in tests. + dfs = (defaultDFS != null) ? defaultDFS.dfs : null; + super.setWorkingDirectory(this.vfs.getHomeDirectory()); + } + + @Override + void initDFSClient(URI uri, Configuration conf) throws IOException { + // Since we plan to initialize vfs in this class, we will not need to + // initialize DFS client. + } + + public ViewDistributedFileSystem() { + } + + private ViewFileSystemOverloadScheme tryInitializeMountingViewFs(URI theUri, + Configuration conf) throws IOException { + ViewFileSystemOverloadScheme viewFs = new ViewFileSystemOverloadScheme(); + viewFs.setSupportAutoAddingFallbackOnNoMounts(false); + viewFs.initialize(theUri, conf); + return viewFs; + } + + @Override + public URI getUri() { + if (this.vfs == null) { + return super.getUri(); + } + return this.vfs.getUri(); + } + + @Override + public String getScheme() { + if (this.vfs == null) { + return super.getScheme(); + } + return this.vfs.getScheme(); + } + + @Override + public Path getWorkingDirectory() { + if (this.vfs == null) { + return super.getWorkingDirectory(); + } + return this.vfs.getWorkingDirectory(); + } + + @Override + public void setWorkingDirectory(Path dir) { + if (this.vfs == null) { + super.setWorkingDirectory(dir); + return; + } + this.vfs.setWorkingDirectory(dir); + } + + @Override + public Path getHomeDirectory() { + if (super.dfs == null) { + return null; + } + if (this.vfs == null) { + return super.getHomeDirectory(); + } + return this.vfs.getHomeDirectory(); + } + + /** + * Returns only default cluster getHedgedReadMetrics. + */ + @Override + public DFSHedgedReadMetrics getHedgedReadMetrics() { + if (this.vfs == null) { + return super.getHedgedReadMetrics(); + } + checkDefaultDFS(defaultDFS, "getHedgedReadMetrics"); + return defaultDFS.getHedgedReadMetrics(); + } + + @Override + public BlockLocation[] getFileBlockLocations(FileStatus fs, long start, + long len) throws IOException { + if (this.vfs == null) { + return super.getFileBlockLocations(fs, start, len); + } + return this.vfs.getFileBlockLocations(fs, start, len); + } + + @Override + public BlockLocation[] getFileBlockLocations(Path p, final long start, + final long len) throws IOException { + if (this.vfs == null) { + return super.getFileBlockLocations(p, start, len); + } + return this.vfs.getFileBlockLocations(p, start, len); + } + + @Override + public void setVerifyChecksum(final boolean verifyChecksum) { + if (this.vfs == null) { + super.setVerifyChecksum(verifyChecksum); + return; + } + this.vfs.setVerifyChecksum(verifyChecksum); + } + + @Override + public boolean recoverLease(final Path f) throws IOException { + if (this.vfs == null) { + return super.recoverLease(f); + } + + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(f, getConf()); + checkDFS(mountPathInfo.getTargetFs(), "recoverLease"); + return ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .recoverLease(mountPathInfo.getPathOnTarget()); + } + + @Override + public FSDataInputStream open(final Path f, final int bufferSize) + throws AccessControlException, FileNotFoundException, IOException { + if (this.vfs == null) { + return super.open(f, bufferSize); + } + + return this.vfs.open(f, bufferSize); + } + + @Override + public FSDataInputStream open(PathHandle fd, int bufferSize) + throws IOException { + if (this.vfs == null) { + return super.open(fd, bufferSize); + } + return this.vfs.open(fd, bufferSize); + } + + @Override + protected HdfsPathHandle createPathHandle(FileStatus st, + Options.HandleOpt... opts) { + if (this.vfs == null) { + return super.createPathHandle(st, opts); + } + throw new UnsupportedOperationException(); + } + + @Override + public FSDataOutputStream append(final Path f, final int bufferSize, + final Progressable progress) throws IOException { + if (this.vfs == null) { + return super.append(f, bufferSize, progress); + } + return this.vfs.append(f, bufferSize, progress); + } + + @Override + public FSDataOutputStream append(Path f, final EnumSet flag, + final int bufferSize, final Progressable progress) throws IOException { + if (this.vfs == null) { + return super.append(f, flag, bufferSize, progress); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(f, getConf()); + checkDFS(mountPathInfo.getTargetFs(), "append"); + return ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .append(mountPathInfo.getPathOnTarget(), flag, bufferSize, progress); + } + + @Override + public FSDataOutputStream append(Path f, final EnumSet flag, + final int bufferSize, final Progressable progress, + final InetSocketAddress[] favoredNodes) throws IOException { + if (this.vfs == null) { + return super.append(f, flag, bufferSize, progress, favoredNodes); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(f, getConf()); + checkDFS(mountPathInfo.getTargetFs(), "append"); + return ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .append(mountPathInfo.getPathOnTarget(), flag, bufferSize, progress, + favoredNodes); + } + + @Override + public FSDataOutputStream create(Path f, FsPermission permission, + boolean overwrite, int bufferSize, short replication, long blockSize, + Progressable progress) throws IOException { + if (this.vfs == null) { + return super + .create(f, permission, overwrite, bufferSize, replication, blockSize, + progress); + } + return this.vfs + .create(f, permission, overwrite, bufferSize, replication, blockSize, + progress); + } + + @Override + public HdfsDataOutputStream create(final Path f, + final FsPermission permission, final boolean overwrite, + final int bufferSize, final short replication, final long blockSize, + final Progressable progress, final InetSocketAddress[] favoredNodes) + throws IOException { + if (this.vfs == null) { + return super + .create(f, permission, overwrite, bufferSize, replication, blockSize, + progress, favoredNodes); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(f, getConf()); + checkDFS(mountPathInfo.getTargetFs(), "create"); + return ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .create(mountPathInfo.getPathOnTarget(), permission, overwrite, + bufferSize, replication, blockSize, progress, favoredNodes); + } + + @Override + //DFS specific API + public FSDataOutputStream create(final Path f, final FsPermission permission, + final EnumSet cflags, final int bufferSize, + final short replication, final long blockSize, + final Progressable progress, final Options.ChecksumOpt checksumOpt) + throws IOException { + if (this.vfs == null) { + return super + .create(f, permission, cflags, bufferSize, replication, blockSize, + progress, checksumOpt); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(f, getConf()); + checkDFS(mountPathInfo.getTargetFs(), "create"); + return mountPathInfo.getTargetFs() + .create(mountPathInfo.getPathOnTarget(), permission, cflags, bufferSize, + replication, blockSize, progress, checksumOpt); + } + + void checkDFS(FileSystem fs, String methodName) { + if (!(fs instanceof DistributedFileSystem)) { + String msg = new StringBuilder("This API:").append(methodName) + .append(" is specific to DFS. Can't run on other fs:") + .append(fs.getUri()).toString(); + throw new UnsupportedOperationException(msg); + } + } + + void checkDefaultDFS(FileSystem fs, String methodName) { + if (fs == null) { + String msg = new StringBuilder("This API:").append(methodName).append( + " cannot be supported without default cluster(that is linkFallBack).") + .toString(); + throw new UnsupportedOperationException(msg); + } + } + + @Override + // DFS specific API + protected HdfsDataOutputStream primitiveCreate(Path f, + FsPermission absolutePermission, EnumSet flag, int bufferSize, + short replication, long blockSize, Progressable progress, + Options.ChecksumOpt checksumOpt) throws IOException { + if (this.vfs == null) { + return super + .primitiveCreate(f, absolutePermission, flag, bufferSize, replication, + blockSize, progress, checksumOpt); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(f, getConf()); + checkDFS(mountPathInfo.getTargetFs(), "primitiveCreate"); + return ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .primitiveCreate(f, absolutePermission, flag, bufferSize, replication, + blockSize, progress, checksumOpt); + } + + @Override + public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, + EnumSet flags, int bufferSize, short replication, + long blockSize, Progressable progress) throws IOException { + if (this.vfs == null) { + return super + .createNonRecursive(f, permission, flags, bufferSize, replication, + bufferSize, progress); + } + return this.vfs + .createNonRecursive(f, permission, flags, bufferSize, replication, + bufferSize, progress); + } + + @Override + public boolean setReplication(final Path f, final short replication) + throws AccessControlException, FileNotFoundException, IOException { + if (this.vfs == null) { + return super.setReplication(f, replication); + } + return this.vfs.setReplication(f, replication); + } + + @Override + public void setStoragePolicy(Path src, String policyName) throws IOException { + if (this.vfs == null) { + super.setStoragePolicy(src, policyName); + return; + } + this.vfs.setStoragePolicy(src, policyName); + } + + @Override + public void unsetStoragePolicy(Path src) throws IOException { + if (this.vfs == null) { + super.unsetStoragePolicy(src); + return; + } + this.vfs.unsetStoragePolicy(src); + } + + @Override + public BlockStoragePolicySpi getStoragePolicy(Path src) throws IOException { + if (this.vfs == null) { + return super.getStoragePolicy(src); + } + return this.vfs.getStoragePolicy(src); + } + + @Override + public Collection getAllStoragePolicies() + throws IOException { + if (this.vfs == null) { + return super.getAllStoragePolicies(); + } + Collection allStoragePolicies = + this.vfs.getAllStoragePolicies(); + return (Collection) allStoragePolicies; + } + + @Override + public long getBytesWithFutureGenerationStamps() throws IOException { + if (this.vfs == null) { + return super.getBytesWithFutureGenerationStamps(); + } + checkDefaultDFS(defaultDFS, "getBytesWithFutureGenerationStamps"); + return defaultDFS.getBytesWithFutureGenerationStamps(); + } + + @Deprecated + @Override + public BlockStoragePolicy[] getStoragePolicies() throws IOException { + if (this.vfs == null) { + return super.getStoragePolicies(); + } + checkDefaultDFS(defaultDFS, "getStoragePolicies"); + return defaultDFS.getStoragePolicies(); + } + + @Override + //Make sure your target fs supports this API, otherwise you will get + // Unsupported operation exception. + public void concat(Path trg, Path[] psrcs) throws IOException { + if (this.vfs == null) { + super.concat(trg, psrcs); + return; + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(trg, getConf()); + mountPathInfo.getTargetFs().concat(mountPathInfo.getPathOnTarget(), psrcs); + } + + @SuppressWarnings("deprecation") + @Override + public boolean rename(final Path src, final Path dst) throws IOException { + if (this.vfs == null) { + return super.rename(src, dst); + } + return this.vfs.rename(src, dst); + } + + @SuppressWarnings("deprecation") + @Override + public void rename(Path src, Path dst, final Options.Rename... options) + throws IOException { + if (this.vfs == null) { + super.rename(src, dst, options); + return; + } + + ViewFileSystemOverloadScheme.MountPathInfo mountSrcPathInfo = + this.vfs.getMountPathInfo(src, getConf()); + + ViewFileSystemOverloadScheme.MountPathInfo mountDstPathInfo = + this.vfs.getMountPathInfo(dst, getConf()); + + //Check both in same cluster. + if (!mountSrcPathInfo.getTargetFs().getUri() + .equals(mountDstPathInfo.getTargetFs().getUri())) { + throw new HadoopIllegalArgumentException( + "Can't rename across file systems."); + } + + FileUtil.rename(mountSrcPathInfo.getTargetFs(), + mountSrcPathInfo.getPathOnTarget(), mountDstPathInfo.getPathOnTarget(), + options); + } + + @Override + public boolean truncate(final Path f, final long newLength) + throws IOException { + if (this.vfs == null) { + return super.truncate(f, newLength); + } + return this.vfs.truncate(f, newLength); + } + + public boolean delete(final Path f, final boolean recursive) + throws AccessControlException, FileNotFoundException, IOException { + if (this.vfs == null) { + return super.delete(f, recursive); + } + return this.vfs.delete(f, recursive); + } + + @Override + public ContentSummary getContentSummary(Path f) throws IOException { + if (this.vfs == null) { + return super.getContentSummary(f); + } + return this.vfs.getContentSummary(f); + } + + @Override + public QuotaUsage getQuotaUsage(Path f) throws IOException { + if (this.vfs == null) { + return super.getQuotaUsage(f); + } + return this.vfs.getQuotaUsage(f); + } + + @Override + public void setQuota(Path src, final long namespaceQuota, + final long storagespaceQuota) throws IOException { + if (this.vfs == null) { + super.setQuota(src, namespaceQuota, storagespaceQuota); + return; + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(src, getConf()); + mountPathInfo.getTargetFs() + .setQuota(mountPathInfo.getPathOnTarget(), namespaceQuota, + storagespaceQuota); + } + + @Override + public void setQuotaByStorageType(Path src, final StorageType type, + final long quota) throws IOException { + if (this.vfs == null) { + super.setQuotaByStorageType(src, type, quota); + return; + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(src, getConf()); + mountPathInfo.getTargetFs() + .setQuotaByStorageType(mountPathInfo.getPathOnTarget(), type, quota); + } + + @Override + public FileStatus[] listStatus(Path p) throws IOException { + if (this.vfs == null) { + return super.listStatus(p); + } + return this.vfs.listStatus(p); + } + + @Override + public RemoteIterator listLocatedStatus(final Path f, + final PathFilter filter) throws FileNotFoundException, IOException { + if (this.vfs == null) { + return super.listLocatedStatus(f, filter); + } + return this.vfs.listLocatedStatus(f, filter); + } + + @Override + public RemoteIterator listStatusIterator(final Path p) + throws IOException { + if (this.vfs == null) { + return super.listStatusIterator(p); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(p, getConf()); + return mountPathInfo.getTargetFs() + .listStatusIterator(mountPathInfo.getPathOnTarget()); + } + + @Override + public RemoteIterator> batchedListStatusIterator( + final List paths) throws IOException { + if (this.vfs == null) { + return super.batchedListStatusIterator(paths); + } + // TODO: revisit for correct implementation. + return this.defaultDFS.batchedListStatusIterator(paths); + } + + @Override + public RemoteIterator> batchedListLocatedStatusIterator( + final List paths) throws IOException { + if (this.vfs == null) { + return super.batchedListLocatedStatusIterator(paths); + } + // TODO: revisit for correct implementation. + return this.defaultDFS.batchedListLocatedStatusIterator(paths); + } + + public boolean mkdir(Path f, FsPermission permission) throws IOException { + if (this.vfs == null) { + return super.mkdir(f, permission); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(f, getConf()); + checkDFS(mountPathInfo.getTargetFs(), "mkdir"); + return ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .mkdir(mountPathInfo.getPathOnTarget(), permission); + } + + @Override + public boolean mkdirs(Path f, FsPermission permission) throws IOException { + if (this.vfs == null) { + return super.mkdirs(f, permission); + } + return this.vfs.mkdirs(f, permission); + } + + @SuppressWarnings("deprecation") + @Override + protected boolean primitiveMkdir(Path f, FsPermission absolutePermission) + throws IOException { + if (this.vfs == null) { + return super.primitiveMkdir(f, absolutePermission); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(f, getConf()); + checkDFS(mountPathInfo.getTargetFs(), "primitiveMkdir"); + return ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .primitiveMkdir(mountPathInfo.getPathOnTarget(), absolutePermission); + } + + @Override + public void close() throws IOException { + if (this.vfs != null) { + this.vfs.close(); + } + super.close(); + } + + @InterfaceAudience.Private + @Override + public DFSClient getClient() { + if (this.vfs == null) { + return super.getClient(); + } + checkDefaultDFS(defaultDFS, "getClient"); + return defaultDFS.getClient(); + } + + @Override + public FsStatus getStatus(Path p) throws IOException { + if (this.vfs == null) { + return super.getStatus(p); + } + return this.vfs.getStatus(p); + } + + @Override + public long getMissingBlocksCount() throws IOException { + if (this.vfs == null) { + return super.getMissingBlocksCount(); + } + checkDefaultDFS(defaultDFS, "getMissingBlocksCount"); + return defaultDFS.getMissingBlocksCount(); + } + + @Override + public long getPendingDeletionBlocksCount() throws IOException { + if (this.vfs == null) { + return super.getPendingDeletionBlocksCount(); + } + checkDefaultDFS(defaultDFS, "getPendingDeletionBlocksCount"); + return defaultDFS.getPendingDeletionBlocksCount(); + } + + @Override + public long getMissingReplOneBlocksCount() throws IOException { + if (this.vfs == null) { + return super.getMissingReplOneBlocksCount(); + } + checkDefaultDFS(defaultDFS, "getMissingReplOneBlocksCount"); + return defaultDFS.getMissingReplOneBlocksCount(); + } + + @Override + public long getLowRedundancyBlocksCount() throws IOException { + if (this.vfs == null) { + return super.getLowRedundancyBlocksCount(); + } + checkDefaultDFS(defaultDFS, "getLowRedundancyBlocksCount"); + return defaultDFS.getLowRedundancyBlocksCount(); + } + + @Override + public long getCorruptBlocksCount() throws IOException { + if (this.vfs == null) { + return super.getCorruptBlocksCount(); + } + checkDefaultDFS(defaultDFS, "getCorruptBlocksCount"); + return defaultDFS.getLowRedundancyBlocksCount(); + } + + @Override + public RemoteIterator listCorruptFileBlocks(final Path path) + throws IOException { + if (this.vfs == null) { + return super.listCorruptFileBlocks(path); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(path, getConf()); + return mountPathInfo.getTargetFs() + .listCorruptFileBlocks(mountPathInfo.getPathOnTarget()); + } + + @Override + public DatanodeInfo[] getDataNodeStats() throws IOException { + if (this.vfs == null) { + return super.getDataNodeStats(); + } + checkDefaultDFS(defaultDFS, "getDataNodeStats"); + return defaultDFS.getDataNodeStats(); + } + + @Override + public DatanodeInfo[] getDataNodeStats( + final HdfsConstants.DatanodeReportType type) throws IOException { + if (this.vfs == null) { + return super.getDataNodeStats(type); + } + checkDefaultDFS(defaultDFS, "getDataNodeStats"); + return defaultDFS.getDataNodeStats(type); + } + + @Override + @SuppressWarnings("deprecation") + public boolean setSafeMode(HdfsConstants.SafeModeAction action) + throws IOException { + if (this.vfs == null) { + return super.setSafeMode(action); + } + checkDefaultDFS(defaultDFS, "setSafeMode"); + return defaultDFS.setSafeMode(action); + } + + @Override + @SuppressWarnings("deprecation") + public boolean setSafeMode(HdfsConstants.SafeModeAction action, + boolean isChecked) throws IOException { + if (this.vfs == null) { + return super.setSafeMode(action, isChecked); + } + checkDefaultDFS(defaultDFS, "setSafeMode"); + return defaultDFS.setSafeMode(action, isChecked); + } + + @Override + public boolean saveNamespace(long timeWindow, long txGap) throws IOException { + if (this.vfs == null) { + return super.saveNamespace(timeWindow, txGap); + } + checkDefaultDFS(defaultDFS, "saveNamespace"); + return defaultDFS.saveNamespace(timeWindow, txGap); + } + + @Override + public void saveNamespace() throws IOException { + if (this.vfs == null) { + super.saveNamespace(); + return; + } + checkDefaultDFS(defaultDFS, "saveNamespace"); + defaultDFS.saveNamespace(); + } + + @Override + public long rollEdits() throws IOException { + if (this.vfs == null) { + return super.rollEdits(); + } + checkDefaultDFS(defaultDFS, "rollEdits"); + return defaultDFS.rollEdits(); + } + + @Override + public boolean restoreFailedStorage(String arg) throws IOException { + if (this.vfs == null) { + return super.restoreFailedStorage(arg); + } + checkDefaultDFS(defaultDFS, "restoreFailedStorage"); + return defaultDFS.restoreFailedStorage(arg); + } + + @Override + public void refreshNodes() throws IOException { + if (this.vfs == null) { + super.refreshNodes(); + return; + } + checkDefaultDFS(defaultDFS, "refreshNodes"); + defaultDFS.refreshNodes(); + } + + @Override + public void finalizeUpgrade() throws IOException { + if (this.vfs == null) { + super.finalizeUpgrade(); + return; + } + checkDefaultDFS(defaultDFS, "finalizeUpgrade"); + defaultDFS.finalizeUpgrade(); + } + + @Override + public boolean upgradeStatus() throws IOException { + if (this.vfs == null) { + return super.upgradeStatus(); + } + checkDefaultDFS(defaultDFS, "upgradeStatus"); + return defaultDFS.upgradeStatus(); + } + + @Override + public RollingUpgradeInfo rollingUpgrade( + HdfsConstants.RollingUpgradeAction action) throws IOException { + if (this.vfs == null) { + return super.rollingUpgrade(action); + } + checkDefaultDFS(defaultDFS, "rollingUpgrade"); + return defaultDFS.rollingUpgrade(action); + } + + @Override + public void metaSave(String pathname) throws IOException { + if (this.vfs == null) { + super.metaSave(pathname); + return; + } + checkDefaultDFS(defaultDFS, "metaSave"); + defaultDFS.metaSave(pathname); + } + + @Override + public FsServerDefaults getServerDefaults() throws IOException { + if (this.vfs == null) { + return super.getServerDefaults(); + } + checkDefaultDFS(defaultDFS, "getServerDefaults"); + //TODO: Need to revisit. + return defaultDFS.getServerDefaults(); + } + + @Override + public FileStatus getFileStatus(final Path f) + throws AccessControlException, FileNotFoundException, IOException { + if (this.vfs == null) { + return super.getFileStatus(f); + } + return this.vfs.getFileStatus(f); + } + + @SuppressWarnings("deprecation") + @Override + public void createSymlink(final Path target, final Path link, + final boolean createParent) throws IOException { + // Regular DFS behavior + if (this.vfs == null) { + super.createSymlink(target, link, createParent); + return; + } + + throw new UnsupportedOperationException( + "createSymlink is not supported in ViewHDFS"); + } + + @Override + public boolean supportsSymlinks() { + if (this.vfs == null) { + return super.supportsSymlinks(); + } + // we can enabled later if we want to support symlinks. + return false; + } + + @Override + public FileStatus getFileLinkStatus(final Path f) throws IOException { + if (this.vfs == null) { + return super.getFileLinkStatus(f); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(f, getConf()); + return mountPathInfo.getTargetFs() + .getFileLinkStatus(mountPathInfo.getPathOnTarget()); + } + + @Override + public Path getLinkTarget(Path path) throws IOException { + if(this.vfs==null){ + return super.getLinkTarget(path); + } + return this.vfs.getLinkTarget(path); + } + + @Override + protected Path resolveLink(Path f) throws IOException { + if(this.vfs==null){ + return super.resolveLink(f); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(f, getConf()); + checkDFS(mountPathInfo.getTargetFs(), "resolveLink"); + return ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .resolveLink(mountPathInfo.getPathOnTarget()); + } + + @Override + public FileChecksum getFileChecksum(final Path f) + throws AccessControlException, FileNotFoundException, IOException { + if (this.vfs == null) { + return super.getFileChecksum(f); + } + return this.vfs.getFileChecksum(f); + } + + @Override + public void setPermission(final Path f, final FsPermission permission) + throws AccessControlException, FileNotFoundException, IOException { + if (this.vfs == null) { + super.setPermission(f, permission); + return; + } + this.vfs.setPermission(f, permission); + } + + @Override + public void setOwner(final Path f, final String username, + final String groupname) + throws AccessControlException, FileNotFoundException, IOException { + if (this.vfs == null) { + super.setOwner(f, username, groupname); + return; + } + this.vfs.setOwner(f, username, groupname); + } + + @Override + public void setTimes(final Path f, final long mtime, final long atime) + throws AccessControlException, FileNotFoundException, IOException { + if (this.vfs == null) { + super.setTimes(f, mtime, atime); + return; + } + this.vfs.setTimes(f, mtime, atime); + } + + @Override + // DFS specific API + protected int getDefaultPort() { + return super.getDefaultPort(); + } + + /** + * If no mount points configured, it works same as + * {@link DistributedFileSystem#getDelegationToken(String)}. If + * there are mount points configured and if default fs(linkFallback) + * configured, then it will return default fs delegation token. Otherwise + * it will return null. + */ + @Override + public Token getDelegationToken(String renewer) + throws IOException { + if (this.vfs == null) { + return super.getDelegationToken(renewer); + } + + if (defaultDFS != null) { + return defaultDFS.getDelegationToken(renewer); + } + return null; + } + + @Override + public void setBalancerBandwidth(long bandwidth) throws IOException { + if (this.vfs == null) { + super.setBalancerBandwidth(bandwidth); + return; + } + checkDefaultDFS(defaultDFS, "setBalancerBandwidth"); + defaultDFS.setBalancerBandwidth(bandwidth); + } + + @Override + public String getCanonicalServiceName() { + if (this.vfs == null) { + return super.getCanonicalServiceName(); + } + checkDefaultDFS(defaultDFS, "getCanonicalServiceName"); + return defaultDFS.getCanonicalServiceName(); + } + + @Override + protected URI canonicalizeUri(URI uri) { + if (this.vfs == null) { + return super.canonicalizeUri(uri); + } + + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = null; + try { + mountPathInfo = this.vfs.getMountPathInfo(new Path(uri), getConf()); + } catch (IOException e) { + LOGGER.warn("Failed to resolve the uri as mount path", e); + return null; + } + checkDFS(mountPathInfo.getTargetFs(), "canonicalizeUri"); + return ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .canonicalizeUri(uri); + } + + @Override + public boolean isInSafeMode() throws IOException { + if (this.vfs == null) { + return super.isInSafeMode(); + } + checkDefaultDFS(defaultDFS, "isInSafeMode"); + return defaultDFS.isInSafeMode(); + } + + @Override + // DFS specific API + public void allowSnapshot(Path path) throws IOException { + if (this.vfs == null) { + super.allowSnapshot(path); + return; + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(path, getConf()); + checkDFS(mountPathInfo.getTargetFs(), "allowSnapshot"); + ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .allowSnapshot(mountPathInfo.getPathOnTarget()); + } + + @Override + public void disallowSnapshot(final Path path) throws IOException { + if (this.vfs == null) { + super.disallowSnapshot(path); + return; + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(path, getConf()); + checkDFS(mountPathInfo.getTargetFs(), "disallowSnapshot"); + ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .disallowSnapshot(mountPathInfo.getPathOnTarget()); + } + + @Override + public Path createSnapshot(Path path, String snapshotName) + throws IOException { + if (this.vfs == null) { + return super.createSnapshot(path, snapshotName); + } + return this.vfs.createSnapshot(path, snapshotName); + } + + @Override + public void renameSnapshot(Path path, String snapshotOldName, + String snapshotNewName) throws IOException { + if (this.vfs == null) { + super.renameSnapshot(path, snapshotOldName, snapshotNewName); + return; + } + this.vfs.renameSnapshot(path, snapshotOldName, snapshotNewName); + } + + @Override + //Ony for HDFS users + public SnapshottableDirectoryStatus[] getSnapshottableDirListing() + throws IOException { + if (this.vfs == null) { + return super.getSnapshottableDirListing(); + } + checkDefaultDFS(defaultDFS, "getSnapshottableDirListing"); + return defaultDFS.getSnapshottableDirListing(); + } + + @Override + public void deleteSnapshot(Path path, String snapshotName) + throws IOException { + if (this.vfs == null) { + super.deleteSnapshot(path, snapshotName); + return; + } + this.vfs.deleteSnapshot(path, snapshotName); + } + + @Override + public RemoteIterator snapshotDiffReportListingRemoteIterator( + final Path snapshotDir, final String fromSnapshot, + final String toSnapshot) throws IOException { + if (this.vfs == null) { + return super + .snapshotDiffReportListingRemoteIterator(snapshotDir, fromSnapshot, + toSnapshot); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(snapshotDir, getConf()); + checkDFS(mountPathInfo.getTargetFs(), + "snapshotDiffReportListingRemoteIterator"); + return ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .snapshotDiffReportListingRemoteIterator( + mountPathInfo.getPathOnTarget(), fromSnapshot, toSnapshot); + } + + @Override + public SnapshotDiffReport getSnapshotDiffReport(final Path snapshotDir, + final String fromSnapshot, final String toSnapshot) throws IOException { + if (this.vfs == null) { + return super.getSnapshotDiffReport(snapshotDir, fromSnapshot, toSnapshot); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(snapshotDir, getConf()); + checkDFS(mountPathInfo.getTargetFs(), "getSnapshotDiffReport"); + return ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .getSnapshotDiffReport(mountPathInfo.getPathOnTarget(), fromSnapshot, + toSnapshot); + } + + @Override + public boolean isFileClosed(final Path src) throws IOException { + if (this.vfs == null) { + return super.isFileClosed(src); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(src, getConf()); + checkDFS(mountPathInfo.getTargetFs(), "isFileClosed"); + return ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .isFileClosed(mountPathInfo.getPathOnTarget()); + } + + @Override + public long addCacheDirective(CacheDirectiveInfo info) throws IOException { + if (this.vfs == null) { + return super.addCacheDirective(info); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(info.getPath(), getConf()); + checkDFS(mountPathInfo.getTargetFs(), "addCacheDirective"); + + return ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .addCacheDirective(new CacheDirectiveInfo.Builder(info) + .setPath(mountPathInfo.getPathOnTarget()).build()); + } + + @Override + public long addCacheDirective(CacheDirectiveInfo info, + EnumSet flags) throws IOException { + if (this.vfs == null) { + return super.addCacheDirective(info, flags); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(info.getPath(), getConf()); + checkDFS(mountPathInfo.getTargetFs(), "addCacheDirective"); + + return ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .addCacheDirective(new CacheDirectiveInfo.Builder(info) + .setPath(mountPathInfo.getPathOnTarget()).build(), flags); + } + + @Override + public void modifyCacheDirective(CacheDirectiveInfo info) throws IOException { + if (this.vfs == null) { + super.modifyCacheDirective(info); + return; + } + if (info.getPath() != null) { + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(info.getPath(), getConf()); + checkDFS(mountPathInfo.getTargetFs(), "modifyCacheDirective"); + ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .modifyCacheDirective(new CacheDirectiveInfo.Builder(info) + .setPath(mountPathInfo.getPathOnTarget()).build()); + return; + } + + // No path available in CacheDirectiveInfo, Let's shoot to all child fs. + List failedExceptions = new ArrayList<>(); + boolean isDFSExistsInChilds = false; + + for (FileSystem fs : getChildFileSystems()) { + if (!(fs instanceof DistributedFileSystem)) { + continue; + } + isDFSExistsInChilds = true; + DistributedFileSystem dfs = (DistributedFileSystem) fs; + try { + dfs.modifyCacheDirective(info); + } catch (IOException ioe) { + failedExceptions.add(ioe); + } + } + if (!isDFSExistsInChilds) { + throw new UnsupportedOperationException( + "No DFS available in child file systems."); + } + if (failedExceptions.size() > 0) { + throw MultipleIOException.createIOException(failedExceptions); + } + } + + @Override + public void modifyCacheDirective(CacheDirectiveInfo info, + EnumSet flags) throws IOException { + if (this.vfs == null) { + super.modifyCacheDirective(info, flags); + return; + } + if (info.getPath() != null) { + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(info.getPath(), getConf()); + checkDFS(mountPathInfo.getTargetFs(), "modifyCacheDirective"); + ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .modifyCacheDirective(new CacheDirectiveInfo.Builder(info) + .setPath(mountPathInfo.getPathOnTarget()).build(), flags); + return; + } + // No path available in CacheDirectiveInfo, Let's shoot to all child fs. + List failedExceptions = new ArrayList<>(); + boolean isDFSExistsInChilds = false; + for (FileSystem fs : getChildFileSystems()) { + if (!(fs instanceof DistributedFileSystem)) { + continue; + } + isDFSExistsInChilds = true; + DistributedFileSystem dfs = (DistributedFileSystem) fs; + try { + dfs.modifyCacheDirective(info, flags); + } catch (IOException ioe) { + failedExceptions.add(ioe); + } + } + if (!isDFSExistsInChilds) { + throw new UnsupportedOperationException( + "No DFS available in child file systems."); + } + if (failedExceptions.size() > 0) { + throw MultipleIOException.createIOException(failedExceptions); + } + } + + @Override + public void removeCacheDirective(long id) throws IOException { + if (this.vfs == null) { + super.removeCacheDirective(id); + return; + } + List failedExceptions = new ArrayList<>(); + boolean isDFSExistsInChilds = false; + + for (FileSystem fs : getChildFileSystems()) { + if (!(fs instanceof DistributedFileSystem)) { + continue; + } + isDFSExistsInChilds = true; + DistributedFileSystem dfs = (DistributedFileSystem) fs; + try { + dfs.removeCacheDirective(id); + } catch (IOException ioe) { + failedExceptions.add(ioe); + } + } + if (!isDFSExistsInChilds) { + throw new UnsupportedOperationException( + "No DFS available in child file systems."); + } + if (failedExceptions.size() > 0) { + throw MultipleIOException.createIOException(failedExceptions); + } + } + + @Override + public RemoteIterator listCacheDirectives( + CacheDirectiveInfo filter) throws IOException { + if (this.vfs == null) { + return super.listCacheDirectives(filter); + } + + if (filter != null && filter.getPath() != null) { + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(filter.getPath(), getConf()); + checkDFS(mountPathInfo.getTargetFs(), "listCacheDirectives"); + return ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .listCacheDirectives(new CacheDirectiveInfo.Builder(filter) + .setPath(mountPathInfo.getPathOnTarget()).build()); + } + + // No path available in filter. Let's try to shoot to all child fs. + final List> iters = new ArrayList<>(); + for (FileSystem fs : getChildFileSystems()) { + if (fs instanceof DistributedFileSystem) { + iters.add(((DistributedFileSystem) fs).listCacheDirectives(filter)); + } + } + if (iters.size() == 0) { + throw new UnsupportedOperationException( + "No DFS found in child fs. This API can't be supported in non DFS"); + } + + return new RemoteIterator() { + int currIdx = 0; + RemoteIterator currIter = iters.get(currIdx++); + + @Override + public boolean hasNext() throws IOException { + if (currIter.hasNext()) { + return true; + } + while (currIdx < iters.size()) { + currIter = iters.get(currIdx++); + if (currIter.hasNext()) { + return true; + } + } + return false; + } + + @Override + public CacheDirectiveEntry next() throws IOException { + if (hasNext()) { + return currIter.next(); + } + throw new NoSuchElementException("No more elements"); + } + }; + } + + //Currently Cache pool APIs supported only in default cluster. + @Override + public void addCachePool(CachePoolInfo info) throws IOException { + if (this.vfs == null) { + super.addCachePool(info); + return; + } + List failedExceptions = new ArrayList<>(); + boolean isDFSExistsInChilds = false; + + for (FileSystem fs : getChildFileSystems()) { + if (!(fs instanceof DistributedFileSystem)) { + continue; + } + isDFSExistsInChilds = true; + DistributedFileSystem dfs = (DistributedFileSystem) fs; + try { + dfs.addCachePool(info); + } catch (IOException ioe) { + failedExceptions.add(ioe); + } + } + if (!isDFSExistsInChilds) { + throw new UnsupportedOperationException( + "No DFS available in child file systems."); + } + if (failedExceptions.size() > 0) { + throw MultipleIOException.createIOException(failedExceptions); + } + } + + @Override + public void modifyCachePool(CachePoolInfo info) throws IOException { + if (this.vfs == null) { + super.modifyCachePool(info); + return; + } + List failedExceptions = new ArrayList<>(); + boolean isDFSExistsInChilds = false; + + for (FileSystem fs : getChildFileSystems()) { + if (!(fs instanceof DistributedFileSystem)) { + continue; + } + isDFSExistsInChilds = true; + DistributedFileSystem dfs = (DistributedFileSystem) fs; + try { + dfs.modifyCachePool(info); + } catch (IOException ioe) { + failedExceptions.add(ioe); + } + } + if (!isDFSExistsInChilds) { + throw new UnsupportedOperationException( + "No DFS available in child file systems."); + } + if (failedExceptions.size() > 0) { + throw MultipleIOException.createIOException(failedExceptions); + } + } + + @Override + public void removeCachePool(String poolName) throws IOException { + if (this.vfs == null) { + super.removeCachePool(poolName); + return; + } + List failedExceptions = new ArrayList<>(); + boolean isDFSExistsInChilds = false; + + for (FileSystem fs : getChildFileSystems()) { + if (!(fs instanceof DistributedFileSystem)) { + continue; + } + isDFSExistsInChilds = true; + DistributedFileSystem dfs = (DistributedFileSystem) fs; + try { + dfs.removeCachePool(poolName); + } catch (IOException ioe) { + failedExceptions.add(ioe); + } + } + if (!isDFSExistsInChilds) { + throw new UnsupportedOperationException( + "No DFS available in child file systems."); + } + if (failedExceptions.size() > 0) { + throw MultipleIOException.createIOException(failedExceptions); + } + } + + @Override + public RemoteIterator listCachePools() throws IOException { + if (this.vfs == null) { + return super.listCachePools(); + } + + List childDFSs = new ArrayList<>(); + for (FileSystem fs : getChildFileSystems()) { + if (fs instanceof DistributedFileSystem) { + childDFSs.add((DistributedFileSystem) fs); + } + } + if (childDFSs.size() == 0) { + throw new UnsupportedOperationException( + "No DFS found in child fs. This API can't be supported in non DFS"); + } + return new RemoteIterator() { + int curDfsIdx = 0; + RemoteIterator currIter = + childDFSs.get(curDfsIdx++).listCachePools(); + + @Override + public boolean hasNext() throws IOException { + if (currIter.hasNext()) { + return true; + } + while (curDfsIdx < childDFSs.size()) { + currIter = childDFSs.get(curDfsIdx++).listCachePools(); + if (currIter.hasNext()) { + return true; + } + } + return false; + } + + @Override + public CachePoolEntry next() throws IOException { + if (hasNext()) { + return currIter.next(); + } + throw new java.util.NoSuchElementException("No more entries"); + } + }; + } + + @Override + public void modifyAclEntries(Path path, List aclSpec) + throws IOException { + if (this.vfs == null) { + super.modifyAclEntries(path, aclSpec); + return; + } + this.vfs.modifyAclEntries(path, aclSpec); + } + + @Override + public void removeAclEntries(Path path, List aclSpec) + throws IOException { + if (this.vfs == null) { + super.removeAclEntries(path, aclSpec); + return; + } + this.vfs.removeAclEntries(path, aclSpec); + } + + @Override + public void removeDefaultAcl(Path path) throws IOException { + if (this.vfs == null) { + super.removeDefaultAcl(path); + return; + } + this.vfs.removeDefaultAcl(path); + } + + @Override + public void removeAcl(Path path) throws IOException { + if (this.vfs == null) { + super.removeAcl(path); + return; + } + this.vfs.removeAcl(path); + } + + @Override + public void setAcl(Path path, List aclSpec) throws IOException { + if (this.vfs == null) { + super.setAcl(path, aclSpec); + return; + } + this.vfs.setAcl(path, aclSpec); + } + + @Override + public AclStatus getAclStatus(Path path) throws IOException { + if (this.vfs == null) { + return super.getAclStatus(path); + } + return this.vfs.getAclStatus(path); + } + + @Override + public void createEncryptionZone(final Path path, final String keyName) + throws IOException { + if (this.vfs == null) { + super.createEncryptionZone(path, keyName); + return; + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(path, getConf()); + checkDFS(mountPathInfo.getTargetFs(), "createEncryptionZone"); + ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .createEncryptionZone(mountPathInfo.getPathOnTarget(), keyName); + } + + @Override + public EncryptionZone getEZForPath(final Path path) throws IOException { + if (this.vfs == null) { + return super.getEZForPath(path); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(path, getConf()); + checkDFS(mountPathInfo.getTargetFs(), "getEZForPath"); + return ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .getEZForPath(mountPathInfo.getPathOnTarget()); + } + + /** + * Returns the results from default DFS (fallback). If you want the results + * from specific clusters, please invoke them on child fs instance directly. + */ + @Override + public RemoteIterator listEncryptionZones() + throws IOException { + if (this.vfs == null) { + return super.listEncryptionZones(); + } + checkDefaultDFS(defaultDFS, "listEncryptionZones"); + return defaultDFS.listEncryptionZones(); + } + + @Override + public void reencryptEncryptionZone(final Path zone, + final HdfsConstants.ReencryptAction action) throws IOException { + if (this.vfs == null) { + super.reencryptEncryptionZone(zone, action); + return; + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(zone, getConf()); + checkDFS(mountPathInfo.getTargetFs(), "reencryptEncryptionZone"); + ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .reencryptEncryptionZone(mountPathInfo.getPathOnTarget(), action); + } + + /** + * Returns the results from default DFS (fallback). If you want the results + * from specific clusters, please invoke them on child fs instance directly. + */ + @Override + public RemoteIterator listReencryptionStatus() + throws IOException { + if (this.vfs == null) { + return super.listReencryptionStatus(); + } + checkDefaultDFS(defaultDFS, "listReencryptionStatus"); + return defaultDFS.listReencryptionStatus(); + } + + @Override + public FileEncryptionInfo getFileEncryptionInfo(final Path path) + throws IOException { + if (this.vfs == null) { + return super.getFileEncryptionInfo(path); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(path, getConf()); + checkDFS(mountPathInfo.getTargetFs(), "getFileEncryptionInfo"); + return ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .getFileEncryptionInfo(mountPathInfo.getPathOnTarget()); + } + + @Override + public void provisionEZTrash(final Path path, + final FsPermission trashPermission) throws IOException { + if (this.vfs == null) { + super.provisionEZTrash(path, trashPermission); + return; + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(path, getConf()); + checkDFS(mountPathInfo.getTargetFs(), "provisionEZTrash"); + ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .provisionEZTrash(mountPathInfo.getPathOnTarget(), trashPermission); + } + + @Override + public void setXAttr(Path path, String name, byte[] value, + EnumSet flag) throws IOException { + if (this.vfs == null) { + super.setXAttr(path, name, value, flag); + return; + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(path, getConf()); + mountPathInfo.getTargetFs() + .setXAttr(mountPathInfo.getPathOnTarget(), name, value, flag); + } + + @Override + public byte[] getXAttr(Path path, String name) throws IOException { + if (this.vfs == null) { + return super.getXAttr(path, name); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(path, getConf()); + return mountPathInfo.getTargetFs() + .getXAttr(mountPathInfo.getPathOnTarget(), name); + } + + @Override + public Map getXAttrs(Path path) throws IOException { + if (this.vfs == null) { + return super.getXAttrs(path); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(path, getConf()); + return mountPathInfo.getTargetFs() + .getXAttrs(mountPathInfo.getPathOnTarget()); + } + + @Override + public Map getXAttrs(Path path, List names) + throws IOException { + if (this.vfs == null) { + return super.getXAttrs(path, names); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(path, getConf()); + return mountPathInfo.getTargetFs() + .getXAttrs(mountPathInfo.getPathOnTarget(), names); + } + + @Override + public List listXAttrs(Path path) throws IOException { + if (this.vfs == null) { + return super.listXAttrs(path); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(path, getConf()); + return mountPathInfo.getTargetFs() + .listXAttrs(mountPathInfo.getPathOnTarget()); + } + + @Override + public void removeXAttr(Path path, String name) throws IOException { + if (this.vfs == null) { + super.removeXAttr(path, name); + return; + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(path, getConf()); + mountPathInfo.getTargetFs() + .removeXAttr(mountPathInfo.getPathOnTarget(), name); + } + + @Override + public void access(Path path, FsAction mode) + throws AccessControlException, FileNotFoundException, IOException { + if (this.vfs == null) { + super.access(path, mode); + return; + } + this.vfs.access(path, mode); + } + + @Override + public URI getKeyProviderUri() throws IOException { + if (this.vfs == null) { + return super.getKeyProviderUri(); + } + checkDefaultDFS(defaultDFS, "getKeyProviderUri"); + return defaultDFS.getKeyProviderUri(); + } + + @Override + public KeyProvider getKeyProvider() throws IOException { + if (this.vfs == null) { + return super.getKeyProvider(); + } + checkDefaultDFS(defaultDFS, "getKeyProvider"); + return defaultDFS.getKeyProvider(); + } + + @Override + public DelegationTokenIssuer[] getAdditionalTokenIssuers() + throws IOException { + if (this.vfs == null) { + return super.getChildFileSystems(); + } + + return this.vfs.getChildFileSystems(); + } + + @Override + public DFSInotifyEventInputStream getInotifyEventStream() throws IOException { + if (this.vfs == null) { + return super.getInotifyEventStream(); + } + checkDefaultDFS(defaultDFS, "getInotifyEventStream"); + return defaultDFS.getInotifyEventStream(); + } + + @Override + public DFSInotifyEventInputStream getInotifyEventStream(long lastReadTxid) + throws IOException { + if (this.vfs == null) { + return super.getInotifyEventStream(); + } + checkDefaultDFS(defaultDFS, "getInotifyEventStream"); + return defaultDFS.getInotifyEventStream(); + } + + @Override + // DFS only API. + public void setErasureCodingPolicy(final Path path, final String ecPolicyName) + throws IOException { + if (this.vfs == null) { + super.setErasureCodingPolicy(path, ecPolicyName); + return; + } + + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(path, getConf()); + checkDFS(mountPathInfo.getTargetFs(), "setErasureCodingPolicy"); + ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .setErasureCodingPolicy(mountPathInfo.getPathOnTarget(), ecPolicyName); + } + + @Override + public void satisfyStoragePolicy(Path src) throws IOException { + if (this.vfs == null) { + super.satisfyStoragePolicy(src); + return; + } + this.vfs.satisfyStoragePolicy(src); + } + + @Override + public ErasureCodingPolicy getErasureCodingPolicy(final Path path) + throws IOException { + if (this.vfs == null) { + return super.getErasureCodingPolicy(path); + } + + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(path, getConf()); + checkDFS(mountPathInfo.getTargetFs(), "getErasureCodingPolicy"); + return ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .getErasureCodingPolicy(mountPathInfo.getPathOnTarget()); + } + + /** + * Gets all erasure coding policies from all available child file systems. + */ + @Override + public Collection getAllErasureCodingPolicies() + throws IOException { + if (this.vfs == null) { + return super.getAllErasureCodingPolicies(); + } + FileSystem[] childFss = getChildFileSystems(); + List results = new ArrayList<>(); + List failedExceptions = new ArrayList<>(); + boolean isDFSExistsInChilds = false; + for (FileSystem fs : childFss) { + if (!(fs instanceof DistributedFileSystem)) { + continue; + } + isDFSExistsInChilds = true; + DistributedFileSystem dfs = (DistributedFileSystem) fs; + try { + results.addAll(dfs.getAllErasureCodingPolicies()); + } catch (IOException ioe) { + failedExceptions.add(ioe); + } + } + + if (!isDFSExistsInChilds) { + throw new UnsupportedOperationException( + "No DFS available in child file systems."); + } + if (failedExceptions.size() > 0) { + throw MultipleIOException.createIOException(failedExceptions); + } + return results; + } + + @Override + public Map getAllErasureCodingCodecs() throws IOException { + if (this.vfs == null) { + return super.getAllErasureCodingCodecs(); + } + FileSystem[] childFss = getChildFileSystems(); + Map results = new HashMap<>(); + List failedExceptions = new ArrayList<>(); + boolean isDFSExistsInChilds = false; + for (FileSystem fs : childFss) { + if (!(fs instanceof DistributedFileSystem)) { + continue; + } + isDFSExistsInChilds = true; + DistributedFileSystem dfs = (DistributedFileSystem) fs; + try { + results.putAll(dfs.getAllErasureCodingCodecs()); + } catch (IOException ioe) { + failedExceptions.add(ioe); + } + } + if (!isDFSExistsInChilds) { + throw new UnsupportedOperationException( + "No DFS available in child file systems."); + } + if (failedExceptions.size() > 0) { + throw MultipleIOException.createIOException(failedExceptions); + } + return results; + } + + @Override + public AddErasureCodingPolicyResponse[] addErasureCodingPolicies( + ErasureCodingPolicy[] policies) throws IOException { + if (this.vfs == null) { + return super.addErasureCodingPolicies(policies); + } + List failedExceptions = new ArrayList<>(); + List results = new ArrayList<>(); + boolean isDFSExistsInChilds = false; + for (FileSystem fs : getChildFileSystems()) { + if (!(fs instanceof DistributedFileSystem)) { + continue; + } + isDFSExistsInChilds = true; + DistributedFileSystem dfs = (DistributedFileSystem) fs; + try { + results.addAll(Arrays.asList(dfs.addErasureCodingPolicies(policies))); + } catch (IOException ioe) { + failedExceptions.add(ioe); + } + } + if (!isDFSExistsInChilds) { + throw new UnsupportedOperationException( + "No DFS available in child file systems."); + } + if (failedExceptions.size() > 0) { + throw MultipleIOException.createIOException(failedExceptions); + } + return results.toArray(new AddErasureCodingPolicyResponse[results.size()]); + } + + @Override + public void removeErasureCodingPolicy(String ecPolicyName) + throws IOException { + if (this.vfs == null) { + super.removeErasureCodingPolicy(ecPolicyName); + return; + } + + List failedExceptions = new ArrayList<>(); + boolean isDFSExistsInChilds = false; + + for (FileSystem fs : getChildFileSystems()) { + if (!(fs instanceof DistributedFileSystem)) { + continue; + } + isDFSExistsInChilds = true; + DistributedFileSystem dfs = (DistributedFileSystem) fs; + try { + dfs.removeErasureCodingPolicy(ecPolicyName); + } catch (IOException ioe) { + failedExceptions.add(ioe); + } + } + if (!isDFSExistsInChilds) { + throw new UnsupportedOperationException( + "No DFS available in child file systems."); + } + if (failedExceptions.size() > 0) { + throw MultipleIOException.createIOException(failedExceptions); + } + } + + @Override + public void enableErasureCodingPolicy(String ecPolicyName) + throws IOException { + if (this.vfs == null) { + super.enableErasureCodingPolicy(ecPolicyName); + return; + } + List failedExceptions = new ArrayList<>(); + boolean isDFSExistsInChilds = false; + + for (FileSystem fs : getChildFileSystems()) { + if (!(fs instanceof DistributedFileSystem)) { + continue; + } + isDFSExistsInChilds = true; + DistributedFileSystem dfs = (DistributedFileSystem) fs; + try { + dfs.enableErasureCodingPolicy(ecPolicyName); + } catch (IOException ioe) { + failedExceptions.add(ioe); + } + } + if (!isDFSExistsInChilds) { + throw new UnsupportedOperationException( + "No DFS available in child file systems."); + } + if (failedExceptions.size() > 0) { + throw MultipleIOException.createIOException(failedExceptions); + } + } + + @Override + public void disableErasureCodingPolicy(String ecPolicyName) + throws IOException { + if (this.vfs == null) { + super.disableErasureCodingPolicy(ecPolicyName); + return; + } + List failedExceptions = new ArrayList<>(); + boolean isDFSExistsInChilds = false; + + for (FileSystem fs : getChildFileSystems()) { + if (!(fs instanceof DistributedFileSystem)) { + continue; + } + isDFSExistsInChilds = true; + DistributedFileSystem dfs = (DistributedFileSystem) fs; + try { + dfs.disableErasureCodingPolicy(ecPolicyName); + } catch (IOException ioe) { + failedExceptions.add(ioe); + } + } + if (!isDFSExistsInChilds) { + throw new UnsupportedOperationException( + "No DFS available in child file systems."); + } + if (failedExceptions.size() > 0) { + throw MultipleIOException.createIOException(failedExceptions); + } + } + + @Override + public void unsetErasureCodingPolicy(final Path path) throws IOException { + if (this.vfs == null) { + super.unsetErasureCodingPolicy(path); + return; + } + + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(path, getConf()); + checkDFS(mountPathInfo.getTargetFs(), "unsetErasureCodingPolicy"); + ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .unsetErasureCodingPolicy(mountPathInfo.getPathOnTarget()); + } + + @Override + public ECTopologyVerifierResult getECTopologyResultForPolicies( + final String... policyNames) throws IOException { + if (this.vfs == null) { + return super.getECTopologyResultForPolicies(policyNames); + } + + List failedExceptions = new ArrayList<>(); + ECTopologyVerifierResult result = null; + for (FileSystem fs : getChildFileSystems()) { + if (!(fs instanceof DistributedFileSystem)) { + continue; + } + DistributedFileSystem dfs = (DistributedFileSystem) fs; + try { + result = dfs.getECTopologyResultForPolicies(policyNames); + if (!result.isSupported()) { + // whenever we see negative result. + return result; + } + } catch (IOException ioe) { + failedExceptions.add(ioe); + } + } + if (result == null) { + throw new UnsupportedOperationException( + "No DFS available in child filesystems"); + } + if (failedExceptions.size() > 0) { + throw MultipleIOException.createIOException(failedExceptions); + } + // Let's just return the last one. + return result; + } + + @Override + public Path getTrashRoot(Path path) { + if (this.vfs == null) { + return super.getTrashRoot(path); + } + return this.vfs.getTrashRoot(path); + } + + @Override + public Collection getTrashRoots(boolean allUsers) { + if (this.vfs == null) { + return super.getTrashRoots(allUsers); + } + List trashRoots = new ArrayList<>(); + for (FileSystem fs : getChildFileSystems()) { + trashRoots.addAll(fs.getTrashRoots(allUsers)); + } + return trashRoots; + } + + // Just proovided the same implementation as default in dfs as thats just + // delegated to FileSystem parent class. + @Override + protected Path fixRelativePart(Path p) { + return super.fixRelativePart(p); + } + + Statistics getFsStatistics() { + if (this.vfs == null) { + return super.getFsStatistics(); + } + return statistics; + } + + DFSOpsCountStatistics getDFSOpsCountStatistics() { + if (this.vfs == null) { + return super.getDFSOpsCountStatistics(); + } + return defaultDFS.getDFSOpsCountStatistics(); + } + + @Override + // Works only for HDFS + public HdfsDataOutputStreamBuilder createFile(Path path) { + if (this.vfs == null) { + return super.createFile(path); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = null; + try { + mountPathInfo = this.vfs.getMountPathInfo(path, getConf()); + } catch (IOException e) { + // TODO: can we return null here? + return null; + } + checkDFS(mountPathInfo.getTargetFs(), "createFile"); + return (HdfsDataOutputStreamBuilder) mountPathInfo.getTargetFs() + .createFile(mountPathInfo.getPathOnTarget()); + } + + @Deprecated + @Override + public RemoteIterator listOpenFiles() throws IOException { + if (this.vfs == null) { + return super.listOpenFiles(); + } + checkDefaultDFS(defaultDFS, "listOpenFiles"); + return defaultDFS.listOpenFiles(); + } + + @Deprecated + @Override + public RemoteIterator listOpenFiles( + EnumSet openFilesTypes) + throws IOException { + if (this.vfs == null) { + return super.listOpenFiles(openFilesTypes); + } + checkDefaultDFS(defaultDFS, "listOpenFiles"); + return defaultDFS.listOpenFiles(openFilesTypes); + } + + @Override + public RemoteIterator listOpenFiles( + EnumSet openFilesTypes, String path) + throws IOException { + if (this.vfs == null) { + return super.listOpenFiles(openFilesTypes, path); + } + Path absF = fixRelativePart(new Path(path)); + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = + this.vfs.getMountPathInfo(absF, getConf()); + checkDFS(mountPathInfo.getTargetFs(), "listOpenFiles"); + return ((DistributedFileSystem) mountPathInfo.getTargetFs()) + .listOpenFiles(openFilesTypes, + mountPathInfo.getPathOnTarget().toString()); + } + + @Override + public HdfsDataOutputStreamBuilder appendFile(Path path) { + if (this.vfs == null) { + return super.appendFile(path); + } + ViewFileSystemOverloadScheme.MountPathInfo mountPathInfo = null; + try { + mountPathInfo = this.vfs.getMountPathInfo(path, getConf()); + } catch (IOException e) { + LOGGER.warn("Failed to resolve the path as mount path", e); + return null; + } + checkDFS(mountPathInfo.getTargetFs(), "appendFile"); + return (HdfsDataOutputStreamBuilder) mountPathInfo.getTargetFs() + .appendFile(mountPathInfo.getPathOnTarget()); + } + + @Override + public boolean hasPathCapability(Path path, String capability) + throws IOException { + if (this.vfs == null) { + return super.hasPathCapability(path, capability); + } + return this.vfs.hasPathCapability(path, capability); + } + + //Below API provided implementations are in ViewFS but not there in DFS. + @Override + public Path resolvePath(final Path f) throws IOException { + if (this.vfs == null) { + return super.resolvePath(f); + } + return this.vfs.resolvePath(f); + } + + @Override + @SuppressWarnings("deprecation") + public boolean delete(final Path f) + throws AccessControlException, FileNotFoundException, IOException { + if (this.vfs == null) { + return super.delete(f); + } + return this.vfs.delete(f); + } + + @Override + public FileChecksum getFileChecksum(final Path f, final long length) + throws AccessControlException, FileNotFoundException, IOException { + if (this.vfs == null) { + return super.getFileChecksum(f, length); + } + return this.vfs.getFileChecksum(f, length); + } + + @Override + public boolean mkdirs(Path dir) throws IOException { + if (this.vfs == null) { + return super.mkdirs(dir); + } + return this.vfs.mkdirs(dir); + } + + @Override + public long getDefaultBlockSize(Path f) { + if (this.vfs == null) { + return super.getDefaultBlockSize(f); + } + return this.vfs.getDefaultBlockSize(f); + } + + @Override + public short getDefaultReplication(Path f) { + if (this.vfs == null) { + return super.getDefaultReplication(f); + } + return this.vfs.getDefaultReplication(f); + } + + @Override + public FsServerDefaults getServerDefaults(Path f) throws IOException { + if (this.vfs == null) { + return super.getServerDefaults(f); + } + return this.vfs.getServerDefaults(f); + } + + @Override + public void setWriteChecksum(final boolean writeChecksum) { + if (this.vfs == null) { + super.setWriteChecksum(writeChecksum); + return; + } + this.vfs.setWriteChecksum(writeChecksum); + } + + @Override + public FileSystem[] getChildFileSystems() { + if (this.vfs == null) { + return super.getChildFileSystems(); + } + return this.vfs.getChildFileSystems(); + } + + public ViewFileSystem.MountPoint[] getMountPoints() { + if (this.vfs == null) { + return null; + } + return this.vfs.getMountPoints(); + } + + @Override + public FsStatus getStatus() throws IOException { + if (this.vfs == null) { + return super.getStatus(); + } + return this.vfs.getStatus(); + } + + @Override + public long getUsed() throws IOException { + if (this.vfs == null) { + return super.getUsed(); + } + return this.vfs.getUsed(); + } + + @Override + public DatanodeInfo[] getSlowDatanodeStats() throws IOException { + if (this.vfs == null) { + return super.getSlowDatanodeStats(); + } + checkDefaultDFS(defaultDFS, "getSlowDatanodeStats"); + return defaultDFS.getSlowDatanodeStats(); + } + +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/XAttrHelper.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/XAttrHelper.java index 0bb995553bc7f..f6e32c40e80fe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/XAttrHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/XAttrHelper.java @@ -26,9 +26,9 @@ import org.apache.hadoop.fs.XAttr.NameSpace; import org.apache.hadoop.util.StringUtils; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; @InterfaceAudience.Private public class XAttrHelper { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/DfsPathCapabilities.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/DfsPathCapabilities.java index 6cad69a46c4e8..30e7e00653bcc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/DfsPathCapabilities.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/DfsPathCapabilities.java @@ -47,6 +47,7 @@ public static Optional hasPathCapability(final Path path, case CommonPathCapabilities.FS_CHECKSUMS: case CommonPathCapabilities.FS_CONCAT: case CommonPathCapabilities.FS_LIST_CORRUPT_FILE_BLOCKS: + case CommonPathCapabilities.FS_MULTIPART_UPLOADER: case CommonPathCapabilities.FS_PATHHANDLES: case CommonPathCapabilities.FS_PERMISSIONS: case CommonPathCapabilities.FS_SNAPSHOTS: diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java index 407462c6e757d..e6af2dbecc555 100755 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java @@ -108,6 +108,9 @@ public interface HdfsClientConfigKeys { String DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL = "dfs.client.use.legacy.blockreader.local"; boolean DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL_DEFAULT = false; + String DFS_CLIENT_READ_USE_CACHE_PRIORITY = + "dfs.client.read.use.cache.priority"; + boolean DFS_CLIENT_READ_USE_CACHE_PRIORITY_DEFAULT = false; String DFS_CLIENT_DATANODE_RESTART_TIMEOUT_KEY = "dfs.client.datanode-restart.timeout"; long DFS_CLIENT_DATANODE_RESTART_TIMEOUT_DEFAULT = 30; @@ -127,6 +130,8 @@ public interface HdfsClientConfigKeys { int DFS_BYTES_PER_CHECKSUM_DEFAULT = 512; String DFS_CHECKSUM_COMBINE_MODE_KEY = "dfs.checksum.combine.mode"; String DFS_CHECKSUM_COMBINE_MODE_DEFAULT = "MD5MD5CRC"; + String DFS_CHECKSUM_EC_SOCKET_TIMEOUT_KEY = "dfs.checksum.ec.socket-timeout"; + int DFS_CHECKSUM_EC_SOCKET_TIMEOUT_DEFAULT = 3000; String DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY = "dfs.datanode.socket.write.timeout"; String DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC = @@ -141,6 +146,8 @@ public interface HdfsClientConfigKeys { "dfs.short.circuit.shared.memory.watcher.interrupt.check.ms"; int DFS_SHORT_CIRCUIT_SHARED_MEMORY_WATCHER_INTERRUPT_CHECK_MS_DEFAULT = 60000; + String DFS_CLIENT_SHORT_CIRCUIT_NUM = "dfs.client.short.circuit.num"; + int DFS_CLIENT_SHORT_CIRCUIT_NUM_DEFAULT = 1; String DFS_CLIENT_SLOW_IO_WARNING_THRESHOLD_KEY = "dfs.client.slow.io.warning.threshold.ms"; long DFS_CLIENT_SLOW_IO_WARNING_THRESHOLD_DEFAULT = 30000; @@ -156,13 +163,9 @@ public interface HdfsClientConfigKeys { "dfs.client.deadnode.detection.enabled"; boolean DFS_CLIENT_DEAD_NODE_DETECTION_ENABLED_DEFAULT = false; - String DFS_CLIENT_DEAD_NODE_DETECTION_DEAD_NODE_QUEUE_MAX_KEY = - "dfs.client.deadnode.detection.deadnode.queue.max"; - int DFS_CLIENT_DEAD_NODE_DETECTION_DEAD_NODE_QUEUE_MAX_DEFAULT = 100; - - String DFS_CLIENT_DEAD_NODE_DETECTION_SUSPECT_NODE_QUEUE_MAX_KEY = - "dfs.client.deadnode.detection.suspectnode.queue.max"; - int DFS_CLIENT_DEAD_NODE_DETECTION_SUSPECT_NODE_QUEUE_MAX_DEFAULT = 1000; + String DFS_CLIENT_DEAD_NODE_DETECTION_IDLE_SLEEP_MS_KEY = + "dfs.client.deadnode.detection.idle.sleep.ms"; + long DFS_CLIENT_DEAD_NODE_DETECTION_IDLE_SLEEP_MS_DEFAULT = 10000; String DFS_CLIENT_DEAD_NODE_DETECTION_PROBE_CONNECTION_TIMEOUT_MS_KEY = "dfs.client.deadnode.detection.probe.connection.timeout.ms"; @@ -196,6 +199,19 @@ public interface HdfsClientConfigKeys { "dfs.client.refresh.read-block-locations.ms"; long DFS_CLIENT_REFRESH_READ_BLOCK_LOCATIONS_MS_DEFAULT = 0L; + // Number of threads to use for refreshing LocatedBlocks of registered + // DFSInputStreams. If a DFSClient opens many DFSInputStreams, increasing + // this may help refresh them all in a timely manner. + String DFS_CLIENT_REFRESH_READ_BLOCK_LOCATIONS_THREADS_KEY = + "dfs.client.refresh.read-block-locations.threads"; + int DFS_CLIENT_REFRESH_READ_BLOCK_LOCATIONS_THREADS_DEFAULT = 5; + + // Whether to auto-register all DFSInputStreams for background refreshes. + // If false, user must manually register using DFSClient#addLocatedBlocksRefresh(DFSInputStream) + String DFS_CLIENT_REFRESH_READ_BLOCK_LOCATIONS_AUTOMATICALLY_KEY = + "dfs.client.refresh.read-block-locations.register-automatically"; + boolean DFS_CLIENT_REFRESH_READ_BLOCK_LOCATIONS_AUTOMATICALLY_DEFAULT = true; + String DFS_DATANODE_KERBEROS_PRINCIPAL_KEY = "dfs.datanode.kerberos.principal"; String DFS_DATANODE_READAHEAD_BYTES_KEY = "dfs.datanode.readahead.bytes"; @@ -245,6 +261,14 @@ public interface HdfsClientConfigKeys { String DFS_LEASE_HARDLIMIT_KEY = "dfs.namenode.lease-hard-limit-sec"; long DFS_LEASE_HARDLIMIT_DEFAULT = 20 * 60; + String DFS_CLIENT_FSCK_CONNECT_TIMEOUT = + "dfs.client.fsck.connect.timeout"; + int DFS_CLIENT_FSCK_CONNECT_TIMEOUT_DEFAULT = 60 * 1000; + + String DFS_CLIENT_FSCK_READ_TIMEOUT = + "dfs.client.fsck.read.timeout"; + int DFS_CLIENT_FSCK_READ_TIMEOUT_DEFAULT = 60 * 1000; + /** * These are deprecated config keys to client code. */ @@ -409,6 +433,9 @@ interface Read { String PREFETCH_SIZE_KEY = PREFIX + "prefetch.size"; + String URI_CACHE_KEY = PREFIX + "uri.cache.enabled"; + boolean URI_CACHE_DEFAULT = false; + interface ShortCircuit { String PREFIX = Read.PREFIX + "shortcircuit."; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataInputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataInputStream.java index 02e5deb499914..5a615bbd62de4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataInputStream.java @@ -31,7 +31,7 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlock; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * The Hdfs implementation of {@link FSDataInputStream}. diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataOutputStream.java index d7331299ef3e6..8af3417ca9fdf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataOutputStream.java @@ -28,7 +28,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hdfs.DFSOutputStream; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * The Hdfs implementation of {@link FSDataOutputStream}. diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsUtils.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsUtils.java index 3b77a3f8ee2a3..59e45e5451705 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsUtils.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsUtils.java @@ -47,6 +47,7 @@ public class HdfsUtils { * @param uri the HDFS URI. Note that the URI path is ignored. * @return true if HDFS is healthy; false, otherwise. */ + @SuppressWarnings("deprecation") public static boolean isHealthy(URI uri) { //check scheme final String scheme = uri.getScheme(); diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderFactory.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderFactory.java index 2109e6e1b7ecf..67c59cb936d10 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderFactory.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderFactory.java @@ -27,10 +27,11 @@ import java.io.IOException; import java.lang.reflect.Constructor; import java.net.InetSocketAddress; +import java.nio.channels.UnresolvedAddressException; import java.util.List; -import com.google.common.io.ByteArrayDataOutput; -import com.google.common.io.ByteStreams; +import org.apache.hadoop.thirdparty.com.google.common.io.ByteArrayDataOutput; +import org.apache.hadoop.thirdparty.com.google.common.io.ByteStreams; import org.apache.commons.lang3.mutable.MutableBoolean; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -73,8 +74,8 @@ import org.apache.hadoop.util.PerformanceAdvisory; import org.apache.hadoop.util.Time; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -475,7 +476,8 @@ private BlockReader getBlockReaderLocal() throws IOException { "giving up on BlockReaderLocal.", this, pathInfo); return null; } - ShortCircuitCache cache = clientContext.getShortCircuitCache(); + ShortCircuitCache cache = + clientContext.getShortCircuitCache(block.getBlockId()); ExtendedBlockId key = new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId()); ShortCircuitReplicaInfo info = cache.fetchOrCreate(key, this); @@ -526,7 +528,8 @@ public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() { if (curPeer.fromCache) remainingCacheTries--; DomainPeer peer = (DomainPeer)curPeer.peer; Slot slot = null; - ShortCircuitCache cache = clientContext.getShortCircuitCache(); + ShortCircuitCache cache = + clientContext.getShortCircuitCache(block.getBlockId()); try { MutableBoolean usedPeer = new MutableBoolean(false); slot = cache.allocShmSlot(datanode, peer, usedPeer, @@ -581,7 +584,8 @@ public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() { */ private ShortCircuitReplicaInfo requestFileDescriptors(DomainPeer peer, Slot slot) throws IOException { - ShortCircuitCache cache = clientContext.getShortCircuitCache(); + ShortCircuitCache cache = + clientContext.getShortCircuitCache(block.getBlockId()); final DataOutputStream out = new DataOutputStream(new BufferedOutputStream(peer.getOutputStream(), SMALL_BUFFER_SIZE)); SlotId slotId = slot == null ? null : slot.getSlotId(); @@ -641,6 +645,9 @@ private ShortCircuitReplicaInfo requestFileDescriptors(DomainPeer peer, "attempting to set up short-circuit access to " + fileName + resp.getMessage(); LOG.debug("{}:{}", this, msg); + if (slot != null) { + cache.freeSlot(slot); + } return new ShortCircuitReplicaInfo(new InvalidToken(msg)); default: final long expiration = @@ -823,7 +830,7 @@ private BlockReaderPeer nextTcpPeer() throws IOException { datanode); LOG.trace("nextTcpPeer: created newConnectedPeer {}", peer); return new BlockReaderPeer(peer, false); - } catch (IOException e) { + } catch (IOException | UnresolvedAddressException e) { LOG.trace("nextTcpPeer: failed to create newConnectedPeer connected to" + "{}", datanode); throw e; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderLocal.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderLocal.java index 9c1ef461b0433..e7ddb98e700e4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderLocal.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderLocal.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs.client.impl; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.ReadOption; import org.apache.hadoop.fs.StorageType; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderRemote.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderRemote.java index 0f2f54c28e331..f25f50cf05411 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderRemote.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderRemote.java @@ -51,7 +51,7 @@ import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.DataChecksum; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/DfsClientConf.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/DfsClientConf.java index 07f0eee8dd5ad..57013896ac4f0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/DfsClientConf.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/DfsClientConf.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs.client.impl; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; @@ -46,6 +46,8 @@ import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CHECKSUM_COMBINE_MODE_DEFAULT; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CHECKSUM_COMBINE_MODE_KEY; +import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CHECKSUM_EC_SOCKET_TIMEOUT_DEFAULT; +import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CHECKSUM_EC_SOCKET_TIMEOUT_KEY; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CHECKSUM_TYPE_DEFAULT; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CHECKSUM_TYPE_KEY; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_CACHED_CONN_RETRY_DEFAULT; @@ -60,6 +62,8 @@ import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_KEY_PROVIDER_CACHE_EXPIRY_MS; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_DEFAULT; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_KEY; +import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_READ_USE_CACHE_PRIORITY; +import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_READ_USE_CACHE_PRIORITY_DEFAULT; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_SLOW_IO_WARNING_THRESHOLD_DEFAULT; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_SLOW_IO_WARNING_THRESHOLD_KEY; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT; @@ -112,6 +116,7 @@ public class DfsClientConf { private final int ioBufferSize; private final ChecksumOpt defaultChecksumOpt; private final ChecksumCombineMode checksumCombineMode; + private final int checksumEcSocketTimeout; private final int writePacketSize; private final int writeMaxPackets; private final ByteArrayManager.Conf writeByteArrayManagerConf; @@ -127,6 +132,7 @@ public class DfsClientConf { private final int blockWriteLocateFollowingMaxDelayMs; private final long defaultBlockSize; private final long prefetchSize; + private final boolean uriCacheEnabled; private final short defaultReplication; private final String taskId; private final FsPermission uMask; @@ -138,8 +144,10 @@ public class DfsClientConf { /** wait time window before refreshing blocklocation for inputstream. */ private final long refreshReadBlockLocationsMS; + private final boolean refreshReadBlockLocationsAutomatically; private final ShortCircuitConf shortCircuitConf; + private final int clientShortCircuitNum; private final long hedgedReadThresholdMillis; private final int hedgedReadThreadpoolSize; @@ -150,6 +158,8 @@ public class DfsClientConf { private final boolean dataTransferTcpNoDelay; + private final boolean readUseCachePriority; + private final boolean deadNodeDetectionEnabled; private final long leaseHardLimitPeriod; @@ -191,6 +201,8 @@ public DfsClientConf(Configuration conf) { CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT); defaultChecksumOpt = getChecksumOptFromConf(conf); checksumCombineMode = getChecksumCombineModeFromConf(conf); + checksumEcSocketTimeout = conf.getInt(DFS_CHECKSUM_EC_SOCKET_TIMEOUT_KEY, + DFS_CHECKSUM_EC_SOCKET_TIMEOUT_DEFAULT); dataTransferTcpNoDelay = conf.getBoolean( DFS_DATA_TRANSFER_CLIENT_TCPNODELAY_KEY, DFS_DATA_TRANSFER_CLIENT_TCPNODELAY_DEFAULT); @@ -206,24 +218,7 @@ public DfsClientConf(Configuration conf) { Write.MAX_PACKETS_IN_FLIGHT_KEY, Write.MAX_PACKETS_IN_FLIGHT_DEFAULT); - final boolean byteArrayManagerEnabled = conf.getBoolean( - Write.ByteArrayManager.ENABLED_KEY, - Write.ByteArrayManager.ENABLED_DEFAULT); - if (!byteArrayManagerEnabled) { - writeByteArrayManagerConf = null; - } else { - final int countThreshold = conf.getInt( - Write.ByteArrayManager.COUNT_THRESHOLD_KEY, - Write.ByteArrayManager.COUNT_THRESHOLD_DEFAULT); - final int countLimit = conf.getInt( - Write.ByteArrayManager.COUNT_LIMIT_KEY, - Write.ByteArrayManager.COUNT_LIMIT_DEFAULT); - final long countResetTimePeriodMs = conf.getLong( - Write.ByteArrayManager.COUNT_RESET_TIME_PERIOD_MS_KEY, - Write.ByteArrayManager.COUNT_RESET_TIME_PERIOD_MS_DEFAULT); - writeByteArrayManagerConf = new ByteArrayManager.Conf( - countThreshold, countLimit, countResetTimePeriodMs); - } + writeByteArrayManagerConf = loadWriteByteArrayManagerConf(conf); defaultBlockSize = conf.getLongBytes(DFS_BLOCK_SIZE_KEY, DFS_BLOCK_SIZE_DEFAULT); @@ -235,6 +230,10 @@ public DfsClientConf(Configuration conf) { Write.EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL_DEFAULT); prefetchSize = conf.getLong(Read.PREFETCH_SIZE_KEY, 10 * defaultBlockSize); + + uriCacheEnabled = conf.getBoolean(Read.URI_CACHE_KEY, + Read.URI_CACHE_DEFAULT); + numCachedConnRetry = conf.getInt(DFS_CLIENT_CACHED_CONN_RETRY_KEY, DFS_CLIENT_CACHED_CONN_RETRY_DEFAULT); numBlockWriteRetry = conf.getInt( @@ -260,13 +259,17 @@ public DfsClientConf(Configuration conf) { slowIoWarningThresholdMs = conf.getLong( DFS_CLIENT_SLOW_IO_WARNING_THRESHOLD_KEY, DFS_CLIENT_SLOW_IO_WARNING_THRESHOLD_DEFAULT); + readUseCachePriority = conf.getBoolean(DFS_CLIENT_READ_USE_CACHE_PRIORITY, + DFS_CLIENT_READ_USE_CACHE_PRIORITY_DEFAULT); refreshReadBlockLocationsMS = conf.getLong( HdfsClientConfigKeys.DFS_CLIENT_REFRESH_READ_BLOCK_LOCATIONS_MS_KEY, HdfsClientConfigKeys. DFS_CLIENT_REFRESH_READ_BLOCK_LOCATIONS_MS_DEFAULT); - shortCircuitConf = new ShortCircuitConf(conf); + refreshReadBlockLocationsAutomatically = conf.getBoolean( + HdfsClientConfigKeys.DFS_CLIENT_REFRESH_READ_BLOCK_LOCATIONS_AUTOMATICALLY_KEY, + HdfsClientConfigKeys.DFS_CLIENT_REFRESH_READ_BLOCK_LOCATIONS_AUTOMATICALLY_DEFAULT); hedgedReadThresholdMillis = conf.getLong( HedgedRead.THRESHOLD_MILLIS_KEY, @@ -290,6 +293,38 @@ public DfsClientConf(Configuration conf) { leaseHardLimitPeriod = conf.getLong(HdfsClientConfigKeys.DFS_LEASE_HARDLIMIT_KEY, HdfsClientConfigKeys.DFS_LEASE_HARDLIMIT_DEFAULT) * 1000; + + shortCircuitConf = new ShortCircuitConf(conf); + clientShortCircuitNum = conf.getInt( + HdfsClientConfigKeys.DFS_CLIENT_SHORT_CIRCUIT_NUM, + HdfsClientConfigKeys.DFS_CLIENT_SHORT_CIRCUIT_NUM_DEFAULT); + Preconditions.checkArgument(clientShortCircuitNum >= 1, + HdfsClientConfigKeys.DFS_CLIENT_SHORT_CIRCUIT_NUM + + "can't be less then 1."); + Preconditions.checkArgument(clientShortCircuitNum <= 5, + HdfsClientConfigKeys.DFS_CLIENT_SHORT_CIRCUIT_NUM + + "can't be more then 5."); + } + + private ByteArrayManager.Conf loadWriteByteArrayManagerConf( + Configuration conf) { + final boolean byteArrayManagerEnabled = conf.getBoolean( + Write.ByteArrayManager.ENABLED_KEY, + Write.ByteArrayManager.ENABLED_DEFAULT); + if (!byteArrayManagerEnabled) { + return null; + } + final int countThreshold = conf.getInt( + Write.ByteArrayManager.COUNT_THRESHOLD_KEY, + Write.ByteArrayManager.COUNT_THRESHOLD_DEFAULT); + final int countLimit = conf.getInt( + Write.ByteArrayManager.COUNT_LIMIT_KEY, + Write.ByteArrayManager.COUNT_LIMIT_DEFAULT); + final long countResetTimePeriodMs = conf.getLong( + Write.ByteArrayManager.COUNT_RESET_TIME_PERIOD_MS_KEY, + Write.ByteArrayManager.COUNT_RESET_TIME_PERIOD_MS_DEFAULT); + return new ByteArrayManager.Conf( + countThreshold, countLimit, countResetTimePeriodMs); } @SuppressWarnings("unchecked") @@ -448,6 +483,13 @@ public ChecksumCombineMode getChecksumCombineMode() { return checksumCombineMode; } + /** + * @return the checksumEcSocketTimeout + */ + public int getChecksumEcSocketTimeout() { + return checksumEcSocketTimeout; + } + /** * @return the writePacketSize */ @@ -539,6 +581,13 @@ public long getPrefetchSize() { return prefetchSize; } + /** + * @return the uriCacheEnable + */ + public boolean isUriCacheEnabled() { + return uriCacheEnabled; + } + /** * @return the defaultReplication */ @@ -595,6 +644,13 @@ public long getSlowIoWarningThresholdMs() { return slowIoWarningThresholdMs; } + /* + * @return the clientShortCircuitNum + */ + public int getClientShortCircuitNum() { + return clientShortCircuitNum; + } + /** * @return the hedgedReadThresholdMillis */ @@ -630,6 +686,13 @@ public long getleaseHardLimitPeriod() { return leaseHardLimitPeriod; } + /** + * @return the readUseCachePriority + */ + public boolean isReadUseCachePriority() { + return readUseCachePriority; + } + /** * @return the replicaAccessorBuilderClasses */ @@ -638,13 +701,18 @@ public long getleaseHardLimitPeriod() { return replicaAccessorBuilderClasses; } - /** - * @return the replicaAccessorBuilderClasses - */ - public long getRefreshReadBlockLocationsMS() { + public boolean isLocatedBlocksRefresherEnabled() { + return refreshReadBlockLocationsMS > 0; + } + + public long getLocatedBlocksRefresherInterval() { return refreshReadBlockLocationsMS; } + public boolean isRefreshReadBlockLocationsAutomatically() { + return refreshReadBlockLocationsAutomatically; + } + /** * @return the shortCircuitConf */ diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/LeaseRenewer.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/LeaseRenewer.java index 8a040f21b8c03..fcacdaa6a734d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/LeaseRenewer.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/LeaseRenewer.java @@ -26,6 +26,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; @@ -36,7 +37,7 @@ import org.apache.hadoop.util.Daemon; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -79,6 +80,8 @@ public class LeaseRenewer { private static long leaseRenewerGraceDefault = 60*1000L; static final long LEASE_RENEWER_SLEEP_DEFAULT = 1000L; + private AtomicBoolean isLSRunning = new AtomicBoolean(false); + /** Get a {@link LeaseRenewer} instance */ public static LeaseRenewer getInstance(final String authority, final UserGroupInformation ugi, final DFSClient dfsc) { @@ -87,6 +90,17 @@ public static LeaseRenewer getInstance(final String authority, return r; } + /** + * Remove the given renewer from the Factory. + * Subsequent call will receive new {@link LeaseRenewer} instance. + * @param renewer Instance to be cleared from Factory + */ + public static void remove(LeaseRenewer renewer) { + synchronized (renewer) { + Factory.INSTANCE.remove(renewer); + } + } + /** * A factory for sharing {@link LeaseRenewer} objects * among {@link DFSClient} instances @@ -156,6 +170,9 @@ private synchronized void remove(final LeaseRenewer r) { final LeaseRenewer stored = renewers.get(r.factorykey); //Since a renewer may expire, the stored renewer can be different. if (r == stored) { + // Expire LeaseRenewer daemon thread as soon as possible. + r.clearClients(); + r.setEmptyTime(0); renewers.remove(r.factorykey); } } @@ -241,6 +258,10 @@ private synchronized void addClient(final DFSClient dfsc) { } } + private synchronized void clearClients() { + dfsclients.clear(); + } + private synchronized boolean clientsRunning() { for(Iterator i = dfsclients.iterator(); i.hasNext(); ) { if (!i.next().isClientRunning()) { @@ -292,11 +313,18 @@ private synchronized boolean isRenewerExpired() { && Time.monotonicNow() - emptyTime > gracePeriod; } - public synchronized void put(final DFSClient dfsc) { + public synchronized boolean put(final DFSClient dfsc) { if (dfsc.isClientRunning()) { if (!isRunning() || isRenewerExpired()) { - //start a new deamon with a new id. + // Start a new daemon with a new id. final int id = ++currentId; + if (isLSRunning.get()) { + // Not allowed to add multiple daemons into LeaseRenewer, let client + // create new LR and continue to acquire lease. + return false; + } + isLSRunning.getAndSet(true); + daemon = new Daemon(new Runnable() { @Override public void run() { @@ -328,6 +356,7 @@ public String toString() { } emptyTime = Long.MAX_VALUE; } + return true; } @VisibleForTesting @@ -426,9 +455,6 @@ private void run(final int id) throws InterruptedException { synchronized (this) { DFSClientFaultInjector.get().delayWhenRenewLeaseTimeout(); dfsclientsCopy = new ArrayList<>(dfsclients); - dfsclients.clear(); - //Expire the current LeaseRenewer thread. - emptyTime = 0; Factory.INSTANCE.remove(LeaseRenewer.this); } for (DFSClient dfsClient : dfsclientsCopy) { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/SnapshotDiffReportGenerator.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/SnapshotDiffReportGenerator.java index 4dbe98858f5d5..89e2e41352a17 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/SnapshotDiffReportGenerator.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/SnapshotDiffReportGenerator.java @@ -19,7 +19,7 @@ import java.util.*; -import com.google.common.primitives.SignedBytes; +import org.apache.hadoop.thirdparty.com.google.common.primitives.SignedBytes; import org.apache.hadoop.util.ChunkedArrayList; import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing.DiffReportListingEntry; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/metrics/BlockReaderLocalMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/metrics/BlockReaderLocalMetrics.java index 61b497e29ca44..83bfb8b9ce5c8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/metrics/BlockReaderLocalMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/metrics/BlockReaderLocalMetrics.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.client.impl.metrics; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.metrics2.MetricsSystem; import org.apache.hadoop.metrics2.annotation.Metric; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/BlockStoragePolicy.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/BlockStoragePolicy.java index edb81e9adb7ce..7bd3f969972c9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/BlockStoragePolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/BlockStoragePolicy.java @@ -23,7 +23,7 @@ import java.util.LinkedList; import java.util.List; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.BlockStoragePolicySpi; import org.apache.hadoop.fs.StorageType; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CacheDirectiveInfo.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CacheDirectiveInfo.java index e80f12aa0ee1c..3e5a4431372ef 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CacheDirectiveInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CacheDirectiveInfo.java @@ -25,7 +25,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.Path; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.hdfs.DFSUtilClient; /** diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CacheDirectiveIterator.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CacheDirectiveIterator.java index 917457f974c38..b3354344d7237 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CacheDirectiveIterator.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CacheDirectiveIterator.java @@ -26,9 +26,9 @@ import org.apache.hadoop.fs.InvalidRequestException; import org.apache.hadoop.ipc.RemoteException; -import com.google.common.base.Preconditions; -import org.apache.htrace.core.TraceScope; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.tracing.TraceScope; +import org.apache.hadoop.tracing.Tracer; /** * CacheDirectiveIterator is a remote iterator that iterates cache directives. diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CachePoolIterator.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CachePoolIterator.java index 431b3a65bec94..7faee9328b2f8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CachePoolIterator.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CachePoolIterator.java @@ -23,8 +23,8 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.BatchedRemoteIterator; -import org.apache.htrace.core.TraceScope; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.tracing.TraceScope; +import org.apache.hadoop.tracing.Tracer; /** * CachePoolIterator is a remote iterator that iterates cache pools. diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java index 2f4dfb9b46cc1..086cfacb4e545 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java @@ -1856,4 +1856,16 @@ BatchedEntries listOpenFiles(long prevId, */ @AtMostOnce void satisfyStoragePolicy(String path) throws IOException; + + /** + * Get report on all of the slow Datanodes. Slow running datanodes are identified based on + * the Outlier detection algorithm, if slow peer tracking is enabled for the DFS cluster. + * + * @return Datanode report for slow running datanodes. + * @throws IOException If an I/O error occurs. + */ + @Idempotent + @ReadOnly + DatanodeInfo[] getSlowDatanodeReport() throws IOException; + } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeID.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeID.java index 0278fc92d2e2f..2cb16879c3f6f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeID.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeID.java @@ -22,7 +22,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import java.net.InetSocketAddress; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java index bba90a05794ba..fbe6bcc4629d8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java @@ -698,9 +698,10 @@ public static class DatanodeInfoBuilder { private long nonDfsUsed = 0L; private long lastBlockReportTime = 0L; private long lastBlockReportMonotonic = 0L; - private int numBlocks; - + private int numBlocks = 0; + // Please use setNumBlocks explicitly to set numBlocks as this method doesn't have + // sufficient info about numBlocks public DatanodeInfoBuilder setFrom(DatanodeInfo from) { this.capacity = from.getCapacity(); this.dfsUsed = from.getDfsUsed(); @@ -717,7 +718,6 @@ public DatanodeInfoBuilder setFrom(DatanodeInfo from) { this.upgradeDomain = from.getUpgradeDomain(); this.lastBlockReportTime = from.getLastBlockReportTime(); this.lastBlockReportMonotonic = from.getLastBlockReportMonotonic(); - this.numBlocks = from.getNumBlocks(); setNodeID(from); return this; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/EncryptionZoneIterator.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/EncryptionZoneIterator.java index eb6a0c0c311bb..7b49cb1471493 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/EncryptionZoneIterator.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/EncryptionZoneIterator.java @@ -23,8 +23,8 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.BatchedRemoteIterator; -import org.apache.htrace.core.TraceScope; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.tracing.TraceScope; +import org.apache.hadoop.tracing.Tracer; /** * EncryptionZoneIterator is a remote iterator that iterates over encryption diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ErasureCodingPolicy.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ErasureCodingPolicy.java index 3559ab97d8fe1..02c4f9a1f21b1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ErasureCodingPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ErasureCodingPolicy.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.protocol; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.lang3.builder.EqualsBuilder; import org.apache.commons.lang3.builder.HashCodeBuilder; import org.apache.hadoop.classification.InterfaceAudience; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ErasureCodingPolicyInfo.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ErasureCodingPolicyInfo.java index c8a2722621020..48b581dfe6acb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ErasureCodingPolicyInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ErasureCodingPolicyInfo.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.protocol; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.lang3.builder.EqualsBuilder; import org.apache.commons.lang3.builder.HashCodeBuilder; import org.apache.hadoop.classification.InterfaceAudience; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java index eb3dc4e9e8c5e..615acad5a6523 100755 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java @@ -22,6 +22,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.util.StringUtils; @InterfaceAudience.Private @@ -110,7 +111,7 @@ public final class HdfsConstants { * period, no other client can write to the file. The writing client can * periodically renew the lease. When the file is closed, the lease is * revoked. The lease duration is bound by this soft limit and a - * {@link HdfsConstants#LEASE_HARDLIMIT_PERIOD hard limit}. Until the + * {@link HdfsClientConfigKeys#DFS_LEASE_HARDLIMIT_KEY }. Until the * soft limit expires, the writer has sole write access to the file. If the * soft limit expires and the client fails to close the file or renew the * lease, another client can preempt the lease. diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsPartialListing.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsPartialListing.java index c3e0ac585ebfd..d96c7892b76ea 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsPartialListing.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsPartialListing.java @@ -18,7 +18,7 @@ package org.apache.hadoop.hdfs.protocol; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.lang3.builder.ToStringBuilder; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.ipc.RemoteException; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java index 29f1b6da6b270..f2d8135ab4dee 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java @@ -22,14 +22,14 @@ import java.util.Comparator; import java.util.List; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; import org.apache.hadoop.security.token.Token; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; /** * Associates a block with the Datanodes that contain its replicas @@ -158,7 +158,7 @@ public ExtendedBlock getBlock() { * {@link org.apache.hadoop.hdfs.protocol.LocatedBlock#updateCachedStorageInfo} * to update the cached Storage ID/Type arrays. */ - public DatanodeInfo[] getLocations() { + public DatanodeInfoWithStorage[] getLocations() { return locs; } @@ -268,6 +268,7 @@ public String toString() { + "; corrupt=" + corrupt + "; offset=" + offset + "; locs=" + Arrays.asList(locs) + + "; cachedLocs=" + Arrays.asList(cachedLocs) + "}"; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlocks.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlocks.java index baf59ce61367b..1f5b85e315f8d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlocks.java @@ -131,7 +131,7 @@ public ErasureCodingPolicy getErasureCodingPolicy() { public int findBlock(long offset) { // create fake block of size 0 as a key LocatedBlock key = new LocatedBlock( - new ExtendedBlock(), new DatanodeInfo[0]); + new ExtendedBlock(), DatanodeInfo.EMPTY_ARRAY); key.setStartOffset(offset); key.getBlock().setNumBytes(1); Comparator comp = diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/OpenFilesIterator.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/OpenFilesIterator.java index c2b378160104a..9eca4e83b0a10 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/OpenFilesIterator.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/OpenFilesIterator.java @@ -24,8 +24,8 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.BatchedRemoteIterator; -import org.apache.htrace.core.TraceScope; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.tracing.TraceScope; +import org.apache.hadoop.tracing.Tracer; /** * OpenFilesIterator is a remote iterator that iterates over the open files list diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ReencryptionStatus.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ReencryptionStatus.java index dabeceacc3e3c..5d7b91343f239 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ReencryptionStatus.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ReencryptionStatus.java @@ -17,9 +17,9 @@ */ package org.apache.hadoop.hdfs.protocol; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.ReencryptionInfoProto; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ReencryptionStatusIterator.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ReencryptionStatusIterator.java index c8a8857572d99..81fb1f90e58f5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ReencryptionStatusIterator.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ReencryptionStatusIterator.java @@ -20,8 +20,8 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.BatchedRemoteIterator; -import org.apache.htrace.core.TraceScope; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.tracing.TraceScope; +import org.apache.hadoop.tracing.Tracer; import java.io.IOException; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReport.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReport.java index 7bc95c9350521..e6f20c9ce1b82 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReport.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReport.java @@ -23,7 +23,7 @@ import org.apache.hadoop.fs.Path; -import com.google.common.base.Objects; +import org.apache.hadoop.thirdparty.com.google.common.base.Objects; import org.apache.hadoop.hdfs.DFSUtilClient; /** diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReportListing.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReportListing.java index 79fd543c3be90..74329bc1e8a12 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReportListing.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReportListing.java @@ -20,7 +20,7 @@ import java.util.Collections; import java.util.List; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.hdfs.DFSUtilClient; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/XAttrNotFoundException.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/XAttrNotFoundException.java new file mode 100644 index 0000000000000..d9584910bf987 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/XAttrNotFoundException.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.protocol; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * The exception that happens when you ask to get a non existing XAttr. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class XAttrNotFoundException extends IOException { + private static final long serialVersionUID = -6506239904158794057L; + public static final String DEFAULT_EXCEPTION_MSG = + "At least one of the attributes provided was not found."; + public XAttrNotFoundException() { + this(DEFAULT_EXCEPTION_MSG); + } + public XAttrNotFoundException(String msg) { + super(msg); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ZoneReencryptionStatus.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ZoneReencryptionStatus.java index c7628d8dd91df..10884f27f90a2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ZoneReencryptionStatus.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ZoneReencryptionStatus.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.protocol; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.ReencryptionInfoProto; /** diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/DataTransferProtoUtil.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/DataTransferProtoUtil.java index 287928c893b1a..85ee3ce4f4987 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/DataTransferProtoUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/DataTransferProtoUtil.java @@ -35,8 +35,9 @@ import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.DataChecksum; -import org.apache.htrace.core.SpanId; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.tracing.Span; +import org.apache.hadoop.tracing.Tracer; +import org.apache.hadoop.tracing.TraceUtils; /** * Static utilities for dealing with the protocol buffers used by the @@ -87,23 +88,16 @@ static BaseHeaderProto buildBaseHeader(ExtendedBlock blk, BaseHeaderProto.Builder builder = BaseHeaderProto.newBuilder() .setBlock(PBHelperClient.convert(blk)) .setToken(PBHelperClient.convert(blockToken)); - SpanId spanId = Tracer.getCurrentSpanId(); - if (spanId.isValid()) { - builder.setTraceInfo(DataTransferTraceInfoProto.newBuilder() - .setTraceId(spanId.getHigh()) - .setParentId(spanId.getLow())); + Span span = Tracer.getCurrentSpan(); + if (span != null) { + DataTransferTraceInfoProto.Builder traceInfoProtoBuilder = + DataTransferTraceInfoProto.newBuilder().setSpanContext( + TraceUtils.spanContextToByteString(span.getContext())); + builder.setTraceInfo(traceInfoProtoBuilder); } return builder.build(); } - public static SpanId fromProto(DataTransferTraceInfoProto proto) { - if ((proto != null) && proto.hasTraceId() && - proto.hasParentId()) { - return new SpanId(proto.getTraceId(), proto.getParentId()); - } - return null; - } - public static void checkBlockOpStatus( BlockOpResponseProto response, String logInfo) throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/PacketHeader.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/PacketHeader.java index 486909466474c..cc958e35df116 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/PacketHeader.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/PacketHeader.java @@ -27,9 +27,9 @@ import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.PacketHeaderProto; import org.apache.hadoop.hdfs.util.ByteBufferOutputStream; -import com.google.common.base.Preconditions; -import com.google.common.primitives.Shorts; -import com.google.common.primitives.Ints; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.primitives.Shorts; +import org.apache.hadoop.thirdparty.com.google.common.primitives.Ints; import org.apache.hadoop.thirdparty.protobuf.InvalidProtocolBufferException; /** diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/PacketReceiver.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/PacketReceiver.java index 6b717ecdfbff2..dc6d590ce630d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/PacketReceiver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/PacketReceiver.java @@ -28,8 +28,8 @@ import org.apache.hadoop.util.DirectBufferPool; import org.apache.hadoop.io.IOUtils; -import com.google.common.base.Preconditions; -import com.google.common.primitives.Ints; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.primitives.Ints; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/PipelineAck.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/PipelineAck.java index 6992477496754..b58fbb8992a47 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/PipelineAck.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/PipelineAck.java @@ -24,7 +24,7 @@ import java.io.OutputStream; import java.util.ArrayList; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Sender.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Sender.java index 659285723af38..3d81a62993efc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Sender.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Sender.java @@ -52,8 +52,9 @@ import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.DataChecksum; -import org.apache.htrace.core.SpanId; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.tracing.Span; +import org.apache.hadoop.tracing.Tracer; +import org.apache.hadoop.tracing.TraceUtils; import org.apache.hadoop.thirdparty.protobuf.Message; @@ -212,11 +213,12 @@ public void releaseShortCircuitFds(SlotId slotId) throws IOException { ReleaseShortCircuitAccessRequestProto.Builder builder = ReleaseShortCircuitAccessRequestProto.newBuilder(). setSlotId(PBHelperClient.convert(slotId)); - SpanId spanId = Tracer.getCurrentSpanId(); - if (spanId.isValid()) { - builder.setTraceInfo(DataTransferTraceInfoProto.newBuilder(). - setTraceId(spanId.getHigh()). - setParentId(spanId.getLow())); + Span span = Tracer.getCurrentSpan(); + if (span != null) { + DataTransferTraceInfoProto.Builder traceInfoProtoBuilder = + DataTransferTraceInfoProto.newBuilder().setSpanContext( + TraceUtils.spanContextToByteString(span.getContext())); + builder.setTraceInfo(traceInfoProtoBuilder); } ReleaseShortCircuitAccessRequestProto proto = builder.build(); send(out, Op.RELEASE_SHORT_CIRCUIT_FDS, proto); @@ -227,11 +229,12 @@ public void requestShortCircuitShm(String clientName) throws IOException { ShortCircuitShmRequestProto.Builder builder = ShortCircuitShmRequestProto.newBuilder(). setClientName(clientName); - SpanId spanId = Tracer.getCurrentSpanId(); - if (spanId.isValid()) { - builder.setTraceInfo(DataTransferTraceInfoProto.newBuilder(). - setTraceId(spanId.getHigh()). - setParentId(spanId.getLow())); + Span span = Tracer.getCurrentSpan(); + if (span != null) { + DataTransferTraceInfoProto.Builder traceInfoProtoBuilder = + DataTransferTraceInfoProto.newBuilder().setSpanContext( + TraceUtils.spanContextToByteString(span.getContext())); + builder.setTraceInfo(traceInfoProtoBuilder); } ShortCircuitShmRequestProto proto = builder.build(); send(out, Op.REQUEST_SHORT_CIRCUIT_SHM, proto); diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/DataTransferSaslUtil.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/DataTransferSaslUtil.java index 8d6e318168b3d..a5171885de151 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/DataTransferSaslUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/DataTransferSaslUtil.java @@ -34,6 +34,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.function.Function; import javax.security.sasl.Sasl; import org.apache.commons.codec.binary.Base64; @@ -52,15 +53,16 @@ import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.HandshakeSecretProto; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.CipherOptionProto; import org.apache.hadoop.hdfs.protocolPB.PBHelperClient; +import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException; import org.apache.hadoop.security.SaslPropertiesResolver; import org.apache.hadoop.security.SaslRpcServer.QualityOfProtection; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Charsets; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Maps; -import com.google.common.net.InetAddresses; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableSet; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.net.InetAddresses; import org.apache.hadoop.thirdparty.protobuf.ByteString; /** @@ -204,6 +206,26 @@ public static SaslPropertiesResolver getSaslPropertiesResolver( return resolver; } + private static T readSaslMessage(InputStream in, + Function handler) throws IOException { + DataTransferEncryptorMessageProto proto = + DataTransferEncryptorMessageProto.parseFrom(vintPrefixed(in)); + switch (proto.getStatus()) { + case ERROR_UNKNOWN_KEY: + throw new InvalidEncryptionKeyException(proto.getMessage()); + case ERROR: + if (proto.hasAccessTokenError() && proto.getAccessTokenError()) { + throw new InvalidBlockTokenException(proto.getMessage()); + } + throw new IOException(proto.getMessage()); + case SUCCESS: + return handler.apply(proto); + default: + throw new IOException( + "Unknown status: " + proto.getStatus() + ", message: " + proto.getMessage()); + } + } + /** * Reads a SASL negotiation message. * @@ -212,15 +234,7 @@ public static SaslPropertiesResolver getSaslPropertiesResolver( * @throws IOException for any error */ public static byte[] readSaslMessage(InputStream in) throws IOException { - DataTransferEncryptorMessageProto proto = - DataTransferEncryptorMessageProto.parseFrom(vintPrefixed(in)); - if (proto.getStatus() == DataTransferEncryptorStatus.ERROR_UNKNOWN_KEY) { - throw new InvalidEncryptionKeyException(proto.getMessage()); - } else if (proto.getStatus() == DataTransferEncryptorStatus.ERROR) { - throw new IOException(proto.getMessage()); - } else { - return proto.getPayload().toByteArray(); - } + return readSaslMessage(in, proto -> proto.getPayload().toByteArray()); } /** @@ -233,13 +247,7 @@ public static byte[] readSaslMessage(InputStream in) throws IOException { */ public static byte[] readSaslMessageAndNegotiationCipherOptions( InputStream in, List cipherOptions) throws IOException { - DataTransferEncryptorMessageProto proto = - DataTransferEncryptorMessageProto.parseFrom(vintPrefixed(in)); - if (proto.getStatus() == DataTransferEncryptorStatus.ERROR_UNKNOWN_KEY) { - throw new InvalidEncryptionKeyException(proto.getMessage()); - } else if (proto.getStatus() == DataTransferEncryptorStatus.ERROR) { - throw new IOException(proto.getMessage()); - } else { + return readSaslMessage(in, proto -> { List optionProtos = proto.getCipherOptionList(); if (optionProtos != null) { for (CipherOptionProto optionProto : optionProtos) { @@ -247,7 +255,7 @@ public static byte[] readSaslMessageAndNegotiationCipherOptions( } } return proto.getPayload().toByteArray(); - } + }); } static class SaslMessageWithHandshake { @@ -276,13 +284,7 @@ String getBpid() { public static SaslMessageWithHandshake readSaslMessageWithHandshakeSecret( InputStream in) throws IOException { - DataTransferEncryptorMessageProto proto = - DataTransferEncryptorMessageProto.parseFrom(vintPrefixed(in)); - if (proto.getStatus() == DataTransferEncryptorStatus.ERROR_UNKNOWN_KEY) { - throw new InvalidEncryptionKeyException(proto.getMessage()); - } else if (proto.getStatus() == DataTransferEncryptorStatus.ERROR) { - throw new IOException(proto.getMessage()); - } else { + return readSaslMessage(in, proto -> { byte[] payload = proto.getPayload().toByteArray(); byte[] secret = null; String bpid = null; @@ -292,7 +294,7 @@ public static SaslMessageWithHandshake readSaslMessageWithHandshakeSecret( bpid = handshakeSecret.getBpid(); } return new SaslMessageWithHandshake(payload, secret, bpid); - } + }); } /** @@ -465,13 +467,7 @@ public static void sendSaslMessageAndNegotiationCipherOptions( public static SaslResponseWithNegotiatedCipherOption readSaslMessageAndNegotiatedCipherOption(InputStream in) throws IOException { - DataTransferEncryptorMessageProto proto = - DataTransferEncryptorMessageProto.parseFrom(vintPrefixed(in)); - if (proto.getStatus() == DataTransferEncryptorStatus.ERROR_UNKNOWN_KEY) { - throw new InvalidEncryptionKeyException(proto.getMessage()); - } else if (proto.getStatus() == DataTransferEncryptorStatus.ERROR) { - throw new IOException(proto.getMessage()); - } else { + return readSaslMessage(in, proto -> { byte[] response = proto.getPayload().toByteArray(); List options = PBHelperClient.convertCipherOptionProtos( proto.getCipherOptionList()); @@ -480,7 +476,7 @@ public static void sendSaslMessageAndNegotiationCipherOptions( option = options.get(0); } return new SaslResponseWithNegotiatedCipherOption(response, option); - } + }); } /** @@ -556,6 +552,13 @@ public static void sendSaslMessage(OutputStream out, DataTransferEncryptorStatus status, byte[] payload, String message, HandshakeSecretProto handshakeSecret) throws IOException { + sendSaslMessage(out, status, payload, message, handshakeSecret, false); + } + + public static void sendSaslMessage(OutputStream out, + DataTransferEncryptorStatus status, byte[] payload, String message, + HandshakeSecretProto handshakeSecret, boolean accessTokenError) + throws IOException { DataTransferEncryptorMessageProto.Builder builder = DataTransferEncryptorMessageProto.newBuilder(); @@ -569,6 +572,9 @@ public static void sendSaslMessage(OutputStream out, if (handshakeSecret != null) { builder.setHandshakeSecret(handshakeSecret); } + if (accessTokenError) { + builder.setAccessTokenError(true); + } DataTransferEncryptorMessageProto proto = builder.build(); proto.writeDelimitedTo(out); diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferClient.java index acd1e505cbbc0..7f6e47473ed39 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferClient.java @@ -21,7 +21,7 @@ import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_ENCRYPT_DATA_OVERWRITE_DOWNSTREAM_NEW_QOP_KEY; import static org.apache.hadoop.hdfs.protocol.datatransfer.sasl.DataTransferSaslUtil.*; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import java.io.ByteArrayInputStream; import java.io.DataInputStream; import java.io.DataOutputStream; @@ -64,8 +64,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Charsets; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; /** * Negotiates SASL for DataTransferProtocol on behalf of a client. There are @@ -583,11 +583,11 @@ private IOStreamPair doSaslHandshake(InetAddress addr, // the client accepts some cipher suites, but the server does not. LOG.debug("Client accepts cipher suites {}, " + "but server {} does not accept any of them", - cipherSuites, addr.toString()); + cipherSuites, addr); } } else { LOG.debug("Client using cipher suite {} with server {}", - cipherOption.getCipherSuite().getName(), addr.toString()); + cipherOption.getCipherSuite().getName(), addr); } } } @@ -598,7 +598,20 @@ private IOStreamPair doSaslHandshake(InetAddress addr, conf, cipherOption, underlyingOut, underlyingIn, false) : sasl.createStreamPair(out, in); } catch (IOException ioe) { - sendGenericSaslErrorMessage(out, ioe.getMessage()); + String message = ioe.getMessage(); + try { + sendGenericSaslErrorMessage(out, message); + } catch (Exception e) { + // If ioe is caused by error response from server, server will close peer connection. + // So sendGenericSaslErrorMessage might cause IOException due to "Broken pipe". + // We suppress IOException from sendGenericSaslErrorMessage + // and always throw `ioe` as top level. + // `ioe` can be InvalidEncryptionKeyException or InvalidBlockTokenException + // that indicates refresh key or token and are important for caller. + LOG.debug("Failed to send generic sasl error to server {} (message: {}), " + + "suppress exception", addr, message, e); + ioe.addSuppressed(e); + } throw ioe; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientDatanodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientDatanodeProtocolTranslatorPB.java index 4028b0e8fb245..47234e8b65d78 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientDatanodeProtocolTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientDatanodeProtocolTranslatorPB.java @@ -68,7 +68,7 @@ import org.apache.hadoop.hdfs.server.datanode.DiskBalancerWorkStatus; import org.apache.hadoop.hdfs.server.datanode.DiskBalancerWorkStatus.Result; import org.apache.hadoop.ipc.ProtobufHelper; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.ProtocolMetaInterface; import org.apache.hadoop.ipc.ProtocolTranslator; import org.apache.hadoop.ipc.RPC; @@ -181,7 +181,7 @@ static ClientDatanodeProtocolPB createClientDatanodeProtocolProxy( InetSocketAddress addr, UserGroupInformation ticket, Configuration conf, SocketFactory factory, int socketTimeout) throws IOException { RPC.setProtocolEngine(conf, ClientDatanodeProtocolPB.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); return RPC.getProxy(ClientDatanodeProtocolPB.class, RPC.getProtocolVersion(ClientDatanodeProtocolPB.class), addr, ticket, conf, factory, socketTimeout); diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java index 572c65b67b21d..ca5d978713fe3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java @@ -24,7 +24,7 @@ import java.util.HashMap; import java.util.List; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -144,6 +144,7 @@ import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetPreferredBlockSizeRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetQuotaUsageRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetServerDefaultsRequestProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetSlowDatanodeReportRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetSnapshotDiffReportRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetSnapshotDiffReportResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetSnapshotDiffReportListingRequestProto; @@ -237,7 +238,7 @@ import org.apache.hadoop.io.retry.AsyncCallHandler; import org.apache.hadoop.ipc.Client; import org.apache.hadoop.ipc.ProtobufHelper; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.ProtocolMetaInterface; import org.apache.hadoop.ipc.ProtocolTranslator; import org.apache.hadoop.ipc.RPC; @@ -456,7 +457,7 @@ public void setPermission(String src, FsPermission permission) private void setAsyncReturnValue() { final AsyncGet asyncReturnMessage - = ProtobufRpcEngine.getAsyncReturnMessage(); + = ProtobufRpcEngine2.getAsyncReturnMessage(); final AsyncGet asyncGet = new AsyncGet() { @Override @@ -609,7 +610,8 @@ public void rename2(String src, String dst, Rename... options) for (Rename option : options) { if (option == Rename.OVERWRITE) { overwrite = true; - } else if (option == Rename.TO_TRASH) { + } + if (option == Rename.TO_TRASH) { toTrash = true; } } @@ -993,9 +995,7 @@ public HdfsFileStatus getFileLinkInfo(String src) throws IOException { .setSrc(src).build(); try { GetFileLinkInfoResponseProto result = rpcProxy.getFileLinkInfo(null, req); - return result.hasFs() ? - PBHelperClient.convert(rpcProxy.getFileLinkInfo(null, req).getFs()) : - null; + return result.hasFs() ? PBHelperClient.convert(result.getFs()) : null; } catch (ServiceException e) { throw ProtobufHelper.getRemoteException(e); } @@ -1569,7 +1569,7 @@ public AclStatus getAclStatus(String src) throws IOException { if (Client.isAsynchronousMode()) { rpcProxy.getAclStatus(null, req); final AsyncGet asyncReturnMessage - = ProtobufRpcEngine.getAsyncReturnMessage(); + = ProtobufRpcEngine2.getAsyncReturnMessage(); final AsyncGet asyncGet = new AsyncGet() { @Override @@ -2045,6 +2045,18 @@ public void satisfyStoragePolicy(String src) throws IOException { } } + @Override + public DatanodeInfo[] getSlowDatanodeReport() throws IOException { + GetSlowDatanodeReportRequestProto req = + GetSlowDatanodeReportRequestProto.newBuilder().build(); + try { + return PBHelperClient.convert( + rpcProxy.getSlowDatanodeReport(null, req).getDatanodeInfoProtoList()); + } catch (ServiceException e) { + throw ProtobufHelper.getRemoteException(e); + } + } + @Override public HAServiceProtocol.HAServiceState getHAServiceState() throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java index 9fc302464271d..b3932f908fffc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java @@ -28,12 +28,12 @@ import java.util.Map; import java.util.Set; -import com.google.common.base.Preconditions; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; -import com.google.common.collect.Lists; -import com.google.common.primitives.Shorts; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheBuilder; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheLoader; +import org.apache.hadoop.thirdparty.com.google.common.cache.LoadingCache; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.primitives.Shorts; import org.apache.hadoop.thirdparty.protobuf.ByteString; import org.apache.hadoop.thirdparty.protobuf.CodedInputStream; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ReconfigurationProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ReconfigurationProtocolTranslatorPB.java index 5165887ece5f3..ce8a89b84acce 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ReconfigurationProtocolTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ReconfigurationProtocolTranslatorPB.java @@ -34,7 +34,7 @@ import org.apache.hadoop.hdfs.protocol.proto.ReconfigurationProtocolProtos.ListReconfigurablePropertiesResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ReconfigurationProtocolProtos.StartReconfigurationRequestProto; import org.apache.hadoop.ipc.ProtobufHelper; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.ProtocolMetaInterface; import org.apache.hadoop.ipc.ProtocolTranslator; import org.apache.hadoop.ipc.RPC; @@ -84,7 +84,7 @@ static ReconfigurationProtocolPB createReconfigurationProtocolProxy( InetSocketAddress addr, UserGroupInformation ticket, Configuration conf, SocketFactory factory, int socketTimeout) throws IOException { RPC.setProtocolEngine(conf, ReconfigurationProtocolPB.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); return RPC.getProxy(ReconfigurationProtocolPB.class, RPC.getProtocolVersion(ReconfigurationProtocolPB.class), addr, ticket, conf, factory, socketTimeout); diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ReconfigurationProtocolUtils.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ReconfigurationProtocolUtils.java index a48c401927406..e387ca7d843d2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ReconfigurationProtocolUtils.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ReconfigurationProtocolUtils.java @@ -26,7 +26,7 @@ import org.apache.hadoop.hdfs.protocol.proto.ReconfigurationProtocolProtos.GetReconfigurationStatusConfigChangeProto; import org.apache.hadoop.hdfs.protocol.proto.ReconfigurationProtocolProtos.GetReconfigurationStatusResponseProto; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; /** * This is a client side utility class that handles diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockTokenIdentifier.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockTokenIdentifier.java index 1d395ec24ac72..4b2e6cd989707 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockTokenIdentifier.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockTokenIdentifier.java @@ -27,7 +27,7 @@ import java.util.EnumSet; import java.util.Optional; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.AccessModeProto; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenIdentifier.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenIdentifier.java index d753174e71bcc..dcbf6c8f0b92c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenIdentifier.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenIdentifier.java @@ -33,7 +33,7 @@ import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * A delegation token identifier that is specific to HDFS. diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockMetadataHeader.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockMetadataHeader.java index 2d1cfc135521f..c343d00e61371 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockMetadataHeader.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockMetadataHeader.java @@ -33,7 +33,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.util.DataChecksum; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.util.InvalidChecksumSizeException; import org.slf4j.Logger; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkItem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkItem.java index 505273ec9790f..130e8c1c9c728 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkItem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkItem.java @@ -22,7 +22,7 @@ import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkStatus.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkStatus.java index af34c49dd12bb..7e3954c562a58 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkStatus.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkStatus.java @@ -23,7 +23,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import com.fasterxml.jackson.databind.SerializationFeature; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ObserverReadProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ObserverReadProxyProvider.java index 6b99a7f7ce23b..9cabeb9037fc5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ObserverReadProxyProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ObserverReadProxyProvider.java @@ -50,7 +50,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * A {@link org.apache.hadoop.io.retry.FailoverProxyProvider} implementation diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/protocol/OutlierMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/protocol/OutlierMetrics.java new file mode 100644 index 0000000000000..452885d0c0538 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/protocol/OutlierMetrics.java @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.protocol; + +import org.apache.commons.lang3.builder.EqualsBuilder; +import org.apache.commons.lang3.builder.HashCodeBuilder; +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Outlier detection metrics - median, median absolute deviation, upper latency limit, + * actual latency etc. + */ +@InterfaceAudience.Private +public class OutlierMetrics { + + private final Double median; + private final Double mad; + private final Double upperLimitLatency; + private final Double actualLatency; + + public OutlierMetrics(Double median, Double mad, Double upperLimitLatency, + Double actualLatency) { + this.median = median; + this.mad = mad; + this.upperLimitLatency = upperLimitLatency; + this.actualLatency = actualLatency; + } + + public Double getMedian() { + return median; + } + + public Double getMad() { + return mad; + } + + public Double getUpperLimitLatency() { + return upperLimitLatency; + } + + public Double getActualLatency() { + return actualLatency; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (o == null || getClass() != o.getClass()) { + return false; + } + + OutlierMetrics that = (OutlierMetrics) o; + + return new EqualsBuilder() + .append(median, that.median) + .append(mad, that.mad) + .append(upperLimitLatency, that.upperLimitLatency) + .append(actualLatency, that.actualLatency) + .isEquals(); + } + + @Override + public int hashCode() { + return new HashCodeBuilder(17, 37) + .append(median) + .append(mad) + .append(upperLimitLatency) + .append(actualLatency) + .toHashCode(); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/protocol/SlowDiskReports.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/protocol/SlowDiskReports.java index 496389a1ddc87..95078a7412d74 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/protocol/SlowDiskReports.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/protocol/SlowDiskReports.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.protocol; -import com.google.common.collect.ImmutableMap; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/protocol/SlowPeerReports.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/protocol/SlowPeerReports.java index 218e30df4d2dc..586e2f4931e28 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/protocol/SlowPeerReports.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/protocol/SlowPeerReports.java @@ -18,7 +18,7 @@ package org.apache.hadoop.hdfs.server.protocol; -import com.google.common.collect.ImmutableMap; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -51,7 +51,7 @@ public final class SlowPeerReports { * meaningful and must be avoided. */ @Nonnull - private final Map slowPeers; + private final Map slowPeers; /** * An object representing a SlowPeerReports with no entries. Should @@ -61,19 +61,19 @@ public final class SlowPeerReports { public static final SlowPeerReports EMPTY_REPORT = new SlowPeerReports(ImmutableMap.of()); - private SlowPeerReports(Map slowPeers) { + private SlowPeerReports(Map slowPeers) { this.slowPeers = slowPeers; } public static SlowPeerReports create( - @Nullable Map slowPeers) { + @Nullable Map slowPeers) { if (slowPeers == null || slowPeers.isEmpty()) { return EMPTY_REPORT; } return new SlowPeerReports(slowPeers); } - public Map getSlowPeers() { + public Map getSlowPeers() { return slowPeers; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/DfsClientShm.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/DfsClientShm.java index 81cc68da07295..7bf768935d9d9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/DfsClientShm.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/DfsClientShm.java @@ -26,7 +26,7 @@ import org.apache.hadoop.net.unix.DomainSocket; import org.apache.hadoop.net.unix.DomainSocketWatcher; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * DfsClientShm is a subclass of ShortCircuitShm which is used by the diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/DfsClientShmManager.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/DfsClientShmManager.java index 6b3d8e07ce082..7cb0c3853ce65 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/DfsClientShmManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/DfsClientShmManager.java @@ -43,8 +43,8 @@ import org.apache.hadoop.net.unix.DomainSocket; import org.apache.hadoop.net.unix.DomainSocketWatcher; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -497,4 +497,14 @@ public String toString() { public DomainSocketWatcher getDomainSocketWatcher() { return domainSocketWatcher; } + + @VisibleForTesting + public int getShmNum() { + int segments = 0; + for (EndpointShmManager endpointShmManager : datanodes.values()) { + segments += + endpointShmManager.notFull.size() + endpointShmManager.full.size(); + } + return segments; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/DomainSocketFactory.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/DomainSocketFactory.java index 760e920c232b5..40436395887a5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/DomainSocketFactory.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/DomainSocketFactory.java @@ -21,7 +21,7 @@ import java.net.InetSocketAddress; import java.util.concurrent.TimeUnit; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.commons.io.IOUtils; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.hdfs.DFSUtilClient; @@ -30,9 +30,9 @@ import org.apache.hadoop.net.unix.DomainSocket; import org.apache.hadoop.util.PerformanceAdvisory; -import com.google.common.base.Preconditions; -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.cache.Cache; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitCache.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitCache.java index d4d898c892662..66882002e8f77 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitCache.java @@ -22,6 +22,7 @@ import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; +import java.net.SocketException; import java.nio.MappedByteBuffer; import java.util.HashMap; import java.util.Map; @@ -53,9 +54,9 @@ import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Waitable; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -181,25 +182,57 @@ private class SlotReleaser implements Runnable { @Override public void run() { + if (slot == null) { + return; + } LOG.trace("{}: about to release {}", ShortCircuitCache.this, slot); final DfsClientShm shm = (DfsClientShm)slot.getShm(); final DomainSocket shmSock = shm.getPeer().getDomainSocket(); final String path = shmSock.getPath(); + DomainSocket domainSocket = pathToDomainSocket.get(path); + DataOutputStream out = null; boolean success = false; - try (DomainSocket sock = DomainSocket.connect(path); - DataOutputStream out = new DataOutputStream( - new BufferedOutputStream(sock.getOutputStream()))) { - new Sender(out).releaseShortCircuitFds(slot.getSlotId()); - DataInputStream in = new DataInputStream(sock.getInputStream()); - ReleaseShortCircuitAccessResponseProto resp = - ReleaseShortCircuitAccessResponseProto.parseFrom( - PBHelperClient.vintPrefixed(in)); - if (resp.getStatus() != Status.SUCCESS) { - String error = resp.hasError() ? resp.getError() : "(unknown)"; - throw new IOException(resp.getStatus().toString() + ": " + error); - } - LOG.trace("{}: released {}", this, slot); - success = true; + int retries = 2; + try { + while (retries > 0) { + try { + if (domainSocket == null || !domainSocket.isOpen()) { + domainSocket = DomainSocket.connect(path); + // we are running in single thread mode, no protection needed for + // pathToDomainSocket + pathToDomainSocket.put(path, domainSocket); + } + + out = new DataOutputStream( + new BufferedOutputStream(domainSocket.getOutputStream())); + new Sender(out).releaseShortCircuitFds(slot.getSlotId()); + DataInputStream in = + new DataInputStream(domainSocket.getInputStream()); + ReleaseShortCircuitAccessResponseProto resp = + ReleaseShortCircuitAccessResponseProto + .parseFrom(PBHelperClient.vintPrefixed(in)); + if (resp.getStatus() != Status.SUCCESS) { + String error = resp.hasError() ? resp.getError() : "(unknown)"; + throw new IOException(resp.getStatus().toString() + ": " + error); + } + + LOG.trace("{}: released {}", this, slot); + success = true; + break; + + } catch (SocketException se) { + // the domain socket on datanode may be timed out, we retry once + retries--; + if (domainSocket != null) { + domainSocket.close(); + domainSocket = null; + pathToDomainSocket.remove(path); + } + if (retries == 0) { + throw new SocketException("Create domain socket failed"); + } + } + } // end of while block } catch (IOException e) { LOG.warn(ShortCircuitCache.this + ": failed to release " + "short-circuit shared memory slot " + slot + " by sending " @@ -211,9 +244,11 @@ public void run() { shmManager.freeSlot(slot); } else { shm.getEndpointShmManager().shutdown(shm); + IOUtilsClient.cleanupWithLogger(LOG, domainSocket, out); + pathToDomainSocket.remove(path); } } - } + } // end of run() } public interface ShortCircuitReplicaCreator { @@ -324,6 +359,12 @@ public interface ShortCircuitReplicaCreator { */ private final DfsClientShmManager shmManager; + /** + * A map contains all DomainSockets used in SlotReleaser. Keys are the domain socket + * paths of short-circuit shared memory segments. + */ + private Map pathToDomainSocket = new HashMap<>(); + public static ShortCircuitCache fromConf(ShortCircuitConf conf) { return new ShortCircuitCache( conf.getShortCircuitStreamsCacheSize(), @@ -997,6 +1038,9 @@ public void freeSlot(Slot slot) { * @param slot The slot to release. */ public void scheduleSlotReleaser(Slot slot) { + if (slot == null) { + return; + } Preconditions.checkState(shmManager != null); releaserExecutor.execute(new SlotReleaser(slot)); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitReplica.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitReplica.java index 14116e2fdc1c6..86218aa0fcd8d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitReplica.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitReplica.java @@ -31,8 +31,8 @@ import org.apache.hadoop.io.nativeio.NativeIO; import org.apache.hadoop.util.Time; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitShm.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitShm.java index b9fcadae529b3..1cb123bb58f3b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitShm.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitShm.java @@ -39,9 +39,9 @@ import sun.misc.Unsafe; -import com.google.common.base.Preconditions; -import com.google.common.collect.ComparisonChain; -import com.google.common.primitives.Ints; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.ComparisonChain; +import org.apache.hadoop.thirdparty.com.google.common.primitives.Ints; import javax.annotation.Nonnull; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/ByteArrayManager.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/ByteArrayManager.java index 164b03bf203a3..059280e494678 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/ByteArrayManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/ByteArrayManager.java @@ -26,7 +26,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.util.Time; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/ECPolicyLoader.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/ECPolicyLoader.java index fb15926dce176..0d1be4b8e6790 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/ECPolicyLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/ECPolicyLoader.java @@ -20,6 +20,8 @@ import org.apache.hadoop.io.erasurecode.ECSchema; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; +import org.apache.hadoop.util.XMLUtils; + import org.w3c.dom.Node; import org.w3c.dom.Text; import org.w3c.dom.Element; @@ -87,7 +89,7 @@ private List loadECPolicies(File policyFile) LOG.info("Loading EC policy file " + policyFile); // Read and parse the EC policy file. - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory(); dbf.setIgnoringComments(true); DocumentBuilder builder = dbf.newDocumentBuilder(); Document doc = builder.parse(policyFile); diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/StripedBlockUtil.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/StripedBlockUtil.java index 012d70895021d..28334fe193199 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/StripedBlockUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/StripedBlockUtil.java @@ -18,7 +18,7 @@ package org.apache.hadoop.hdfs.util; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.StorageType; @@ -28,7 +28,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedStripedBlock; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; import org.apache.hadoop.hdfs.DFSStripedOutputStream; @@ -245,8 +245,7 @@ public static long getSafeLength(ErasureCodingPolicy ecPolicy, Arrays.sort(cpy); // full stripe is a stripe has at least dataBlkNum full cells. // lastFullStripeIdx is the index of the last full stripe. - int lastFullStripeIdx = - (int) (cpy[cpy.length - dataBlkNum] / cellSize); + long lastFullStripeIdx = cpy[cpy.length - dataBlkNum] / cellSize; return lastFullStripeIdx * stripeSize; // return the safeLength // TODO: Include lastFullStripeIdx+1 stripe in safeLength, if there exists // such a stripe (and it must be partial). @@ -271,9 +270,9 @@ private static int lastCellSize(int size, int cellSize, int numDataBlocks, */ public static long offsetInBlkToOffsetInBG(int cellSize, int dataBlkNum, long offsetInBlk, int idxInBlockGroup) { - int cellIdxInBlk = (int) (offsetInBlk / cellSize); + long cellIdxInBlk = offsetInBlk / cellSize; return cellIdxInBlk * cellSize * dataBlkNum // n full stripes before offset - + idxInBlockGroup * cellSize // m full cells before offset + + (long)idxInBlockGroup * cellSize // m full cells before offset + offsetInBlk % cellSize; // partial cell } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/ByteRangeInputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/ByteRangeInputStream.java index 31de804d74ee9..c83ccf9d738ba 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/ByteRangeInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/ByteRangeInputStream.java @@ -31,8 +31,8 @@ import org.apache.hadoop.fs.FSExceptionMessages; import org.apache.hadoop.fs.FSInputStream; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.net.HttpHeaders; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.net.HttpHeaders; import javax.annotation.Nonnull; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java index e846b56200993..87932cc38952d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java @@ -19,9 +19,9 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileChecksum; import org.apache.hadoop.fs.FileStatus; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/TokenAspect.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/TokenAspect.java index 870103eddf021..dfc754b3a819e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/TokenAspect.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/TokenAspect.java @@ -38,7 +38,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * This class implements the aspects that relate to delegation tokens for all diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/URLConnectionFactory.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/URLConnectionFactory.java index 8b6c7f7cfd02a..589afb4604d3c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/URLConnectionFactory.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/URLConnectionFactory.java @@ -34,7 +34,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Utilities for handling URLs diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java index d0b10cbbcf813..99d99a8a52cac 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java @@ -76,10 +76,12 @@ import org.apache.hadoop.fs.FsServerDefaults; import org.apache.hadoop.fs.GlobalStorageStatistics; import org.apache.hadoop.fs.GlobalStorageStatistics.StorageStatisticsProvider; +import org.apache.hadoop.fs.MultipartUploaderBuilder; import org.apache.hadoop.fs.QuotaUsage; import org.apache.hadoop.fs.PathCapabilities; import org.apache.hadoop.fs.StorageStatistics; import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.fs.impl.FileSystemMultipartUploaderBuilder; import org.apache.hadoop.fs.permission.FsCreateModes; import org.apache.hadoop.hdfs.DFSOpsCountStatistics; import org.apache.hadoop.hdfs.DFSOpsCountStatistics.OpType; @@ -132,12 +134,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Charsets; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; - -import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; /** A FileSystem for HDFS over the web. */ public class WebHdfsFileSystem extends FileSystem @@ -152,6 +152,7 @@ public class WebHdfsFileSystem extends FileSystem + "/v" + VERSION; public static final String EZ_HEADER = "X-Hadoop-Accept-EZ"; public static final String FEFINFO_HEADER = "X-Hadoop-feInfo"; + public static final String DFS_HTTP_POLICY_KEY = "dfs.http.policy"; /** * Default connection factory may be overridden in tests to use smaller @@ -181,6 +182,7 @@ public class WebHdfsFileSystem extends FileSystem private DFSOpsCountStatistics storageStatistics; private KeyProvider testProvider; + private boolean isTLSKrb; /** * Return the protocol scheme for the FileSystem. @@ -242,6 +244,7 @@ public synchronized void initialize(URI uri, Configuration conf .newDefaultURLConnectionFactory(connectTimeout, readTimeout, conf); } + this.isTLSKrb = "HTTPS_ONLY".equals(conf.get(DFS_HTTP_POLICY_KEY)); ugi = UserGroupInformation.getCurrentUser(); this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority()); @@ -699,6 +702,11 @@ protected HttpURLConnection connect(URL url) throws IOException { //redirect hostname and port redirectHost = null; + if (url.getProtocol().equals("http") && + UserGroupInformation.isSecurityEnabled() && + isTLSKrb) { + throw new IOException("Access denied: dfs.http.policy is HTTPS_ONLY."); + } // resolve redirects for a DN operation unless already resolved if (op.getRedirect() && !redirected) { @@ -1331,6 +1339,8 @@ public void allowSnapshot(final Path p) throws IOException { @Override public void satisfyStoragePolicy(final Path p) throws IOException { + statistics.incrementWriteOps(1); + storageStatistics.incrementOpCounter(OpType.SATISFY_STORAGE_POLICY); final HttpOpParam.Op op = PutOpParam.Op.SATISFYSTORAGEPOLICY; new FsPathRunner(op, p).run(); } @@ -1420,6 +1430,7 @@ public void renameSnapshot(final Path path, final String snapshotOldName, public SnapshotDiffReport getSnapshotDiffReport(final Path snapshotDir, final String fromSnapshot, final String toSnapshot) throws IOException { + statistics.incrementReadOps(1); storageStatistics.incrementOpCounter(OpType.GET_SNAPSHOT_DIFF); final HttpOpParam.Op op = GetOpParam.Op.GETSNAPSHOTDIFF; return new FsPathResponseRunner(op, snapshotDir, @@ -1434,6 +1445,7 @@ SnapshotDiffReport decodeResponse(Map json) { public SnapshottableDirectoryStatus[] getSnapshottableDirectoryList() throws IOException { + statistics.incrementReadOps(1); storageStatistics .incrementOpCounter(OpType.GET_SNAPSHOTTABLE_DIRECTORY_LIST); final HttpOpParam.Op op = GetOpParam.Op.GETSNAPSHOTTABLEDIRECTORYLIST; @@ -1995,6 +2007,8 @@ public void setStoragePolicy(Path p, String policyName) throws IOException { @Override public Collection getAllStoragePolicies() throws IOException { + statistics.incrementReadOps(1); + storageStatistics.incrementOpCounter(OpType.GET_STORAGE_POLICIES); final HttpOpParam.Op op = GetOpParam.Op.GETALLSTORAGEPOLICY; return new FsPathResponseRunner>(op, null) { @Override @@ -2007,6 +2021,8 @@ Collection decodeResponse(Map json) @Override public BlockStoragePolicy getStoragePolicy(Path src) throws IOException { + statistics.incrementReadOps(1); + storageStatistics.incrementOpCounter(OpType.GET_STORAGE_POLICY); final HttpOpParam.Op op = GetOpParam.Op.GETSTORAGEPOLICY; return new FsPathResponseRunner(op, src) { @Override @@ -2109,6 +2125,12 @@ public boolean hasPathCapability(final Path path, final String capability) return super.hasPathCapability(p, capability); } + @Override + public MultipartUploaderBuilder createMultipartUploader(final Path basePath) + throws IOException { + return new FileSystemMultipartUploaderBuilder(this, basePath); + } + /** * This class is used for opening, reading, and seeking files while using the * WebHdfsFileSystem. This class will invoke the retry policy when performing @@ -2450,10 +2472,12 @@ InputStream initializeInputStream(HttpURLConnection conn) @VisibleForTesting void closeInputStream(RunnerState rs) throws IOException { if (in != null) { - IOUtils.close(cachedConnection); in = null; } - cachedConnection = null; + if (cachedConnection != null) { + IOUtils.close(cachedConnection); + cachedConnection = null; + } runnerState = rs; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/oauth2/ConfRefreshTokenBasedAccessTokenProvider.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/oauth2/ConfRefreshTokenBasedAccessTokenProvider.java index 3e3fbfbd91380..e944e8c1c8d77 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/oauth2/ConfRefreshTokenBasedAccessTokenProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/oauth2/ConfRefreshTokenBasedAccessTokenProvider.java @@ -18,10 +18,15 @@ */ package org.apache.hadoop.hdfs.web.oauth2; -import com.squareup.okhttp.OkHttpClient; -import com.squareup.okhttp.Request; -import com.squareup.okhttp.RequestBody; -import com.squareup.okhttp.Response; +import java.io.IOException; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import okhttp3.OkHttpClient; +import okhttp3.Request; +import okhttp3.RequestBody; +import okhttp3.Response; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -30,10 +35,6 @@ import org.apache.hadoop.util.Timer; import org.apache.http.HttpStatus; -import java.io.IOException; -import java.util.Map; -import java.util.concurrent.TimeUnit; - import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.OAUTH_CLIENT_ID_KEY; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.OAUTH_REFRESH_URL_KEY; import static org.apache.hadoop.hdfs.web.oauth2.OAuth2Constants.ACCESS_TOKEN; @@ -102,37 +103,34 @@ public synchronized String getAccessToken() throws IOException { } void refresh() throws IOException { - try { - OkHttpClient client = new OkHttpClient(); - client.setConnectTimeout(URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT, - TimeUnit.MILLISECONDS); - client.setReadTimeout(URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT, - TimeUnit.MILLISECONDS); - - String bodyString = Utils.postBody(GRANT_TYPE, REFRESH_TOKEN, - REFRESH_TOKEN, refreshToken, - CLIENT_ID, clientId); - - RequestBody body = RequestBody.create(URLENCODED, bodyString); - - Request request = new Request.Builder() - .url(refreshURL) - .post(body) - .build(); - Response responseBody = client.newCall(request).execute(); - - if (responseBody.code() != HttpStatus.SC_OK) { - throw new IllegalArgumentException("Received invalid http response: " - + responseBody.code() + ", text = " + responseBody.toString()); + OkHttpClient client = + new OkHttpClient.Builder().connectTimeout(URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT, + TimeUnit.MILLISECONDS) + .readTimeout(URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT, TimeUnit.MILLISECONDS) + .build(); + + String bodyString = + Utils.postBody(GRANT_TYPE, REFRESH_TOKEN, REFRESH_TOKEN, refreshToken, CLIENT_ID, clientId); + + RequestBody body = RequestBody.create(bodyString, URLENCODED); + + Request request = new Request.Builder().url(refreshURL).post(body).build(); + try (Response response = client.newCall(request).execute()) { + if (!response.isSuccessful()) { + throw new IOException("Unexpected code " + response); + } + if (response.code() != HttpStatus.SC_OK) { + throw new IllegalArgumentException( + "Received invalid http response: " + response.code() + ", text = " + + response.toString()); } - Map response = JsonSerialization.mapReader().readValue( - responseBody.body().string()); + Map responseBody = JsonSerialization.mapReader().readValue(response.body().string()); - String newExpiresIn = response.get(EXPIRES_IN).toString(); + String newExpiresIn = responseBody.get(EXPIRES_IN).toString(); accessTokenTimer.setExpiresIn(newExpiresIn); - accessToken = response.get(ACCESS_TOKEN).toString(); + accessToken = responseBody.get(ACCESS_TOKEN).toString(); } catch (Exception e) { throw new IOException("Exception while refreshing access token", e); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/oauth2/CredentialBasedAccessTokenProvider.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/oauth2/CredentialBasedAccessTokenProvider.java index bfd7055990e81..25ceb8846092b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/oauth2/CredentialBasedAccessTokenProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/oauth2/CredentialBasedAccessTokenProvider.java @@ -18,10 +18,15 @@ */ package org.apache.hadoop.hdfs.web.oauth2; -import com.squareup.okhttp.OkHttpClient; -import com.squareup.okhttp.Request; -import com.squareup.okhttp.RequestBody; -import com.squareup.okhttp.Response; +import java.io.IOException; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import okhttp3.OkHttpClient; +import okhttp3.Request; +import okhttp3.RequestBody; +import okhttp3.Response; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -30,10 +35,6 @@ import org.apache.hadoop.util.Timer; import org.apache.http.HttpStatus; -import java.io.IOException; -import java.util.Map; -import java.util.concurrent.TimeUnit; - import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.OAUTH_CLIENT_ID_KEY; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.OAUTH_REFRESH_URL_KEY; import static org.apache.hadoop.hdfs.web.oauth2.OAuth2Constants.ACCESS_TOKEN; @@ -96,38 +97,38 @@ public synchronized String getAccessToken() throws IOException { } void refresh() throws IOException { - try { - OkHttpClient client = new OkHttpClient(); - client.setConnectTimeout(URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT, - TimeUnit.MILLISECONDS); - client.setReadTimeout(URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT, - TimeUnit.MILLISECONDS); - - String bodyString = Utils.postBody(CLIENT_SECRET, getCredential(), - GRANT_TYPE, CLIENT_CREDENTIALS, - CLIENT_ID, clientId); - - RequestBody body = RequestBody.create(URLENCODED, bodyString); - - Request request = new Request.Builder() - .url(refreshURL) - .post(body) - .build(); - Response responseBody = client.newCall(request).execute(); - - if (responseBody.code() != HttpStatus.SC_OK) { + OkHttpClient client = new OkHttpClient.Builder() + .connectTimeout(URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT, TimeUnit.MILLISECONDS) + .readTimeout(URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT, TimeUnit.MILLISECONDS) + .build(); + + String bodyString = Utils.postBody(CLIENT_SECRET, getCredential(), + GRANT_TYPE, CLIENT_CREDENTIALS, + CLIENT_ID, clientId); + + RequestBody body = RequestBody.create(bodyString, URLENCODED); + + Request request = new Request.Builder() + .url(refreshURL) + .post(body) + .build(); + try (Response response = client.newCall(request).execute()) { + if (!response.isSuccessful()) { + throw new IOException("Unexpected code " + response); + } + + if (response.code() != HttpStatus.SC_OK) { throw new IllegalArgumentException("Received invalid http response: " - + responseBody.code() + ", text = " + responseBody.toString()); + + response.code() + ", text = " + response.toString()); } - Map response = JsonSerialization.mapReader().readValue( - responseBody.body().string()); + Map responseBody = JsonSerialization.mapReader().readValue( + response.body().string()); - String newExpiresIn = response.get(EXPIRES_IN).toString(); + String newExpiresIn = responseBody.get(EXPIRES_IN).toString(); timer.setExpiresIn(newExpiresIn); - accessToken = response.get(ACCESS_TOKEN).toString(); - + accessToken = responseBody.get(ACCESS_TOKEN).toString(); } catch (Exception e) { throw new IOException("Unable to obtain access token from credential", e); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/oauth2/OAuth2Constants.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/oauth2/OAuth2Constants.java index 3f091c2945bcc..2f28b65e40e92 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/oauth2/OAuth2Constants.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/oauth2/OAuth2Constants.java @@ -18,7 +18,7 @@ */ package org.apache.hadoop.hdfs.web.oauth2; -import com.squareup.okhttp.MediaType; +import okhttp3.MediaType; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/AclPermissionParam.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/AclPermissionParam.java index 9ab3ad57fa8e2..5419219b6effc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/AclPermissionParam.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/AclPermissionParam.java @@ -24,7 +24,7 @@ import java.util.List; import java.util.regex.Pattern; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.fs.permission.AclEntry; /** AclPermission parameter. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/UserParam.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/UserParam.java index 19efb9d1818c5..0ebafe747fc28 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/UserParam.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/UserParam.java @@ -19,7 +19,7 @@ import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_WEBHDFS_USER_PATTERN_DEFAULT; import org.apache.hadoop.security.UserGroupInformation; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import java.text.MessageFormat; import java.util.regex.Pattern; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientDatanodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientDatanodeProtocol.proto index 84cd771da4912..6c8d1c5fafb80 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientDatanodeProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientDatanodeProtocol.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax="proto2"; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto index 3fb57bc02d0ac..563322f744dec 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax="proto2"; @@ -416,6 +416,13 @@ message GetPreferredBlockSizeResponseProto { required uint64 bsize = 1; } +message GetSlowDatanodeReportRequestProto { +} + +message GetSlowDatanodeReportResponseProto { + repeated DatanodeInfoProto datanodeInfoProto = 1; +} + enum SafeModeActionProto { SAFEMODE_LEAVE = 1; SAFEMODE_ENTER = 2; @@ -1060,4 +1067,6 @@ service ClientNamenodeProtocol { returns(SatisfyStoragePolicyResponseProto); rpc getHAServiceState(HAServiceStateRequestProto) returns(HAServiceStateResponseProto); + rpc getSlowDatanodeReport(GetSlowDatanodeReportRequestProto) + returns(GetSlowDatanodeReportResponseProto); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/datatransfer.proto b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/datatransfer.proto index 66a69a9fcde6f..5356cd6961699 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/datatransfer.proto +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/datatransfer.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax="proto2"; @@ -44,6 +44,7 @@ message DataTransferEncryptorMessageProto { optional string message = 3; repeated CipherOptionProto cipherOption = 4; optional HandshakeSecretProto handshakeSecret = 5; + optional bool accessTokenError = 6; } message HandshakeSecretProto { @@ -58,8 +59,9 @@ message BaseHeaderProto { } message DataTransferTraceInfoProto { - required uint64 traceId = 1; - required uint64 parentId = 2; + optional uint64 traceId = 1; + optional uint64 parentId = 2; + optional bytes spanContext = 3; } message ClientOperationHeaderProto { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/encryption.proto b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/encryption.proto index bcd82d63e0577..d280947cf5a74 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/encryption.proto +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/encryption.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax="proto2"; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/erasurecoding.proto b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/erasurecoding.proto index d92dd4cb84c97..fd3618fe73175 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/erasurecoding.proto +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/erasurecoding.proto @@ -108,6 +108,7 @@ message BlockECReconstructionInfoProto { required StorageTypesProto targetStorageTypes = 5; required bytes liveBlockIndices = 6; required ErasureCodingPolicyProto ecPolicy = 7; + optional bytes excludeReconstructedIndices = 8; } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto index 82fe329c9ce5e..0d975994b1daa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax="proto2"; @@ -197,7 +197,7 @@ message StorageTypeQuotaInfosProto { } message StorageTypeQuotaInfoProto { - required StorageTypeProto type = 1; + optional StorageTypeProto type = 1 [default = DISK]; required uint64 quota = 2; required uint64 consumed = 3; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/inotify.proto b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/inotify.proto index afcccaa13bd5d..eb7a0c3549b08 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/inotify.proto +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/inotify.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax="proto2"; @@ -32,7 +32,6 @@ package hadoop.hdfs; import "acl.proto"; import "xattr.proto"; -import "hdfs.proto"; enum EventType { EVENT_CREATE = 0x0; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/resources/META-INF/services/org.apache.hadoop.fs.MultipartUploaderFactory b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/resources/META-INF/services/org.apache.hadoop.fs.MultipartUploaderFactory deleted file mode 100644 index b153fd9924381..0000000000000 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/resources/META-INF/services/org.apache.hadoop.fs.MultipartUploaderFactory +++ /dev/null @@ -1,16 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -org.apache.hadoop.hdfs.DFSMultipartUploaderFactory diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/TestDFSPacket.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/TestDFSPacket.java index 77957bc2af6ee..8bf60971b3d7b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/TestDFSPacket.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/TestDFSPacket.java @@ -20,7 +20,6 @@ import java.util.Random; import org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader; import org.apache.hadoop.io.DataOutputBuffer; -import org.apache.htrace.core.SpanId; import org.junit.Assert; import org.junit.Test; @@ -66,29 +65,4 @@ public static void assertArrayRegionsEqual(byte []buf1, int off1, byte []buf2, } } } - - @Test - public void testAddParentsGetParents() throws Exception { - DFSPacket p = new DFSPacket(null, maxChunksPerPacket, - 0, 0, checksumSize, false); - SpanId parents[] = p.getTraceParents(); - Assert.assertEquals(0, parents.length); - p.addTraceParent(new SpanId(0, 123)); - p.addTraceParent(new SpanId(0, 123)); - parents = p.getTraceParents(); - Assert.assertEquals(1, parents.length); - Assert.assertEquals(new SpanId(0, 123), parents[0]); - parents = p.getTraceParents(); // test calling 'get' again. - Assert.assertEquals(1, parents.length); - Assert.assertEquals(new SpanId(0, 123), parents[0]); - p.addTraceParent(new SpanId(0, 1)); - p.addTraceParent(new SpanId(0, 456)); - p.addTraceParent(new SpanId(0, 789)); - parents = p.getTraceParents(); - Assert.assertEquals(4, parents.length); - Assert.assertEquals(new SpanId(0, 1), parents[0]); - Assert.assertEquals(new SpanId(0, 123), parents[1]); - Assert.assertEquals(new SpanId(0, 456), parents[2]); - Assert.assertEquals(new SpanId(0, 789), parents[3]); - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java index b24df2bfce929..3f1ff8826957e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs; -import com.google.common.collect.HashMultiset; +import org.apache.hadoop.thirdparty.com.google.common.collect.HashMultiset; import org.apache.hadoop.hdfs.net.Peer; import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.net.unix.DomainSocket; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/client/impl/TestLeaseRenewer.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/client/impl/TestLeaseRenewer.java index f73ea6d24712f..f1a11edeefcd1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/client/impl/TestLeaseRenewer.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/client/impl/TestLeaseRenewer.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.client.impl; -import com.google.common.base.Supplier; +import java.util.function.Supplier; import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.DFSOutputStream; import org.apache.hadoop.security.UserGroupInformation; @@ -31,7 +31,11 @@ import org.mockito.stubbing.Answer; import java.io.IOException; +import java.lang.management.ManagementFactory; +import java.lang.management.ThreadInfo; +import java.lang.management.ThreadMXBean; import java.util.concurrent.atomic.AtomicInteger; +import java.util.regex.Pattern; import static org.junit.Assert.assertSame; @@ -168,6 +172,11 @@ public Boolean get() { renewer.closeClient(mockClient1); renewer.closeClient(mockClient2); + renewer.closeClient(MOCK_DFSCLIENT); + + // Make sure renewer is not running due to expiration. + Thread.sleep(FAST_GRACE_PERIOD * 2); + Assert.assertTrue(!renewer.isRunning()); } @Test @@ -197,4 +206,82 @@ public void testThreadName() throws Exception { Assert.assertFalse(renewer.isRunning()); } + /** + * Test for HDFS-14575. In this fix, the LeaseRenewer clears all clients + * and expires immediately via setting empty time to 0 before it's removed + * from factory. Previously, LeaseRenewer#daemon thread might leak. + */ + @Test + public void testDaemonThreadLeak() throws Exception { + Assert.assertFalse("Renewer not initially running", renewer.isRunning()); + + // Pretend to create a file#1, daemon#1 starts + renewer.put(MOCK_DFSCLIENT); + Assert.assertTrue("Renewer should have started running", + renewer.isRunning()); + Pattern daemonThreadNamePattern = Pattern.compile("LeaseRenewer:\\S+"); + Assert.assertEquals(1, countThreadMatching(daemonThreadNamePattern)); + + // Pretend to create file#2, daemon#2 starts due to expiration + LeaseRenewer lastRenewer = renewer; + renewer = + LeaseRenewer.getInstance(FAKE_AUTHORITY, FAKE_UGI_A, MOCK_DFSCLIENT); + Assert.assertEquals(lastRenewer, renewer); + + // Pretend to close file#1 + renewer.closeClient(MOCK_DFSCLIENT); + Assert.assertEquals(1, countThreadMatching(daemonThreadNamePattern)); + + // Pretend to be expired + renewer.setEmptyTime(0); + + renewer = + LeaseRenewer.getInstance(FAKE_AUTHORITY, FAKE_UGI_A, MOCK_DFSCLIENT); + renewer.setGraceSleepPeriod(FAST_GRACE_PERIOD); + boolean success = renewer.put(MOCK_DFSCLIENT); + if (!success) { + LeaseRenewer.remove(renewer); + renewer = + LeaseRenewer.getInstance(FAKE_AUTHORITY, FAKE_UGI_A, MOCK_DFSCLIENT); + renewer.setGraceSleepPeriod(FAST_GRACE_PERIOD); + renewer.put(MOCK_DFSCLIENT); + } + + int threadCount = countThreadMatching(daemonThreadNamePattern); + //Sometimes old LR#Daemon gets closed and lead to count 1 (rare scenario) + Assert.assertTrue(1 == threadCount || 2 == threadCount); + + // After grace period, both daemon#1 and renewer#1 will be removed due to + // expiration, then daemon#2 will leak before HDFS-14575. + Thread.sleep(FAST_GRACE_PERIOD * 2); + + // Pretend to close file#2, renewer#2 will be created + lastRenewer = renewer; + renewer = + LeaseRenewer.getInstance(FAKE_AUTHORITY, FAKE_UGI_A, MOCK_DFSCLIENT); + Assert.assertEquals(lastRenewer, renewer); + renewer.setGraceSleepPeriod(FAST_GRACE_PERIOD); + renewer.closeClient(MOCK_DFSCLIENT); + renewer.setEmptyTime(0); + // Make sure LeaseRenewer#daemon threads will terminate after grace period + Thread.sleep(FAST_GRACE_PERIOD * 2); + Assert.assertEquals("LeaseRenewer#daemon thread leaks", 0, + countThreadMatching(daemonThreadNamePattern)); + } + + private static int countThreadMatching(Pattern pattern) { + ThreadMXBean threadBean = ManagementFactory.getThreadMXBean(); + ThreadInfo[] infos = + threadBean.getThreadInfo(threadBean.getAllThreadIds(), 1); + int count = 0; + for (ThreadInfo info : infos) { + if (info == null) { + continue; + } + if (pattern.matcher(info.getThreadName()).matches()) { + count++; + } + } + return count; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/protocol/TestReadOnly.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/protocol/TestReadOnly.java index 41069b439784f..7e74b3354a99c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/protocol/TestReadOnly.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/protocol/TestReadOnly.java @@ -75,7 +75,8 @@ public class TestReadOnly { "getQuotaUsage", "msync", "getHAServiceState", - "getECTopologyResultForPolicies" + "getECTopologyResultForPolicies", + "getSlowDatanodeReport" ) ); diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestConfiguredFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestConfiguredFailoverProxyProvider.java index e3f34e3c66954..c198536d01a2b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestConfiguredFailoverProxyProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestConfiguredFailoverProxyProvider.java @@ -23,6 +23,7 @@ import org.apache.hadoop.net.MockDomainNameResolver; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.Time; import org.junit.Before; import org.junit.BeforeClass; @@ -34,7 +35,6 @@ import org.slf4j.event.Level; import java.io.IOException; -import java.net.InetAddress; import java.net.InetSocketAddress; import java.net.URI; import java.net.URISyntaxException; @@ -292,12 +292,22 @@ private void testResolveDomainNameUsingDNS(boolean useFQDN) throws Exception { MockDomainNameResolver.FQDN_2 : "/" + MockDomainNameResolver.ADDR_2; // Check we got the proper addresses assertEquals(2, proxyResults.size()); - assertTrue( - "nn1 wasn't returned: " + proxyResults, - proxyResults.containsKey(resolvedHost1 + ":8020")); - assertTrue( - "nn2 wasn't returned: " + proxyResults, - proxyResults.containsKey(resolvedHost2 + ":8020")); + if (Shell.isJavaVersionAtLeast(14) && useFQDN) { + // JDK-8225499. The string format of unresolved address has been changed. + assertTrue( + "nn1 wasn't returned: " + proxyResults, + proxyResults.containsKey(resolvedHost1 + "/:8020")); + assertTrue( + "nn2 wasn't returned: " + proxyResults, + proxyResults.containsKey(resolvedHost2 + "/:8020")); + } else { + assertTrue( + "nn1 wasn't returned: " + proxyResults, + proxyResults.containsKey(resolvedHost1 + ":8020")); + assertTrue( + "nn2 wasn't returned: " + proxyResults, + proxyResults.containsKey(resolvedHost2 + ":8020")); + } // Check that the Namenodes were invoked assertEquals(NUM_ITERATIONS, nn1Count.get() + nn2Count.get()); diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRequestHedgingProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRequestHedgingProxyProvider.java index 5704ef0bd4c03..23e7b74469c8d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRequestHedgingProxyProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRequestHedgingProxyProvider.java @@ -56,7 +56,7 @@ import static org.mockito.Mockito.when; import static org.mockito.Mockito.mock; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; public class TestRequestHedgingProxyProvider { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/web/TestByteRangeInputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/web/TestByteRangeInputStream.java index 6c426f575b91c..010d7c5870c54 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/web/TestByteRangeInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/web/TestByteRangeInputStream.java @@ -34,7 +34,7 @@ import java.net.HttpURLConnection; import java.net.URL; -import com.google.common.net.HttpHeaders; +import org.apache.hadoop.thirdparty.com.google.common.net.HttpHeaders; import org.apache.hadoop.hdfs.web.ByteRangeInputStream.InputStreamAndFileLength; import org.apache.hadoop.test.Whitebox; import org.junit.Test; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/web/TestURLConnectionFactory.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/web/TestURLConnectionFactory.java index 2be8bf4336279..eebe0baaf2356 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/web/TestURLConnectionFactory.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/web/TestURLConnectionFactory.java @@ -27,13 +27,14 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.security.authentication.client.ConnectionConfigurator; +import static org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory.SSL_MONITORING_THREAD_NAME; import org.apache.hadoop.security.ssl.KeyStoreTestUtil; import org.apache.hadoop.security.ssl.SSLFactory; import org.apache.hadoop.test.GenericTestUtils; import org.junit.Assert; import org.junit.Test; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.slf4j.LoggerFactory; public final class TestURLConnectionFactory { @@ -99,7 +100,7 @@ public void testSSLFactoryCleanup() throws Exception { Thread reloaderThread = null; for (Thread thread : threads) { if ((thread.getName() != null) - && (thread.getName().contains("Truststore reloader thread"))) { + && (thread.getName().contains(SSL_MONITORING_THREAD_NAME))) { reloaderThread = thread; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFSOAuth2.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFSOAuth2.java index bd12204118694..af17d0b1e5ee3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFSOAuth2.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFSOAuth2.java @@ -31,7 +31,7 @@ import org.junit.After; import org.junit.Before; import org.junit.Test; -import org.mockserver.client.server.MockServerClient; +import org.mockserver.client.MockServerClient; import org.mockserver.integration.ClientAndServer; import org.mockserver.model.Header; import org.mockserver.model.HttpRequest; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/web/oauth2/TestClientCredentialTimeBasedTokenRefresher.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/web/oauth2/TestClientCredentialTimeBasedTokenRefresher.java index 935cd9eeb665b..9ae7ff88fb6b0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/web/oauth2/TestClientCredentialTimeBasedTokenRefresher.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/web/oauth2/TestClientCredentialTimeBasedTokenRefresher.java @@ -24,7 +24,7 @@ import org.apache.hadoop.util.Timer; import org.apache.http.HttpStatus; import org.junit.Test; -import org.mockserver.client.server.MockServerClient; +import org.mockserver.client.MockServerClient; import org.mockserver.integration.ClientAndServer; import org.mockserver.model.Header; import org.mockserver.model.HttpRequest; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/web/oauth2/TestRefreshTokenTimeBasedTokenRefresher.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/web/oauth2/TestRefreshTokenTimeBasedTokenRefresher.java index d9996bc4f2f0e..3ef105ca246c4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/web/oauth2/TestRefreshTokenTimeBasedTokenRefresher.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/web/oauth2/TestRefreshTokenTimeBasedTokenRefresher.java @@ -23,7 +23,7 @@ import org.apache.hadoop.util.Timer; import org.apache.http.HttpStatus; import org.junit.Test; -import org.mockserver.client.server.MockServerClient; +import org.mockserver.client.MockServerClient; import org.mockserver.integration.ClientAndServer; import org.mockserver.model.Header; import org.mockserver.model.HttpRequest; diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml index eac05832fcd7c..0a17e9a7dbbec 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml @@ -22,11 +22,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-hdfs-httpfs - 3.3.0-SNAPSHOT + 3.3.6 jar Apache Hadoop HttpFS @@ -72,8 +72,8 @@ javax.servlet-api - com.google.guava - guava + org.apache.hadoop.thirdparty + hadoop-shaded-guava compile @@ -118,10 +118,6 @@ org.eclipse.jetty servlet-api-2.5 - - com.amazonaws - aws-java-sdk-s3 - org.eclipse.jdt core @@ -153,10 +149,6 @@ org.eclipse.jetty servlet-api-2.5 - - com.amazonaws - aws-java-sdk-s3 - org.eclipse.jdt core @@ -187,8 +179,8 @@ test-jar - log4j - log4j + ch.qos.reload4j + reload4j compile @@ -198,7 +190,7 @@ org.slf4j - slf4j-log4j12 + slf4j-reload4j runtime @@ -255,7 +247,6 @@ org.apache.maven.plugins maven-surefire-plugin - ${ignoreTestFailure} 1 600 @@ -281,7 +272,7 @@ - javadoc + javadoc-no-fork site @@ -345,8 +336,8 @@ - org.codehaus.mojo - findbugs-maven-plugin + com.github.spotbugs + spotbugs-maven-plugin ${basedir}/dev-support/findbugsExcludeFile.xml @@ -369,7 +360,6 @@ org.apache.maven.plugins maven-surefire-plugin - ${ignoreTestFailure} 1 true 600 diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java index 17227598844a9..b427d8c35daa4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java @@ -23,7 +23,7 @@ import java.util.EnumSet; import java.util.List; -import com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonPathCapabilities; @@ -47,6 +47,7 @@ import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; import org.apache.hadoop.hdfs.protocol.FsPermissionExtension; @@ -70,9 +71,9 @@ import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; @@ -90,7 +91,6 @@ import java.security.PrivilegedExceptionAction; import java.text.MessageFormat; import java.util.HashMap; -import java.util.Locale; import java.util.Map; import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; @@ -197,6 +197,7 @@ public static FILE_TYPE getType(FileStatus fileStatus) { public static final String XATTR_VALUE_JSON = "value"; public static final String XATTRNAMES_JSON = "XAttrNames"; public static final String ECPOLICY_JSON = "ecPolicyObj"; + public static final String SYMLINK_JSON = "symlink"; public static final String FILE_CHECKSUM_JSON = "FileChecksum"; public static final String CHECKSUM_ALGORITHM_JSON = "algorithm"; @@ -801,6 +802,11 @@ public DirectoryEntries listStatusBatch(Path f, byte[] token) throws */ @Override public void setWorkingDirectory(Path newDir) { + String result = newDir.toUri().getPath(); + if (!DFSUtilClient.isValidName(result)) { + throw new IllegalArgumentException( + "Invalid DFS directory name " + result); + } workingDir = newDir; } @@ -1093,6 +1099,9 @@ private FileStatus createFileStatus(Path parent, JSONObject json) { String pathSuffix = (String) json.get(PATH_SUFFIX_JSON); Path path = (pathSuffix.equals("")) ? parent : new Path(parent, pathSuffix); FILE_TYPE type = FILE_TYPE.valueOf((String) json.get(TYPE_JSON)); + String symLinkValue = + type == FILE_TYPE.SYMLINK ? (String) json.get(SYMLINK_JSON) : null; + Path symLink = symLinkValue == null ? null : new Path(symLinkValue); long len = (Long) json.get(LENGTH_JSON); String owner = (String) json.get(OWNER_JSON); String group = (String) json.get(GROUP_JSON); @@ -1117,11 +1126,12 @@ private FileStatus createFileStatus(Path parent, JSONObject json) { new FsPermissionExtension(permission, aBit, eBit, ecBit); FileStatus fileStatus = new FileStatus(len, FILE_TYPE.DIRECTORY == type, replication, blockSize, mTime, aTime, deprecatedPerm, owner, group, - null, path, FileStatus.attributes(aBit, eBit, ecBit, seBit)); + symLink, path, FileStatus.attributes(aBit, eBit, ecBit, seBit)); return fileStatus; } else { return new FileStatus(len, FILE_TYPE.DIRECTORY == type, - replication, blockSize, mTime, aTime, permission, owner, group, path); + replication, blockSize, mTime, aTime, permission, owner, group, + symLink, path); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSUtils.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSUtils.java index fcc7bab15e461..bd9baaa93f410 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSUtils.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSUtils.java @@ -33,6 +33,8 @@ import java.util.List; import java.util.Map; +import javax.ws.rs.core.MediaType; + /** * Utility methods used by HttpFS classes. */ @@ -127,8 +129,17 @@ static URL createURL(Path path, Map params, MapInputStream could not be * JSON parsed. */ - static Object jsonParse(HttpURLConnection conn) throws IOException { + public static Object jsonParse(HttpURLConnection conn) throws IOException { try { + String contentType = conn.getContentType(); + if (contentType != null) { + final MediaType parsed = MediaType.valueOf(contentType); + if (!MediaType.APPLICATION_JSON_TYPE.isCompatible(parsed)) { + throw new IOException("Content-Type \"" + contentType + + "\" is incompatible with \"" + MediaType.APPLICATION_JSON + + "\" (parsed=\"" + parsed + "\")"); + } + } JSONParser parser = new JSONParser(); return parser.parse( new InputStreamReader(conn.getInputStream(), StandardCharsets.UTF_8)); diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/FSOperations.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/FSOperations.java index 0f8ea071e2d3f..b2e9a8470d2c5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/FSOperations.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/FSOperations.java @@ -18,6 +18,7 @@ package org.apache.hadoop.fs.http.server; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockStoragePolicySpi; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileChecksum; @@ -33,6 +34,7 @@ import org.apache.hadoop.fs.XAttrCodec; import org.apache.hadoop.fs.XAttrSetFlag; import org.apache.hadoop.fs.http.client.HttpFSFileSystem; +import org.apache.hadoop.fs.http.client.HttpFSFileSystem.FILE_TYPE; import org.apache.hadoop.fs.permission.AclEntry; import org.apache.hadoop.fs.permission.AclStatus; import org.apache.hadoop.fs.permission.FsAction; @@ -45,7 +47,6 @@ import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport; import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus; import org.apache.hadoop.hdfs.web.JsonUtil; -import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.lib.service.FileSystemAccess; import org.apache.hadoop.util.StringUtils; import org.json.simple.JSONArray; @@ -71,7 +72,22 @@ * FileSystem operation executors used by {@link HttpFSServer}. */ @InterfaceAudience.Private -public class FSOperations { +public final class FSOperations { + + private static int bufferSize = 4096; + + private FSOperations() { + // not called + } + /** + * Set the buffer size. The size is set during the initialization of + * HttpFSServerWebApp. + * @param conf the configuration to get the bufferSize + */ + public static void setBufferSize(Configuration conf) { + bufferSize = conf.getInt(HTTPFS_BUFFER_SIZE_KEY, + HTTP_BUFFER_SIZE_DEFAULT); + } /** * @param fileStatus a FileStatus object @@ -110,8 +126,17 @@ private static Map toJsonInner(FileStatus fileStatus, Map json = new LinkedHashMap(); json.put(HttpFSFileSystem.PATH_SUFFIX_JSON, (emptyPathSuffix) ? "" : fileStatus.getPath().getName()); - json.put(HttpFSFileSystem.TYPE_JSON, - HttpFSFileSystem.FILE_TYPE.getType(fileStatus).toString()); + FILE_TYPE fileType = HttpFSFileSystem.FILE_TYPE.getType(fileStatus); + json.put(HttpFSFileSystem.TYPE_JSON, fileType.toString()); + if (fileType.equals(FILE_TYPE.SYMLINK)) { + // put the symlink into Json + try { + json.put(HttpFSFileSystem.SYMLINK_JSON, + fileStatus.getSymlink().getName()); + } catch (IOException e) { + // Can't happen. + } + } json.put(HttpFSFileSystem.LENGTH_JSON, fileStatus.getLen()); json.put(HttpFSFileSystem.OWNER_JSON, fileStatus.getOwner()); json.put(HttpFSFileSystem.GROUP_JSON, fileStatus.getGroup()); @@ -425,10 +450,9 @@ public FSAppend(InputStream is, String path) { */ @Override public Void execute(FileSystem fs) throws IOException { - int bufferSize = fs.getConf().getInt("httpfs.buffer.size", 4096); OutputStream os = fs.append(path, bufferSize); - IOUtils.copyBytes(is, os, bufferSize, true); - os.close(); + long bytes = copyBytes(is, os); + HttpFSServerWebApp.get().getMetrics().incrBytesWritten(bytes); return null; } @@ -511,6 +535,7 @@ public FSTruncate(String path, long newLength) { @Override public JSONObject execute(FileSystem fs) throws IOException { boolean result = fs.truncate(path, newLength); + HttpFSServerWebApp.get().getMetrics().incrOpsTruncate(); return toJSON( StringUtils.toLowerCase(HttpFSFileSystem.TRUNCATE_JSON), result); } @@ -627,16 +652,65 @@ public Void execute(FileSystem fs) throws IOException { fsPermission = FsCreateModes.create(fsPermission, new FsPermission(unmaskedPermission)); } - int bufferSize = fs.getConf().getInt(HTTPFS_BUFFER_SIZE_KEY, - HTTP_BUFFER_SIZE_DEFAULT); OutputStream os = fs.create(path, fsPermission, override, bufferSize, replication, blockSize, null); - IOUtils.copyBytes(is, os, bufferSize, true); - os.close(); + long bytes = copyBytes(is, os); + HttpFSServerWebApp.get().getMetrics().incrBytesWritten(bytes); return null; } } + /** + * These copyBytes methods combines the two different flavors used originally. + * One with length and another one with buffer size. + * In this impl, buffer size is determined internally, which is a singleton + * normally set during initialization. + * @param in the inputStream + * @param out the outputStream + * @return the totalBytes + * @throws IOException the exception to be thrown. + */ + public static long copyBytes(InputStream in, OutputStream out) + throws IOException { + return copyBytes(in, out, Long.MAX_VALUE); + } + + public static long copyBytes(InputStream in, OutputStream out, long count) + throws IOException { + long totalBytes = 0; + + // If bufferSize is not initialized use 4k. This will not happen + // if all callers check and set it. + byte[] buf = new byte[bufferSize]; + long bytesRemaining = count; + int bytesRead; + + try { + while (bytesRemaining > 0) { + int bytesToRead = (int) + (bytesRemaining < buf.length ? bytesRemaining : buf.length); + + bytesRead = in.read(buf, 0, bytesToRead); + if (bytesRead == -1) { + break; + } + + out.write(buf, 0, bytesRead); + bytesRemaining -= bytesRead; + totalBytes += bytesRead; + } + return totalBytes; + } finally { + // Originally IOUtils.copyBytes() were called with close=true. So we are + // implementing the same behavior here. + try { + in.close(); + } finally { + out.close(); + } + } + } + /** * Executor that performs a delete FileSystemAccess files system operation. */ @@ -669,6 +743,7 @@ public FSDelete(String path, boolean recursive) { @Override public JSONObject execute(FileSystem fs) throws IOException { boolean deleted = fs.delete(path, recursive); + HttpFSServerWebApp.get().getMetrics().incrOpsDelete(); return toJSON( StringUtils.toLowerCase(HttpFSFileSystem.DELETE_JSON), deleted); } @@ -737,6 +812,7 @@ public FSFileStatus(String path) { @Override public Map execute(FileSystem fs) throws IOException { FileStatus status = fs.getFileStatus(path); + HttpFSServerWebApp.get().getMetrics().incrOpsStat(); return toJson(status); } @@ -765,7 +841,6 @@ public JSONObject execute(FileSystem fs) throws IOException { json.put(HttpFSFileSystem.HOME_DIR_JSON, homeDir.toUri().getPath()); return json; } - } /** @@ -803,6 +878,7 @@ public FSListStatus(String path, String filter) throws IOException { @Override public Map execute(FileSystem fs) throws IOException { FileStatus[] fileStatuses = fs.listStatus(path, filter); + HttpFSServerWebApp.get().getMetrics().incrOpsListing(); return toJson(fileStatuses, fs.getFileStatus(path).isFile()); } @@ -894,6 +970,7 @@ public JSONObject execute(FileSystem fs) throws IOException { new FsPermission(unmaskedPermission)); } boolean mkdirs = fs.mkdirs(path, fsPermission); + HttpFSServerWebApp.get().getMetrics().incrOpsMkdir(); return toJSON(HttpFSFileSystem.MKDIRS_JSON, mkdirs); } @@ -926,8 +1003,8 @@ public FSOpen(String path) { */ @Override public InputStream execute(FileSystem fs) throws IOException { - int bufferSize = HttpFSServerWebApp.get().getConfig().getInt( - HTTPFS_BUFFER_SIZE_KEY, HTTP_BUFFER_SIZE_DEFAULT); + // Only updating ops count. bytesRead is updated in InputStreamEntity + HttpFSServerWebApp.get().getMetrics().incrOpsOpen(); return fs.open(path, bufferSize); } @@ -965,6 +1042,7 @@ public FSRename(String path, String toPath) { @Override public JSONObject execute(FileSystem fs) throws IOException { boolean renamed = fs.rename(path, toPath); + HttpFSServerWebApp.get().getMetrics().incrOpsRename(); return toJSON(HttpFSFileSystem.RENAME_JSON, renamed); } @@ -1896,6 +1974,7 @@ public Void execute(FileSystem fs) throws IOException { if (fs instanceof DistributedFileSystem) { DistributedFileSystem dfs = (DistributedFileSystem) fs; dfs.access(path, mode); + HttpFSServerWebApp.get().getMetrics().incrOpsCheckAccess(); } else { throw new UnsupportedOperationException("checkaccess is " + "not supported for HttpFs on " + fs.getClass() diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSExceptionProvider.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSExceptionProvider.java index 8d301827364cf..4739e42137ccb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSExceptionProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSExceptionProvider.java @@ -70,12 +70,16 @@ public Response toResponse(Throwable throwable) { status = Response.Status.NOT_FOUND; } else if (throwable instanceof IOException) { status = Response.Status.INTERNAL_SERVER_ERROR; + logErrorFully(status, throwable); } else if (throwable instanceof UnsupportedOperationException) { status = Response.Status.BAD_REQUEST; + logErrorFully(status, throwable); } else if (throwable instanceof IllegalArgumentException) { status = Response.Status.BAD_REQUEST; + logErrorFully(status, throwable); } else { status = Response.Status.INTERNAL_SERVER_ERROR; + logErrorFully(status, throwable); } return createResponse(status, throwable); } @@ -95,4 +99,7 @@ protected void log(Response.Status status, Throwable throwable) { LOG.warn("[{}:{}] response [{}] {}", method, path, status, message, throwable); } + private void logErrorFully(Response.Status status, Throwable throwable) { + LOG.debug("Failed with {}", status, throwable); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSServer.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSServer.java index 3cca83eac53a0..16a95c31d2b1e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSServer.java @@ -18,7 +18,7 @@ package org.apache.hadoop.fs.http.server; -import com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -106,8 +106,38 @@ @Path(HttpFSFileSystem.SERVICE_VERSION) @InterfaceAudience.Private public class HttpFSServer { + + enum AccessMode { + READWRITE, WRITEONLY, READONLY; + } private static Logger AUDIT_LOG = LoggerFactory.getLogger("httpfsaudit"); private static final Logger LOG = LoggerFactory.getLogger(HttpFSServer.class); + AccessMode accessMode = AccessMode.READWRITE; + + public HttpFSServer() { + Configuration conf = HttpFSServerWebApp.get().getConfig(); + final String accessModeString = conf.get("httpfs.access.mode", "read-write").toLowerCase(); + if(accessModeString.compareTo("write-only") == 0) + accessMode = AccessMode.WRITEONLY; + else if(accessModeString.compareTo("read-only") == 0) + accessMode = AccessMode.READONLY; + else + accessMode = AccessMode.READWRITE; + } + + + // First try getting a user through HttpUserGroupInformation. This will return + // if the built-in hadoop auth filter is not used. Fall back to getting the + // authenticated user from the request. + private UserGroupInformation getHttpUGI(HttpServletRequest request) { + UserGroupInformation user = HttpUserGroupInformation.get(); + if (user != null) { + return user; + } + + return UserGroupInformation.createRemoteUser(request.getUserPrincipal().getName()); + } + /** * Executes a {@link FileSystemAccess.FileSystemExecutor} using a filesystem for the effective @@ -119,7 +149,7 @@ public class HttpFSServer { * @return FileSystemExecutor response * * @throws IOException thrown if an IO error occurs. - * @throws FileSystemAccessException thrown if a FileSystemAccess releated error occurred. Thrown + * @throws FileSystemAccessException thrown if a FileSystemAccess related error occurred. Thrown * exceptions are handled by {@link HttpFSExceptionProvider}. */ private T fsExecute(UserGroupInformation ugi, FileSystemAccess.FileSystemExecutor executor) @@ -130,8 +160,8 @@ private T fsExecute(UserGroupInformation ugi, FileSystemAccess.FileSystemExe } /** - * Returns a filesystem instance. The fileystem instance is wired for release at the completion of - * the current Servlet request via the {@link FileSystemReleaseFilter}. + * Returns a filesystem instance. The filesystem instance is wired for release at the completion + * of the current Servlet request via the {@link FileSystemReleaseFilter}. *

      * If a do-as user is specified, the current user must be a valid proxyuser, otherwise an * AccessControlException will be thrown. @@ -142,7 +172,7 @@ private T fsExecute(UserGroupInformation ugi, FileSystemAccess.FileSystemExe * * @throws IOException thrown if an IO error occurred. Thrown exceptions are * handled by {@link HttpFSExceptionProvider}. - * @throws FileSystemAccessException thrown if a FileSystemAccess releated error occurred. Thrown + * @throws FileSystemAccessException thrown if a FileSystemAccess related error occurred. Thrown * exceptions are handled by {@link HttpFSExceptionProvider}. */ private FileSystem createFileSystem(UserGroupInformation ugi) @@ -218,6 +248,12 @@ public Response get(@PathParam("path") String path, @Context Parameters params, @Context HttpServletRequest request) throws IOException, FileSystemAccessException { + // Restrict access to only GETFILESTATUS and LISTSTATUS in write-only mode + if((op.value() != HttpFSFileSystem.Operation.GETFILESTATUS) && + (op.value() != HttpFSFileSystem.Operation.LISTSTATUS) && + accessMode == AccessMode.WRITEONLY) { + return Response.status(Response.Status.FORBIDDEN).build(); + } UserGroupInformation user = HttpUserGroupInformation.get(); Response response; path = makeAbsolute(path); @@ -248,7 +284,7 @@ public InputStream run() throws Exception { } }); } catch (InterruptedException ie) { - LOG.info("Open interrupted.", ie); + LOG.warn("Open interrupted.", ie); Thread.currentThread().interrupt(); } Long offset = params.get(OffsetParam.NAME, OffsetParam.class); @@ -281,7 +317,7 @@ public InputStream run() throws Exception { enforceRootPath(op.value(), path); FSOperations.FSHomeDir command = new FSOperations.FSHomeDir(); JSONObject json = fsExecute(user, command); - AUDIT_LOG.info(""); + AUDIT_LOG.info("Home Directory for [{}]", user); response = Response.ok(json).type(MediaType.APPLICATION_JSON).build(); break; } @@ -303,7 +339,7 @@ public InputStream run() throws Exception { FSOperations.FSContentSummary command = new FSOperations.FSContentSummary(path); Map json = fsExecute(user, command); - AUDIT_LOG.info("[{}]", path); + AUDIT_LOG.info("Content summary for [{}]", path); response = Response.ok(json).type(MediaType.APPLICATION_JSON).build(); break; } @@ -311,7 +347,7 @@ public InputStream run() throws Exception { FSOperations.FSQuotaUsage command = new FSOperations.FSQuotaUsage(path); Map json = fsExecute(user, command); - AUDIT_LOG.info("[{}]", path); + AUDIT_LOG.info("Quota Usage for [{}]", path); response = Response.ok(json).type(MediaType.APPLICATION_JSON).build(); break; } @@ -490,6 +526,10 @@ public Response delete(@PathParam("path") String path, @Context Parameters params, @Context HttpServletRequest request) throws IOException, FileSystemAccessException { + // Do not allow DELETE commands in read-only mode + if(accessMode == AccessMode.READONLY) { + return Response.status(Response.Status.FORBIDDEN).build(); + } UserGroupInformation user = HttpUserGroupInformation.get(); Response response; path = makeAbsolute(path); @@ -577,6 +617,10 @@ public Response post(InputStream is, @Context Parameters params, @Context HttpServletRequest request) throws IOException, FileSystemAccessException { + // Do not allow POST commands in read-only mode + if(accessMode == AccessMode.READONLY) { + return Response.status(Response.Status.FORBIDDEN).build(); + } UserGroupInformation user = HttpUserGroupInformation.get(); Response response; path = makeAbsolute(path); @@ -585,35 +629,30 @@ public Response post(InputStream is, switch (op.value()) { case APPEND: { Boolean hasData = params.get(DataParam.NAME, DataParam.class); - if (!hasData) { - URI redirectURL = createUploadRedirectionURL( - uriInfo, HttpFSFileSystem.Operation.APPEND); - Boolean noRedirect = params.get( - NoRedirectParam.NAME, NoRedirectParam.class); - if (noRedirect) { + URI redirectURL = createUploadRedirectionURL(uriInfo, + HttpFSFileSystem.Operation.APPEND); + Boolean noRedirect = + params.get(NoRedirectParam.NAME, NoRedirectParam.class); + if (noRedirect) { final String js = JsonUtil.toJsonString("Location", redirectURL); response = Response.ok(js).type(MediaType.APPLICATION_JSON).build(); - } else { - response = Response.temporaryRedirect(redirectURL).build(); - } - } else { + } else if (hasData) { FSOperations.FSAppend command = new FSOperations.FSAppend(is, path); fsExecute(user, command); AUDIT_LOG.info("[{}]", path); response = Response.ok().type(MediaType.APPLICATION_JSON).build(); + } else { + response = Response.temporaryRedirect(redirectURL).build(); } break; } case CONCAT: { - System.out.println("HTTPFS SERVER CONCAT"); String sources = params.get(SourcesParam.NAME, SourcesParam.class); - FSOperations.FSConcat command = new FSOperations.FSConcat(path, sources.split(",")); fsExecute(user, command); AUDIT_LOG.info("[{}]", path); - System.out.println("SENT RESPONSE"); response = Response.ok().build(); break; } @@ -662,7 +701,8 @@ public Response post(InputStream is, protected URI createUploadRedirectionURL(UriInfo uriInfo, Enum uploadOperation) { UriBuilder uriBuilder = uriInfo.getRequestUriBuilder(); uriBuilder = uriBuilder.replaceQueryParam(OperationParam.NAME, uploadOperation). - queryParam(DataParam.NAME, Boolean.TRUE); + queryParam(DataParam.NAME, Boolean.TRUE) + .replaceQueryParam(NoRedirectParam.NAME, (Object[]) null); return uriBuilder.build(null); } @@ -718,6 +758,10 @@ public Response put(InputStream is, @Context Parameters params, @Context HttpServletRequest request) throws IOException, FileSystemAccessException { + // Do not allow PUT commands in read-only mode + if(accessMode == AccessMode.READONLY) { + return Response.status(Response.Status.FORBIDDEN).build(); + } UserGroupInformation user = HttpUserGroupInformation.get(); Response response; path = makeAbsolute(path); @@ -726,18 +770,14 @@ public Response put(InputStream is, switch (op.value()) { case CREATE: { Boolean hasData = params.get(DataParam.NAME, DataParam.class); - if (!hasData) { - URI redirectURL = createUploadRedirectionURL( - uriInfo, HttpFSFileSystem.Operation.CREATE); - Boolean noRedirect = params.get( - NoRedirectParam.NAME, NoRedirectParam.class); - if (noRedirect) { + URI redirectURL = createUploadRedirectionURL(uriInfo, + HttpFSFileSystem.Operation.CREATE); + Boolean noRedirect = + params.get(NoRedirectParam.NAME, NoRedirectParam.class); + if (noRedirect) { final String js = JsonUtil.toJsonString("Location", redirectURL); response = Response.ok(js).type(MediaType.APPLICATION_JSON).build(); - } else { - response = Response.temporaryRedirect(redirectURL).build(); - } - } else { + } else if (hasData) { Short permission = params.get(PermissionParam.NAME, PermissionParam.class); Short unmaskedPermission = params.get(UnmaskedPermissionParam.NAME, @@ -761,6 +801,8 @@ public Response put(InputStream is, "Location", uriInfo.getAbsolutePath()); response = Response.created(uriInfo.getAbsolutePath()) .type(MediaType.APPLICATION_JSON).entity(js).build(); + } else { + response = Response.temporaryRedirect(redirectURL).build(); } break; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSServerWebApp.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSServerWebApp.java index 66438b5f4ab41..fd60186950763 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSServerWebApp.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSServerWebApp.java @@ -21,9 +21,13 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.http.server.metrics.HttpFSServerMetrics; import org.apache.hadoop.lib.server.ServerException; import org.apache.hadoop.lib.service.FileSystemAccess; import org.apache.hadoop.lib.servlet.ServerWebApp; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.util.JvmPauseMonitor; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -56,6 +60,7 @@ public class HttpFSServerWebApp extends ServerWebApp { public static final String CONF_ADMIN_GROUP = "admin.group"; private static HttpFSServerWebApp SERVER; + private static HttpFSServerMetrics metrics; private String adminGroup; @@ -102,6 +107,7 @@ public void init() throws ServerException { LOG.info("Connects to Namenode [{}]", get().get(FileSystemAccess.class).getFileSystemConfiguration(). get(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY)); + setMetrics(getConfig()); } /** @@ -110,9 +116,22 @@ public void init() throws ServerException { @Override public void destroy() { SERVER = null; + if (metrics != null) { + metrics.shutdown(); + } super.destroy(); } + private static void setMetrics(Configuration config) { + LOG.info("Initializing HttpFSServerMetrics"); + metrics = HttpFSServerMetrics.create(config, "HttpFSServer"); + JvmPauseMonitor pauseMonitor = new JvmPauseMonitor(); + pauseMonitor.init(config); + pauseMonitor.start(); + metrics.getJvmMetrics().setPauseMonitor(pauseMonitor); + FSOperations.setBufferSize(config); + DefaultMetricsSystem.initialize("HttpFSServer"); + } /** * Returns HttpFSServer server singleton, configuration and services are * accessible through it. @@ -123,6 +142,14 @@ public static HttpFSServerWebApp get() { return SERVER; } + /** + * gets the HttpFSServerMetrics instance. + * @return the HttpFSServerMetrics singleton. + */ + public static HttpFSServerMetrics getMetrics() { + return metrics; + } + /** * Returns HttpFSServer admin group. * diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/metrics/HttpFSServerMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/metrics/HttpFSServerMetrics.java new file mode 100644 index 0000000000000..524ec09290a9e --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/metrics/HttpFSServerMetrics.java @@ -0,0 +1,163 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.http.server.metrics; + +import static org.apache.hadoop.metrics2.impl.MsInfo.SessionId; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.metrics2.MetricsSystem; +import org.apache.hadoop.metrics2.annotation.Metric; +import org.apache.hadoop.metrics2.annotation.Metrics; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.MetricsRegistry; +import org.apache.hadoop.metrics2.lib.MutableCounterLong; +import org.apache.hadoop.metrics2.source.JvmMetrics; + +import java.util.concurrent.ThreadLocalRandom; + +/** + * + * This class is for maintaining the various HttpFSServer statistics + * and publishing them through the metrics interfaces. + * This also registers the JMX MBean for RPC. + *

      + * This class has a number of metrics variables that are publicly accessible; + * these variables (objects) have methods to update their values; + * for example: + *

      {@link #bytesRead}.inc() + * + */ +@InterfaceAudience.Private +@Metrics(about="HttpFSServer metrics", context="httpfs") +public class HttpFSServerMetrics { + + private @Metric MutableCounterLong bytesWritten; + private @Metric MutableCounterLong bytesRead; + + // Write ops + private @Metric MutableCounterLong opsCreate; + private @Metric MutableCounterLong opsAppend; + private @Metric MutableCounterLong opsTruncate; + private @Metric MutableCounterLong opsDelete; + private @Metric MutableCounterLong opsRename; + private @Metric MutableCounterLong opsMkdir; + + // Read ops + private @Metric MutableCounterLong opsOpen; + private @Metric MutableCounterLong opsListing; + private @Metric MutableCounterLong opsStat; + private @Metric MutableCounterLong opsCheckAccess; + + private final MetricsRegistry registry = new MetricsRegistry("httpfsserver"); + private final String name; + private JvmMetrics jvmMetrics = null; + + public HttpFSServerMetrics(String name, String sessionId, + final JvmMetrics jvmMetrics) { + this.name = name; + this.jvmMetrics = jvmMetrics; + registry.tag(SessionId, sessionId); + } + + public static HttpFSServerMetrics create(Configuration conf, + String serverName) { + String sessionId = conf.get(DFSConfigKeys.DFS_METRICS_SESSION_ID_KEY); + MetricsSystem ms = DefaultMetricsSystem.instance(); + JvmMetrics jm = JvmMetrics.create("HttpFSServer", sessionId, ms); + String name = "ServerActivity-"+ (serverName.isEmpty() + ? "UndefinedServer"+ ThreadLocalRandom.current().nextInt() + : serverName.replace(':', '-')); + + return ms.register(name, null, new HttpFSServerMetrics(name, + sessionId, jm)); + } + + public String name() { + return name; + } + + public JvmMetrics getJvmMetrics() { + return jvmMetrics; + } + + public void incrBytesWritten(long bytes) { + bytesWritten.incr(bytes); + } + + public void incrBytesRead(long bytes) { + bytesRead.incr(bytes); + } + + public void incrOpsCreate() { + opsCreate.incr(); + } + + public void incrOpsAppend() { + opsAppend.incr(); + } + + public void incrOpsTruncate() { + opsTruncate.incr(); + } + + public void incrOpsDelete() { + opsDelete.incr(); + } + + public void incrOpsRename() { + opsRename.incr(); + } + + public void incrOpsMkdir() { + opsMkdir.incr(); + } + + public void incrOpsOpen() { + opsOpen.incr(); + } + + public void incrOpsListing() { + opsListing.incr(); + } + + public void incrOpsStat() { + opsStat.incr(); + } + + public void incrOpsCheckAccess() { + opsCheckAccess.incr(); + } + + public void shutdown() { + DefaultMetricsSystem.shutdown(); + } + + public long getOpsMkdir() { + return opsMkdir.value(); + } + + public long getOpsListing() { + return opsListing.value(); + } + + public long getOpsStat() { + return opsStat.value(); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/metrics/package-info.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/metrics/package-info.java new file mode 100644 index 0000000000000..47e8d4a4c2fb2 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/metrics/package-info.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A package to implement metrics for the HttpFS Server. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +package org.apache.hadoop.fs.http.server.metrics; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService.java index b2bba088911b3..81208bc6e43ab 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService.java @@ -194,9 +194,11 @@ protected void init() throws ServiceException { throw new ServiceException(FileSystemAccessException.ERROR.H11, ex.toString(), ex); } - LOG.debug("FileSystemAccess FileSystem configuration:"); - for (Map.Entry entry : serviceHadoopConf) { - LOG.debug(" {} = {}", entry.getKey(), entry.getValue()); + if (LOG.isDebugEnabled()) { + LOG.debug("FileSystemAccess FileSystem configuration:"); + for (Map.Entry entry : serviceHadoopConf) { + LOG.debug(" {} = {}", entry.getKey(), entry.getValue()); + } } setRequiredServiceHadoopConf(serviceHadoopConf); @@ -262,7 +264,7 @@ public void run() { LOG.warn("Error while purging filesystem, " + ex.toString(), ex); } } - LOG.debug("Purged [{}} filesystem instances", count); + LOG.debug("Purged [{}] filesystem instances", count); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/servlet/FileSystemReleaseFilter.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/servlet/FileSystemReleaseFilter.java index ec559f9125d01..73a0dbe7392bc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/servlet/FileSystemReleaseFilter.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/servlet/FileSystemReleaseFilter.java @@ -94,14 +94,14 @@ public void destroy() { * Static method that sets the FileSystem to release back to * the {@link FileSystemAccess} service on servlet request completion. * - * @param fs fileystem instance. + * @param fs a filesystem instance. */ public static void setFileSystem(FileSystem fs) { FILE_SYSTEM_TL.set(fs); } /** - * Abstract method to be implemetned by concrete implementations of the + * Abstract method to be implemented by concrete implementations of the * filter that return the {@link FileSystemAccess} service to which the filesystem * will be returned to. * diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/servlet/ServerWebApp.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/servlet/ServerWebApp.java index 5e855de7eec85..985feed09981c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/servlet/ServerWebApp.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/servlet/ServerWebApp.java @@ -18,7 +18,7 @@ package org.apache.hadoop.lib.servlet; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.lib.server.Server; diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/util/Check.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/util/Check.java index 31666e83e2711..62fbe28c54b15 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/util/Check.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/util/Check.java @@ -130,7 +130,7 @@ public static String validIdentifier(String value, int maxLen, String name) { } if (!IDENTIFIER_PATTERN.matcher(value).find()) { throw new IllegalArgumentException( - MessageFormat.format("[{0}] = [{1}] must be '{2}'", name, value, IDENTIFIER_PATTERN_STR)); + MessageFormat.format("[{0}] = [{1}] must be \"{2}\"", name, value, IDENTIFIER_PATTERN_STR)); } return value; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/wsrs/InputStreamEntity.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/wsrs/InputStreamEntity.java index 9edb24a7bcbc0..5f387c908506e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/wsrs/InputStreamEntity.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/wsrs/InputStreamEntity.java @@ -19,6 +19,9 @@ package org.apache.hadoop.lib.wsrs; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.http.server.FSOperations; +import org.apache.hadoop.fs.http.server.HttpFSServerWebApp; +import org.apache.hadoop.fs.http.server.metrics.HttpFSServerMetrics; import org.apache.hadoop.io.IOUtils; import javax.ws.rs.core.StreamingOutput; @@ -45,10 +48,17 @@ public InputStreamEntity(InputStream is) { @Override public void write(OutputStream os) throws IOException { IOUtils.skipFully(is, offset); + long bytes = 0L; if (len == -1) { - IOUtils.copyBytes(is, os, 4096, true); + // Use the configured buffer size instead of hardcoding to 4k + bytes = FSOperations.copyBytes(is, os); } else { - IOUtils.copyBytes(is, os, len, true); + bytes = FSOperations.copyBytes(is, os, len); + } + // Update metrics. + HttpFSServerMetrics metrics = HttpFSServerWebApp.get().getMetrics(); + if (metrics != null) { + metrics.incrBytesRead(bytes); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/wsrs/Parameters.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/wsrs/Parameters.java index e0f62002c70d4..c171e929ca6d6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/wsrs/Parameters.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/wsrs/Parameters.java @@ -19,7 +19,7 @@ import org.apache.hadoop.classification.InterfaceAudience; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import java.util.List; import java.util.Map; diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/wsrs/ParametersProvider.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/wsrs/ParametersProvider.java index c93f8f2b17db1..56a999f519cdf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/wsrs/ParametersProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/wsrs/ParametersProvider.java @@ -18,7 +18,7 @@ package org.apache.hadoop.lib.wsrs; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import com.sun.jersey.api.core.HttpContext; import com.sun.jersey.core.spi.component.ComponentContext; import com.sun.jersey.core.spi.component.ComponentScope; diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/resources/httpfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/resources/httpfs-default.xml index 5b8e469e96bc7..869e4e53e05ac 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/resources/httpfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/resources/httpfs-default.xml @@ -56,7 +56,7 @@ hadoop.http.idle_timeout.ms - 1000 + 60000 Httpfs Server connection timeout in milliseconds. @@ -323,4 +323,15 @@ + + httpfs.access.mode + read-write + + Sets the access mode for HTTPFS. If access is not allowed the FORBIDDED (403) is returned. + Valid access modes are: + read-write Full Access allowed + write-only PUT POST and DELETE full Access. GET only allows GETFILESTATUS and LISTSTATUS + read-only GET Full Access PUT POST and DELETE are FORBIDDEN + + diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/client/BaseTestHttpFSWith.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/client/BaseTestHttpFSWith.java index 902861d3bd10f..7182c987fa623 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/client/BaseTestHttpFSWith.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/client/BaseTestHttpFSWith.java @@ -59,6 +59,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.test.HFSTestCase; import org.apache.hadoop.test.HadoopUsersConfTestHelper; +import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.test.TestDir; import org.apache.hadoop.test.TestDirHelper; import org.apache.hadoop.test.TestHdfs; @@ -73,7 +74,7 @@ import org.eclipse.jetty.server.Server; import org.eclipse.jetty.webapp.WebAppContext; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import java.io.File; import java.io.FileOutputStream; @@ -364,6 +365,42 @@ private void testDelete() throws Exception { fs.close(); } + private void testListSymLinkStatus() throws Exception { + if (isLocalFS()) { + // do not test the the symlink for local FS. + return; + } + FileSystem fs = FileSystem.get(getProxiedFSConf()); + boolean isWebhdfs = fs instanceof WebHdfsFileSystem; + Path path = + new Path(getProxiedFSTestDir() + "-symlink", "targetFoo.txt"); + OutputStream os = fs.create(path); + os.write(1); + os.close(); + Path linkPath = + new Path(getProxiedFSTestDir()+ "-symlink", "symlinkFoo.txt"); + fs.createSymlink(path, linkPath, false); + fs = getHttpFSFileSystem(); + FileStatus linkStatus = fs.getFileStatus(linkPath); + FileStatus status1 = fs.getFileStatus(path); + + FileStatus[] stati = fs.listStatus(path.getParent()); + assertEquals(2, stati.length); + + int countSymlink = 0; + for (int i = 0; i < stati.length; i++) { + FileStatus fStatus = stati[i]; + countSymlink += fStatus.isSymlink() ? 1 : 0; + } + assertEquals(1, countSymlink); + + assertFalse(status1.isSymlink()); + if (isWebhdfs) { + assertTrue(linkStatus.isSymlink()); + } + fs.close(); + } + private void testListStatus() throws Exception { FileSystem fs = FileSystem.get(getProxiedFSConf()); boolean isDFS = fs instanceof DistributedFileSystem; @@ -521,9 +558,18 @@ private void testWorkingdirectory() throws Exception { fs = getHttpFSFileSystem(); fs.setWorkingDirectory(new Path("/tmp")); workingDir = fs.getWorkingDirectory(); - fs.close(); assertEquals(workingDir.toUri().getPath(), new Path("/tmp").toUri().getPath()); + final FileSystem httpFs = getHttpFSFileSystem(); + LambdaTestUtils.intercept(IllegalArgumentException.class, + "Invalid DFS directory name /foo:bar", + () -> httpFs.setWorkingDirectory(new Path("/foo:bar"))); + fs.setWorkingDirectory(new Path("/bar")); + workingDir = fs.getWorkingDirectory(); + httpFs.close(); + fs.close(); + assertEquals(workingDir.toUri().getPath(), + new Path("/bar").toUri().getPath()); } private void testTrashRoot() throws Exception { @@ -1179,6 +1225,7 @@ private void operation(Operation op) throws Exception { break; case LIST_STATUS: testListStatus(); + testListSymLinkStatus(); break; case WORKING_DIRECTORY: testWorkingdirectory(); diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/client/TestHttpFSFileSystemLocalFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/client/TestHttpFSFileSystemLocalFileSystem.java index 83bcb2efacc3c..955529ef9816d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/client/TestHttpFSFileSystemLocalFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/client/TestHttpFSFileSystemLocalFileSystem.java @@ -32,8 +32,6 @@ import org.junit.runners.Parameterized; import java.io.File; -import java.net.URI; -import java.net.URISyntaxException; @RunWith(value = Parameterized.class) public class TestHttpFSFileSystemLocalFileSystem extends BaseTestHttpFSWith { diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/server/TestHttpFSAccessControlled.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/server/TestHttpFSAccessControlled.java new file mode 100644 index 0000000000000..d3a4f59750f2b --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/server/TestHttpFSAccessControlled.java @@ -0,0 +1,354 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.http.server; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.test.HTestCase; +import org.apache.hadoop.test.HadoopUsersConfTestHelper; +import org.apache.hadoop.test.TestDir; +import org.apache.hadoop.test.TestDirHelper; +import org.apache.hadoop.test.TestJetty; +import org.apache.hadoop.test.TestJettyHelper; +import org.junit.Assert; +import org.junit.Test; +import org.eclipse.jetty.server.Server; +import org.eclipse.jetty.webapp.WebAppContext; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.OutputStream; +import java.io.Writer; +import java.net.HttpURLConnection; +import java.net.URL; +import java.text.MessageFormat; + +/** + * This test class ensures that everything works as expected when + * support with the access controlled HTTPFS file system. + */ +public class TestHttpFSAccessControlled extends HTestCase { + + private MiniDFSCluster miniDfs; + private Configuration nnConf; + + /** + * Fire up our own hand-rolled MiniDFSCluster. We do this here instead + * of relying on TestHdfsHelper because we don't want to turn on ACL + * support. + * + * @throws Exception + */ + private void startMiniDFS() throws Exception { + + File testDirRoot = TestDirHelper.getTestDir(); + + if (System.getProperty("hadoop.log.dir") == null) { + System.setProperty("hadoop." + + "log.dir", + new File(testDirRoot, "hadoop-log").getAbsolutePath()); + } + if (System.getProperty("test.build.data") == null) { + System.setProperty("test.build.data", + new File(testDirRoot, "hadoop-data").getAbsolutePath()); + } + + Configuration conf = HadoopUsersConfTestHelper.getBaseConf(); + HadoopUsersConfTestHelper.addUserConf(conf); + conf.set("fs.hdfs.impl.disable.cache", "true"); + conf.set("dfs.block.access.token.enable", "false"); + conf.set("dfs.permissions", "true"); + conf.set("hadoop.security.authentication", "simple"); + + MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf); + builder.numDataNodes(2); + miniDfs = builder.build(); + nnConf = miniDfs.getConfiguration(0); + } + + /** + * Create an HttpFS Server to talk to the MiniDFSCluster we created. + * @throws Exception + */ + private void createHttpFSServer() throws Exception { + File homeDir = TestDirHelper.getTestDir(); + Assert.assertTrue(new File(homeDir, "conf").mkdir()); + Assert.assertTrue(new File(homeDir, "log").mkdir()); + Assert.assertTrue(new File(homeDir, "temp").mkdir()); + HttpFSServerWebApp.setHomeDirForCurrentThread(homeDir.getAbsolutePath()); + + File secretFile = new File(new File(homeDir, "conf"), "secret"); + Writer w = new FileWriter(secretFile); + w.write("secret"); + w.close(); + + // HDFS configuration + File hadoopConfDir = new File(new File(homeDir, "conf"), "hadoop-conf"); + if ( !hadoopConfDir.mkdirs() ) { + throw new IOException(); + } + + String fsDefaultName = + nnConf.get(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY); + Configuration conf = new Configuration(false); + conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, fsDefaultName); + + File hdfsSite = new File(hadoopConfDir, "hdfs-site.xml"); + OutputStream os = new FileOutputStream(hdfsSite); + conf.writeXml(os); + os.close(); + + // HTTPFS configuration + conf = new Configuration(false); + conf.set("httpfs.hadoop.config.dir", hadoopConfDir.toString()); + conf.set("httpfs.proxyuser." + + HadoopUsersConfTestHelper.getHadoopProxyUser() + ".groups", + HadoopUsersConfTestHelper.getHadoopProxyUserGroups()); + conf.set("httpfs.proxyuser." + + HadoopUsersConfTestHelper.getHadoopProxyUser() + ".hosts", + HadoopUsersConfTestHelper.getHadoopProxyUserHosts()); + conf.set("httpfs.authentication.signature.secret.file", + secretFile.getAbsolutePath()); + + File httpfsSite = new File(new File(homeDir, "conf"), "httpfs-site.xml"); + os = new FileOutputStream(httpfsSite); + conf.writeXml(os); + os.close(); + + ClassLoader cl = Thread.currentThread().getContextClassLoader(); + URL url = cl.getResource("webapp"); + if ( url == null ) { + throw new IOException(); + } + WebAppContext context = new WebAppContext(url.getPath(), "/webhdfs"); + Server server = TestJettyHelper.getJettyServer(); + server.setHandler(context); + server.start(); + } + + /** + * Talks to the http interface to get the json output of a *STATUS command + * on the given file. + * + * @param filename The file to query. + * @param message Failure message + * @param command Command to test + * @param expectOK Is this operation expected to succeed? + * @throws Exception + */ + private void getCmd(String filename, String message, String command, boolean expectOK) + throws Exception { + String user = HadoopUsersConfTestHelper.getHadoopUsers()[0]; + String outMsg = message + " (" + command + ")"; + // Remove leading / from filename + if ( filename.charAt(0) == '/' ) { + filename = filename.substring(1); + } + String pathOps = MessageFormat.format( + "/webhdfs/v1/{0}?user.name={1}&op={2}", + filename, user, command); + URL url = new URL(TestJettyHelper.getJettyURL(), pathOps); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestMethod("GET"); + conn.connect(); + int resp = conn.getResponseCode(); + if ( expectOK ) { + Assert.assertEquals( outMsg, HttpURLConnection.HTTP_OK, resp); + } else { + Assert.assertEquals(outMsg, HttpURLConnection.HTTP_FORBIDDEN, resp); + } + } + + /** + * General-purpose http PUT command to the httpfs server. + * @param filename The file to operate upon + * @param message Failure message + * @param command The command to perform (SETPERMISSION, etc) + * @param params Parameters to command + * @param expectOK Is this operation expected to succeed? + */ + private void putCmd(String filename, String message, String command, + String params, boolean expectOK) throws Exception { + String user = HadoopUsersConfTestHelper.getHadoopUsers()[0]; + String outMsg = message + " (" + command + ")"; + // Remove leading / from filename + if ( filename.charAt(0) == '/' ) { + filename = filename.substring(1); + } + String pathOps = MessageFormat.format( + "/webhdfs/v1/{0}?user.name={1}{2}{3}&op={4}", + filename, user, (params == null) ? "" : "&", + (params == null) ? "" : params, command); + URL url = new URL(TestJettyHelper.getJettyURL(), pathOps); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestMethod("PUT"); + conn.connect(); + int resp = conn.getResponseCode(); + if ( expectOK ) { + Assert.assertEquals(outMsg, HttpURLConnection.HTTP_OK, resp); + } else { + Assert.assertEquals(outMsg, HttpURLConnection.HTTP_FORBIDDEN, resp); + } + } + + /** + * General-purpose http PUT command to the httpfs server. + * @param filename The file to operate upon + * @param message Failure message + * @param command The command to perform (SETPERMISSION, etc) + * @param params Parameters to command + * @param expectOK Is this operation expected to succeed? + */ + private void deleteCmd(String filename, String message, String command, + String params, boolean expectOK) throws Exception { + String user = HadoopUsersConfTestHelper.getHadoopUsers()[0]; + String outMsg = message + " (" + command + ")"; + // Remove leading / from filename + if ( filename.charAt(0) == '/' ) { + filename = filename.substring(1); + } + String pathOps = MessageFormat.format( + "/webhdfs/v1/{0}?user.name={1}{2}{3}&op={4}", + filename, user, (params == null) ? "" : "&", + (params == null) ? "" : params, command); + URL url = new URL(TestJettyHelper.getJettyURL(), pathOps); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestMethod("DELETE"); + conn.connect(); + int resp = conn.getResponseCode(); + if ( expectOK ) { + Assert.assertEquals(outMsg, HttpURLConnection.HTTP_OK, resp); + } else { + Assert.assertEquals(outMsg, HttpURLConnection.HTTP_FORBIDDEN, resp); + } + } + + /** + * General-purpose http POST command to the httpfs server. + * @param filename The file to operate upon + * @param message Failure message + * @param command The command to perform (UNSETSTORAGEPOLICY, etc) + * @param params Parameters to command" + * @param expectOK Is this operation expected to succeed? + */ + private void postCmd(String filename, String message, String command, + String params, boolean expectOK) throws Exception { + String user = HadoopUsersConfTestHelper.getHadoopUsers()[0]; + String outMsg = message + " (" + command + ")"; + // Remove leading / from filename + if ( filename.charAt(0) == '/' ) { + filename = filename.substring(1); + } + String pathOps = MessageFormat.format( + "/webhdfs/v1/{0}?user.name={1}{2}{3}&op={4}", + filename, user, (params == null) ? "" : "&", + (params == null) ? "" : params, command); + URL url = new URL(TestJettyHelper.getJettyURL(), pathOps); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestMethod("POST"); + conn.connect(); + int resp = conn.getResponseCode(); + if ( expectOK ) { + Assert.assertEquals(outMsg, HttpURLConnection.HTTP_OK, resp); + } else { + Assert.assertEquals(outMsg, HttpURLConnection.HTTP_FORBIDDEN, resp); + } + } + + /** + * Ensure that + *

        + *
      1. GETFILESTATUS (GET) and LISTSTATUS (GET) work in all modes
      2. + *
      3. GETXATTRS (GET) works in read-write and read-only but write-only throws an exception
      4. + *
      5. SETPERMISSION (PUT) works in read-write and write only but read-only throws an exception
      6. + *
      7. SETPERMISSION (POST) works in read-write and write only but read-only throws an exception
      8. + *
      9. DELETE (DELETE) works in read-write and write only but read-only throws an exception
      10. + *
      + * + * @throws Exception + */ + @Test + @TestDir + @TestJetty + public void testAcessControlledFS() throws Exception { + final String testRwMsg = "Test read-write "; + final String testRoMsg = "Test read-only "; + final String testWoMsg = "Test write-only "; + final String defUser1 = "default:user:glarch:r-x"; + final String dir = "/testAccess"; + final String pathRW = dir + "/foo-rw"; + final String pathWO = dir + "/foo-wo"; + final String pathRO = dir + "/foo-ro"; + final String setPermSpec = "744"; + final String snapshopSpec = "snapshotname=test-snap"; + startMiniDFS(); + createHttpFSServer(); + + FileSystem fs = FileSystem.get(nnConf); + fs.mkdirs(new Path(dir)); + OutputStream os = fs.create(new Path(pathRW)); + os.write(1); + os.close(); + + os = fs.create(new Path(pathWO)); + os.write(1); + os.close(); + + os = fs.create(new Path(pathRO)); + os.write(1); + os.close(); + + Configuration conf = HttpFSServerWebApp.get().getConfig(); + + /* test Read-Write Mode */ + conf.setStrings("httpfs.access.mode", "read-write"); + getCmd(pathRW, testRwMsg + "GET", "GETFILESTATUS", true); + getCmd(pathRW, testRwMsg + "GET", "LISTSTATUS", true); + getCmd(pathRW, testRwMsg + "GET", "GETXATTRS", true); + putCmd(pathRW, testRwMsg + "PUT", "SETPERMISSION", setPermSpec, true); + postCmd(pathRW, testRwMsg + "POST", "UNSETSTORAGEPOLICY", null, true); + deleteCmd(pathRW, testRwMsg + "DELETE", "DELETE", null, true); + + /* test Write-Only Mode */ + conf.setStrings("httpfs.access.mode", "write-only"); + getCmd(pathWO, testWoMsg + "GET", "GETFILESTATUS", true); + getCmd(pathWO, testWoMsg + "GET", "LISTSTATUS", true); + getCmd(pathWO, testWoMsg + "GET", "GETXATTRS", false); + putCmd(pathWO, testWoMsg + "PUT", "SETPERMISSION", setPermSpec, true); + postCmd(pathWO, testWoMsg + "POST", "UNSETSTORAGEPOLICY", null, true); + deleteCmd(pathWO, testWoMsg + "DELETE", "DELETE", null, true); + + /* test Read-Only Mode */ + conf.setStrings("httpfs.access.mode", "read-only"); + getCmd(pathRO, testRoMsg + "GET", "GETFILESTATUS", true); + getCmd(pathRO, testRoMsg + "GET", "LISTSTATUS", true); + getCmd(pathRO, testRoMsg + "GET", "GETXATTRS", true); + putCmd(pathRO, testRoMsg + "PUT", "SETPERMISSION", setPermSpec, false); + postCmd(pathRO, testRoMsg + "POST", "UNSETSTORAGEPOLICY", null, false); + deleteCmd(pathRO, testRoMsg + "DELETE", "DELETE", null, false); + + conf.setStrings("httpfs.access.mode", "read-write"); + + miniDfs.shutdown(); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/server/TestHttpFSServer.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/server/TestHttpFSServer.java index da7080599860e..6ecc33a587c34 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/server/TestHttpFSServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/server/TestHttpFSServer.java @@ -56,10 +56,11 @@ import java.net.HttpURLConnection; import java.net.URI; import java.net.URL; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.text.MessageFormat; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -70,6 +71,8 @@ import org.apache.hadoop.fs.FsServerDefaults; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.XAttrCodec; +import org.apache.hadoop.fs.http.client.HttpFSUtils; +import org.apache.hadoop.fs.http.client.HttpFSFileSystem.Operation; import org.apache.hadoop.fs.http.server.HttpFSParametersProvider.DataParam; import org.apache.hadoop.fs.http.server.HttpFSParametersProvider.NoRedirectParam; import org.apache.hadoop.fs.permission.AclEntry; @@ -88,6 +91,7 @@ import org.apache.hadoop.security.token.Token; import org.apache.hadoop.test.HFSTestCase; import org.apache.hadoop.test.HadoopUsersConfTestHelper; +import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.test.TestDir; import org.apache.hadoop.test.TestDirHelper; import org.apache.hadoop.test.TestHdfs; @@ -100,8 +104,9 @@ import org.eclipse.jetty.server.Server; import org.eclipse.jetty.webapp.WebAppContext; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import java.util.Properties; +import java.util.concurrent.Callable; import java.util.regex.Pattern; import javax.ws.rs.HttpMethod; @@ -114,6 +119,23 @@ */ public class TestHttpFSServer extends HFSTestCase { + /** + * define metric getters for unit tests. + */ + private static Callable defaultEntryMetricGetter = () -> 0L; + private static Callable defaultExitMetricGetter = () -> 1L; + private static HashMap> metricsGetter = + new HashMap>() { + { + put("LISTSTATUS", + () -> HttpFSServerWebApp.get().getMetrics().getOpsListing()); + put("MKDIRS", + () -> HttpFSServerWebApp.get().getMetrics().getOpsMkdir()); + put("GETFILESTATUS", + () -> HttpFSServerWebApp.get().getMetrics().getOpsStat()); + } + }; + @Test @TestDir @TestJetty @@ -397,7 +419,8 @@ public void instrumentation() throws Exception { @TestHdfs public void testHdfsAccess() throws Exception { createHttpFSServer(false, false); - + long oldOpsListStatus = + metricsGetter.get("LISTSTATUS").call(); String user = HadoopUsersConfTestHelper.getHadoopUsers()[0]; URL url = new URL(TestJettyHelper.getJettyURL(), MessageFormat.format("/webhdfs/v1/?user.name={0}&op=liststatus", @@ -408,6 +431,8 @@ public void testHdfsAccess() throws Exception { new InputStreamReader(conn.getInputStream())); reader.readLine(); reader.close(); + Assert.assertEquals(1 + oldOpsListStatus, + (long) metricsGetter.get("LISTSTATUS").call()); } @Test @@ -416,7 +441,8 @@ public void testHdfsAccess() throws Exception { @TestHdfs public void testMkdirs() throws Exception { createHttpFSServer(false, false); - + long oldMkdirOpsStat = + metricsGetter.get("MKDIRS").call(); String user = HadoopUsersConfTestHelper.getHadoopUsers()[0]; URL url = new URL(TestJettyHelper.getJettyURL(), MessageFormat.format( "/webhdfs/v1/tmp/sub-tmp?user.name={0}&op=MKDIRS", user)); @@ -424,8 +450,10 @@ public void testMkdirs() throws Exception { conn.setRequestMethod("PUT"); conn.connect(); Assert.assertEquals(conn.getResponseCode(), HttpURLConnection.HTTP_OK); - getStatus("/tmp/sub-tmp", "LISTSTATUS"); + long opsStat = + metricsGetter.get("MKDIRS").call(); + Assert.assertEquals(1 + oldMkdirOpsStat, opsStat); } @Test @@ -434,7 +462,8 @@ public void testMkdirs() throws Exception { @TestHdfs public void testGlobFilter() throws Exception { createHttpFSServer(false, false); - + long oldOpsListStatus = + metricsGetter.get("LISTSTATUS").call(); FileSystem fs = FileSystem.get(TestHdfsHelper.getHdfsConf()); fs.mkdirs(new Path("/tmp")); fs.create(new Path("/tmp/foo.txt")).close(); @@ -449,6 +478,8 @@ public void testGlobFilter() throws Exception { new InputStreamReader(conn.getInputStream())); reader.readLine(); reader.close(); + Assert.assertEquals(1 + oldOpsListStatus, + (long) metricsGetter.get("LISTSTATUS").call()); } /** @@ -508,6 +539,9 @@ private void createWithHttp(String filename, String perms, */ private void createDirWithHttp(String dirname, String perms, String unmaskedPerms) throws Exception { + // get the createDirMetrics + long oldOpsMkdir = + metricsGetter.get("MKDIRS").call(); String user = HadoopUsersConfTestHelper.getHadoopUsers()[0]; // Remove leading / from filename if (dirname.charAt(0) == '/') { @@ -531,6 +565,8 @@ private void createDirWithHttp(String dirname, String perms, conn.setRequestMethod("PUT"); conn.connect(); Assert.assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode()); + Assert.assertEquals(1 + oldOpsMkdir, + (long) metricsGetter.get("MKDIRS").call()); } /** @@ -544,6 +580,8 @@ private void createDirWithHttp(String dirname, String perms, */ private String getStatus(String filename, String command) throws Exception { + long oldOpsStat = + metricsGetter.getOrDefault(command, defaultEntryMetricGetter).call(); String user = HadoopUsersConfTestHelper.getHadoopUsers()[0]; // Remove leading / from filename if (filename.charAt(0) == '/') { @@ -559,7 +597,9 @@ private String getStatus(String filename, String command) BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream())); - + long opsStat = + metricsGetter.getOrDefault(command, defaultExitMetricGetter).call(); + Assert.assertEquals(oldOpsStat + 1L, opsStat); return reader.readLine(); } @@ -1565,7 +1605,7 @@ public void testNoRedirect() throws Exception { new InputStreamReader(conn.getInputStream())); String location = (String)json.get("Location"); Assert.assertTrue(location.contains(DataParam.NAME)); - Assert.assertTrue(location.contains(NoRedirectParam.NAME)); + Assert.assertFalse(location.contains(NoRedirectParam.NAME)); Assert.assertTrue(location.contains("CREATE")); Assert.assertTrue("Wrong location: " + location, location.startsWith(TestJettyHelper.getJettyURL().toString())); @@ -1615,8 +1655,7 @@ public void testNoRedirect() throws Exception { conn.connect(); // Verify that we read what we wrote Assert.assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode()); - String content = IOUtils.toString( - conn.getInputStream(), Charset.defaultCharset()); + String content = IOUtils.toString(conn.getInputStream(), StandardCharsets.UTF_8); Assert.assertEquals(testContent, content); @@ -1834,4 +1873,78 @@ public void testStoragePolicySatisfier() throws Exception { assertTrue( xAttrs.containsKey(HdfsServerConstants.XATTR_SATISFY_STORAGE_POLICY)); } + + @Test + @TestDir + @TestJetty + @TestHdfs + public void testNoRedirectWithData() throws Exception { + createHttpFSServer(false, false); + + final String path = "/file"; + final String username = HadoopUsersConfTestHelper.getHadoopUsers()[0]; + // file creation which should not redirect + URL url = new URL(TestJettyHelper.getJettyURL(), + MessageFormat.format( + "/webhdfs/v1{0}?user.name={1}&op=CREATE&data=true&noredirect=true", + path, username)); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestMethod(HttpMethod.PUT); + conn.setRequestProperty("Content-Type", MediaType.APPLICATION_OCTET_STREAM); + conn.setDoOutput(true); + conn.connect(); + Assert.assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode()); + JSONObject json = (JSONObject) new JSONParser() + .parse(new InputStreamReader(conn.getInputStream())); + + // get the location to write + String location = (String) json.get("Location"); + Assert.assertTrue(location.contains(DataParam.NAME)); + Assert.assertTrue(location.contains("CREATE")); + url = new URL(location); + conn = (HttpURLConnection) url.openConnection(); + conn.setRequestMethod(HttpMethod.PUT); + conn.setRequestProperty("Content-Type", MediaType.APPLICATION_OCTET_STREAM); + conn.setDoOutput(true); + conn.connect(); + final String writeStr = "write some content"; + OutputStream os = conn.getOutputStream(); + os.write(writeStr.getBytes()); + os.close(); + // Verify that file got created + Assert.assertEquals(HttpURLConnection.HTTP_CREATED, conn.getResponseCode()); + json = (JSONObject) new JSONParser() + .parse(new InputStreamReader(conn.getInputStream())); + location = (String) json.get("Location"); + Assert.assertEquals(TestJettyHelper.getJettyURL() + "/webhdfs/v1" + path, + location); + } + + @Test + @TestDir + @TestJetty + @TestHdfs + public void testContentType() throws Exception { + createHttpFSServer(false, false); + FileSystem fs = FileSystem.get(TestHdfsHelper.getHdfsConf()); + Path dir = new Path("/tmp"); + Path file = new Path(dir, "foo"); + fs.mkdirs(dir); + fs.create(file); + + String user = HadoopUsersConfTestHelper.getHadoopUsers()[0]; + URL url = new URL(TestJettyHelper.getJettyURL(), MessageFormat.format( + "/webhdfs/v1/tmp/foo?user.name={0}&op=open&offset=1&length=2", user)); + + // test jsonParse with non-json type. + final HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestMethod(Operation.OPEN.getMethod()); + conn.connect(); + + LambdaTestUtils.intercept(IOException.class, + "Content-Type \"text/html;charset=iso-8859-1\" " + + "is incompatible with \"application/json\"", + () -> HttpFSUtils.jsonParse(conn)); + conn.disconnect(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/lib/server/TestServer.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/lib/server/TestServer.java index a6a139f23e16b..ff1d1ca0ad55c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/lib/server/TestServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/lib/server/TestServer.java @@ -42,7 +42,6 @@ import org.apache.hadoop.test.TestDir; import org.apache.hadoop.test.TestDirHelper; import org.apache.hadoop.test.TestException; -import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.StringUtils; import org.junit.Test; diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/pom.xml b/hadoop-hdfs-project/hadoop-hdfs-native-client/pom.xml index bb95442605a79..04cf060df4cb3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/pom.xml @@ -20,11 +20,11 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-project-dist - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project-dist hadoop-hdfs-native-client - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop HDFS Native Client Apache Hadoop HDFS Native Client jar diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/CMakeLists.txt b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/CMakeLists.txt index 626c49bf192c6..24ec297aa27b8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/CMakeLists.txt +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/CMakeLists.txt @@ -65,6 +65,8 @@ if(WIN32) set(OUT_DIR bin) else() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden") + # using old default behavior on GCC >= 10.0 + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fcommon") set(OS_DIR ${CMAKE_SOURCE_DIR}/main/native/libhdfs/os/posix) # IMPORTANT: OUT_DIR MUST be relative to maven's diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/doc/README b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/doc/README index e8cc0e509f7ee..0dc17214348d6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/doc/README +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/doc/README @@ -88,6 +88,7 @@ rw -odebug (do not daemonize - aka -d in fuse speak) -obig_writes (use fuse big_writes option so as to allow better performance of writes on kernels >= 2.6.26) -initchecks - have fuse-dfs try to connect to hdfs to ensure all is ok upon startup. recommended to have this on +-omax_background=%d (maximum number of pending "background" requests - see fuse docs) The defaults are: entry,attribute_timeouts = 60 seconds diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_dfs.c b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_dfs.c index f693032d5c5ed..b9b01009beb1f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_dfs.c +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_dfs.c @@ -81,6 +81,7 @@ int main(int argc, char *argv[]) options.rdbuffer_size = 10*1024*1024; options.attribute_timeout = 60; options.entry_timeout = 60; + options.max_background = 0; if (-1 == fuse_opt_parse(&args, &options, dfs_opts, dfs_options)) { return -1; @@ -114,6 +115,11 @@ int main(int argc, char *argv[]) snprintf(buf, sizeof buf, "-oentry_timeout=%d",options.entry_timeout); fuse_opt_add_arg(&args, buf); + + if (options.max_background > 0) { + snprintf(buf, sizeof buf, "-omax_background=%d",options.max_background); + fuse_opt_add_arg(&args, buf); + } } if (options.nn_uri == NULL) { diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_init.c b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_init.c index 4da6da0fa91d9..9799c24db3007 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_init.c +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_init.c @@ -91,11 +91,11 @@ static void dfsPrintOptions(FILE *fp, const struct options *o) INFO("Mounting with options: [ protected=%s, nn_uri=%s, nn_port=%d, " "debug=%d, read_only=%d, initchecks=%d, " "no_permissions=%d, usetrash=%d, entry_timeout=%d, " - "attribute_timeout=%d, rdbuffer_size=%zd, direct_io=%d ]", + "attribute_timeout=%d, rdbuffer_size=%zd, direct_io=%d, max_background=%d ]", (o->protected ? o->protected : "(NULL)"), o->nn_uri, o->nn_port, o->debug, o->read_only, o->initchecks, o->no_permissions, o->usetrash, o->entry_timeout, - o->attribute_timeout, o->rdbuffer_size, o->direct_io); + o->attribute_timeout, o->rdbuffer_size, o->direct_io, o->max_background); } void *dfs_init(struct fuse_conn_info *conn) diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_options.c b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_options.c index 8461ce40f9186..b4082c63d783e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_options.c +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_options.c @@ -37,11 +37,13 @@ void print_options() { "\tentry_timeout=%d\n" "\tattribute_timeout=%d\n" "\tprivate=%d\n" - "\trdbuffer_size=%d (KBs)\n", - options.protected, options.nn_uri, options.nn_port, options.debug, + "\trdbuffer_size=%d (KBs)\n" + "\tmax_background=%d\n", + options.protected, options.nn_uri, options.nn_port, options.debug, options.read_only, options.usetrash, options.entry_timeout, options.attribute_timeout, options.private, - (int)options.rdbuffer_size / 1024); + (int)options.rdbuffer_size / 1024, + options.max_background); } const char *program; @@ -56,7 +58,7 @@ void print_usage(const char *pname) "[-ousetrash] [-obig_writes] [-oprivate (single user)] [ro] " "[-oserver=] [-oport=] " "[-oentry_timeout=] [-oattribute_timeout=] " - "[-odirect_io] [-onopoermissions] [-o] " + "[-odirect_io] [-onopoermissions] [-omax_background=] [-o] " " [fuse options]\n", pname); printf("NOTE: debugging option for fuse is -debug\n"); } @@ -87,6 +89,7 @@ struct fuse_opt dfs_opts[] = DFSFS_OPT_KEY("protected=%s", protected, 0), DFSFS_OPT_KEY("port=%d", nn_port, 0), DFSFS_OPT_KEY("rdbuffer=%d", rdbuffer_size,0), + DFSFS_OPT_KEY("max_background=%d", max_background, 0), FUSE_OPT_KEY("private", KEY_PRIVATE), FUSE_OPT_KEY("ro", KEY_RO), diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_options.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_options.h index 4bfc2355259b3..2d00f1b30a1dc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_options.h +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_options.h @@ -34,6 +34,7 @@ struct options { int private; size_t rdbuffer_size; int direct_io; + int max_background; } options; extern struct fuse_opt dfs_opts[]; diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/test/TestFuseDFS.java b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/test/TestFuseDFS.java index dabbe00b01668..ec5c4e37473d5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/test/TestFuseDFS.java +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/test/TestFuseDFS.java @@ -187,6 +187,7 @@ private static Process establishMount(URI uri) throws IOException { "-ononempty", // Don't complain about junk in mount point "-f", // Don't background the process "-ordbuffer=32768", // Read buffer size in kb + "-omax_background=100", // Set fuse max_background=100 (12 by default) "rw" }; diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/test_libhdfs_mini_stress.c b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/test_libhdfs_mini_stress.c index 9054287405632..846852bfd0e88 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/test_libhdfs_mini_stress.c +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/test_libhdfs_mini_stress.c @@ -279,7 +279,7 @@ static int testHdfsMiniStressImpl(struct tlhThreadInfo *ti) EXPECT_NONNULL(ti->hdfs); // Error injection on, some failures are expected in the read path. // The expectation is that any memory stomps will cascade and cause - // the following test to fail. Ideally RPC errors would be seperated + // the following test to fail. Ideally RPC errors would be separated // from BlockReader errors (RPC is expected to recover from disconnects). doTestHdfsMiniStress(ti, 1); // No error injection diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/test_libhdfs_ops.c b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/test_libhdfs_ops.c index 23fa2e51128ba..b1e64c642edaf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/test_libhdfs_ops.c +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/test_libhdfs_ops.c @@ -454,6 +454,68 @@ int main(int argc, char **argv) { hdfsCloseFile(lfs, localFile); } + + { + // HDFS Open File Builder tests + + exists = hdfsExists(fs, readPath); + + if (exists) { + fprintf(stderr, "Failed to validate existence of %s\n", readPath); + shutdown_and_exit(cl, -1); + } + + hdfsOpenFileBuilder *builder; + builder = hdfsOpenFileBuilderAlloc(fs, readPath); + hdfsOpenFileBuilderOpt(builder, "hello", "world"); + + hdfsOpenFileFuture *future; + future = hdfsOpenFileBuilderBuild(builder); + + readFile = hdfsOpenFileFutureGet(future); + if (!hdfsOpenFileFutureCancel(future, 0)) { + fprintf(stderr, "Cancel on a completed Future should return false"); + shutdown_and_exit(cl, -1); + } + hdfsOpenFileFutureFree(future); + + memset(buffer, 0, sizeof(buffer)); + num_read_bytes = hdfsRead(fs, readFile, (void *) buffer, + sizeof(buffer)); + if (strncmp(fileContents, buffer, strlen(fileContents)) != 0) { + fprintf(stderr, + "Failed to read. Expected %s but got %s (%d bytes)\n", + fileContents, buffer, num_read_bytes); + shutdown_and_exit(cl, -1); + } + hdfsCloseFile(fs, readFile); + + builder = hdfsOpenFileBuilderAlloc(fs, readPath); + hdfsOpenFileBuilderOpt(builder, "hello", "world"); + + future = hdfsOpenFileBuilderBuild(builder); + + readFile = hdfsOpenFileFutureGetWithTimeout(future, 1, jDays); + if (!hdfsOpenFileFutureCancel(future, 0)) { + fprintf(stderr, "Cancel on a completed Future should return " + "false"); + shutdown_and_exit(cl, -1); + } + hdfsOpenFileFutureFree(future); + + memset(buffer, 0, sizeof(buffer)); + num_read_bytes = hdfsRead(fs, readFile, (void*)buffer, + sizeof(buffer)); + if (strncmp(fileContents, buffer, strlen(fileContents)) != 0) { + fprintf(stderr, "Failed to read. Expected %s but got " + "%s (%d bytes)\n", fileContents, buffer, + num_read_bytes); + shutdown_and_exit(cl, -1); + } + memset(buffer, 0, strlen(fileContents + 1)); + hdfsCloseFile(fs, readFile); + } + totalResult = 0; result = 0; { diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/hdfs.c b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/hdfs.c index 220208676e311..ed150925cdb81 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/hdfs.c +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/hdfs.c @@ -38,6 +38,10 @@ #define KERBEROS_TICKET_CACHE_PATH "hadoop.security.kerberos.ticket.cache.path" +// StreamCapability flags taken from o.a.h.fs.StreamCapabilities +#define IS_READ_BYTE_BUFFER_CAPABILITY "in:readbytebuffer" +#define IS_PREAD_BYTE_BUFFER_CAPABILITY "in:preadbytebuffer" + // Bit fields for hdfsFile_internal flags #define HDFS_FILE_SUPPORTS_DIRECT_READ (1<<0) #define HDFS_FILE_SUPPORTS_DIRECT_PREAD (1<<1) @@ -956,9 +960,14 @@ struct hdfsStreamBuilder { struct hdfsStreamBuilder *hdfsStreamBuilderAlloc(hdfsFS fs, const char *path, int flags) { - int path_len = strlen(path); + size_t path_len = strlen(path); struct hdfsStreamBuilder *bld; + // Check for overflow in path_len + if (path_len > SIZE_MAX - sizeof(struct hdfsStreamBuilder)) { + errno = EOVERFLOW; + return NULL; + } // sizeof(hdfsStreamBuilder->path) includes one byte for the string // terminator bld = malloc(sizeof(struct hdfsStreamBuilder) + path_len); @@ -1070,6 +1079,27 @@ static int hdfsHasStreamCapability(jobject jFile, return 0; } +/** + * Sets the flags of the given hdfsFile based on the capabilities of the + * underlying stream. + * + * @param file file->flags will be updated based on the capabilities of jFile + * @param jFile the underlying stream to check for capabilities + */ +static void setFileFlagCapabilities(hdfsFile file, jobject jFile) { + // Check the StreamCapabilities of jFile to see if we can do direct + // reads + if (hdfsHasStreamCapability(jFile, IS_READ_BYTE_BUFFER_CAPABILITY)) { + file->flags |= HDFS_FILE_SUPPORTS_DIRECT_READ; + } + + // Check the StreamCapabilities of jFile to see if we can do direct + // preads + if (hdfsHasStreamCapability(jFile, IS_PREAD_BYTE_BUFFER_CAPABILITY)) { + file->flags |= HDFS_FILE_SUPPORTS_DIRECT_PREAD; + } +} + static hdfsFile hdfsOpenFileImpl(hdfsFS fs, const char *path, int flags, int32_t bufferSize, int16_t replication, int64_t blockSize) { @@ -1240,17 +1270,7 @@ static hdfsFile hdfsOpenFileImpl(hdfsFS fs, const char *path, int flags, file->flags = 0; if ((flags & O_WRONLY) == 0) { - // Check the StreamCapabilities of jFile to see if we can do direct - // reads - if (hdfsHasStreamCapability(jFile, "in:readbytebuffer")) { - file->flags |= HDFS_FILE_SUPPORTS_DIRECT_READ; - } - - // Check the StreamCapabilities of jFile to see if we can do direct - // preads - if (hdfsHasStreamCapability(jFile, "in:preadbytebuffer")) { - file->flags |= HDFS_FILE_SUPPORTS_DIRECT_PREAD; - } + setFileFlagCapabilities(file, jFile); } ret = 0; @@ -1283,6 +1303,469 @@ hdfsFile hdfsStreamBuilderBuild(struct hdfsStreamBuilder *bld) return file; } +/** + * A wrapper around o.a.h.fs.FutureDataInputStreamBuilder and the file name + * associated with the builder. + */ +struct hdfsOpenFileBuilder { + jobject jBuilder; + const char *path; +}; + +/** + * A wrapper around a java.util.concurrent.Future (created by calling + * FutureDataInputStreamBuilder#build) and the file name associated with the + * builder. + */ +struct hdfsOpenFileFuture { + jobject jFuture; + const char *path; +}; + +hdfsOpenFileBuilder *hdfsOpenFileBuilderAlloc(hdfsFS fs, + const char *path) { + int ret = 0; + jthrowable jthr; + jvalue jVal; + jobject jFS = (jobject) fs; + + jobject jPath = NULL; + jobject jBuilder = NULL; + + JNIEnv *env = getJNIEnv(); + if (!env) { + errno = EINTERNAL; + return NULL; + } + + hdfsOpenFileBuilder *builder; + builder = calloc(1, sizeof(hdfsOpenFileBuilder)); + if (!builder) { + fprintf(stderr, "hdfsOpenFileBuilderAlloc(%s): OOM when creating " + "hdfsOpenFileBuilder\n", path); + errno = ENOMEM; + goto done; + } + builder->path = path; + + jthr = constructNewObjectOfPath(env, path, &jPath); + if (jthr) { + errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL, + "hdfsOpenFileBuilderAlloc(%s): constructNewObjectOfPath", + path); + goto done; + } + + jthr = invokeMethod(env, &jVal, INSTANCE, jFS, JC_FILE_SYSTEM, + "openFile", JMETHOD1(JPARAM(HADOOP_PATH), JPARAM(HADOOP_FDISB)), + jPath); + if (jthr) { + ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL, + "hdfsOpenFileBuilderAlloc(%s): %s#openFile(Path) failed", + HADOOP_FS, path); + goto done; + } + jBuilder = jVal.l; + + builder->jBuilder = (*env)->NewGlobalRef(env, jBuilder); + if (!builder->jBuilder) { + printPendingExceptionAndFree(env, PRINT_EXC_ALL, + "hdfsOpenFileBuilderAlloc(%s): NewGlobalRef(%s) failed", path, + HADOOP_FDISB); + ret = EINVAL; + goto done; + } + +done: + destroyLocalReference(env, jPath); + destroyLocalReference(env, jBuilder); + if (ret) { + if (builder) { + if (builder->jBuilder) { + (*env)->DeleteGlobalRef(env, builder->jBuilder); + } + free(builder); + } + errno = ret; + return NULL; + } + return builder; +} + +/** + * Used internally by hdfsOpenFileBuilderWithOption to switch between + * FSBuilder#must and #opt. + */ +typedef enum { must, opt } openFileBuilderOptionType; + +/** + * Shared implementation of hdfsOpenFileBuilderMust and hdfsOpenFileBuilderOpt + * that switches between each method depending on the value of + * openFileBuilderOptionType. + */ +static hdfsOpenFileBuilder *hdfsOpenFileBuilderWithOption( + hdfsOpenFileBuilder *builder, const char *key, + const char *value, openFileBuilderOptionType optionType) { + int ret = 0; + jthrowable jthr; + jvalue jVal; + jobject localJBuilder = NULL; + jobject globalJBuilder; + jstring jKeyString = NULL; + jstring jValueString = NULL; + + // If the builder was not previously created by a prior call to + // hdfsOpenFileBuilderAlloc then exit + if (builder == NULL || builder->jBuilder == NULL) { + errno = EINVAL; + return NULL; + } + + JNIEnv *env = getJNIEnv(); + if (!env) { + errno = EINTERNAL; + return NULL; + } + jthr = newJavaStr(env, key, &jKeyString); + if (jthr) { + ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL, + "hdfsOpenFileBuilderWithOption(%s): newJavaStr(%s)", + builder->path, key); + goto done; + } + jthr = newJavaStr(env, value, &jValueString); + if (jthr) { + ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL, + "hdfsOpenFileBuilderWithOption(%s): newJavaStr(%s)", + builder->path, value); + goto done; + } + + const char *optionTypeMethodName; + switch (optionType) { + case must: + optionTypeMethodName = "must"; + break; + case opt: + optionTypeMethodName = "opt"; + break; + default: + ret = EINTERNAL; + goto done; + } + + jthr = invokeMethod(env, &jVal, INSTANCE, builder->jBuilder, + JC_FUTURE_DATA_IS_BUILDER, optionTypeMethodName, + JMETHOD2(JPARAM(JAVA_STRING), JPARAM(JAVA_STRING), + JPARAM(HADOOP_FS_BLDR)), jKeyString, + jValueString); + if (jthr) { + ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL, + "hdfsOpenFileBuilderWithOption(%s): %s#%s(%s, %s) failed", + builder->path, HADOOP_FS_BLDR, optionTypeMethodName, key, + value); + goto done; + } + + localJBuilder = jVal.l; + globalJBuilder = (*env)->NewGlobalRef(env, localJBuilder); + if (!globalJBuilder) { + printPendingExceptionAndFree(env, PRINT_EXC_ALL, + "hdfsOpenFileBuilderWithOption(%s): NewGlobalRef(%s) failed", + builder->path, HADOOP_FDISB); + ret = EINVAL; + goto done; + } + (*env)->DeleteGlobalRef(env, builder->jBuilder); + builder->jBuilder = globalJBuilder; + +done: + destroyLocalReference(env, jKeyString); + destroyLocalReference(env, jValueString); + destroyLocalReference(env, localJBuilder); + if (ret) { + errno = ret; + return NULL; + } + return builder; +} + +hdfsOpenFileBuilder *hdfsOpenFileBuilderMust(hdfsOpenFileBuilder *builder, + const char *key, const char *value) { + openFileBuilderOptionType optionType; + optionType = must; + return hdfsOpenFileBuilderWithOption(builder, key, value, optionType); +} + +hdfsOpenFileBuilder *hdfsOpenFileBuilderOpt(hdfsOpenFileBuilder *builder, + const char *key, const char *value) { + openFileBuilderOptionType optionType; + optionType = opt; + return hdfsOpenFileBuilderWithOption(builder, key, value, optionType); +} + +hdfsOpenFileFuture *hdfsOpenFileBuilderBuild(hdfsOpenFileBuilder *builder) { + int ret = 0; + jthrowable jthr; + jvalue jVal; + + jobject jFuture = NULL; + + // If the builder was not previously created by a prior call to + // hdfsOpenFileBuilderAlloc then exit + if (builder == NULL || builder->jBuilder == NULL) { + ret = EINVAL; + return NULL; + } + + JNIEnv *env = getJNIEnv(); + if (!env) { + errno = EINTERNAL; + return NULL; + } + + hdfsOpenFileFuture *future; + future = calloc(1, sizeof(hdfsOpenFileFuture)); + if (!future) { + fprintf(stderr, "hdfsOpenFileBuilderBuild: OOM when creating " + "hdfsOpenFileFuture\n"); + errno = ENOMEM; + goto done; + } + future->path = builder->path; + + jthr = invokeMethod(env, &jVal, INSTANCE, builder->jBuilder, + JC_FUTURE_DATA_IS_BUILDER, "build", + JMETHOD1("", JPARAM(JAVA_CFUTURE))); + if (jthr) { + ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL, + "hdfsOpenFileBuilderBuild(%s): %s#build() failed", + builder->path, HADOOP_FDISB); + goto done; + } + jFuture = jVal.l; + + future->jFuture = (*env)->NewGlobalRef(env, jFuture); + if (!future->jFuture) { + printPendingExceptionAndFree(env, PRINT_EXC_ALL, + "hdfsOpenFileBuilderBuild(%s): NewGlobalRef(%s) failed", + builder->path, JAVA_CFUTURE); + ret = EINVAL; + goto done; + } + +done: + destroyLocalReference(env, jFuture); + if (ret) { + if (future) { + if (future->jFuture) { + (*env)->DeleteGlobalRef(env, future->jFuture); + } + free(future); + } + hdfsOpenFileBuilderFree(builder); + errno = ret; + return NULL; + } + hdfsOpenFileBuilderFree(builder); + return future; +} + +void hdfsOpenFileBuilderFree(hdfsOpenFileBuilder *builder) { + JNIEnv *env; + env = getJNIEnv(); + if (!env) { + return; + } + if (builder->jBuilder) { + (*env)->DeleteGlobalRef(env, builder->jBuilder); + builder->jBuilder = NULL; + } + free(builder); +} + +/** + * Shared implementation of hdfsOpenFileFutureGet and + * hdfsOpenFileFutureGetWithTimeout. If a timeout is specified, calls + * Future#get() otherwise it calls Future#get(long, TimeUnit). + */ +static hdfsFile fileFutureGetWithTimeout(hdfsOpenFileFuture *future, + int64_t timeout, jobject jTimeUnit) { + int ret = 0; + jthrowable jthr; + jvalue jVal; + + hdfsFile file = NULL; + jobject jFile = NULL; + + JNIEnv *env = getJNIEnv(); + if (!env) { + ret = EINTERNAL; + return NULL; + } + + if (!jTimeUnit) { + jthr = invokeMethod(env, &jVal, INSTANCE, future->jFuture, + JC_CFUTURE, "get", JMETHOD1("", JPARAM(JAVA_OBJECT))); + } else { + jthr = invokeMethod(env, &jVal, INSTANCE, future->jFuture, + JC_CFUTURE, "get", JMETHOD2("J", + JPARAM(JAVA_TIMEUNIT), JPARAM(JAVA_OBJECT)), timeout, + jTimeUnit); + } + if (jthr) { + ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL, + "hdfsOpenFileFutureGet(%s): %s#get failed", future->path, + JAVA_CFUTURE); + goto done; + } + + file = calloc(1, sizeof(struct hdfsFile_internal)); + if (!file) { + fprintf(stderr, "hdfsOpenFileFutureGet(%s): OOM when creating " + "hdfsFile\n", future->path); + ret = ENOMEM; + goto done; + } + jFile = jVal.l; + file->file = (*env)->NewGlobalRef(env, jFile); + if (!file->file) { + ret = printPendingExceptionAndFree(env, PRINT_EXC_ALL, + "hdfsOpenFileFutureGet(%s): NewGlobalRef(jFile) failed", + future->path); + goto done; + } + + file->type = HDFS_STREAM_INPUT; + file->flags = 0; + + setFileFlagCapabilities(file, jFile); + +done: + destroyLocalReference(env, jTimeUnit); + destroyLocalReference(env, jFile); + if (ret) { + if (file) { + if (file->file) { + (*env)->DeleteGlobalRef(env, file->file); + } + free(file); + } + errno = ret; + return NULL; + } + return file; +} + +hdfsFile hdfsOpenFileFutureGet(hdfsOpenFileFuture *future) { + return fileFutureGetWithTimeout(future, -1, NULL); +} + +hdfsFile hdfsOpenFileFutureGetWithTimeout(hdfsOpenFileFuture *future, + int64_t timeout, javaConcurrentTimeUnit timeUnit) { + int ret = 0; + jthrowable jthr; + jobject jTimeUnit = NULL; + + JNIEnv *env = getJNIEnv(); + if (!env) { + ret = EINTERNAL; + return NULL; + } + + const char *timeUnitEnumName; + switch (timeUnit) { + case jNanoseconds: + timeUnitEnumName = "NANOSECONDS"; + break; + case jMicroseconds: + timeUnitEnumName = "MICROSECONDS"; + break; + case jMilliseconds: + timeUnitEnumName = "MILLISECONDS"; + break; + case jSeconds: + timeUnitEnumName = "SECONDS"; + break; + case jMinutes: + timeUnitEnumName = "MINUTES"; + break; + case jHours: + timeUnitEnumName = "HOURS"; + break; + case jDays: + timeUnitEnumName = "DAYS"; + break; + default: + ret = EINTERNAL; + goto done; + } + + jthr = fetchEnumInstance(env, JAVA_TIMEUNIT, timeUnitEnumName, &jTimeUnit); + + if (jthr) { + ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL, + "hdfsOpenFileFutureGet(%s): %s#get failed", future->path, + JAVA_CFUTURE); + goto done; + } + return fileFutureGetWithTimeout(future, timeout, jTimeUnit); + +done: + if (ret) { + errno = ret; + } + return NULL; +} + +int hdfsOpenFileFutureCancel(hdfsOpenFileFuture *future, + int mayInterruptIfRunning) { + int ret = 0; + jthrowable jthr; + jvalue jVal; + + jboolean jMayInterruptIfRunning; + + JNIEnv *env = getJNIEnv(); + if (!env) { + ret = EINTERNAL; + return -1; + } + + jMayInterruptIfRunning = mayInterruptIfRunning ? JNI_TRUE : JNI_FALSE; + jthr = invokeMethod(env, &jVal, INSTANCE, future->jFuture, JC_CFUTURE, + "cancel", JMETHOD1("Z", "Z"), jMayInterruptIfRunning); + if (jthr) { + ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL, + "hdfsOpenFileFutureCancel(%s): %s#cancel failed", future->path, + JAVA_CFUTURE); + goto done; + } + +done: + if (ret) { + errno = ret; + return -1; + } + if (!jVal.z) { + return -1; + } + return 0; +} + +void hdfsOpenFileFutureFree(hdfsOpenFileFuture *future) { + JNIEnv *env; + env = getJNIEnv(); + if (!env) { + return; + } + if (future->jFuture) { + (*env)->DeleteGlobalRef(env, future->jFuture); + future->jFuture = NULL; + } + free(future); +} + int hdfsTruncateFile(hdfsFS fs, const char* path, tOffset newlength) { jobject jFS = (jobject)fs; @@ -3409,7 +3892,7 @@ tOffset hdfsGetUsed(hdfsFS fs) } fss = (jobject)jVal.l; jthr = invokeMethod(env, &jVal, INSTANCE, fss, JC_FS_STATUS, - HADOOP_FSSTATUS,"getUsed", "()J"); + "getUsed", "()J"); destroyLocalReference(env, fss); if (jthr) { errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL, diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/include/hdfs/hdfs.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/include/hdfs/hdfs.h index e58a6232d205a..eba50ff6eb277 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/include/hdfs/hdfs.h +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/include/hdfs/hdfs.h @@ -82,6 +82,29 @@ extern "C" { } tObjectKind; struct hdfsStreamBuilder; + /** + * The C reflection of the enum values from java.util.concurrent.TimeUnit . + */ + typedef enum javaConcurrentTimeUnit { + jNanoseconds, + jMicroseconds, + jMilliseconds, + jSeconds, + jMinutes, + jHours, + jDays, + } javaConcurrentTimeUnit; + + /** + * The C reflection of java.util.concurrent.Future specifically used for + * opening HDFS files asynchronously. + */ + typedef struct hdfsOpenFileFuture hdfsOpenFileFuture; + + /** + * The C reflection of o.a.h.fs.FutureDataInputStreamBuilder . + */ + typedef struct hdfsOpenFileBuilder hdfsOpenFileBuilder; /** * The C reflection of org.apache.org.hadoop.FileSystem . @@ -429,6 +452,118 @@ extern "C" { hdfsFile hdfsOpenFile(hdfsFS fs, const char* path, int flags, int bufferSize, short replication, tSize blocksize); + /** + * hdfsOpenFileBuilderAlloc - Allocate a HDFS open file builder. + * + * @param fs The configured filesystem handle. + * @param path The full path to the file. + * @return Returns the hdfsOpenFileBuilder, or NULL on error. + */ + LIBHDFS_EXTERNAL + hdfsOpenFileBuilder *hdfsOpenFileBuilderAlloc(hdfsFS fs, + const char *path); + + /** + * hdfsOpenFileBuilderMust - Specifies a mandatory parameter for the open + * file builder. While the underlying FsBuilder supports various various + * types for the value (boolean, int, float, double), currently only + * strings are supported. + * + * @param builder The open file builder to set the config for. + * @param key The config key + * @param value The config value + * @return Returns the hdfsOpenFileBuilder, or NULL on error. + */ + LIBHDFS_EXTERNAL + hdfsOpenFileBuilder *hdfsOpenFileBuilderMust(hdfsOpenFileBuilder *builder, + const char *key, const char *value); + + /** + * hdfsOpenFileBuilderOpt - Specifies an optional parameter for the open + * file builder. While the underlying FsBuilder supports various various + * types for the value (boolean, int, float, double), currently only + * strings are supported. + * + * @param builder The open file builder to set the config for. + * @param key The config key + * @param value The config value + * @return Returns the hdfsOpenFileBuilder, or NULL on error. + */ + LIBHDFS_EXTERNAL + hdfsOpenFileBuilder *hdfsOpenFileBuilderOpt(hdfsOpenFileBuilder *builder, + const char *key, const char *value); + + /** + * hdfsOpenFileBuilderBuild - Builds the open file builder and returns a + * hdfsOpenFileFuture which tracks the asynchronous call to open the + * specified file. + * + * @param builder The open file builder to build. + * @return Returns the hdfsOpenFileFuture, or NULL on error. + */ + LIBHDFS_EXTERNAL + hdfsOpenFileFuture *hdfsOpenFileBuilderBuild(hdfsOpenFileBuilder *builder); + + /** + * hdfsOpenFileBuilderFree - Free a HDFS open file builder. + * + * It is normally not necessary to call this function since + * hdfsOpenFileBuilderBuild frees the builder. + * + * @param builder The hdfsOpenFileBuilder to free. + */ + LIBHDFS_EXTERNAL + void hdfsOpenFileBuilderFree(hdfsOpenFileBuilder *builder); + + /** + * hdfsOpenFileFutureGet - Call Future#get() on the underlying Java Future + * object. A call to #get() will block until the asynchronous operation has + * completed. In this case, until the open file call has completed. This + * method blocks indefinitely until blocking call completes. + * + * @param future The hdfsOpenFileFuture to call #get on + * @return Returns the opened hdfsFile, or NULL on error. + */ + LIBHDFS_EXTERNAL + hdfsFile hdfsOpenFileFutureGet(hdfsOpenFileFuture *future); + + /** + * hdfsOpenFileFutureGetWithTimeout - Call Future#get(long, TimeUnit) on + * the underlying Java Future object. A call to #get(long, TimeUnit) will + * block until the asynchronous operation has completed (in this case, + * until the open file call has completed) or the specified timeout has + * been reached. + * + * @param future The hdfsOpenFileFuture to call #get on + * @return Returns the opened hdfsFile, or NULL on error or if the timeout + * has been reached. + */ + LIBHDFS_EXTERNAL + hdfsFile hdfsOpenFileFutureGetWithTimeout(hdfsOpenFileFuture *future, + int64_t timeout, javaConcurrentTimeUnit timeUnit); + + /** + * hdfsOpenFileFutureCancel - Call Future#cancel(boolean) on the + * underlying Java Future object. The value of mayInterruptedIfRunning + * controls whether the Java thread running the Future should be + * interrupted or not. + * + * @param future The hdfsOpenFileFuture to call #cancel on + * @param mayInterruptIfRunning if true, interrupts the running thread + * @return Returns 0 if the thread was successfully cancelled, else -1 + */ + LIBHDFS_EXTERNAL + int hdfsOpenFileFutureCancel(hdfsOpenFileFuture *future, + int mayInterruptIfRunning); + + /** + * hdfsOpenFileFutureFree - Free a HDFS open file future. + * + * @param hdfsOpenFileFuture The hdfsOpenFileFuture to free. + */ + LIBHDFS_EXTERNAL + void hdfsOpenFileFutureFree(hdfsOpenFileFuture *future); + /** * hdfsStreamBuilderAlloc - Allocate an HDFS stream builder. * diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jclasses.c b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jclasses.c index cf880e91b7596..9f589ac257aa1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jclasses.c +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jclasses.c @@ -98,6 +98,8 @@ jthrowable initCachedClasses(JNIEnv* env) { "org/apache/hadoop/hdfs/ReadStatistics"; cachedJavaClasses[JC_HDFS_DATA_INPUT_STREAM].className = "org/apache/hadoop/hdfs/client/HdfsDataInputStream"; + cachedJavaClasses[JC_FUTURE_DATA_IS_BUILDER].className = + "org/apache/hadoop/fs/FutureDataInputStreamBuilder"; cachedJavaClasses[JC_DOMAIN_SOCKET].className = "org/apache/hadoop/net/unix/DomainSocket"; cachedJavaClasses[JC_URI].className = @@ -108,6 +110,8 @@ jthrowable initCachedClasses(JNIEnv* env) { "java/util/EnumSet"; cachedJavaClasses[JC_EXCEPTION_UTILS].className = "org/apache/commons/lang3/exception/ExceptionUtils"; + cachedJavaClasses[JC_CFUTURE].className = + "java/util/concurrent/CompletableFuture"; // Create and set the jclass objects based on the class names set above jthrowable jthr; diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jclasses.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jclasses.h index 92cdd542e2371..0b174e1fecc56 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jclasses.h +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jclasses.h @@ -54,11 +54,13 @@ typedef enum { JC_FS_PERMISSION, JC_READ_STATISTICS, JC_HDFS_DATA_INPUT_STREAM, + JC_FUTURE_DATA_IS_BUILDER, JC_DOMAIN_SOCKET, JC_URI, JC_BYTE_BUFFER, JC_ENUM_SET, JC_EXCEPTION_UTILS, + JC_CFUTURE, // A special marker enum that counts the number of cached jclasses NUM_CACHED_CLASSES } CachedJavaClass; @@ -95,6 +97,8 @@ const char *getClassName(CachedJavaClass cachedJavaClass); #define HADOOP_FSPERM "org/apache/hadoop/fs/permission/FsPermission" #define HADOOP_RSTAT "org/apache/hadoop/hdfs/ReadStatistics" #define HADOOP_HDISTRM "org/apache/hadoop/hdfs/client/HdfsDataInputStream" +#define HADOOP_FDISB "org/apache/hadoop/fs/FutureDataInputStreamBuilder" +#define HADOOP_FS_BLDR "org/apache/hadoop/fs/FSBuilder" #define HADOOP_RO "org/apache/hadoop/fs/ReadOption" #define HADOOP_DS "org/apache/hadoop/net/unix/DomainSocket" @@ -104,6 +108,9 @@ const char *getClassName(CachedJavaClass cachedJavaClass); #define JAVA_BYTEBUFFER "java/nio/ByteBuffer" #define JAVA_STRING "java/lang/String" #define JAVA_ENUMSET "java/util/EnumSet" +#define JAVA_CFUTURE "java/util/concurrent/CompletableFuture" +#define JAVA_TIMEUNIT "java/util/concurrent/TimeUnit" +#define JAVA_OBJECT "java/lang/Object" /* Some frequently used third-party class names */ diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/os/posix/thread_local_storage.c b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/os/posix/thread_local_storage.c index a0f26c6cb6e71..1b6dafaba82ea 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/os/posix/thread_local_storage.c +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/os/posix/thread_local_storage.c @@ -53,7 +53,7 @@ void hdfsThreadDestructor(void *v) char thr_name[MAXTHRID]; /* Detach the current thread from the JVM */ - if (env) { + if ((env != NULL) && (*env != NULL)) { ret = (*env)->GetJavaVM(env, &vm); if (ret != 0) { diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/os/windows/thread_local_storage.c b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/os/windows/thread_local_storage.c index a6f48fd4a830e..f7abc89908b0c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/os/windows/thread_local_storage.c +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/os/windows/thread_local_storage.c @@ -46,10 +46,10 @@ static void detachCurrentThreadFromJvm() if (threadLocalStorageGet(&state) || !state) { return; } - if (!state->env) { + env = state->env; + if ((env == NULL) || (*env == NULL)) { return; } - env = state->env; ret = (*env)->GetJavaVM(env, &vm); if (ret) { fprintf(stderr, diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/CMakeLists.txt b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/CMakeLists.txt index 411320ad771e7..f4dd4922340a7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/CMakeLists.txt +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/CMakeLists.txt @@ -48,6 +48,7 @@ find_package(GSasl) find_package(Threads) include(CheckCXXSourceCompiles) +include(CheckSymbolExists) # Check if thread_local is supported unset (THREAD_LOCAL_SUPPORTED CACHE) @@ -140,6 +141,11 @@ else (NOT NO_SASL) message(STATUS "Compiling with NO SASL SUPPORT") endif (NOT NO_SASL) +check_symbol_exists(explicit_bzero "string.h" HAVE_EXPLICIT_BZERO) +if(HAVE_EXPLICIT_BZERO) + add_definitions(-DHAVE_EXPLICIT_BZERO) +endif() + add_definitions(-DASIO_STANDALONE -DASIO_CPP11_DATE_TIME) # Disable optimizations if compiling debug @@ -263,6 +269,7 @@ if (HADOOP_BUILD) ${CMAKE_THREAD_LIBS_INIT} ) set_target_properties(hdfspp PROPERTIES SOVERSION ${LIBHDFSPP_VERSION}) + hadoop_dual_output_directory(hdfspp ${OUT_DIR}) else (HADOOP_BUILD) add_library(hdfspp_static STATIC ${EMPTY_FILE_CC} ${LIBHDFSPP_ALL_OBJECTS}) target_link_libraries(hdfspp_static diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/uri.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/uri.h index d8574d15b3195..bc3d8b96d3f66 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/uri.h +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/uri.h @@ -103,7 +103,7 @@ class URI { std::string str(bool encoded_output=true) const; - // Get a string with each URI field printed on a seperate line + // Get a string with each URI field printed on a separate line std::string GetDebugString() const; private: // These are stored in encoded form diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/bindings/c/hdfs.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/bindings/c/hdfs.cc index 6b2468fd5dbdc..549da93c2aa89 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/bindings/c/hdfs.cc +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/bindings/c/hdfs.cc @@ -1402,7 +1402,11 @@ int hdfsGetBlockLocations(hdfsFS fs, const char *path, struct hdfsBlockLocations hdfsBlockLocations *locations = new struct hdfsBlockLocations(); (*locations_out) = locations; +#ifdef HAVE_EXPLICIT_BZERO + explicit_bzero(locations, sizeof(*locations)); +#else bzero(locations, sizeof(*locations)); +#endif locations->fileLength = ppLocations->getFileLength(); locations->isLastBlockComplete = ppLocations->isLastBlockComplete(); locations->isUnderConstruction = ppLocations->isUnderConstruction(); diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/hdfs_ext_test.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/hdfs_ext_test.cc index 79771f0d7c57c..fba82b817ecb4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/hdfs_ext_test.cc +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/hdfs_ext_test.cc @@ -453,11 +453,11 @@ TEST_F(HdfsExtTest, TestHosts) { EXPECT_EQ(0, errno); //Test invalid arguments - EXPECT_EQ(nullptr, hdfsGetHosts(fs, filename.c_str(), 0, std::numeric_limits::max()+1)); + EXPECT_EQ(nullptr, hdfsGetHosts(fs, filename.c_str(), 0, std::numeric_limits::min())); EXPECT_EQ((int) std::errc::invalid_argument, errno); //Test invalid arguments - EXPECT_EQ(nullptr, hdfsGetHosts(fs, filename.c_str(), std::numeric_limits::max()+1, std::numeric_limits::max())); + EXPECT_EQ(nullptr, hdfsGetHosts(fs, filename.c_str(), std::numeric_limits::min(), std::numeric_limits::max())); EXPECT_EQ((int) std::errc::invalid_argument, errno); } @@ -475,7 +475,11 @@ TEST_F(HdfsExtTest, TestReadStats) { hdfsFile file = hdfsOpenFile(fs, path.c_str(), O_WRONLY, 0, 0, 0); EXPECT_NE(nullptr, file); void * buf = malloc(size); +#ifdef HAVE_EXPLICIT_BZERO + explicit_bzero(buf, size); +#else bzero(buf, size); +#endif EXPECT_EQ(size, hdfsWrite(fs, file, buf, size)); free(buf); EXPECT_EQ(0, hdfsCloseFile(fs, file)); diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/hdfs_shim.c b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/hdfs_shim.c index bda27b9a43202..2d265b8f03c0c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/hdfs_shim.c +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/hdfs_shim.c @@ -250,6 +250,65 @@ hdfsFile hdfsOpenFile(hdfsFS fs, const char* path, int flags, return ret; } +hdfsOpenFileBuilder *hdfsOpenFileBuilderAlloc(hdfsFS fs, + const char *path) { + return libhdfs_hdfsOpenFileBuilderAlloc(fs->libhdfsRep, path); +} + +hdfsOpenFileBuilder *hdfsOpenFileBuilderMust( + hdfsOpenFileBuilder *builder, const char *key, + const char *value) { + return libhdfs_hdfsOpenFileBuilderMust(builder, key, value); +} + +hdfsOpenFileBuilder *hdfsOpenFileBuilderOpt( + hdfsOpenFileBuilder *builder, const char *key, + const char *value) { + return libhdfs_hdfsOpenFileBuilderOpt(builder, key, value); +} + +hdfsOpenFileFuture *hdfsOpenFileBuilderBuild( + hdfsOpenFileBuilder *builder) { + return libhdfs_hdfsOpenFileBuilderBuild(builder); +} + +void hdfsOpenFileBuilderFree(hdfsOpenFileBuilder *builder) { + libhdfs_hdfsOpenFileBuilderFree(builder); +} + +hdfsFile hdfsOpenFileFutureGet(hdfsOpenFileFuture *future) { + hdfsFile ret = calloc(1, sizeof(struct hdfsFile_internal)); + ret->libhdfsppRep = 0; + ret->libhdfsRep = libhdfs_hdfsOpenFileFutureGet(future); + if (!ret->libhdfsRep) { + free(ret); + ret = NULL; + } + return ret; +} + +hdfsFile hdfsOpenFileFutureGetWithTimeout(hdfsOpenFileFuture *future, + int64_t timeout, javaConcurrentTimeUnit timeUnit) { + hdfsFile ret = calloc(1, sizeof(struct hdfsFile_internal)); + ret->libhdfsppRep = 0; + ret->libhdfsRep = libhdfs_hdfsOpenFileFutureGetWithTimeout(future, timeout, + timeUnit); + if (!ret->libhdfsRep) { + free(ret); + ret = NULL; + } + return ret; +} + +int hdfsOpenFileFutureCancel(hdfsOpenFileFuture *future, + int mayInterruptIfRunning) { + return libhdfs_hdfsOpenFileFutureCancel(future, mayInterruptIfRunning); +} + +void hdfsOpenFileFutureFree(hdfsOpenFileFuture *future) { + libhdfs_hdfsOpenFileFutureFree(future); +} + int hdfsTruncateFile(hdfsFS fs, const char* path, tOffset newlength) { return libhdfs_hdfsTruncateFile(fs->libhdfsRep, path, newlength); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/hdfspp_mini_dfs.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/hdfspp_mini_dfs.h index aecced1a8b6e5..320a958b10c0b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/hdfspp_mini_dfs.h +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/hdfspp_mini_dfs.h @@ -92,7 +92,11 @@ class HdfsHandle { hdfsFile file = hdfsOpenFile(*this, path.c_str(), O_WRONLY, 0, 0, 0); EXPECT_NE(nullptr, file); void * buf = malloc(size); +#ifdef HAVE_EXPLICIT_BZERO + explicit_bzero(buf, size); +#else bzero(buf, size); +#endif EXPECT_EQ(1024, hdfsWrite(*this, file, buf, size)); EXPECT_EQ(0, hdfsCloseFile(*this, file)); free(buf); diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfs_wrapper_defines.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfs_wrapper_defines.h index 0d014341b4c57..165744142558a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfs_wrapper_defines.h +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfs_wrapper_defines.h @@ -39,6 +39,23 @@ #define hdfsConfStrFree libhdfs_hdfsConfStrFree #define hdfsDisconnect libhdfs_hdfsDisconnect #define hdfsOpenFile libhdfs_hdfsOpenFile +#define hdfsOpenFileBuilderAlloc libhdfs_hdfsOpenFileBuilderAlloc +#define hdfsOpenFileBuilderMust libhdfs_hdfsOpenFileBuilderMust +#define hdfsOpenFileBuilderOpt libhdfs_hdfsOpenFileBuilderOpt +#define hdfsOpenFileBuilderBuild libhdfs_hdfsOpenFileBuilderBuild +#define hdfsOpenFileBuilderFree libhdfs_hdfsOpenFileBuilderFree +#define hdfsOpenFileFutureGet libhdfs_hdfsOpenFileFutureGet +#define javaConcurrentTimeUnit libhdfs_javaConcurrentTimeUnit +#define jNanoseconds libhdfs_jNanoseconds +#define jMicroseconds libhdfs_jMicroseconds +#define jMilliseconds libhdfs_jMilliseconds +#define jSeconds libhdfsj_jSeconds +#define jMinutes libhdfs_jMinutes +#define jHours libhdfs_jHours +#define jDays libhdfs_jDays +#define hdfsOpenFileFutureGetWithTimeout libhdfs_hdfsOpenFileFutureGetWithTimeout +#define hdfsOpenFileFutureCancel libhdfs_hdfsOpenFileFutureCancel +#define hdfsOpenFileFutureFree libhdfs_hdfsOpenFileFutureFree #define hdfsTruncateFile libhdfs_hdfsTruncateFile #define hdfsUnbufferFile libhdfs_hdfsUnbufferFile #define hdfsCloseFile libhdfs_hdfsCloseFile diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfs_wrapper_undefs.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfs_wrapper_undefs.h index d46768c02ad39..d84b8ba287525 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfs_wrapper_undefs.h +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfs_wrapper_undefs.h @@ -39,6 +39,23 @@ #undef hdfsConfStrFree #undef hdfsDisconnect #undef hdfsOpenFile +#undef hdfsOpenFileBuilderAlloc +#undef hdfsOpenFileBuilderMust +#undef hdfsOpenFileBuilderOpt +#undef hdfsOpenFileBuilderBuild +#undef hdfsOpenFileBuilderFree +#undef hdfsOpenFileFutureGet +#undef javaConcurrentTimeUnit +#undef jNanoseconds +#undef jMicroseconds +#undef jMilliseconds +#undef jSeconds +#undef jMinutes +#undef jHours +#undef jDays +#undef hdfsOpenFileFutureGetWithTimeout +#undef hdfsOpenFileFutureCancel +#undef hdfsOpenFileFutureFree #undef hdfsTruncateFile #undef hdfsUnbufferFile #undef hdfsCloseFile diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfspp_wrapper_defines.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfspp_wrapper_defines.h index 4b08d0556c3aa..0a6d987409fec 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfspp_wrapper_defines.h +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfspp_wrapper_defines.h @@ -39,6 +39,23 @@ #define hdfsConfStrFree libhdfspp_hdfsConfStrFree #define hdfsDisconnect libhdfspp_hdfsDisconnect #define hdfsOpenFile libhdfspp_hdfsOpenFile +#define hdfsOpenFileBuilderAlloc libhdfspp_hdfsOpenFileBuilderAlloc +#define hdfsOpenFileBuilderMust libhdfspp_hdfsOpenFileBuilderMust +#define hdfsOpenFileBuilderOpt libhdfspp_hdfsOpenFileBuilderOpt +#define hdfsOpenFileBuilderBuild libhdfspp_hdfsOpenFileBuilderBuild +#define hdfsOpenFileBuilderFree libhdfspp_hdfsOpenFileBuilderFree +#define hdfsOpenFileFutureGet libhdfspp_hdfsOpenFileFutureGet +#define javaConcurrentTimeUnit libhdfspp_javaConcurrentTimeUnit +#define jNanoseconds libhdfspp_jNanoseconds +#define jMicroseconds libhdfspp_jMicroseconds +#define jMilliseconds libhdfspp_jMilliseconds +#define jSeconds libhdfspp_jSeconds +#define jMinutes libhdfspp_jMinutes +#define jHours libhdfspp_jHours +#define jDays libhdfspp_jDays +#define hdfsOpenFileFutureGetWithTimeout libhdfspp_hdfsOpenFileFutureGetWithTimeout +#define hdfsOpenFileFutureCancel libhdfspp_hdfsOpenFileFutureCancel +#define hdfsOpenFileFutureFree libhdfspp_hdfsOpenFileFutureFree #define hdfsTruncateFile libhdfspp_hdfsTruncateFile #define hdfsUnbufferFile libhdfspp_hdfsUnbufferFile #define hdfsCloseFile libhdfspp_hdfsCloseFile diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/uriparser2/uriparser2/uriparser/UriFile.c b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/uriparser2/uriparser2/uriparser/UriFile.c index 5471e5af8ccb2..22f38bee25bd2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/uriparser2/uriparser2/uriparser/UriFile.c +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/uriparser2/uriparser2/uriparser/UriFile.c @@ -90,7 +90,7 @@ static URI_INLINE int URI_FUNC(FilenameToUriString)(const URI_CHAR * filename, if ((input[0] == _UT('\0')) || (fromUnix && input[0] == _UT('/')) || (!fromUnix && input[0] == _UT('\\'))) { - /* Copy text after last seperator */ + /* Copy text after last separator */ if (lastSep + 1 < input) { if (!fromUnix && absolute && (firstSegment == URI_TRUE)) { /* Quick hack to not convert "C:" to "C%3A" */ diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs-nfs/pom.xml index 2d30f67ef34a2..6faa17493d47e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/pom.xml @@ -20,11 +20,11 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-hdfs-nfs - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop HDFS-NFS Apache Hadoop HDFS-NFS jar @@ -47,7 +47,7 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> io.netty - netty + netty-all compile @@ -84,8 +84,18 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> test - com.google.guava - guava + io.dropwizard.metrics + metrics-core + provided + + + org.xerial.snappy + snappy-java + provided + + + org.apache.hadoop.thirdparty + hadoop-shaded-guava compile @@ -134,8 +144,8 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> compile - log4j - log4j + ch.qos.reload4j + reload4j compile @@ -160,7 +170,7 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.slf4j - slf4j-log4j12 + slf4j-reload4j provided diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/mount/RpcProgramMountd.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/mount/RpcProgramMountd.java index 27213953802f7..2ba1bb060ce43 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/mount/RpcProgramMountd.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/mount/RpcProgramMountd.java @@ -26,6 +26,10 @@ import java.util.List; import java.util.HashMap; +import io.netty.channel.ChannelHandler; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.channel.ChannelHandlerContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileSystem; @@ -51,15 +55,13 @@ import org.apache.hadoop.oncrpc.security.VerifierNone; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; -import org.jboss.netty.buffer.ChannelBuffer; -import org.jboss.netty.buffer.ChannelBuffers; -import org.jboss.netty.channel.ChannelHandlerContext; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * RPC program corresponding to mountd daemon. See {@link Mountd}. */ +@ChannelHandler.Sharable public class RpcProgramMountd extends RpcProgram implements MountInterface { private static final Logger LOG = LoggerFactory.getLogger(RpcProgramMountd.class); @@ -262,8 +264,8 @@ RpcAcceptedReply.AcceptState.PROC_UNAVAIL, new VerifierNone()) RpcAcceptedReply.AcceptState.PROC_UNAVAIL, new VerifierNone()).write( out); } - ChannelBuffer buf = - ChannelBuffers.wrappedBuffer(out.asReadOnlyWrap().buffer()); + ByteBuf buf = + Unpooled.wrappedBuffer(out.asReadOnlyWrap().buffer()); RpcResponse rsp = new RpcResponse(buf, info.remoteAddress()); RpcUtil.sendRpcResponse(ctx, rsp); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/DFSClientCache.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/DFSClientCache.java index e0fb302992850..41add2212936a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/DFSClientCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/DFSClientCache.java @@ -29,7 +29,7 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hdfs.DFSClient; @@ -40,13 +40,13 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ShutdownHookManager; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Objects; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; -import com.google.common.cache.RemovalListener; -import com.google.common.cache.RemovalNotification; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Objects; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheBuilder; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheLoader; +import org.apache.hadoop.thirdparty.com.google.common.cache.LoadingCache; +import org.apache.hadoop.thirdparty.com.google.common.cache.RemovalListener; +import org.apache.hadoop.thirdparty.com.google.common.cache.RemovalNotification; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/Nfs3.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/Nfs3.java index 8494493beed70..ff64ad5804609 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/Nfs3.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/Nfs3.java @@ -26,7 +26,7 @@ import org.apache.hadoop.nfs.nfs3.Nfs3Base; import org.apache.hadoop.util.StringUtils; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Nfs server. Supports NFS v3 using {@link RpcProgramNfs3}. diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/Nfs3HttpServer.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/Nfs3HttpServer.java index c37a21e7d83f5..ad410d4d388f3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/Nfs3HttpServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/Nfs3HttpServer.java @@ -25,7 +25,6 @@ import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys; import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration; -import org.apache.hadoop.hdfs.server.common.JspHelper; import org.apache.hadoop.http.HttpConfig; import org.apache.hadoop.http.HttpServer2; import org.apache.hadoop.net.NetUtils; diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/Nfs3Utils.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/Nfs3Utils.java index c6da1981f3716..c58dc5976b37d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/Nfs3Utils.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/Nfs3Utils.java @@ -22,6 +22,8 @@ import java.net.URI; import java.nio.file.FileSystemException; +import io.netty.buffer.ByteBuf; +import io.netty.channel.Channel; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FsConstants; @@ -39,8 +41,6 @@ import org.apache.hadoop.nfs.nfs3.response.WccData; import org.apache.hadoop.oncrpc.XDR; import org.apache.hadoop.security.IdMappingServiceProvider; -import org.jboss.netty.buffer.ChannelBuffer; -import org.jboss.netty.channel.Channel; /** * Utility/helper methods related to NFS @@ -147,16 +147,16 @@ public static void writeChannel(Channel channel, XDR out, int xid) { if (RpcProgramNfs3.LOG.isDebugEnabled()) { RpcProgramNfs3.LOG.debug(WRITE_RPC_END + xid); } - ChannelBuffer outBuf = XDR.writeMessageTcp(out, true); - channel.write(outBuf); + ByteBuf outBuf = XDR.writeMessageTcp(out, true); + channel.writeAndFlush(outBuf); } public static void writeChannelCommit(Channel channel, XDR out, int xid) { if (RpcProgramNfs3.LOG.isDebugEnabled()) { RpcProgramNfs3.LOG.debug("Commit done:" + xid); } - ChannelBuffer outBuf = XDR.writeMessageTcp(out, true); - channel.write(outBuf); + ByteBuf outBuf = XDR.writeMessageTcp(out, true); + channel.writeAndFlush(outBuf); } private static boolean isSet(int access, int bits) { diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OffsetRange.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OffsetRange.java index 764524a8ff677..3995fa5566bb0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OffsetRange.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OffsetRange.java @@ -19,7 +19,7 @@ import java.util.Comparator; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * OffsetRange is the range of read/write request. A single point (e.g.,[5,5]) diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java index 6067a5df34786..8358c056cac4e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java @@ -31,6 +31,7 @@ import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.atomic.AtomicLong; +import io.netty.channel.Channel; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.client.HdfsDataOutputStream; @@ -55,10 +56,9 @@ import org.apache.hadoop.security.IdMappingServiceProvider; import org.apache.hadoop.util.Daemon; import org.apache.hadoop.util.Time; -import org.jboss.netty.channel.Channel; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtxCache.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtxCache.java index 5c915d26bf1e0..b8db83c89a3a8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtxCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtxCache.java @@ -30,9 +30,9 @@ import org.apache.hadoop.util.Daemon; import org.apache.hadoop.util.Time; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; /** * A cache saves OpenFileCtx objects for different users. Each cache entry is diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java index cb46f449a1f00..f6cb4350e4050 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java @@ -28,6 +28,11 @@ import java.nio.charset.Charset; import java.util.EnumSet; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.channel.Channel; +import io.netty.channel.ChannelHandler; +import io.netty.channel.ChannelHandlerContext; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.DirectoryListingStartAfterNotFoundException; @@ -129,18 +134,15 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.AuthorizationException; import org.apache.hadoop.util.JvmPauseMonitor; -import org.jboss.netty.buffer.ChannelBuffer; -import org.jboss.netty.buffer.ChannelBuffers; -import org.jboss.netty.channel.Channel; -import org.jboss.netty.channel.ChannelHandlerContext; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * RPC program corresponding to nfs daemon. See {@link Nfs3}. */ +@ChannelHandler.Sharable public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { public static final int DEFAULT_UMASK = 0022; public static final FsPermission umask = new FsPermission( @@ -2180,7 +2182,7 @@ public void handleInternal(ChannelHandlerContext ctx, RpcInfo info) { RpcDeniedReply.RejectState.AUTH_ERROR, new VerifierNone()); rdr.write(reply); - ChannelBuffer buf = ChannelBuffers.wrappedBuffer(reply.asReadOnlyWrap() + ByteBuf buf = Unpooled.wrappedBuffer(reply.asReadOnlyWrap() .buffer()); RpcResponse rsp = new RpcResponse(buf, info.remoteAddress()); RpcUtil.sendRpcResponse(ctx, rsp); @@ -2291,7 +2293,7 @@ RpcAcceptedReply.AcceptState.PROC_UNAVAIL, new VerifierNone()).write( } // TODO: currently we just return VerifierNone out = response.serialize(out, xid, new VerifierNone()); - ChannelBuffer buf = ChannelBuffers.wrappedBuffer(out.asReadOnlyWrap() + ByteBuf buf = Unpooled.wrappedBuffer(out.asReadOnlyWrap() .buffer()); RpcResponse rsp = new RpcResponse(buf, info.remoteAddress()); diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteCtx.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteCtx.java index 98f3d6cfa2930..d5c9d4f55924a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteCtx.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteCtx.java @@ -22,15 +22,15 @@ import java.io.RandomAccessFile; import java.nio.ByteBuffer; +import io.netty.channel.Channel; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.client.HdfsDataOutputStream; import org.apache.hadoop.nfs.nfs3.FileHandle; import org.apache.hadoop.nfs.nfs3.Nfs3Constant.WriteStableHow; -import org.jboss.netty.channel.Channel; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * WriteCtx saves the context of one write request, such as request, channel, diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteManager.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteManager.java index 35542391bdb4c..a1b6e12eebfcf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteManager.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.EnumSet; +import io.netty.channel.Channel; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; @@ -43,9 +44,8 @@ import org.apache.hadoop.oncrpc.XDR; import org.apache.hadoop.oncrpc.security.VerifierNone; import org.apache.hadoop.security.IdMappingServiceProvider; -import org.jboss.netty.channel.Channel; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Manage the writes and responds asynchronously. diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestOutOfOrderWrite.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestOutOfOrderWrite.java index 4e53c72bec8a8..31528a2db87a5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestOutOfOrderWrite.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestOutOfOrderWrite.java @@ -21,6 +21,12 @@ import java.nio.ByteBuffer; import java.util.Arrays; +import io.netty.buffer.ByteBuf; +import io.netty.channel.Channel; +import io.netty.channel.ChannelHandlerContext; +import io.netty.channel.ChannelInitializer; +import io.netty.channel.ChannelPipeline; +import io.netty.channel.socket.SocketChannel; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys; @@ -42,13 +48,6 @@ import org.apache.hadoop.oncrpc.XDR; import org.apache.hadoop.oncrpc.security.CredentialsNone; import org.apache.hadoop.oncrpc.security.VerifierNone; -import org.jboss.netty.buffer.ChannelBuffer; -import org.jboss.netty.channel.Channel; -import org.jboss.netty.channel.ChannelHandlerContext; -import org.jboss.netty.channel.ChannelPipeline; -import org.jboss.netty.channel.ChannelPipelineFactory; -import org.jboss.netty.channel.Channels; -import org.jboss.netty.channel.MessageEvent; public class TestOutOfOrderWrite { public final static Logger LOG = @@ -100,9 +99,9 @@ public WriteHandler(XDR request) { } @Override - public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) { + public void channelRead(ChannelHandlerContext ctx, Object msg) { // Get handle from create response - ChannelBuffer buf = (ChannelBuffer) e.getMessage(); + ByteBuf buf = (ByteBuf) msg; XDR rsp = new XDR(buf.array()); if (rsp.getBytes().length == 0) { LOG.info("rsp length is zero, why?"); @@ -125,7 +124,7 @@ public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) { rsp.readBoolean(); // value follow handle = new FileHandle(); handle.deserialize(rsp); - channel = e.getChannel(); + channel = ctx.channel(); } } @@ -136,16 +135,17 @@ public WriteClient(String host, int port, XDR request, Boolean oneShot) { } @Override - protected ChannelPipelineFactory setPipelineFactory() { - this.pipelineFactory = new ChannelPipelineFactory() { + protected ChannelInitializer setChannelHandler() { + return new ChannelInitializer() { @Override - public ChannelPipeline getPipeline() { - return Channels.pipeline( + protected void initChannel(SocketChannel ch) throws Exception { + ChannelPipeline p = ch.pipeline(); + p.addLast( RpcUtil.constructRpcFrameDecoder(), - new WriteHandler(request)); + new WriteHandler(request) + ); } }; - return this.pipelineFactory; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestNfs3Utils.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestNfs3Utils.java index 77646af2c5ed9..8380c3c9bb3ef 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestNfs3Utils.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestNfs3Utils.java @@ -18,8 +18,6 @@ package org.apache.hadoop.hdfs.nfs.nfs3; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; import org.junit.Test; diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestRpcProgramNfs3.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestRpcProgramNfs3.java index 30ecc0b824b9e..07954c00d64e2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestRpcProgramNfs3.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestRpcProgramNfs3.java @@ -28,6 +28,7 @@ import java.nio.ByteBuffer; import java.util.EnumSet; +import io.netty.channel.Channel; import org.apache.hadoop.crypto.key.JavaKeyStoreProvider; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; @@ -92,7 +93,6 @@ import org.apache.hadoop.security.IdMappingConstant; import org.apache.hadoop.security.authorize.DefaultImpersonationProvider; import org.apache.hadoop.security.authorize.ProxyUsers; -import org.jboss.netty.channel.Channel; import org.junit.AfterClass; import org.junit.Assert; import org.junit.Before; diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestViewfsWithNfs3.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestViewfsWithNfs3.java index a5997b46a9154..4899d9bd4606c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestViewfsWithNfs3.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestViewfsWithNfs3.java @@ -154,8 +154,6 @@ public static void setup() throws Exception { DFSTestUtil.createFile(viewFs, new Path("/hdfs2/write2"), 0, (short) 1, 0); DFSTestUtil.createFile(viewFs, new Path("/hdfs1/renameMultiNN"), 0, (short) 1, 0); - DFSTestUtil.createFile(viewFs, new Path("/hdfs1/renameSingleNN"), - 0, (short) 1, 0); } @AfterClass @@ -307,6 +305,8 @@ public void testNfsRenameMultiNN() throws Exception { @Test (timeout = 60000) public void testNfsRenameSingleNN() throws Exception { + DFSTestUtil.createFile(viewFs, new Path("/hdfs1/renameSingleNN"), + 0, (short) 1, 0); HdfsFileStatus fromFileStatus = nn1.getRpcServer().getFileInfo("/user1"); int fromNNId = Nfs3Utils.getNamenodeId(config, hdfs1.getUri()); FileHandle fromHandle = @@ -316,6 +316,8 @@ public void testNfsRenameSingleNN() throws Exception { nn1.getRpcServer().getFileInfo("/user1/renameSingleNN"); Assert.assertEquals(statusBeforeRename.isDirectory(), false); + Path successFilePath = new Path("/user1/renameSingleNNSucess"); + hdfs1.delete(successFilePath, false); testNfsRename(fromHandle, "renameSingleNN", fromHandle, "renameSingleNNSucess", Nfs3Status.NFS3_OK); diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java index f7a92fac53501..0f03c6da93bf3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java @@ -27,6 +27,7 @@ import java.util.Arrays; import java.util.concurrent.ConcurrentNavigableMap; +import io.netty.channel.Channel; import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdfs.MiniDFSCluster; @@ -52,7 +53,6 @@ import org.apache.hadoop.security.ShellBasedIdMapping; import org.apache.hadoop.security.authorize.DefaultImpersonationProvider; import org.apache.hadoop.security.authorize.ProxyUsers; -import org.jboss.netty.channel.Channel; import org.junit.Assert; import org.junit.Test; import org.mockito.Mockito; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/pom.xml b/hadoop-hdfs-project/hadoop-hdfs-rbf/pom.xml index 777921746a66f..4fdd9dbb7558e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/pom.xml @@ -20,11 +20,11 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-project-dist - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project-dist hadoop-hdfs-rbf - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop HDFS-RBF Apache Hadoop HDFS-RBF jar @@ -54,8 +54,8 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> commons-logging - log4j - log4j + ch.qos.reload4j + reload4j @@ -71,7 +71,7 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.slf4j - slf4j-log4j12 + slf4j-reload4j provided @@ -93,6 +93,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> com.fasterxml.jackson.core jackson-databind + + com.zaxxer + HikariCP-java7 + junit junit @@ -109,11 +113,21 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> curator-test test + + org.apache.derby + derby + test + org.mockito mockito-core test + + org.assertj + assertj-core + test + @@ -121,6 +135,11 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.maven.plugins maven-surefire-plugin + + + ${project.build.directory}/derby.log + + org.apache.maven.plugins @@ -175,10 +194,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> run - + - + diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/scripts/TokenStore/MySQL/TokenStoreDatabase.sql b/hadoop-hdfs-project/hadoop-hdfs-rbf/scripts/TokenStore/MySQL/TokenStoreDatabase.sql new file mode 100644 index 0000000000000..07fea4c24bc04 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/scripts/TokenStore/MySQL/TokenStoreDatabase.sql @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Script to create a new Database in MySQL for the TokenStore + +CREATE DATABASE IF NOT EXISTS TokenStore; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/scripts/TokenStore/MySQL/TokenStoreTables.sql b/hadoop-hdfs-project/hadoop-hdfs-rbf/scripts/TokenStore/MySQL/TokenStoreTables.sql new file mode 100644 index 0000000000000..d377c4e15f28e --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/scripts/TokenStore/MySQL/TokenStoreTables.sql @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Script to generate all the tables for the TokenStore in MySQL + +USE TokenStore + +CREATE TABLE IF NOT EXISTS Tokens( + sequenceNum int NOT NULL, + tokenIdentifier varbinary(255) NOT NULL, + tokenInfo varbinary(255) NOT NULL, + modifiedTime timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY(sequenceNum, tokenIdentifier) +); + +CREATE TABLE IF NOT EXISTS DelegationKeys( + keyId int NOT NULL, + delegationKey varbinary(255) NOT NULL, + modifiedTime timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY(keyId) +); + +CREATE TABLE IF NOT EXISTS LastSequenceNum( + sequenceNum int NOT NULL +); + +-- Initialize the LastSequenceNum table with a single entry +INSERT INTO LastSequenceNum (sequenceNum) +SELECT 0 WHERE NOT EXISTS (SELECT * FROM LastSequenceNum); + +CREATE TABLE IF NOT EXISTS LastDelegationKeyId( + keyId int NOT NULL +); + +-- Initialize the LastDelegationKeyId table with a single entry +INSERT INTO LastDelegationKeyId (keyId) +SELECT 0 WHERE NOT EXISTS (SELECT * FROM LastDelegationKeyId); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/scripts/TokenStore/MySQL/TokenStoreUser.sql b/hadoop-hdfs-project/hadoop-hdfs-rbf/scripts/TokenStore/MySQL/TokenStoreUser.sql new file mode 100644 index 0000000000000..844d7a2f94413 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/scripts/TokenStore/MySQL/TokenStoreUser.sql @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Script to create a new User in MySQL for the TokenStore + +-- Update TokenStore user and password on this script +CREATE USER IF NOT EXISTS 'TokenStoreUser'@'%' IDENTIFIED BY 'TokenStorePassword'; + +GRANT ALL PRIVILEGES ON TokenStore.* TO 'TokenStoreUser'@'%'; + +FLUSH PRIVILEGES; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/scripts/TokenStore/README b/hadoop-hdfs-project/hadoop-hdfs-rbf/scripts/TokenStore/README new file mode 100644 index 0000000000000..724253153198f --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/scripts/TokenStore/README @@ -0,0 +1,24 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +These scripts must be executed to create the TokenStore database, tables and users needed to use the +SQLDelegationTokenSecretManagerImpl as the delegation token secret manager: +1. TokenStoreDatabase.sql +2. TokenStoreTables.sql +3. TokenStoreUser.sql + +Note: The TokenStoreUser.sql defines a default user/password. You are highly encouraged to set +this to a proper strong password. diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMBean.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMBean.java index 5fa4755868bd7..e78ae4c4fa07c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMBean.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hdfs.server.federation.metrics; +import java.math.BigInteger; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -54,22 +56,46 @@ public interface FederationMBean { /** * Get the total capacity of the federated cluster. + * The number could overflow if too big. In that case use + * {@link #getTotalCapacityBigInt()} instead. * @return Total capacity of the federated cluster. */ long getTotalCapacity(); /** * Get the used capacity of the federated cluster. + * The number could overflow if too big. In that case use + * {@link #getUsedCapacityBigInt()} instead. * @return Used capacity of the federated cluster. */ long getUsedCapacity(); /** * Get the remaining capacity of the federated cluster. + * The number could overflow if too big. In that case use + * {@link #getRemainingCapacityBigInt()} instead. * @return Remaining capacity of the federated cluster. */ long getRemainingCapacity(); + /** + * Get the total capacity (big integer) of the federated cluster. + * @return Total capacity of the federated cluster. + */ + BigInteger getTotalCapacityBigInt(); + + /** + * Get the used capacity (big integer) of the federated cluster. + * @return Used capacity of the federated cluster. + */ + BigInteger getUsedCapacityBigInt(); + + /** + * Get the remaining capacity (big integer) of the federated cluster. + * @return Remaining capacity of the federated cluster. + */ + BigInteger getRemainingCapacityBigInt(); + /** * Get the total remote storage capacity mounted in the federated cluster. * @return Remote capacity of the federated cluster. diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationRPCPerformanceMonitor.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationRPCPerformanceMonitor.java index 5f06f5918ea5f..64936e28a651e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationRPCPerformanceMonitor.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationRPCPerformanceMonitor.java @@ -33,7 +33,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import static org.apache.hadoop.util.Time.monotonicNow; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java index 2e54765d44224..9fdccad46e6a6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java @@ -61,9 +61,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheBuilder; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheLoader; +import org.apache.hadoop.thirdparty.com.google.common.cache.LoadingCache; /** * Expose the Namenode metrics as the Router was one. @@ -709,6 +709,11 @@ public int getNumEnteringMaintenanceDataNodes() { return 0; } + @Override + public int getNumInServiceLiveDataNodes() { + return 0; + } + @Override public int getVolumeFailuresTotal() { return 0; @@ -820,7 +825,7 @@ public long getNumberOfSnapshottableDirs() { @Override public String getEnteringMaintenanceNodes() { - return "N/A"; + return "{}"; } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/RBFMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/RBFMetrics.java index 05398e77e24f5..1eae105b82127 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/RBFMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/RBFMetrics.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.lang.reflect.Method; +import java.math.BigInteger; import java.net.InetAddress; import java.net.InetSocketAddress; import java.net.UnknownHostException; @@ -88,7 +89,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Implementation of the Router metrics collector. @@ -377,14 +378,29 @@ public long getRemainingCapacity() { return getNameserviceAggregatedLong(MembershipStats::getAvailableSpace); } + @Override + public long getUsedCapacity() { + return getTotalCapacity() - getRemainingCapacity(); + } + + @Override + public BigInteger getTotalCapacityBigInt() { + return getNameserviceAggregatedBigInt(MembershipStats::getTotalSpace); + } + + @Override + public BigInteger getRemainingCapacityBigInt() { + return getNameserviceAggregatedBigInt(MembershipStats::getAvailableSpace); + } + @Override public long getProvidedSpace() { return getNameserviceAggregatedLong(MembershipStats::getProvidedSpace); } @Override - public long getUsedCapacity() { - return getTotalCapacity() - getRemainingCapacity(); + public BigInteger getUsedCapacityBigInt() { + return getTotalCapacityBigInt().subtract(getRemainingCapacityBigInt()); } @Override @@ -730,6 +746,22 @@ private long getNameserviceAggregatedLong(ToLongFunction f) { } } + private BigInteger getNameserviceAggregatedBigInt( + ToLongFunction f) { + try { + List states = getActiveNamenodeRegistrations(); + BigInteger sum = BigInteger.valueOf(0); + for (MembershipState state : states) { + long lvalue = f.applyAsLong(state.getStats()); + sum = sum.add(BigInteger.valueOf(lvalue)); + } + return sum; + } catch (IOException e) { + LOG.error("Unable to extract metrics: {}", e.getMessage()); + return new BigInteger("0"); + } + } + /** * Fetches the most active namenode memberships for all known nameservices. * The fetched membership may not or may not be active. Excludes expired diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/StateStoreMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/StateStoreMetrics.java index 64bb10822f9f8..4d1e07562541a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/StateStoreMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/StateStoreMetrics.java @@ -30,9 +30,10 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MetricsRegistry; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; +import org.apache.hadoop.metrics2.lib.MutableGaugeLong; import org.apache.hadoop.metrics2.lib.MutableRate; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Implementations of the JMX interface for the State Store metrics. @@ -136,6 +137,19 @@ public void setCacheSize(String name, int size) { counter.set(size); } + /** + * set the count of the location cache access information. + * @param name Name of the record. + * @param count count of the record. + */ + public void setLocationCache(String name, long count) { + MutableGaugeLong counter = (MutableGaugeLong) registry.get(name); + if (counter == null) { + counter = registry.newGauge(name, name, count); + } + counter.set(count); + } + @VisibleForTesting public void reset() { reads.resetMinMax(); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/FederationNamespaceInfo.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/FederationNamespaceInfo.java index 33edd30ec2e5f..1ef159cf8f2e0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/FederationNamespaceInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/FederationNamespaceInfo.java @@ -17,6 +17,9 @@ */ package org.apache.hadoop.hdfs.server.federation.resolver; +import org.apache.commons.lang3.builder.CompareToBuilder; +import org.apache.commons.lang3.builder.EqualsBuilder; +import org.apache.commons.lang3.builder.HashCodeBuilder; import org.apache.hadoop.hdfs.server.federation.router.RemoteLocationContext; /** @@ -75,4 +78,45 @@ public String getBlockPoolId() { public String toString() { return this.nameserviceId + "->" + this.blockPoolId + ":" + this.clusterId; } -} \ No newline at end of file + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (obj == this) { + return true; + } + if (obj.getClass() != getClass()) { + return false; + } + FederationNamespaceInfo other = (FederationNamespaceInfo) obj; + return new EqualsBuilder() + .append(nameserviceId, other.nameserviceId) + .append(clusterId, other.clusterId) + .append(blockPoolId, other.blockPoolId) + .isEquals(); + } + + @Override + public int hashCode() { + return new HashCodeBuilder(17, 31) + .append(nameserviceId) + .append(clusterId) + .append(blockPoolId) + .toHashCode(); + } + + @Override + public int compareTo(RemoteLocationContext info) { + if (info instanceof FederationNamespaceInfo) { + FederationNamespaceInfo other = (FederationNamespaceInfo) info; + return new CompareToBuilder() + .append(nameserviceId, other.nameserviceId) + .append(clusterId, other.clusterId) + .append(blockPoolId, other.blockPoolId) + .toComparison(); + } + return super.compareTo(info); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MembershipNamenodeResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MembershipNamenodeResolver.java index 6117e9a072d91..9d2dd1651a02d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MembershipNamenodeResolver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MembershipNamenodeResolver.java @@ -47,6 +47,7 @@ import org.apache.hadoop.hdfs.server.federation.store.protocol.UpdateNamenodeRegistrationRequest; import org.apache.hadoop.hdfs.server.federation.store.records.MembershipState; import org.apache.hadoop.hdfs.server.federation.store.records.MembershipStats; +import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.util.Time; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -122,9 +123,13 @@ public boolean loadCache(boolean force) { // Our cache depends on the store, update it first try { MembershipStore membership = getMembershipStore(); - membership.loadCache(force); + if (!membership.loadCache(force)) { + return false; + } DisabledNameserviceStore disabled = getDisabledNameserviceStore(); - disabled.loadCache(force); + if (!disabled.loadCache(force)) { + return false; + } } catch (IOException e) { LOG.error("Cannot update membership from the State Store", e); } @@ -263,7 +268,8 @@ public boolean registerNamenode(NamenodeStatusReport report) MembershipState record = MembershipState.newInstance( routerId, report.getNameserviceId(), report.getNamenodeId(), - report.getClusterId(), report.getBlockPoolId(), report.getRpcAddress(), + report.getClusterId(), report.getBlockPoolId(), + NetUtils.normalizeIP2HostName(report.getRpcAddress()), report.getServiceAddress(), report.getLifelineAddress(), report.getWebScheme(), report.getWebAddress(), report.getState(), report.getSafemode()); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MountTableResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MountTableResolver.java index 96b560c649a97..756f31078dcf1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MountTableResolver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MountTableResolver.java @@ -42,14 +42,20 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.LongAdder; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.regex.Pattern; +import java.util.ArrayList; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hdfs.server.federation.metrics.StateStoreMetrics; import org.apache.hadoop.hdfs.server.federation.resolver.order.DestinationOrder; import org.apache.hadoop.hdfs.server.federation.router.Router; +import org.apache.hadoop.hdfs.server.federation.router.RouterRpcServer; import org.apache.hadoop.hdfs.server.federation.store.MountTableStore; import org.apache.hadoop.hdfs.server.federation.store.StateStoreCache; import org.apache.hadoop.hdfs.server.federation.store.StateStoreService; @@ -61,9 +67,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.cache.Cache; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheBuilder; /** * Mount table to map between global paths and remote locations. This allows the @@ -93,6 +99,8 @@ public class MountTableResolver private final TreeMap tree = new TreeMap<>(); /** Path -> Remote location. */ private final Cache locationCache; + private final LongAdder locCacheMiss = new LongAdder(); + private final LongAdder locCacheAccess = new LongAdder(); /** Default nameservice when no mount matches the math. */ private String defaultNameService = ""; @@ -104,6 +112,8 @@ public class MountTableResolver private final Lock readLock = readWriteLock.readLock(); private final Lock writeLock = readWriteLock.writeLock(); + /** Trash Current matching pattern. */ + private static final String TRASH_PATTERN = "/(Current|[0-9]+)"; @VisibleForTesting public MountTableResolver(Configuration conf) { @@ -337,6 +347,52 @@ public void refreshEntries(final Collection entries) { this.init = true; } + /** + * Check if PATH is the trail associated with the Trash. + * + * @param path A path. + */ + @VisibleForTesting + public static boolean isTrashPath(String path) throws IOException { + Pattern pattern = Pattern.compile( + "^" + getTrashRoot() + TRASH_PATTERN + "/"); + return pattern.matcher(path).find(); + } + + @VisibleForTesting + public static String getTrashRoot() throws IOException { + // Gets the Trash directory for the current user. + return FileSystem.USER_HOME_PREFIX + "/" + + RouterRpcServer.getRemoteUser().getShortUserName() + "/" + + FileSystem.TRASH_PREFIX; + } + + /** + * Subtract a TrashCurrent to get a new path. + * + * @param path A path. + */ + @VisibleForTesting + public static String subtractTrashCurrentPath(String path) + throws IOException { + return path.replaceAll("^" + + getTrashRoot() + TRASH_PATTERN, ""); + } + + /** + * If path is a path related to the trash can, + * subtract TrashCurrent to return a new path. + * + * @param path A path. + */ + private static String processTrashPath(String path) throws IOException { + if (isTrashPath(path)) { + return subtractTrashCurrentPath(path); + } else { + return path; + } + } + /** * Replaces the current in-memory cached of the mount table with a new * version fetched from the data store. @@ -346,7 +402,9 @@ public boolean loadCache(boolean force) { try { // Our cache depends on the store, update it first MountTableStore mountTable = this.getMountTableStore(); - mountTable.loadCache(force); + if (!mountTable.loadCache(force)) { + return false; + } GetMountTableEntriesRequest request = GetMountTableEntriesRequest.newInstance("/"); @@ -354,6 +412,9 @@ public boolean loadCache(boolean force) { mountTable.getMountTableEntries(request); List records = response.getEntries(); refreshEntries(records); + StateStoreMetrics metrics = this.getMountTableStore().getDriver().getMetrics(); + metrics.setLocationCache("locationCacheMissed", this.getLocCacheMiss().sum()); + metrics.setLocationCache("locationCacheAccessed", this.getLocCacheAccess().sum()); } catch (IOException e) { LOG.error("Cannot fetch mount table entries from State Store", e); return false; @@ -381,18 +442,29 @@ public void clear() { public PathLocation getDestinationForPath(final String path) throws IOException { verifyMountTable(); + PathLocation res; readLock.lock(); try { if (this.locationCache == null) { - return lookupLocation(path); + res = lookupLocation(processTrashPath(path)); + } else { + Callable meh = (Callable) () -> { + this.getLocCacheMiss().increment(); + return lookupLocation(processTrashPath(path)); + }; + res = this.locationCache.get(processTrashPath(path), meh); + this.getLocCacheAccess().increment(); } - Callable meh = new Callable() { - @Override - public PathLocation call() throws Exception { - return lookupLocation(path); + if (isTrashPath(path)) { + List remoteLocations = new ArrayList<>(); + for (RemoteLocation remoteLocation : res.getDestinations()) { + remoteLocations.add(new RemoteLocation(remoteLocation, path)); } - }; - return this.locationCache.get(path, meh); + return new PathLocation(path, remoteLocations, + res.getDestinationOrder()); + } else { + return res; + } } catch (ExecutionException e) { Throwable cause = e.getCause(); final IOException ioe; @@ -450,8 +522,11 @@ public MountTable getMountPoint(final String path) throws IOException { @Override public List getMountPoints(final String str) throws IOException { verifyMountTable(); - final String path = RouterAdmin.normalizeFileSystemPath(str); + String path = RouterAdmin.normalizeFileSystemPath(str); + if (isTrashPath(path)) { + path = subtractTrashCurrentPath(path); + } Set children = new TreeSet<>(); readLock.lock(); try { @@ -669,4 +744,12 @@ public void setDefaultNSEnable(boolean defaultNSRWEnable) { public void setDisabled(boolean disable) { this.disabled = disable; } + + public LongAdder getLocCacheMiss() { + return locCacheMiss; + } + + public LongAdder getLocCacheAccess() { + return locCacheAccess; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MultipleDestinationMountTableResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MultipleDestinationMountTableResolver.java index b09a883ebcb77..9beffe757d424 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MultipleDestinationMountTableResolver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MultipleDestinationMountTableResolver.java @@ -32,7 +32,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Mount table resolver that supports multiple locations for each mount entry. diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/RemoteLocation.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/RemoteLocation.java index 77d050062e740..4cb6516e4b267 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/RemoteLocation.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/RemoteLocation.java @@ -61,6 +61,20 @@ public RemoteLocation(String nsId, String nnId, String dPath, String sPath) { this.srcPath = sPath; } + /** + * Use the Ns and Nn of a remote location + * and another path to create a new remote location pointing. + * + * @param remoteLocation A remoteLocation. + * @param path Path in the destination namespace. + */ + public RemoteLocation(RemoteLocation remoteLocation, String path) { + this.nameserviceId = remoteLocation.nameserviceId; + this.namenodeId = remoteLocation.namenodeId; + this.dstPath = path; + this.srcPath = path; + } + @Override public String getNameserviceId() { String ret = this.nameserviceId; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/AvailableSpaceResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/AvailableSpaceResolver.java index 883a126236e6f..88e20649506dc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/AvailableSpaceResolver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/AvailableSpaceResolver.java @@ -39,7 +39,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * Order the destinations based on available space. This resolver uses a diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/HashResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/HashResolver.java index 455a3edb87d0b..3f8c354913c18 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/HashResolver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/HashResolver.java @@ -29,7 +29,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Order the destinations based on consistent hashing. diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/LocalResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/LocalResolver.java index 58a8ed278b3dc..3da655e35d094 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/LocalResolver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/LocalResolver.java @@ -42,8 +42,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.net.HostAndPort; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.net.HostAndPort; /** diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/RandomResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/RandomResolver.java index 13643e5e9f754..d21eef545b3b4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/RandomResolver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/RandomResolver.java @@ -25,7 +25,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.collect.Iterables; +import org.apache.hadoop.thirdparty.com.google.common.collect.Iterables; /** * Order the destinations randomly. diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionManager.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionManager.java index 74bbbb572fd27..9ec3b54ed50b7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionManager.java @@ -32,7 +32,7 @@ import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Time; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionPool.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionPool.java index b84848089a319..52e7cebd26017 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionPool.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionPool.java @@ -31,7 +31,7 @@ import javax.net.SocketFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -47,7 +47,7 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.io.retry.RetryUtils; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.RefreshUserMappingsProtocol; @@ -379,7 +379,7 @@ protected static ConnectionContext newConnection(Configuration conf, throw new IllegalStateException(msg); } ProtoImpl classes = PROTO_MAP.get(proto); - RPC.setProtocolEngine(conf, classes.protoPb, ProtobufRpcEngine.class); + RPC.setProtocolEngine(conf, classes.protoPb, ProtobufRpcEngine2.class); final RetryPolicy defaultPolicy = RetryUtils.getDefaultRetryPolicy(conf, HdfsClientConfigKeys.Retry.POLICY_ENABLED_KEY, diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionPoolId.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionPoolId.java index 7cb343b1d53e8..8b537f9d94c11 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionPoolId.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionPoolId.java @@ -23,7 +23,7 @@ import java.util.Collections; import java.util.List; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.commons.lang3.builder.HashCodeBuilder; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/MountTableRefresherService.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/MountTableRefresherService.java index e3ecd266ab3df..0246ea98c29f2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/MountTableRefresherService.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/MountTableRefresherService.java @@ -34,17 +34,19 @@ import org.apache.hadoop.hdfs.server.federation.store.StateStoreUtils; import org.apache.hadoop.hdfs.server.federation.store.records.RouterState; import org.apache.hadoop.net.NetUtils; +import org.apache.hadoop.security.SecurityUtil; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.service.AbstractService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; -import com.google.common.cache.RemovalListener; -import com.google.common.cache.RemovalNotification; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheBuilder; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheLoader; +import org.apache.hadoop.thirdparty.com.google.common.cache.LoadingCache; +import org.apache.hadoop.thirdparty.com.google.common.cache.RemovalListener; +import org.apache.hadoop.thirdparty.com.google.common.cache.RemovalNotification; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; /** * This service is invoked from {@link MountTableStore} when there is change in @@ -170,7 +172,12 @@ public RouterClient load(String adminAddress) throws IOException { @VisibleForTesting protected RouterClient createRouterClient(InetSocketAddress routerSocket, Configuration config) throws IOException { - return new RouterClient(routerSocket, config); + return SecurityUtil.doAsLoginUser(() -> { + if (UserGroupInformation.isSecurityEnabled()) { + UserGroupInformation.getLoginUser().checkTGTAndReloginFromKeytab(); + } + return new RouterClient(routerSocket, config); + }); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/MountTableRefresherThread.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/MountTableRefresherThread.java index c9967a20736e6..a077c4b3f45a4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/MountTableRefresherThread.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/MountTableRefresherThread.java @@ -23,6 +23,8 @@ import org.apache.hadoop.hdfs.server.federation.resolver.MountTableManager; import org.apache.hadoop.hdfs.server.federation.store.protocol.RefreshMountTableEntriesRequest; import org.apache.hadoop.hdfs.server.federation.store.protocol.RefreshMountTableEntriesResponse; +import org.apache.hadoop.security.SecurityUtil; +import org.apache.hadoop.security.UserGroupInformation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -61,10 +63,16 @@ public MountTableRefresherThread(MountTableManager manager, @Override public void run() { try { - RefreshMountTableEntriesResponse refreshMountTableEntries = - manager.refreshMountTableEntries( - RefreshMountTableEntriesRequest.newInstance()); - success = refreshMountTableEntries.getResult(); + SecurityUtil.doAsLoginUser(() -> { + if (UserGroupInformation.isSecurityEnabled()) { + UserGroupInformation.getLoginUser().checkTGTAndReloginFromKeytab(); + } + RefreshMountTableEntriesResponse refreshMountTableEntries = manager + .refreshMountTableEntries( + RefreshMountTableEntriesRequest.newInstance()); + success = refreshMountTableEntries.getResult(); + return true; + }); } catch (IOException e) { LOG.error("Failed to refresh mount table entries cache at router {}", adminAddress, e); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/NamenodeHeartbeatService.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/NamenodeHeartbeatService.java index ce6db5ce05c0a..ffae90e84895c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/NamenodeHeartbeatService.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/NamenodeHeartbeatService.java @@ -221,12 +221,16 @@ private void updateState() { LOG.error("Namenode is not operational: {}", getNamenodeDesc()); } else if (report.haStateValid()) { // block and HA status available - LOG.debug("Received service state: {} from HA namenode: {}", - report.getState(), getNamenodeDesc()); + if (LOG.isDebugEnabled()) { + LOG.debug("Received service state: {} from HA namenode: {}", + report.getState(), getNamenodeDesc()); + } } else if (localTarget == null) { // block info available, HA status not expected - LOG.debug( - "Reporting non-HA namenode as operational: " + getNamenodeDesc()); + if (LOG.isDebugEnabled()) { + LOG.debug( + "Reporting non-HA namenode as operational: {}", getNamenodeDesc()); + } } else { // block info available, HA status should be available, but was not // fetched do nothing and let the current state stand diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/PeriodicService.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/PeriodicService.java index 5e1222247286c..b690b8685c0a4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/PeriodicService.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/PeriodicService.java @@ -28,7 +28,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; /** * Service to periodically execute a runnable. diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/Quota.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/Quota.java index 2919ddc4c506d..ee938657d2287 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/Quota.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/Quota.java @@ -42,8 +42,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.collect.ArrayListMultimap; -import com.google.common.collect.ListMultimap; +import org.apache.hadoop.thirdparty.com.google.common.collect.ArrayListMultimap; +import org.apache.hadoop.thirdparty.com.google.common.collect.ListMultimap; /** * Module that implements the quota relevant RPC calls @@ -80,6 +80,9 @@ public Quota(Router router, RouterRpcServer server) { */ public void setQuota(String path, long namespaceQuota, long storagespaceQuota, StorageType type, boolean checkMountEntry) throws IOException { + if (!router.isQuotaEnabled()) { + throw new IOException("The quota system is disabled in Router."); + } if (checkMountEntry && isMountEntry(path)) { throw new AccessControlException( "Permission denied: " + RouterRpcServer.getRemoteUser() @@ -101,9 +104,6 @@ void setQuotaInternal(String path, List locations, long namespaceQuota, long storagespaceQuota, StorageType type) throws IOException { rpcServer.checkOperation(OperationCategory.WRITE); - if (!router.isQuotaEnabled()) { - throw new IOException("The quota system is disabled in Router."); - } // Set quota for current path and its children mount table path. if (locations == null) { diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/Router.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/Router.java index 64fdabe43b18c..1b637899f329b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/Router.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/Router.java @@ -56,7 +56,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Router that provides a unified view of multiple federated HDFS clusters. It @@ -682,7 +682,7 @@ public ActiveNamenodeResolver getNamenodeResolver() { /** * Get the state store interface for the router heartbeats. * - * @return FederationRouterStateStore state store API handle. + * @return RouterStore state store API handle. */ public RouterStore getRouterStateManager() { if (this.routerStateManager == null && this.stateStore != null) { diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterAdminServer.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterAdminServer.java index 5fd7c79f88f1a..4dd0693e97ae2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterAdminServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterAdminServer.java @@ -29,7 +29,7 @@ import java.util.Map; import java.util.Set; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.StorageType; @@ -75,14 +75,18 @@ import org.apache.hadoop.hdfs.server.federation.store.protocol.UpdateMountTableEntryResponse; import org.apache.hadoop.hdfs.server.federation.store.records.MountTable; import org.apache.hadoop.hdfs.server.namenode.NameNode; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RPC.Server; +import org.apache.hadoop.ipc.RefreshCallQueueProtocol; import org.apache.hadoop.ipc.RefreshRegistry; import org.apache.hadoop.ipc.RefreshResponse; import org.apache.hadoop.ipc.proto.GenericRefreshProtocolProtos; +import org.apache.hadoop.ipc.proto.RefreshCallQueueProtocolProtos; import org.apache.hadoop.ipc.protocolPB.GenericRefreshProtocolPB; import org.apache.hadoop.ipc.protocolPB.GenericRefreshProtocolServerSideTranslatorPB; +import org.apache.hadoop.ipc.protocolPB.RefreshCallQueueProtocolPB; +import org.apache.hadoop.ipc.protocolPB.RefreshCallQueueProtocolServerSideTranslatorPB; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.ProxyUsers; @@ -90,7 +94,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.thirdparty.protobuf.BlockingService; /** @@ -98,7 +102,7 @@ * router. It is created, started, and stopped by {@link Router}. */ public class RouterAdminServer extends AbstractService - implements RouterAdminProtocol { + implements RouterAdminProtocol, RefreshCallQueueProtocol { private static final Logger LOG = LoggerFactory.getLogger(RouterAdminServer.class); @@ -136,7 +140,7 @@ public RouterAdminServer(Configuration conf, Router router) RBFConfigKeys.DFS_ROUTER_ADMIN_HANDLER_COUNT_DEFAULT); RPC.setProtocolEngine(this.conf, RouterAdminProtocolPB.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); RouterAdminProtocolServerSideTranslatorPB routerAdminProtocolTranslator = new RouterAdminProtocolServerSideTranslatorPB(this); @@ -184,8 +188,16 @@ public RouterAdminServer(Configuration conf, Router router) GenericRefreshProtocolProtos.GenericRefreshProtocolService. newReflectiveBlockingService(genericRefreshXlator); + RefreshCallQueueProtocolServerSideTranslatorPB refreshCallQueueXlator = + new RefreshCallQueueProtocolServerSideTranslatorPB(this); + BlockingService refreshCallQueueService = + RefreshCallQueueProtocolProtos.RefreshCallQueueProtocolService. + newReflectiveBlockingService(refreshCallQueueXlator); + DFSUtil.addPBProtocol(conf, GenericRefreshProtocolPB.class, genericRefreshService, adminServer); + DFSUtil.addPBProtocol(conf, RefreshCallQueueProtocolPB.class, + refreshCallQueueService, adminServer); } /** @@ -643,4 +655,12 @@ public boolean refreshSuperUserGroupsConfiguration() throws IOException { ProxyUsers.refreshSuperUserGroupsConfiguration(); return true; } + + @Override // RefreshCallQueueProtocol + public void refreshCallQueue() throws IOException { + LOG.info("Refreshing call queue."); + + Configuration configuration = new Configuration(); + router.getRpcServer().getServer().refreshCallQueue(configuration); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterClient.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterClient.java index 0641c0b82afb6..ee29b7dd2b513 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterClient.java @@ -28,7 +28,7 @@ import org.apache.hadoop.hdfs.protocolPB.RouterAdminProtocolTranslatorPB; import org.apache.hadoop.hdfs.server.federation.resolver.MountTableManager; import org.apache.hadoop.hdfs.server.federation.resolver.RouterGenericManager; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.UserGroupInformation; @@ -47,7 +47,7 @@ private static RouterAdminProtocolTranslatorPB createRouterProxy( throws IOException { RPC.setProtocolEngine( - conf, RouterAdminProtocolPB.class, ProtobufRpcEngine.class); + conf, RouterAdminProtocolPB.class, ProtobufRpcEngine2.class); AtomicBoolean fallbackToSimpleAuth = new AtomicBoolean(false); final long version = RPC.getProtocolVersion(RouterAdminProtocolPB.class); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterClientProtocol.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterClientProtocol.java index 6a28c4f505889..8576eae160e76 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterClientProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterClientProtocol.java @@ -96,13 +96,14 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import java.io.FileNotFoundException; import java.io.IOException; import java.net.ConnectException; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.EnumSet; import java.util.HashMap; import java.util.Iterator; @@ -771,13 +772,14 @@ public DirectoryListing getListing(String src, byte[] startAfter, List> listings = getListingInt(src, startAfter, needLocation); - Map nnListing = new TreeMap<>(); + TreeMap nnListing = new TreeMap<>(); int totalRemainingEntries = 0; int remainingEntries = 0; boolean namenodeListingExists = false; + // Check the subcluster listing with the smallest name to make sure + // no file is skipped across subclusters + String lastName = null; if (listings != null) { - // Check the subcluster listing with the smallest name - String lastName = null; for (RemoteResult result : listings) { if (result.hasException()) { IOException ioe = result.getException(); @@ -824,6 +826,10 @@ public DirectoryListing getListing(String src, byte[] startAfter, // Add mount points at this level in the tree final List children = subclusterResolver.getMountPoints(src); + // Sort the list as the entries from subcluster are also sorted + if (children != null) { + Collections.sort(children); + } if (children != null) { // Get the dates for each mount point Map dates = getMountPointDates(src); @@ -838,9 +844,27 @@ public DirectoryListing getListing(String src, byte[] startAfter, HdfsFileStatus dirStatus = getMountPointStatus(childPath.toString(), 0, date); - // This may overwrite existing listing entries with the mount point - // TODO don't add if already there? - nnListing.put(child, dirStatus); + // if there is no subcluster path, always add mount point + if (lastName == null) { + nnListing.put(child, dirStatus); + } else { + if (shouldAddMountPoint(child, + lastName, startAfter, remainingEntries)) { + // This may overwrite existing listing entries with the mount point + // TODO don't add if already there? + nnListing.put(child, dirStatus); + } + } + } + // Update the remaining count to include left mount points + if (nnListing.size() > 0) { + String lastListing = nnListing.lastKey(); + for (int i = 0; i < children.size(); i++) { + if (children.get(i).compareTo(lastListing) > 0) { + remainingEntries += (children.size() - i); + break; + } + } } } @@ -1155,7 +1179,7 @@ public void setBalancerBandwidth(long bandwidth) throws IOException { rpcServer.checkOperation(NameNode.OperationCategory.UNCHECKED); RemoteMethod method = new RemoteMethod("setBalancerBandwidth", - new Class[] {Long.class}, bandwidth); + new Class[] {long.class}, bandwidth); final Set nss = namenodeResolver.getNamespaces(); rpcClient.invokeConcurrent(nss, method, true, false); } @@ -1768,10 +1792,17 @@ public void satisfyStoragePolicy(String path) throws IOException { } @Override - public HAServiceProtocol.HAServiceState getHAServiceState() - throws IOException { - rpcServer.checkOperation(NameNode.OperationCategory.READ, false); - return null; + public DatanodeInfo[] getSlowDatanodeReport() throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.UNCHECKED); + return rpcServer.getSlowDatanodeReport(true, 0); + } + + @Override + public HAServiceProtocol.HAServiceState getHAServiceState() { + if (rpcServer.isSafeMode()) { + return HAServiceProtocol.HAServiceState.STANDBY; + } + return HAServiceProtocol.HAServiceState.ACTIVE; } /** @@ -1832,6 +1863,8 @@ private RemoteLocation getFirstMatchingLocation(RemoteLocation location, /** * Aggregate content summaries for each subcluster. + * If the mount point has multiple destinations + * add the quota set value only once. * * @param summaries Collection of individual summaries. * @return Aggregated content summary. @@ -1854,9 +1887,9 @@ private ContentSummary aggregateContentSummary( length += summary.getLength(); fileCount += summary.getFileCount(); directoryCount += summary.getDirectoryCount(); - quota += summary.getQuota(); + quota = summary.getQuota(); spaceConsumed += summary.getSpaceConsumed(); - spaceQuota += summary.getSpaceQuota(); + spaceQuota = summary.getSpaceQuota(); // We return from the first response as we assume that the EC policy // of each sub-cluster is same. if (ecPolicy.isEmpty()) { @@ -1950,7 +1983,8 @@ private static FsPermission getParentPermission(final FsPermission mask) { * @param date Map with the dates. * @return New HDFS file status representing a mount point. */ - private HdfsFileStatus getMountPointStatus( + @VisibleForTesting + HdfsFileStatus getMountPointStatus( String name, int childrenNum, long date) { long modTime = date; long accessTime = date; @@ -2001,6 +2035,8 @@ private HdfsFileStatus getMountPointStatus( } } long inodeId = 0; + Path path = new Path(name); + String nameStr = path.getName(); return new HdfsFileStatus.Builder() .isdir(true) .mtime(modTime) @@ -2009,7 +2045,7 @@ private HdfsFileStatus getMountPointStatus( .owner(owner) .group(group) .symlink(new byte[0]) - .path(DFSUtil.string2Bytes(name)) + .path(DFSUtil.string2Bytes(nameStr)) .fileId(inodeId) .children(childrenNum) .flags(flags) @@ -2107,6 +2143,36 @@ private List> getListingInt( } } + /** + * Check if we should add the mount point into the total listing. + * This should be done under either of the two cases: + * 1) current mount point is between startAfter and cutoff lastEntry. + * 2) there are no remaining entries from subclusters and this mount + * point is bigger than all files from subclusters + * This is to make sure that the following batch of + * getListing call will use the correct startAfter, which is lastEntry from + * subcluster. + * + * @param mountPoint to be added mount point inside router + * @param lastEntry biggest listing from subcluster + * @param startAfter starting listing from client, used to define listing + * start boundary + * @param remainingEntries how many entries left from subcluster + * @return + */ + private static boolean shouldAddMountPoint( + String mountPoint, String lastEntry, byte[] startAfter, + int remainingEntries) { + if (mountPoint.compareTo(DFSUtil.bytes2String(startAfter)) > 0 && + mountPoint.compareTo(lastEntry) <= 0) { + return true; + } + if (remainingEntries == 0 && mountPoint.compareTo(lastEntry) >= 0) { + return true; + } + return false; + } + /** * Checks if the path is a directory and is supposed to be present in all * subclusters. diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterFsck.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterFsck.java index 898099c908a03..9a90677644f05 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterFsck.java @@ -33,6 +33,7 @@ import java.util.Map.Entry; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.server.federation.resolver.FederationNamenodeServiceState; import org.apache.hadoop.hdfs.server.federation.store.MembershipStore; import org.apache.hadoop.hdfs.server.federation.store.StateStoreService; @@ -48,6 +49,7 @@ * Wrapper for the Router to offer the Namenode FSCK. */ @InterfaceAudience.Private +@InterfaceStability.Unstable public class RouterFsck { public static final Logger LOG = @@ -69,6 +71,10 @@ public RouterFsck(Router router, Map pmap, public void fsck() { final long startTime = Time.monotonicNow(); try { + String warnMsg = "Now FSCK to DFSRouter is unstable feature. " + + "There may be incompatible changes between releases."; + LOG.warn(warnMsg); + out.println(warnMsg); String msg = "Federated FSCK started by " + UserGroupInformation.getCurrentUser() + " from " + remoteAddress + " at " + new Date(); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterFsckServlet.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterFsckServlet.java index 41216fce702ba..a439e5c0ce84e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterFsckServlet.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterFsckServlet.java @@ -25,20 +25,19 @@ import java.util.Map; import javax.servlet.ServletContext; -import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdfs.server.common.JspHelper; +import org.apache.hadoop.hdfs.server.namenode.DfsServlet; import org.apache.hadoop.security.UserGroupInformation; /** * This class is used in Namesystem's web server to do fsck on namenode. */ @InterfaceAudience.Private -public class RouterFsckServlet extends HttpServlet { +public class RouterFsckServlet extends DfsServlet { /** for java.io.Serializable. */ private static final long serialVersionUID = 1L; @@ -67,15 +66,4 @@ public void doGet(HttpServletRequest request, HttpServletResponse response) } } - /** - * Copy from {@link org.apache.hadoop.hdfs.server.namenode.DfsServlet}. - * @param request Http request from the user - * @param conf configuration - * @return ugi of the requested user - * @throws IOException failed to get ugi - */ - protected UserGroupInformation getUGI(HttpServletRequest request, - Configuration conf) throws IOException { - return JspHelper.getUGI(getServletContext(), request, conf); - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterHeartbeatService.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterHeartbeatService.java index c497d85335922..37407c2a3b8aa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterHeartbeatService.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterHeartbeatService.java @@ -21,7 +21,7 @@ import java.util.List; import java.util.concurrent.TimeUnit; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.server.federation.store.CachedRecordStore; import org.apache.hadoop.hdfs.server.federation.store.MembershipStore; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterHttpServer.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterHttpServer.java index b1fcc0c6b4c0b..85044399f9815 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterHttpServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterHttpServer.java @@ -125,6 +125,9 @@ private static void setupServlets( RouterFsckServlet.PATH_SPEC, RouterFsckServlet.class, true); + httpServer.addInternalServlet(RouterNetworkTopologyServlet.SERVLET_NAME, + RouterNetworkTopologyServlet.PATH_SPEC, + RouterNetworkTopologyServlet.class); } public InetSocketAddress getHttpAddress() { diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterNetworkTopologyServlet.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterNetworkTopologyServlet.java new file mode 100644 index 0000000000000..e517066c81c20 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterNetworkTopologyServlet.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.federation.router; + +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.server.namenode.NetworkTopologyServlet; +import org.apache.hadoop.net.Node; +import org.apache.hadoop.util.StringUtils; + +import javax.servlet.ServletContext; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import java.io.IOException; +import java.io.PrintStream; +import java.util.Arrays; +import java.util.List; + +/** + * A servlet to print out the network topology from router. + */ +public class RouterNetworkTopologyServlet extends NetworkTopologyServlet { + + @Override + public void doGet(HttpServletRequest request, HttpServletResponse response) + throws IOException { + final ServletContext context = getServletContext(); + + String format = parseAcceptHeader(request); + if (FORMAT_TEXT.equals(format)) { + response.setContentType("text/plain; charset=UTF-8"); + } else if (FORMAT_JSON.equals(format)) { + response.setContentType("application/json; charset=UTF-8"); + } + + Router router = RouterHttpServer.getRouterFromContext(context); + DatanodeInfo[] datanodeReport = + router.getRpcServer().getDatanodeReport( + HdfsConstants.DatanodeReportType.ALL); + List datanodeInfos = Arrays.asList(datanodeReport); + + try (PrintStream out = new PrintStream( + response.getOutputStream(), false, "UTF-8")) { + printTopology(out, datanodeInfos, format); + } catch (Throwable t) { + String errMsg = "Print network topology failed. " + + StringUtils.stringifyException(t); + response.sendError(HttpServletResponse.SC_GONE, errMsg); + throw new IOException(errMsg); + } finally { + response.getOutputStream().close(); + } + } +} \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcClient.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcClient.java index dae4b9356436c..8f07b4dfba3ac 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcClient.java @@ -18,8 +18,11 @@ package org.apache.hadoop.hdfs.server.federation.router; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_CONTEXT_SEPARATOR_DEFAULT; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_CONTEXT_SEPARATOR_KEY; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_TIMEOUT_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_IP_PROXY_USERS; import java.io.EOFException; import java.io.FileNotFoundException; @@ -29,6 +32,7 @@ import java.lang.reflect.Method; import java.net.ConnectException; import java.net.InetSocketAddress; +import java.net.SocketException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -65,8 +69,11 @@ import org.apache.hadoop.io.retry.RetryPolicies; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.io.retry.RetryPolicy.RetryAction.RetryDecision; +import org.apache.hadoop.ipc.CallerContext; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.ipc.RetriableException; +import org.apache.hadoop.ipc.Server; +import org.apache.hadoop.ipc.Server.Call; import org.apache.hadoop.ipc.StandbyException; import org.apache.hadoop.net.ConnectTimeoutException; import org.apache.hadoop.security.UserGroupInformation; @@ -74,8 +81,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; /** * A client proxy for Router to NN communication using the NN ClientProtocol. @@ -114,11 +121,17 @@ public class RouterRpcClient { private final RetryPolicy retryPolicy; /** Optional perf monitor. */ private final RouterRpcMonitor rpcMonitor; + /** Field separator of CallerContext. */ + private final String contextFieldSeparator; /** Pattern to parse a stack trace line. */ private static final Pattern STACK_TRACE_PATTERN = Pattern.compile("\\tat (.*)\\.(.*)\\((.*):(\\d*)\\)"); + private static final String CLIENT_IP_STR = "clientIp"; + private static final String CLIENT_PORT_STR = "clientPort"; + + private final boolean enableProxyUser; /** * Create a router RPC client to manage remote procedure calls to NNs. @@ -135,6 +148,9 @@ public RouterRpcClient(Configuration conf, Router router, this.namenodeResolver = resolver; Configuration clientConf = getClientConfiguration(conf); + this.contextFieldSeparator = + clientConf.get(HADOOP_CALLER_CONTEXT_SEPARATOR_KEY, + HADOOP_CALLER_CONTEXT_SEPARATOR_DEFAULT); this.connectionManager = new ConnectionManager(clientConf); this.connectionManager.start(); @@ -172,6 +188,8 @@ public RouterRpcClient(Configuration conf, Router router, this.retryPolicy = RetryPolicies.failoverOnNetworkException( RetryPolicies.TRY_ONCE_THEN_FAIL, maxFailoverAttempts, maxRetryAttempts, failoverSleepBaseMillis, failoverSleepMaxMillis); + String[] ipProxyUsers = conf.getStrings(DFS_NAMENODE_IP_PROXY_USERS); + this.enableProxyUser = ipProxyUsers != null && ipProxyUsers.length > 0; } /** @@ -303,7 +321,7 @@ private ConnectionContext getConnection(UserGroupInformation ugi, String nsId, // TODO Add tokens from the federated UGI UserGroupInformation connUGI = ugi; - if (UserGroupInformation.isSecurityEnabled()) { + if (UserGroupInformation.isSecurityEnabled() || this.enableProxyUser) { UserGroupInformation routerUser = UserGroupInformation.getLoginUser(); connUGI = UserGroupInformation.createProxyUser( ugi.getUserName(), routerUser); @@ -404,6 +422,8 @@ private Object invokeMethod( + router.getRouterId()); } + addClientInfoToCallerContext(ugi); + Object ret = null; if (rpcMonitor != null) { rpcMonitor.proxyOp(); @@ -518,6 +538,42 @@ private Object invokeMethod( } } + /** + * For tracking which is the actual client address. + * It adds trace info "clientIp:ip", "clientPort:port" and "realUser:userName" + * in the caller context, removing the old values if they were + * already present. + */ + private void addClientInfoToCallerContext(UserGroupInformation ugi) { + CallerContext ctx = CallerContext.getCurrent(); + String origContext = ctx == null ? null : ctx.getContext(); + byte[] origSignature = ctx == null ? null : ctx.getSignature(); + String realUser = null; + if (ugi.getRealUser() != null) { + realUser = ugi.getRealUser().getUserName(); + } + CallerContext.Builder builder = + new CallerContext.Builder("", contextFieldSeparator) + .append(CallerContext.CLIENT_IP_STR, Server.getRemoteAddress()) + .append(CallerContext.CLIENT_PORT_STR, + Integer.toString(Server.getRemotePort())) + .append(CallerContext.REAL_USER_STR, realUser) + .setSignature(origSignature); + // Append the original caller context + if (origContext != null) { + for (String part : origContext.split(contextFieldSeparator)) { + String[] keyValue = + part.split(CallerContext.Builder.KEY_VALUE_SEPARATOR, 2); + if (keyValue.length == 2) { + builder.appendIfAbsent(keyValue[0], keyValue[1]); + } else if (keyValue.length == 1) { + builder.append(keyValue[0]); + } + } + } + CallerContext.setCurrent(builder.build()); + } + /** * Invokes a method on the designated object. Catches exceptions specific to * the invocation. @@ -582,9 +638,9 @@ private Object invoke(String nsId, int retryCount, final Method method, * @return If the exception comes from an unavailable subcluster. */ public static boolean isUnavailableException(IOException ioe) { - if (ioe instanceof ConnectException || - ioe instanceof ConnectTimeoutException || + if (ioe instanceof ConnectTimeoutException || ioe instanceof EOFException || + ioe instanceof SocketException || ioe instanceof StandbyException) { return true; } @@ -1017,25 +1073,17 @@ private static boolean isExpectedValue(Object expectedValue, Object value) { * Invoke method in all locations and return success if any succeeds. * * @param The type of the remote location. - * @param The type of the remote method return. * @param locations List of remote locations to call concurrently. * @param method The remote method and parameters to invoke. * @return If the call succeeds in any location. * @throws IOException If any of the calls return an exception. */ - public boolean invokeAll( + public boolean invokeAll( final Collection locations, final RemoteMethod method) - throws IOException { - boolean anyResult = false; + throws IOException { Map results = invokeConcurrent(locations, method, false, false, Boolean.class); - for (Boolean value : results.values()) { - boolean result = value.booleanValue(); - if (result) { - anyResult = true; - } - } - return anyResult; + return results.containsValue(true); } /** @@ -1239,6 +1287,9 @@ public Map invokeConcurrent( List orderedLocations = new ArrayList<>(); List> callables = new ArrayList<>(); + // transfer originCall & callerContext to worker threads of executor. + final Call originCall = Server.getCurCall().get(); + final CallerContext originContext = CallerContext.getCurrent(); for (final T location : locations) { String nsId = location.getNameserviceId(); final List namenodes = @@ -1256,12 +1307,20 @@ public Map invokeConcurrent( nnLocation = (T)new RemoteLocation(nsId, nnId, location.getDest()); } orderedLocations.add(nnLocation); - callables.add(() -> invokeMethod(ugi, nnList, proto, m, paramList)); + callables.add( + () -> { + transferThreadLocalContext(originCall, originContext); + return invokeMethod(ugi, nnList, proto, m, paramList); + }); } } else { // Call the objectGetter in order of nameservices in the NS list orderedLocations.add(location); - callables.add(() -> invokeMethod(ugi, namenodes, proto, m, paramList)); + callables.add( + () -> { + transferThreadLocalContext(originCall, originContext); + return invokeMethod(ugi, namenodes, proto, m, paramList); + }); } } @@ -1328,6 +1387,20 @@ public Map invokeConcurrent( } } + /** + * Transfer origin thread local context which is necessary to current + * worker thread when invoking method concurrently by executor service. + * + * @param originCall origin Call required for getting remote client ip. + * @param originContext origin CallerContext which should be transferred + * to server side. + */ + private void transferThreadLocalContext( + final Call originCall, final CallerContext originContext) { + Server.getCurCall().set(originCall); + CallerContext.setCurrent(originContext); + } + /** * Get a prioritized list of NNs that share the same nameservice ID (in the * same namespace). NNs that are reported as ACTIVE will be first in the list. diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcServer.java index 345ec705f2cd8..289848df30925 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcServer.java @@ -133,7 +133,7 @@ import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest; import org.apache.hadoop.io.EnumSetWritable; import org.apache.hadoop.io.Text; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RPC.Server; import org.apache.hadoop.ipc.RemoteException; @@ -156,7 +156,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.thirdparty.protobuf.BlockingService; /** @@ -256,7 +256,7 @@ public RouterRpcServer(Configuration configuration, Router router, readerQueueSize); RPC.setProtocolEngine(this.conf, ClientNamenodeProtocolPB.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); ClientNamenodeProtocolServerSideTranslatorPB clientProtocolServerTranslator = @@ -298,7 +298,7 @@ public RouterRpcServer(Configuration configuration, Router router, .setBindAddress(confRpcAddress.getHostName()) .setPort(confRpcAddress.getPort()) .setNumHandlers(handlerCount) - .setnumReaders(readerCount) + .setNumReaders(readerCount) .setQueueSizePerHandler(handlerQueueSize) .setVerbose(false) .setSecretManager(this.securityManager.getSecretManager()) @@ -526,8 +526,7 @@ void checkOperation(OperationCategory op) * client requests. */ private void checkSafeMode() throws StandbyException { - RouterSafemodeService safemodeService = router.getSafemodeService(); - if (safemodeService != null && safemodeService.isInSafeMode()) { + if (isSafeMode()) { // Throw standby exception, router is not available if (rpcMonitor != null) { rpcMonitor.routerFailureSafemode(); @@ -538,6 +537,16 @@ private void checkSafeMode() throws StandbyException { } } + /** + * Return true if the Router is in safe mode. + * + * @return true if the Router is in safe mode. + */ + boolean isSafeMode() { + RouterSafemodeService safemodeService = router.getSafemodeService(); + return (safemodeService != null && safemodeService.isInSafeMode()); + } + /** * Get the name of the method that is calling this function. * @@ -889,24 +898,7 @@ public DatanodeInfo[] getDatanodeReport( Map results = rpcClient.invokeConcurrent(nss, method, requireResponse, false, timeOutMs, DatanodeInfo[].class); - for (Entry entry : - results.entrySet()) { - FederationNamespaceInfo ns = entry.getKey(); - DatanodeInfo[] result = entry.getValue(); - for (DatanodeInfo node : result) { - String nodeId = node.getXferAddr(); - DatanodeInfo dn = datanodesMap.get(nodeId); - if (dn == null || node.getLastUpdate() > dn.getLastUpdate()) { - // Add the subcluster as a suffix to the network location - node.setNetworkLocation( - NodeBase.PATH_SEPARATOR_STR + ns.getNameserviceId() + - node.getNetworkLocation()); - datanodesMap.put(nodeId, node); - } else { - LOG.debug("{} is in multiple subclusters", nodeId); - } - } - } + updateDnMap(results, datanodesMap); // Map -> Array Collection datanodes = datanodesMap.values(); return toArray(datanodes, DatanodeInfo.class); @@ -1349,6 +1341,11 @@ public void satisfyStoragePolicy(String path) throws IOException { clientProto.satisfyStoragePolicy(path); } + @Override // ClientProtocol + public DatanodeInfo[] getSlowDatanodeReport() throws IOException { + return clientProto.getSlowDatanodeReport(); + } + @Override // NamenodeProtocol public BlocksWithLocations getBlocks(DatanodeInfo datanode, long size, long minBlockSize) throws IOException { @@ -1748,4 +1745,52 @@ public void refreshSuperUserGroupsConfiguration() throws IOException { public String[] getGroupsForUser(String user) throws IOException { return routerProto.getGroupsForUser(user); } + + /** + * Get the slow running datanodes report with a timeout. + * + * @param requireResponse If we require all the namespaces to report. + * @param timeOutMs Time out for the reply in milliseconds. + * @return List of datanodes. + * @throws IOException If it cannot get the report. + */ + public DatanodeInfo[] getSlowDatanodeReport(boolean requireResponse, long timeOutMs) + throws IOException { + checkOperation(OperationCategory.UNCHECKED); + + Map datanodesMap = new LinkedHashMap<>(); + RemoteMethod method = new RemoteMethod("getSlowDatanodeReport"); + + Set nss = namenodeResolver.getNamespaces(); + Map results = + rpcClient.invokeConcurrent(nss, method, requireResponse, false, + timeOutMs, DatanodeInfo[].class); + updateDnMap(results, datanodesMap); + // Map -> Array + Collection datanodes = datanodesMap.values(); + return toArray(datanodes, DatanodeInfo.class); + } + + private void updateDnMap(Map results, + Map datanodesMap) { + for (Entry entry : + results.entrySet()) { + FederationNamespaceInfo ns = entry.getKey(); + DatanodeInfo[] result = entry.getValue(); + for (DatanodeInfo node : result) { + String nodeId = node.getXferAddr(); + DatanodeInfo dn = datanodesMap.get(nodeId); + if (dn == null || node.getLastUpdate() > dn.getLastUpdate()) { + // Add the subcluster as a suffix to the network location + node.setNetworkLocation( + NodeBase.PATH_SEPARATOR_STR + ns.getNameserviceId() + + node.getNetworkLocation()); + datanodesMap.put(nodeId, node); + } else { + LOG.debug("{} is in multiple subclusters", nodeId); + } + } + } + } + } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterWebHdfsMethods.java index 9f0d06d7695cd..061a556c2ea9d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterWebHdfsMethods.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterWebHdfsMethods.java @@ -115,20 +115,18 @@ public class RouterWebHdfsMethods extends NamenodeWebHdfsMethods { private static final Logger LOG = LoggerFactory.getLogger(RouterWebHdfsMethods.class); - private static final ThreadLocal REMOTE_ADDRESS = - new ThreadLocal(); - private @Context HttpServletRequest request; private String method; private String query; private String reqPath; + private String remoteAddr; public RouterWebHdfsMethods(@Context HttpServletRequest request) { super(request); this.method = request.getMethod(); this.query = request.getQueryString(); this.reqPath = request.getServletPath(); - REMOTE_ADDRESS.set(JspHelper.getRemoteAddr(request)); + this.remoteAddr = JspHelper.getRemoteAddr(request); } @Override @@ -139,7 +137,7 @@ protected void init(final UserGroupInformation ugi, final Param... parameters) { super.init(ugi, delegation, username, doAsUser, path, op, parameters); - REMOTE_ADDRESS.set(JspHelper.getRemoteAddr(request)); + remoteAddr = JspHelper.getRemoteAddr(request); } @Override @@ -153,12 +151,12 @@ protected ClientProtocol getRpcClientProtocol() throws IOException { } private void reset() { - REMOTE_ADDRESS.set(null); + remoteAddr = null; } @Override protected String getRemoteAddr() { - return REMOTE_ADDRESS.get(); + return remoteAddr; } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/RouterSecurityManager.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/RouterSecurityManager.java index 8e7a34381cff7..7b0787f0f1613 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/RouterSecurityManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/RouterSecurityManager.java @@ -18,7 +18,7 @@ package org.apache.hadoop.hdfs.server.federation.router.security; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/DistributedSQLCounter.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/DistributedSQLCounter.java new file mode 100644 index 0000000000000..14b232783f5fe --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/DistributedSQLCounter.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.federation.router.security.token; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * Distributed counter that relies on a SQL database to synchronize + * between multiple clients. This expects a table with a single int field + * to exist in the database. One record must exist on the table at all times, + * representing the last used value reserved by a client. + */ +public class DistributedSQLCounter { + private static final Logger LOG = + LoggerFactory.getLogger(DistributedSQLCounter.class); + + private final String field; + private final String table; + private final SQLConnectionFactory connectionFactory; + + public DistributedSQLCounter(String field, String table, + SQLConnectionFactory connectionFactory) { + this.field = field; + this.table = table; + this.connectionFactory = connectionFactory; + } + + /** + * Obtains the value of the counter. + * @return counter value. + */ + public int selectCounterValue() throws SQLException { + try (Connection connection = connectionFactory.getConnection()) { + return selectCounterValue(false, connection); + } + } + + private int selectCounterValue(boolean forUpdate, Connection connection) throws SQLException { + String query = String.format("SELECT %s FROM %s %s", field, table, + forUpdate ? "FOR UPDATE" : ""); + LOG.debug("Select counter statement: " + query); + try (Statement statement = connection.createStatement(); + ResultSet result = statement.executeQuery(query)) { + if (result.next()) { + return result.getInt(field); + } else { + throw new IllegalStateException("Counter table not initialized: " + table); + } + } + } + + /** + * Sets the counter to the given value. + * @param value Value to assign to counter. + */ + public void updateCounterValue(int value) throws SQLException { + try (Connection connection = connectionFactory.getConnection(true)) { + updateCounterValue(value, connection); + } + } + + /** + * Sets the counter to the given value. + * @param connection Connection to database hosting the counter table. + * @param value Value to assign to counter. + */ + public void updateCounterValue(int value, Connection connection) throws SQLException { + String queryText = String.format("UPDATE %s SET %s = ?", table, field); + LOG.debug("Update counter statement: " + queryText + ". Value: " + value); + try (PreparedStatement statement = connection.prepareStatement(queryText)) { + statement.setInt(1, value); + statement.execute(); + } + } + + /** + * Increments the counter by the given amount and + * returns the previous counter value. + * @param amount Amount to increase the counter. + * @return Previous counter value. + */ + public int incrementCounterValue(int amount) throws SQLException { + // Disabling auto-commit to ensure that all statements on this transaction + // are committed at once. + try (Connection connection = connectionFactory.getConnection(false)) { + // Preventing dirty reads and non-repeatable reads to ensure that the + // value read will not be updated by a different connection. + if (connection.getTransactionIsolation() < Connection.TRANSACTION_REPEATABLE_READ) { + connection.setTransactionIsolation(Connection.TRANSACTION_REPEATABLE_READ); + } + + try { + // Reading the counter value "FOR UPDATE" to lock the value record, + // forcing other connections to wait until this transaction is committed. + int lastValue = selectCounterValue(true, connection); + + // Calculate the new counter value and handling overflow by + // resetting the counter to 0. + int newValue = lastValue + amount; + if (newValue < 0) { + lastValue = 0; + newValue = amount; + } + + updateCounterValue(newValue, connection); + connection.commit(); + return lastValue; + } catch (Exception e) { + // Rollback transaction to release table locks + connection.rollback(); + throw e; + } + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/HikariDataSourceConnectionFactory.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/HikariDataSourceConnectionFactory.java new file mode 100644 index 0000000000000..5510e9f54b996 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/HikariDataSourceConnectionFactory.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.federation.router.security.token; + +import com.zaxxer.hikari.HikariConfig; +import com.zaxxer.hikari.HikariDataSource; +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.token.delegation.SQLDelegationTokenSecretManager; + +import java.sql.Connection; +import java.sql.SQLException; +import java.util.Properties; + +/** + * Class that relies on a HikariDataSource to provide SQL connections. + */ +class HikariDataSourceConnectionFactory implements SQLConnectionFactory { + protected final static String HIKARI_PROPS = SQLDelegationTokenSecretManager.SQL_DTSM_CONF_PREFIX + + "connection.hikari."; + private final HikariDataSource dataSource; + + HikariDataSourceConnectionFactory(Configuration conf) { + Properties properties = new Properties(); + properties.setProperty("jdbcUrl", conf.get(CONNECTION_URL)); + properties.setProperty("username", conf.get(CONNECTION_USERNAME)); + properties.setProperty("password", conf.get(CONNECTION_PASSWORD)); + properties.setProperty("driverClassName", conf.get(CONNECTION_DRIVER)); + + // Include hikari connection properties + properties.putAll(conf.getPropsWithPrefix(HIKARI_PROPS)); + + HikariConfig hikariConfig = new HikariConfig(properties); + this.dataSource = new HikariDataSource(hikariConfig); + } + + @Override + public Connection getConnection() throws SQLException { + return dataSource.getConnection(); + } + + @Override + public void shutdown() { + // Close database connections + dataSource.close(); + } + + @VisibleForTesting + HikariDataSource getDataSource() { + return dataSource; + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/SQLConnectionFactory.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/SQLConnectionFactory.java new file mode 100644 index 0000000000000..54c9cbcd42318 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/SQLConnectionFactory.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.federation.router.security.token; + +import com.zaxxer.hikari.HikariConfig; +import com.zaxxer.hikari.HikariDataSource; +import java.sql.Connection; +import java.sql.SQLException; +import java.util.Properties; +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.token.delegation.SQLDelegationTokenSecretManager; + + +/** + * Interface to provide SQL connections to the {@link SQLDelegationTokenSecretManagerImpl}. + */ +public interface SQLConnectionFactory { + String CONNECTION_URL = SQLDelegationTokenSecretManager.SQL_DTSM_CONF_PREFIX + + "connection.url"; + String CONNECTION_USERNAME = SQLDelegationTokenSecretManager.SQL_DTSM_CONF_PREFIX + + "connection.username"; + String CONNECTION_PASSWORD = SQLDelegationTokenSecretManager.SQL_DTSM_CONF_PREFIX + + "connection.password"; + String CONNECTION_DRIVER = SQLDelegationTokenSecretManager.SQL_DTSM_CONF_PREFIX + + "connection.driver"; + + Connection getConnection() throws SQLException; + void shutdown(); + + default Connection getConnection(boolean autocommit) throws SQLException { + Connection connection = getConnection(); + connection.setAutoCommit(autocommit); + return connection; + } +} + diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/SQLDelegationTokenSecretManagerImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/SQLDelegationTokenSecretManagerImpl.java new file mode 100644 index 0000000000000..7da54778f3127 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/SQLDelegationTokenSecretManagerImpl.java @@ -0,0 +1,242 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.federation.router.security.token; + +import java.io.IOException; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier; +import org.apache.hadoop.security.token.delegation.SQLDelegationTokenSecretManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * An implementation of {@link SQLDelegationTokenSecretManager} that + * persists TokenIdentifiers and DelegationKeys in a SQL database. + * This implementation relies on the Datanucleus JDO PersistenceManager, which + * can be configured with datanucleus.* configuration properties. + */ +public class SQLDelegationTokenSecretManagerImpl + extends SQLDelegationTokenSecretManager { + + private static final Logger LOG = + LoggerFactory.getLogger(SQLDelegationTokenSecretManagerImpl.class); + private static final String SEQ_NUM_COUNTER_FIELD = "sequenceNum"; + private static final String SEQ_NUM_COUNTER_TABLE = "LastSequenceNum"; + private static final String KEY_ID_COUNTER_FIELD = "keyId"; + private static final String KEY_ID_COUNTER_TABLE = "LastDelegationKeyId"; + + private final SQLConnectionFactory connectionFactory; + private final DistributedSQLCounter sequenceNumCounter; + private final DistributedSQLCounter delegationKeyIdCounter; + private final SQLSecretManagerRetriableHandler retryHandler; + + public SQLDelegationTokenSecretManagerImpl(Configuration conf) { + this(conf, new HikariDataSourceConnectionFactory(conf), + SQLSecretManagerRetriableHandlerImpl.getInstance(conf)); + } + + public SQLDelegationTokenSecretManagerImpl(Configuration conf, + SQLConnectionFactory connectionFactory, SQLSecretManagerRetriableHandler retryHandler) { + super(conf); + + this.connectionFactory = connectionFactory; + this.sequenceNumCounter = new DistributedSQLCounter(SEQ_NUM_COUNTER_FIELD, + SEQ_NUM_COUNTER_TABLE, connectionFactory); + this.delegationKeyIdCounter = new DistributedSQLCounter(KEY_ID_COUNTER_FIELD, + KEY_ID_COUNTER_TABLE, connectionFactory); + this.retryHandler = retryHandler; + + try { + super.startThreads(); + } catch (IOException e) { + throw new RuntimeException("Error starting threads for MySQL secret manager", e); + } + + LOG.info("MySQL delegation token secret manager instantiated"); + } + + @Override + public DelegationTokenIdentifier createIdentifier() { + return new DelegationTokenIdentifier(); + } + + @Override + public void stopThreads() { + super.stopThreads(); + connectionFactory.shutdown(); + } + + @Override + protected void insertToken(int sequenceNum, byte[] tokenIdentifier, byte[] tokenInfo) + throws SQLException { + retryHandler.execute(() -> { + try (Connection connection = connectionFactory.getConnection(true); + PreparedStatement statement = connection.prepareStatement( + "INSERT INTO Tokens (sequenceNum, tokenIdentifier, tokenInfo) VALUES (?, ?, ?)")) { + statement.setInt(1, sequenceNum); + statement.setBytes(2, tokenIdentifier); + statement.setBytes(3, tokenInfo); + statement.execute(); + } + }); + } + + @Override + protected void updateToken(int sequenceNum, byte[] tokenIdentifier, byte[] tokenInfo) + throws SQLException { + retryHandler.execute(() -> { + try (Connection connection = connectionFactory.getConnection(true); + PreparedStatement statement = connection.prepareStatement( + "UPDATE Tokens SET tokenInfo = ? WHERE sequenceNum = ? AND tokenIdentifier = ?")) { + statement.setBytes(1, tokenInfo); + statement.setInt(2, sequenceNum); + statement.setBytes(3, tokenIdentifier); + statement.execute(); + } + }); + } + + @Override + protected void deleteToken(int sequenceNum, byte[] tokenIdentifier) throws SQLException { + retryHandler.execute(() -> { + try (Connection connection = connectionFactory.getConnection(true); + PreparedStatement statement = connection.prepareStatement( + "DELETE FROM Tokens WHERE sequenceNum = ? AND tokenIdentifier = ?")) { + statement.setInt(1, sequenceNum); + statement.setBytes(2, tokenIdentifier); + statement.execute(); + } + }); + } + + @Override + protected byte[] selectTokenInfo(int sequenceNum, byte[] tokenIdentifier) throws SQLException { + return retryHandler.execute(() -> { + try (Connection connection = connectionFactory.getConnection(); + PreparedStatement statement = connection.prepareStatement( + "SELECT tokenInfo FROM Tokens WHERE sequenceNum = ? AND tokenIdentifier = ?")) { + statement.setInt(1, sequenceNum); + statement.setBytes(2, tokenIdentifier); + try (ResultSet result = statement.executeQuery()) { + if (result.next()) { + return result.getBytes("tokenInfo"); + } + } + } + return null; + }); + } + + @Override + protected void insertDelegationKey(int keyId, byte[] delegationKey) throws SQLException { + retryHandler.execute(() -> { + try (Connection connection = connectionFactory.getConnection(true); + PreparedStatement statement = connection.prepareStatement( + "INSERT INTO DelegationKeys (keyId, delegationKey) VALUES (?, ?)")) { + statement.setInt(1, keyId); + statement.setBytes(2, delegationKey); + statement.execute(); + } + }); + } + + @Override + protected void updateDelegationKey(int keyId, byte[] delegationKey) throws SQLException { + retryHandler.execute(() -> { + try (Connection connection = connectionFactory.getConnection(true); + PreparedStatement statement = connection.prepareStatement( + "UPDATE DelegationKeys SET delegationKey = ? WHERE keyId = ?")) { + statement.setBytes(1, delegationKey); + statement.setInt(2, keyId); + statement.execute(); + } + }); + } + + @Override + protected void deleteDelegationKey(int keyId) throws SQLException { + retryHandler.execute(() -> { + try (Connection connection = connectionFactory.getConnection(true); + PreparedStatement statement = connection.prepareStatement( + "DELETE FROM DelegationKeys WHERE keyId = ?")) { + statement.setInt(1, keyId); + statement.execute(); + } + }); + } + + @Override + protected byte[] selectDelegationKey(int keyId) throws SQLException { + return retryHandler.execute(() -> { + try (Connection connection = connectionFactory.getConnection(); + PreparedStatement statement = connection.prepareStatement( + "SELECT delegationKey FROM DelegationKeys WHERE keyId = ?")) { + statement.setInt(1, keyId); + try (ResultSet result = statement.executeQuery()) { + if (result.next()) { + return result.getBytes("delegationKey"); + } + } + } + return null; + }); + } + + @Override + protected int selectSequenceNum() throws SQLException { + return retryHandler.execute(() -> sequenceNumCounter.selectCounterValue()); + } + + @Override + protected void updateSequenceNum(int value) throws SQLException { + retryHandler.execute(() -> sequenceNumCounter.updateCounterValue(value)); + } + + @Override + protected int incrementSequenceNum(int amount) throws SQLException { + return retryHandler.execute(() -> sequenceNumCounter.incrementCounterValue(amount)); + } + + @Override + protected int selectKeyId() throws SQLException { + return retryHandler.execute(delegationKeyIdCounter::selectCounterValue); + } + + @Override + protected void updateKeyId(int value) throws SQLException { + retryHandler.execute(() -> delegationKeyIdCounter.updateCounterValue(value)); + } + + @Override + protected int incrementKeyId(int amount) throws SQLException { + return retryHandler.execute(() -> delegationKeyIdCounter.incrementCounterValue(amount)); + } + + @VisibleForTesting + protected SQLConnectionFactory getConnectionFactory() { + return connectionFactory; + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/SQLSecretManagerRetriableHandler.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/SQLSecretManagerRetriableHandler.java new file mode 100644 index 0000000000000..16151226217bb --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/SQLSecretManagerRetriableHandler.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.federation.router.security.token; + +import java.sql.SQLException; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.retry.RetryPolicies; +import org.apache.hadoop.io.retry.RetryPolicy; +import org.apache.hadoop.io.retry.RetryProxy; +import org.apache.hadoop.security.token.delegation.SQLDelegationTokenSecretManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * Interface to handle retries when {@link SQLDelegationTokenSecretManagerImpl} + * throws expected errors. + */ +public interface SQLSecretManagerRetriableHandler { + void execute(SQLCommandVoid command) throws SQLException; + T execute(SQLCommand command) throws SQLException; + + @FunctionalInterface + interface SQLCommandVoid { + void doCall() throws SQLException; + } + + @FunctionalInterface + interface SQLCommand { + T doCall() throws SQLException; + } +} + +/** + * Implementation of {@link SQLSecretManagerRetriableHandler} that uses a + * {@link RetryProxy} to simplify the retryable operations. + */ +class SQLSecretManagerRetriableHandlerImpl implements SQLSecretManagerRetriableHandler { + public final static String MAX_RETRIES = + SQLDelegationTokenSecretManager.SQL_DTSM_CONF_PREFIX + "max-retries"; + public final static int MAX_RETRIES_DEFAULT = 0; + public final static String RETRY_SLEEP_TIME_MS = + SQLDelegationTokenSecretManager.SQL_DTSM_CONF_PREFIX + "retry-sleep-time-ms"; + public final static long RETRY_SLEEP_TIME_MS_DEFAULT = 100; + + private static final Logger LOG = + LoggerFactory.getLogger(SQLSecretManagerRetriableHandlerImpl.class); + + static SQLSecretManagerRetriableHandler getInstance(Configuration conf) { + return getInstance(conf, new SQLSecretManagerRetriableHandlerImpl()); + } + + static SQLSecretManagerRetriableHandler getInstance(Configuration conf, + SQLSecretManagerRetriableHandlerImpl retryHandler) { + RetryPolicy basePolicy = RetryPolicies.exponentialBackoffRetry( + conf.getInt(MAX_RETRIES, MAX_RETRIES_DEFAULT), + conf.getLong(RETRY_SLEEP_TIME_MS, RETRY_SLEEP_TIME_MS_DEFAULT), + TimeUnit.MILLISECONDS); + + // Configure SQLSecretManagerRetriableException to retry with exponential backoff + Map, RetryPolicy> exceptionToPolicyMap = new HashMap<>(); + exceptionToPolicyMap.put(SQLSecretManagerRetriableException.class, basePolicy); + + // Configure all other exceptions to fail after one attempt + RetryPolicy retryPolicy = RetryPolicies.retryByException( + RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap); + + return (SQLSecretManagerRetriableHandler) RetryProxy.create( + SQLSecretManagerRetriableHandler.class, retryHandler, retryPolicy); + } + + /** + * Executes a SQL command and raises retryable errors as + * {@link SQLSecretManagerRetriableException}s so they are recognized by the + * {@link RetryProxy}. + * @param command SQL command to execute + * @throws SQLException When SQL connection errors occur + */ + @Override + public void execute(SQLCommandVoid command) throws SQLException { + try { + command.doCall(); + } catch (SQLException e) { + LOG.warn("Failed to execute SQL command", e); + throw new SQLSecretManagerRetriableException(e); + } + } + + /** + * Executes a SQL command and raises retryable errors as + * {@link SQLSecretManagerRetriableException}s so they are recognized by the + * {@link RetryProxy}. + * @param command SQL command to execute + * @throws SQLException When SQL connection errors occur + */ + @Override + public T execute(SQLCommand command) throws SQLException { + try { + return command.doCall(); + } catch (SQLException e) { + LOG.warn("Failed to execute SQL command", e); + throw new SQLSecretManagerRetriableException(e); + } + } + + /** + * Class used to identify errors that can be retried. + */ + static class SQLSecretManagerRetriableException extends SQLException { + SQLSecretManagerRetriableException(Throwable cause) { + super(cause); + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/ZKDelegationTokenSecretManagerImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/ZKDelegationTokenSecretManagerImpl.java index 4a111187ac46a..dcb05159500ae 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/ZKDelegationTokenSecretManagerImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/ZKDelegationTokenSecretManagerImpl.java @@ -19,13 +19,24 @@ package org.apache.hadoop.hdfs.server.federation.router.security.token; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; +import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier; import org.apache.hadoop.security.token.delegation.ZKDelegationTokenSecretManager; +import org.apache.hadoop.util.Time; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.ZooKeeper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import java.io.IOException; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; /** * Zookeeper based router delegation token store implementation. @@ -33,24 +44,181 @@ public class ZKDelegationTokenSecretManagerImpl extends ZKDelegationTokenSecretManager { + public static final String ZK_DTSM_ROUTER_TOKEN_SYNC_INTERVAL = + ZK_CONF_PREFIX + "router.token.sync.interval"; + public static final int ZK_DTSM_ROUTER_TOKEN_SYNC_INTERVAL_DEFAULT = 5; + private static final Logger LOG = LoggerFactory.getLogger(ZKDelegationTokenSecretManagerImpl.class); - private Configuration conf = null; + private Configuration conf; + + private final ScheduledExecutorService scheduler = + Executors.newSingleThreadScheduledExecutor(); + + // Local cache of delegation tokens, used for deprecating tokens from + // currentTokenMap + private final Set localTokenCache = + new HashSet<>(); + // Native zk client for getting all tokens + private ZooKeeper zookeeper; + private final String TOKEN_PATH = "/" + zkClient.getNamespace() + + ZK_DTSM_TOKENS_ROOT; + // The flag used to issue an extra check before deletion + // Since cancel token and token remover thread use the same + // API here and one router could have a token that is renewed + // by another router, thus token remover should always check ZK + // to confirm whether it has been renewed or not + private ThreadLocal checkAgainstZkBeforeDeletion = + new ThreadLocal() { + @Override + protected Boolean initialValue() { + return true; + } + }; public ZKDelegationTokenSecretManagerImpl(Configuration conf) { super(conf); this.conf = conf; try { - super.startThreads(); + startThreads(); } catch (IOException e) { LOG.error("Error starting threads for zkDelegationTokens", e); } LOG.info("Zookeeper delegation token secret manager instantiated"); } + @Override + public void startThreads() throws IOException { + super.startThreads(); + // start token cache related work when watcher is disabled + if (!isTokenWatcherEnabled()) { + LOG.info("Watcher for tokens is disabled in this secret manager"); + try { + // By default set this variable + checkAgainstZkBeforeDeletion.set(true); + // Ensure the token root path exists + if (zkClient.checkExists().forPath(ZK_DTSM_TOKENS_ROOT) == null) { + zkClient.create().creatingParentsIfNeeded() + .withMode(CreateMode.PERSISTENT) + .forPath(ZK_DTSM_TOKENS_ROOT); + } + // Set up zookeeper client + try { + zookeeper = zkClient.getZookeeperClient().getZooKeeper(); + } catch (Exception e) { + LOG.info("Cannot get zookeeper client ", e); + } finally { + if (zookeeper == null) { + throw new IOException("Zookeeper client is null"); + } + } + + LOG.info("Start loading token cache"); + long start = Time.now(); + rebuildTokenCache(true); + LOG.info("Loaded token cache in {} milliseconds", Time.now() - start); + + int syncInterval = conf.getInt(ZK_DTSM_ROUTER_TOKEN_SYNC_INTERVAL, + ZK_DTSM_ROUTER_TOKEN_SYNC_INTERVAL_DEFAULT); + scheduler.scheduleAtFixedRate(new Runnable() { + @Override + public void run() { + try { + rebuildTokenCache(false); + } catch (Exception e) { + // ignore + } + } + }, syncInterval, syncInterval, TimeUnit.SECONDS); + } catch (Exception e) { + LOG.error("Error rebuilding local cache for zkDelegationTokens ", e); + } + } + } + + @Override + public void stopThreads() { + super.stopThreads(); + scheduler.shutdown(); + } + @Override public DelegationTokenIdentifier createIdentifier() { return new DelegationTokenIdentifier(); } + + /** + * This function will rebuild local token cache from zk storage. + * It is first called when the secret manager is initialized and + * then regularly at a configured interval. + * + * @param initial whether this is called during initialization + * @throws IOException + */ + private void rebuildTokenCache(boolean initial) throws IOException { + localTokenCache.clear(); + // Use bare zookeeper client to get all children since curator will + // wrap the same API with a sorting process. This is time consuming given + // millions of tokens + List zkTokens; + try { + zkTokens = zookeeper.getChildren(TOKEN_PATH, false); + } catch (KeeperException | InterruptedException e) { + throw new IOException("Tokens cannot be fetched from path " + + TOKEN_PATH, e); + } + byte[] data; + for (String tokenPath : zkTokens) { + try { + data = zkClient.getData().forPath( + ZK_DTSM_TOKENS_ROOT + "/" + tokenPath); + } catch (KeeperException.NoNodeException e) { + LOG.debug("No node in path [" + tokenPath + "]"); + continue; + } catch (Exception ex) { + throw new IOException(ex); + } + // Store data to currentTokenMap + AbstractDelegationTokenIdentifier ident = processTokenAddOrUpdate(data); + // Store data to localTokenCache for sync + localTokenCache.add(ident); + } + if (!initial) { + // Sync zkTokens with local cache, specifically + // 1) add/update tokens to local cache from zk, which is done through + // processTokenAddOrUpdate above + // 2) remove tokens in local cache but not in zk anymore + for (AbstractDelegationTokenIdentifier ident : currentTokens.keySet()) { + if (!localTokenCache.contains(ident)) { + currentTokens.remove(ident); + } + } + } + } + + @Override + public AbstractDelegationTokenIdentifier cancelToken( + Token token, String canceller) + throws IOException { + checkAgainstZkBeforeDeletion.set(false); + AbstractDelegationTokenIdentifier ident = super.cancelToken(token, + canceller); + checkAgainstZkBeforeDeletion.set(true); + return ident; + } + + @Override + protected void removeStoredToken(AbstractDelegationTokenIdentifier ident) + throws IOException { + super.removeStoredToken(ident, checkAgainstZkBeforeDeletion.get()); + } + + @Override + protected void addOrUpdateToken(AbstractDelegationTokenIdentifier ident, + DelegationTokenInformation info, boolean isUpdate) throws Exception { + // Store the data in local memory first + currentTokens.put(ident, info); + super.addOrUpdateToken(ident, info, isUpdate); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/package-info.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/package-info.java index a51e4552955a5..ae65c8fe6755d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/package-info.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/package-info.java @@ -18,7 +18,8 @@ /** * Includes implementations of token secret managers. - * Implementations should extend {@link AbstractDelegationTokenSecretManager}. + * Implementations should extend + * {@link org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager}. */ @InterfaceAudience.Private @InterfaceStability.Evolving diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/CachedRecordStore.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/CachedRecordStore.java index 7b28c03a529ad..613d8a78038ca 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/CachedRecordStore.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/CachedRecordStore.java @@ -100,7 +100,7 @@ protected CachedRecordStore( * @throws StateStoreUnavailableException If the cache is not initialized. */ private void checkCacheAvailable() throws StateStoreUnavailableException { - if (!this.initialized) { + if (!getDriver().isDriverReady() || !this.initialized) { throw new StateStoreUnavailableException( "Cached State Store not initialized, " + getRecordClass().getSimpleName() + " records not valid"); @@ -125,7 +125,6 @@ public boolean loadCache(boolean force) throws IOException { } catch (IOException e) { LOG.error("Cannot get \"{}\" records from the State Store", getRecordClass().getSimpleName()); - this.initialized = false; return false; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/MembershipStore.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/MembershipStore.java index 4352ae19bde18..c6545d7425c92 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/MembershipStore.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/MembershipStore.java @@ -48,7 +48,7 @@ * StateStoreDriver}, NameNode registrations are cached until the next query. * The fetched registration data is aggregated using a quorum to determine the * best/most accurate state for each NameNode. The cache is periodically updated - * by the @{link StateStoreCacheUpdateService}. + * by the {@link StateStoreCacheUpdateService}. */ @InterfaceAudience.Private @InterfaceStability.Evolving diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/RecordStore.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/RecordStore.java index 92aa5843e3d1e..a2e7adc8d74c6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/RecordStore.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/RecordStore.java @@ -73,7 +73,7 @@ public Class getRecordClass() { * * @return State Store driver. */ - protected StateStoreDriver getDriver() { + public StateStoreDriver getDriver() { return this.driver; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/StateStoreService.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/StateStoreService.java index 66c288238eb76..a401805794287 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/StateStoreService.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/StateStoreService.java @@ -53,7 +53,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * A service to initialize a @@ -272,6 +272,15 @@ public > T getRegisteredRecordStore( return null; } + /** + * Get the list of all RecordStores. + * @return a list of each RecordStore. + */ + @SuppressWarnings("unchecked") + public > List getRecordStores() { + return new ArrayList<>((Collection) recordStores.values()); + } + /** * List of records supported by this State Store. * diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/StateStoreRecordOperations.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/StateStoreRecordOperations.java index b5ce8f8d41169..3b781cb485bdd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/StateStoreRecordOperations.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/StateStoreRecordOperations.java @@ -56,7 +56,7 @@ public interface StateStoreRecordOperations { * @param clazz Class of record to fetch. * @param query Query to filter results. * @return A single record matching the query. Null if there are no matching - * records or more than one matching record in the store. + * records. * @throws IOException If multiple records match or if the data store cannot * be queried. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileBaseImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileBaseImpl.java index 15fc9c1ae9f6c..1ed9f38474a6e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileBaseImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileBaseImpl.java @@ -44,7 +44,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * {@link StateStoreDriver} implementation based on files. In this approach, we @@ -85,7 +85,8 @@ protected abstract BufferedReader getReader( * @param path Path of the record to write. * @return Writer for the record. */ - protected abstract BufferedWriter getWriter( + @VisibleForTesting + public abstract BufferedWriter getWriter( String path); /** @@ -348,25 +349,18 @@ public boolean putAll( for (Entry entry : toWrite.entrySet()) { String recordPath = entry.getKey(); String recordPathTemp = recordPath + "." + now() + TMP_MARK; - BufferedWriter writer = getWriter(recordPathTemp); - try { + boolean recordWrittenSuccessfully = true; + try (BufferedWriter writer = getWriter(recordPathTemp)) { T record = entry.getValue(); String line = serializeString(record); writer.write(line); } catch (IOException e) { LOG.error("Cannot write {}", recordPathTemp, e); + recordWrittenSuccessfully = false; success = false; - } finally { - if (writer != null) { - try { - writer.close(); - } catch (IOException e) { - LOG.error("Cannot close the writer for {}", recordPathTemp, e); - } - } } // Commit - if (!rename(recordPathTemp, recordPath)) { + if (recordWrittenSuccessfully && !rename(recordPathTemp, recordPath)) { LOG.error("Failed committing record into {}", recordPath); success = false; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileImpl.java index 60dbcdc10ef44..6ca2663716162 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileImpl.java @@ -31,12 +31,13 @@ import java.util.List; import org.apache.commons.lang3.ArrayUtils; +import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys; import org.apache.hadoop.hdfs.server.federation.store.records.BaseRecord; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.io.Files; +import org.apache.hadoop.thirdparty.com.google.common.io.Files; /** * StateStoreDriver implementation based on a local file. @@ -88,7 +89,18 @@ protected String getRootDir() { if (this.rootDirectory == null) { String dir = getConf().get(FEDERATION_STORE_FILE_DIRECTORY); if (dir == null) { - File tempDir = Files.createTempDir(); + File tempDirBase = + new File(System.getProperty("java.io.tmpdir")); + File tempDir = null; + try { + tempDir = java.nio.file.Files.createTempDirectory( + tempDirBase.toPath(), System.currentTimeMillis() + "-").toFile(); + } catch (IOException e) { + // fallback to the base upon exception. + LOG.debug("Unable to create a temporary directory. Fall back to " + + " the default system temp directory {}", tempDirBase, e); + tempDir = tempDirBase; + } dir = tempDir.getAbsolutePath(); LOG.warn("The root directory is not available, using {}", dir); } @@ -114,7 +126,8 @@ protected BufferedReader getReader(String filename) { } @Override - protected BufferedWriter getWriter(String filename) { + @VisibleForTesting + public BufferedWriter getWriter(String filename) { BufferedWriter writer = null; try { LOG.debug("Writing file: {}", filename); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileSystemImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileSystemImpl.java index e6bf159e2f597..ee34d8a4cabbb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileSystemImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileSystemImpl.java @@ -28,13 +28,14 @@ import java.util.Collections; import java.util.List; +import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Options; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys; import org.apache.hadoop.hdfs.server.federation.store.driver.StateStoreDriver; import org.apache.hadoop.hdfs.server.federation.store.records.BaseRecord; @@ -82,17 +83,8 @@ protected boolean mkdir(String path) { @Override protected boolean rename(String src, String dst) { try { - if (fs instanceof DistributedFileSystem) { - DistributedFileSystem dfs = (DistributedFileSystem)fs; - dfs.rename(new Path(src), new Path(dst), Options.Rename.OVERWRITE); - return true; - } else { - // Replace should be atomic but not available - if (fs.exists(new Path(dst))) { - fs.delete(new Path(dst), true); - } - return fs.rename(new Path(src), new Path(dst)); - } + FileUtil.rename(fs, new Path(src), new Path(dst), Options.Rename.OVERWRITE); + return true; } catch (Exception e) { LOG.error("Cannot rename {} to {}", src, dst, e); return false; @@ -148,7 +140,8 @@ protected BufferedReader getReader(String pathName) { } @Override - protected BufferedWriter getWriter(String pathName) { + @VisibleForTesting + public BufferedWriter getWriter(String pathName) { BufferedWriter writer = null; Path path = new Path(pathName); try { diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreZooKeeperImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreZooKeeperImpl.java index c6441caf82119..45442da0ab570 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreZooKeeperImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreZooKeeperImpl.java @@ -50,6 +50,7 @@ * |--- MEMBERSHIP * |--- REBALANCER * |--- ROUTERS + * |--- DISABLE_NAMESERVICE */ public class StateStoreZooKeeperImpl extends StateStoreSerializableImpl { diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/impl/MembershipStoreImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/impl/MembershipStoreImpl.java index 57b7b618b04fe..5d22b77afe2fd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/impl/MembershipStoreImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/impl/MembershipStoreImpl.java @@ -185,7 +185,9 @@ public NamenodeHeartbeatResponse namenodeHeartbeat( @Override public boolean loadCache(boolean force) throws IOException { - super.loadCache(force); + if (!super.loadCache(force)) { + return false; + } // Update local cache atomically cacheWriteLock.lock(); @@ -213,12 +215,15 @@ public boolean loadCache(boolean force) throws IOException { nnRegistrations.put(nnId, nnRegistration); } nnRegistration.add(membership); - String bpId = membership.getBlockPoolId(); - String cId = membership.getClusterId(); - String nsId = membership.getNameserviceId(); - FederationNamespaceInfo nsInfo = - new FederationNamespaceInfo(bpId, cId, nsId); - this.activeNamespaces.add(nsInfo); + if (membership.getState() + != FederationNamenodeServiceState.UNAVAILABLE) { + String bpId = membership.getBlockPoolId(); + String cId = membership.getClusterId(); + String nsId = membership.getNameserviceId(); + FederationNamespaceInfo nsInfo = + new FederationNamespaceInfo(bpId, cId, nsId); + this.activeNamespaces.add(nsInfo); + } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/package-info.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/package-info.java index 6b3e55f6d98fd..9b45f28396de6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/package-info.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/package-info.java @@ -31,19 +31,21 @@ * StateStoreDriver} to handle querying, updating and deleting data records. The * data storage driver is initialized and maintained by the {@link * org.apache.hadoop.hdfs.server.federation.store.StateStoreService - * FederationStateStoreService}. The state store + * StateStoreService}. The state store * supports fetching all records of a type, filtering by column values or * fetching a single record by its primary key. *

      * The state store contains several API interfaces, one for each data records * type. *

        - *
      • FederationMembershipStateStore: state of all Namenodes in the federation. + *
      • MembershipStore: state of all Namenodes in the federation. * Uses the MembershipState record. - *
      • FederationMountTableStore: Mount table mapping paths in the global + *
      • MountTableStore: Mount table mapping paths in the global * namespace to individual subcluster paths. Uses the MountTable record. - *
      • RouterStateStore: State of all routers in the federation. Uses the + *
      • RouterStore: State of all routers in the federation. Uses the * RouterState record. + *
      • DisabledNameserviceStore: state of all disabled nameservice in the federation. + * Uses the DisabledNameservice record. *
      * Each API is defined in a separate interface. The implementations of these * interfaces are responsible for accessing the {@link diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/BaseRecord.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/BaseRecord.java index 86721eaa476b6..6b39e20bd7ee7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/BaseRecord.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/BaseRecord.java @@ -21,7 +21,7 @@ import org.apache.hadoop.util.Time; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Abstract base of a data record in the StateStore. All StateStore records are diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MembershipState.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MembershipState.java index 4add8fa99c6eb..80889b3d4aa4a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MembershipState.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MembershipState.java @@ -32,7 +32,7 @@ /** * Data schema for storing NN registration information in the * {@link org.apache.hadoop.hdfs.server.federation.store.StateStoreService - * FederationStateStoreService}. + * StateStoreService}. */ public abstract class MembershipState extends BaseRecord implements FederationNamenodeContext { diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MountTable.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MountTable.java index d1351a340c3cf..a87934610f3c1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MountTable.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MountTable.java @@ -42,9 +42,9 @@ /** * Data schema for {@link * org.apache.hadoop.hdfs.server.federation.store.MountTableStore - * FederationMountTableStore} data stored in the {@link + * MountTableStore} data stored in the {@link * org.apache.hadoop.hdfs.server.federation.store.StateStoreService - * FederationStateStoreService}. Supports string serialization. + * StateStoreService}. Supports string serialization. */ public abstract class MountTable extends BaseRecord { @@ -430,6 +430,8 @@ public int hashCode() { .append(this.isReadOnly()) .append(this.getDestOrder()) .append(this.isFaultTolerant()) + .append(this.getQuota().getQuota()) + .append(this.getQuota().getSpaceQuota()) .toHashCode(); } @@ -443,6 +445,9 @@ public boolean equals(Object obj) { .append(this.isReadOnly(), other.isReadOnly()) .append(this.getDestOrder(), other.getDestOrder()) .append(this.isFaultTolerant(), other.isFaultTolerant()) + .append(this.getQuota().getQuota(), other.getQuota().getQuota()) + .append(this.getQuota().getSpaceQuota(), + other.getQuota().getSpaceQuota()) .isEquals(); } return false; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/RouterState.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/RouterState.java index 761e2a4872e9a..337a58c359812 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/RouterState.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/RouterState.java @@ -31,7 +31,7 @@ * Entry to log the state of a * {@link org.apache.hadoop.hdfs.server.federation.router.Router Router} in the * {@link org.apache.hadoop.hdfs.server.federation.store.StateStoreService - * FederationStateStoreService}. + * StateStoreService}. */ public abstract class RouterState extends BaseRecord { diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/utils/ConsistentHashRing.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/utils/ConsistentHashRing.java index fc3e49ff9dbb0..ab7bfb16cb822 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/utils/ConsistentHashRing.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/utils/ConsistentHashRing.java @@ -33,8 +33,8 @@ * or remove nodes, it minimizes the item migration. */ public class ConsistentHashRing { - private static final String SEPERATOR = "/"; - private static final String VIRTUAL_NODE_FORMAT = "%s" + SEPERATOR + "%d"; + private static final String SEPARATOR = "/"; + private static final String VIRTUAL_NODE_FORMAT = "%s" + SEPARATOR + "%d"; /** Hash ring. */ private SortedMap ring = new TreeMap(); @@ -119,7 +119,7 @@ public String getLocation(String item) { hash = tailMap.isEmpty() ? ring.firstKey() : tailMap.firstKey(); } String virtualNode = ring.get(hash); - int index = virtualNode.lastIndexOf(SEPERATOR); + int index = virtualNode.lastIndexOf(SEPARATOR); if (index >= 0) { return virtualNode.substring(0, index); } else { diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/tools/federation/RouterAdmin.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/tools/federation/RouterAdmin.java index 5ea33237b672c..f7a9424e69f89 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/tools/federation/RouterAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/tools/federation/RouterAdmin.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdfs.tools.federation; import java.io.IOException; +import java.io.PrintStream; import java.net.InetSocketAddress; import java.util.Arrays; import java.util.Collection; @@ -26,6 +27,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.TreeMap; import java.util.regex.Pattern; import org.apache.hadoop.classification.InterfaceAudience.Private; @@ -46,6 +48,10 @@ import org.apache.hadoop.hdfs.server.federation.router.RouterClient; import org.apache.hadoop.hdfs.server.federation.router.RouterQuotaUsage; import org.apache.hadoop.hdfs.server.federation.router.RouterStateManager; +import org.apache.hadoop.hdfs.server.federation.store.CachedRecordStore; +import org.apache.hadoop.hdfs.server.federation.store.RecordStore; +import org.apache.hadoop.hdfs.server.federation.store.StateStoreService; +import org.apache.hadoop.hdfs.server.federation.store.StateStoreUtils; import org.apache.hadoop.hdfs.server.federation.store.protocol.AddMountTableEntryRequest; import org.apache.hadoop.hdfs.server.federation.store.protocol.AddMountTableEntryResponse; import org.apache.hadoop.hdfs.server.federation.store.protocol.DisableNameserviceRequest; @@ -70,13 +76,17 @@ import org.apache.hadoop.hdfs.server.federation.store.protocol.RemoveMountTableEntryResponse; import org.apache.hadoop.hdfs.server.federation.store.protocol.UpdateMountTableEntryRequest; import org.apache.hadoop.hdfs.server.federation.store.protocol.UpdateMountTableEntryResponse; +import org.apache.hadoop.hdfs.server.federation.store.records.BaseRecord; import org.apache.hadoop.hdfs.server.federation.store.records.MountTable; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.hdfs.server.federation.store.records.impl.pb.PBRecord; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RefreshResponse; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.ipc.protocolPB.GenericRefreshProtocolClientSideTranslatorPB; import org.apache.hadoop.ipc.protocolPB.GenericRefreshProtocolPB; +import org.apache.hadoop.ipc.protocolPB.RefreshCallQueueProtocolClientSideTranslatorPB; +import org.apache.hadoop.ipc.protocolPB.RefreshCallQueueProtocolPB; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.StringUtils; @@ -95,6 +105,7 @@ public class RouterAdmin extends Configured implements Tool { private static final Logger LOG = LoggerFactory.getLogger(RouterAdmin.class); + private static final String DUMP_COMMAND = "-dumpState"; private RouterClient client; @@ -131,9 +142,9 @@ private String getUsage(String cmd) { String[] commands = {"-add", "-update", "-rm", "-ls", "-getDestination", "-setQuota", "-setStorageTypeQuota", "-clrQuota", "-clrStorageTypeQuota", - "-safemode", "-nameservice", "-getDisabledNameservices", + DUMP_COMMAND, "-safemode", "-nameservice", "-getDisabledNameservices", "-refresh", "-refreshRouterArgs", - "-refreshSuperUserGroupsConfiguration"}; + "-refreshSuperUserGroupsConfiguration", "-refreshCallQueue"}; StringBuilder usage = new StringBuilder(); usage.append("Usage: hdfs dfsrouteradmin :\n"); for (int i = 0; i < commands.length; i++) { @@ -183,6 +194,10 @@ private String getUsage(String cmd) { return "\t[-refreshRouterArgs [arg1..argn]]"; } else if (cmd.equals("-refreshSuperUserGroupsConfiguration")) { return "\t[-refreshSuperUserGroupsConfiguration]"; + } else if (cmd.equals("-refreshCallQueue")) { + return "\t[-refreshCallQueue]"; + } else if (cmd.equals(DUMP_COMMAND)) { + return "\t[" + DUMP_COMMAND + "]"; } return getUsage(null); } @@ -220,6 +235,11 @@ private void validateMax(String[] arg) { if (arg.length > 1) { throw new IllegalArgumentException("No arguments allowed"); } + } else if (arg[0].equals("-refreshCallQueue") || + arg[0].equals(DUMP_COMMAND)) { + if (arg.length > 1) { + throw new IllegalArgumentException("No arguments allowed"); + } } } @@ -278,6 +298,15 @@ private boolean validateMin(String[] argv) { return true; } + /** + * Does this command run in the local process? + * @param cmd the string of the command + * @return is this a local command? + */ + boolean isLocalCommand(String cmd) { + return DUMP_COMMAND.equals(cmd); + } + @Override public int run(String[] argv) throws Exception { if (argv.length < 1) { @@ -295,6 +324,10 @@ public int run(String[] argv) throws Exception { System.err.println("Not enough parameters specificed for cmd " + cmd); printUsage(cmd); return exitCode; + } else if (isLocalCommand(argv[0])) { + if (DUMP_COMMAND.equals(argv[0])) { + return dumpStateStore(getConf(), System.out) ? 0 : -1; + } } String address = null; // Initialize RouterClient @@ -388,6 +421,8 @@ public int run(String[] argv) throws Exception { exitCode = genericRefresh(argv, i); } else if ("-refreshSuperUserGroupsConfiguration".equals(cmd)) { exitCode = refreshSuperUserGroupsConfiguration(); + } else if ("-refreshCallQueue".equals(cmd)) { + exitCode = refreshCallQueue(); } else { throw new IllegalArgumentException("Unknown Command: " + cmd); } @@ -1222,7 +1257,7 @@ public int genericRefresh(String[] argv, int i) throws IOException { InetSocketAddress address = NetUtils.createSocketAddr(hostport); UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); - RPC.setProtocolEngine(conf, xface, ProtobufRpcEngine.class); + RPC.setProtocolEngine(conf, xface, ProtobufRpcEngine2.class); GenericRefreshProtocolPB proxy = (GenericRefreshProtocolPB)RPC.getProxy( xface, RPC.getProtocolVersion(xface), address, ugi, conf, NetUtils.getDefaultSocketFactory(conf), 0); @@ -1258,6 +1293,82 @@ public int genericRefresh(String[] argv, int i) throws IOException { } } + /** + * Refresh Router's call Queue. + * + * @throws IOException if the operation was not successful. + */ + private int refreshCallQueue() throws IOException { + Configuration conf = getConf(); + String hostport = getConf().getTrimmed( + RBFConfigKeys.DFS_ROUTER_ADMIN_ADDRESS_KEY, + RBFConfigKeys.DFS_ROUTER_ADMIN_ADDRESS_DEFAULT); + + // Create the client + Class xface = RefreshCallQueueProtocolPB.class; + InetSocketAddress address = NetUtils.createSocketAddr(hostport); + UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); + + RPC.setProtocolEngine(conf, xface, ProtobufRpcEngine2.class); + RefreshCallQueueProtocolPB proxy = (RefreshCallQueueProtocolPB)RPC.getProxy( + xface, RPC.getProtocolVersion(xface), address, ugi, conf, + NetUtils.getDefaultSocketFactory(conf), 0); + + int returnCode = -1; + try (RefreshCallQueueProtocolClientSideTranslatorPB xlator = + new RefreshCallQueueProtocolClientSideTranslatorPB(proxy)) { + xlator.refreshCallQueue(); + System.out.println("Refresh call queue successfully for " + hostport); + returnCode = 0; + } catch (IOException ioe){ + System.out.println("Refresh call queue unsuccessfully for " + hostport); + } + return returnCode; + } + + /** + * Dumps the contents of the StateStore to stdout. + * @return true if it was successful + */ + public static boolean dumpStateStore(Configuration conf, + PrintStream output) throws IOException { + StateStoreService service = new StateStoreService(); + conf.setBoolean(RBFConfigKeys.DFS_ROUTER_METRICS_ENABLE, false); + service.init(conf); + service.loadDriver(); + if (!service.isDriverReady()) { + System.err.println("Can't initialize driver"); + return false; + } + // Get the stores sorted by name + Map> stores = new TreeMap<>(); + for(RecordStore store: service.getRecordStores()) { + String recordName = StateStoreUtils.getRecordName(store.getRecordClass()); + stores.put(recordName, store); + } + for (Entry> pair: stores.entrySet()) { + String recordName = pair.getKey(); + RecordStore store = pair.getValue(); + output.println("---- " + recordName + " ----"); + if (store instanceof CachedRecordStore) { + for (Object record: ((CachedRecordStore) store).getCachedRecords()) { + if (record instanceof BaseRecord && record instanceof PBRecord) { + BaseRecord baseRecord = (BaseRecord) record; + // Generate the pseudo-json format of the protobuf record + String recordString = ((PBRecord) record).getProto().toString(); + // Indent each line + recordString = " " + recordString.replaceAll("\n", "\n "); + output.println(String.format(" %s:", baseRecord.getPrimaryKey())); + output.println(recordString); + } + } + output.println(); + } + } + service.stop(); + return true; + } + /** * Normalize a path for that filesystem. * @@ -1298,4 +1409,4 @@ public FsPermission getMode() { return mode; } } -} \ No newline at end of file +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/proto/FederationProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/proto/FederationProtocol.proto index 5b6dfd9c9653b..ad391e72eb0fe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/proto/FederationProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/proto/FederationProtocol.proto @@ -306,3 +306,16 @@ message GetDisabledNameservicesRequestProto { message GetDisabledNameservicesResponseProto { repeated string nameServiceIds = 1; } + +///////////////////////////////////////////////// +// Alignment state for namespaces. +///////////////////////////////////////////////// + +/** + * Clients should receive this message in RPC responses and forward it + * in RPC requests without interpreting it. It should be encoded + * as an obscure byte array when being sent to clients. + */ +message RouterFederatedStateProto { + map namespaceStateIds = 1; // Last seen state IDs for multiple namespaces. +} \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/webapps/router/explorer.html b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/webapps/router/explorer.html index 1f45f4d16c1f7..15f7d7feb3848 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/webapps/router/explorer.html +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/webapps/router/explorer.html @@ -48,6 +48,7 @@
    3. Metrics
    4. Configuration
    5. Process Thread Dump
    6. +
    7. Network Topology
    8. @@ -310,7 +311,7 @@

      - @@ -437,7 +497,7 @@ - + @@ -445,6 +505,7 @@ + diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/webapps/router/federationhealth.js b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/webapps/router/federationhealth.js index 94ba67c0fd1b6..86eda24540c62 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/webapps/router/federationhealth.js +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/webapps/router/federationhealth.js @@ -278,20 +278,34 @@ for (var i = 0, e = nodes.length; i < e; ++i) { var n = nodes[i]; n.usedPercentage = Math.round((n.used + n.nonDfsUsedSpace) * 1.0 / n.capacity * 100); + var port = n.infoAddr.split(":")[1]; + var securePort = n.infoSecureAddr.split(":")[1]; + var dnHost = n.name.split(":")[0]; + n.dnWebAddress = "http://" + dnHost + ":" + port; + if (securePort != 0) { + n.dnWebAddress = "https://" + dnHost + ":" + securePort; + } + if (n.adminState === "In Service") { n.state = "alive"; } else if (nodes[i].adminState === "Decommission In Progress") { n.state = "decommissioning"; } else if (nodes[i].adminState === "Decommissioned") { n.state = "decommissioned"; + } else if (nodes[i].adminState === "Entering Maintenance") { + n.state = "entering-maintenance"; + } else if (nodes[i].adminState === "In Maintenance") { + n.state = "in-maintenance"; } } } function augment_dead_nodes(nodes) { for (var i = 0, e = nodes.length; i < e; ++i) { - if (nodes[i].decommissioned) { + if (nodes[i].adminState === "Decommissioned") { nodes[i].state = "down-decommissioned"; + } else if (nodes[i].adminState === "In Maintenance") { + nodes[i].state = "down-maintenance"; } else { nodes[i].state = "down"; } @@ -303,9 +317,77 @@ r.DeadNodes = node_map_to_array(JSON.parse(r.DeadNodes)); augment_dead_nodes(r.DeadNodes); r.DecomNodes = node_map_to_array(JSON.parse(r.DecomNodes)); + r.EnteringMaintenanceNodes = node_map_to_array(JSON.parse(r.EnteringMaintenanceNodes)); return r; } + function renderHistogram(dnData) { + var data = dnData.LiveNodes.map(function(dn) { + return (dn.usedSpace / dn.capacity) * 100.0; + }); + + var formatCount = d3.format(",.0f"); + + var widthCap = $("div.container").width(); + var heightCap = 150; + + var margin = {top: 10, right: 60, bottom: 30, left: 30}, + width = widthCap * 0.9, + height = heightCap - margin.top - margin.bottom; + + var x = d3.scaleLinear() + .domain([0.0, 100.0]) + .range([0, width]); + + var bins = d3.histogram() + .domain(x.domain()) + .thresholds(x.ticks(20)) + (data); + + var y = d3.scaleLinear() + .domain([0, d3.max(bins, function(d) { return d.length; })]) + .range([height, 0]); + + var svg = d3.select("#datanode-usage-histogram").append("svg") + .attr("width", width + 50.0) + .attr("height", height + margin.top + margin.bottom) + .append("g") + .attr("transform", "translate(" + margin.left + "," + margin.top + ")"); + + svg.append("text") + .attr("x", (width / 2)) + .attr("y", heightCap - 6 - (margin.top / 2)) + .attr("text-anchor", "middle") + .style("font-size", "15px") + .text("Disk usage of each DataNode (%)"); + + var bar = svg.selectAll(".bar") + .data(bins) + .enter().append("g") + .attr("class", "bar") + .attr("transform", function(d) { return "translate(" + x(d.x0) + "," + y(d.length) + ")"; }); + + window.liveNodes = dnData.LiveNodes; + + bar.append("rect") + .attr("x", 1) + .attr("width", x(bins[0].x1) - x(bins[0].x0) - 1) + .attr("height", function(d) { return height - y(d.length); }) + .attr("onclick", function (d) { return "open_hostip_list(" + d.x0 + "," + d.x1 + ")"; }); + + bar.append("text") + .attr("dy", ".75em") + .attr("y", 6) + .attr("x", (x(bins[0].x1) - x(bins[0].x0)) / 2) + .attr("text-anchor", "middle") + .text(function(d) { return formatCount(d.length); }); + + svg.append("g") + .attr("class", "axis axis--x") + .attr("transform", "translate(0," + height + ")") + .call(d3.axisBottom(x)); + } + $.get( 'jmx?qry=Hadoop:service=NameNode,name=NameNodeInfo', guard_with_startup_progress(function (resp) { @@ -315,12 +397,38 @@ $('#tab-datanode').html(out); $('#table-datanodes').dataTable( { 'lengthMenu': [ [25, 50, 100, -1], [25, 50, 100, "All"] ], + 'columnDefs': [ + { 'targets': [ 0 ], 'visible': false, 'searchable': false } + ], 'columns': [ - { 'orderDataType': 'ng-value', 'searchable': true }, - { 'orderDataType': 'ng-value', 'type': 'numeric' }, - { 'orderDataType': 'ng-value', 'type': 'numeric' }, - { 'orderDataType': 'ng-value', 'type': 'numeric'} - ]}); + { 'orderDataType': 'ng-value', 'searchable': true , "defaultContent": "" }, + { 'orderDataType': 'ng-value', 'searchable': true , "defaultContent": "" }, + { 'orderDataType': 'ng-value', 'searchable': true , "defaultContent": ""}, + { 'orderDataType': 'ng-value', 'type': 'num' , "defaultContent": 0}, + { 'orderDataType': 'ng-value', 'type': 'num' , "defaultContent": 0}, + { 'orderDataType': 'ng-value', 'type': 'num' , "defaultContent": 0}, + { 'orderDataType': 'ng-value', 'type': 'num' , "defaultContent": 0}, + { 'orderDataType': 'ng-value', 'type': 'num' , "defaultContent": 0}, + { 'type': 'num' , "defaultContent": 0}, + { 'orderDataType': 'ng-value', 'type': 'num' , "defaultContent": 0}, + { 'type': 'string' , "defaultContent": ""} + ], + initComplete: function () { + var column = this.api().column([0]); + var select = $('') + .appendTo('#datanodefilter') + .on('change', function () { + var val = $.fn.dataTable.util.escapeRegex( + $(this).val()); + column.search(val ? '^' + val + '$' : '', true, false).draw(); + }); + console.log(select); + column.data().unique().sort().each(function (d, j) { + select.append(''); + }); + } + }); + renderHistogram(data); $('#ui-tabs a[href="#tab-datanode"]').tab('show'); }); })).fail(ajax_error_handler); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/site/markdown/HDFSRouterFederation.md b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/site/markdown/HDFSRouterFederation.md index de45645db093c..a1da6c0ef4839 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/site/markdown/HDFSRouterFederation.md +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/site/markdown/HDFSRouterFederation.md @@ -316,6 +316,17 @@ To trigger a runtime-refresh of the resource specified by \ on \ [arg1..argn] +### Router state dump + +To diagnose the current state of the routers, you can use the +dumpState command. It generates a text dump of the records in the +State Store. Since it uses the configuration to find and read the +state store, it is often easiest to use the machine where the routers +run. The command runs locally, so the routers do not have to be up to +use this command. + + [hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -dumpState + Client configuration -------------------- @@ -362,6 +373,20 @@ With this setting a user can interact with `ns-fed` as a regular namespace: This federated namespace can also be set as the default one at **core-site.xml** using `fs.defaultFS`. +NameNode configuration +-------------------- + +In order for the system to support data-locality, you must configure your NameNodes so that they will trust the routers to supply the user's client IP address. `dfs.namenode.ip-proxy-users` defines a comma separated list of users that are allowed to provide the client ip address via the caller context. + +```xml + + + dfs.namenode.ip-proxy-users + hdfs + + +``` + Router configuration -------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/fs/contract/router/web/TestRouterWebHDFSContractRootDirectory.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/fs/contract/router/web/TestRouterWebHDFSContractRootDirectory.java index dd2bbff7d8da2..b1e4a05500fda 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/fs/contract/router/web/TestRouterWebHDFSContractRootDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/fs/contract/router/web/TestRouterWebHDFSContractRootDirectory.java @@ -71,4 +71,9 @@ public void testRmRootRecursive() { public void testRmEmptyRootDirRecursive() { // It doesn't apply because we still have the mount points here } + + @Override + public void testSimpleRootListing() { + // It doesn't apply because DFSRouter dosn't support LISTSTATUS_BATCH. + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/FederationTestUtils.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/FederationTestUtils.java index 31c1bffe1efd5..2017a45de1299 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/FederationTestUtils.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/FederationTestUtils.java @@ -92,7 +92,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Supplier; +import java.util.function.Supplier; /** * Helper utilities for testing HDFS Federation. @@ -138,8 +138,15 @@ public static void verifyException(Object obj, String methodName, public static NamenodeStatusReport createNamenodeReport(String ns, String nn, HAServiceState state) { Random rand = new Random(); - NamenodeStatusReport report = new NamenodeStatusReport(ns, nn, - "localhost:" + rand.nextInt(10000), "localhost:" + rand.nextInt(10000), + return createNamenodeReport(ns, nn, "localhost:" + + rand.nextInt(10000), state); + } + + public static NamenodeStatusReport createNamenodeReport(String ns, String nn, + String rpcAddress, HAServiceState state) { + Random rand = new Random(); + NamenodeStatusReport report = new NamenodeStatusReport(ns, nn, rpcAddress, + "localhost:" + rand.nextInt(10000), "localhost:" + rand.nextInt(10000), "http", "testwebaddress-" + ns + nn); if (state == null) { @@ -187,7 +194,7 @@ public Boolean get() { } return false; } - }, 1000, 20 * 1000); + }, 1000, 60 * 1000); } /** @@ -474,7 +481,10 @@ public static RouterClient getAdminClient( /** * Add a mount table entry in some name services and wait until it is - * available. + * available. If there are multiple routers, + * {@link #createMountTableEntry(List, String, DestinationOrder, Collection)} + * should be used instead because the method does not refresh + * the mount tables of the other routers. * @param router Router to change. * @param mountPoint Name of the mount point. * @param order Order of the mount table entry. diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/MiniRouterDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/MiniRouterDFSCluster.java index 0c9a2e0046c0b..896d08f2c49b6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/MiniRouterDFSCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/MiniRouterDFSCluster.java @@ -152,7 +152,7 @@ public class MiniRouterDFSCluster { /** * Router context. */ - public class RouterContext { + public static class RouterContext { private Router router; private FileContext fileContext; private String nameserviceId; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/MockNamenode.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/MockNamenode.java index 699ea92abb598..a4755c20fcae4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/MockNamenode.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/MockNamenode.java @@ -46,6 +46,7 @@ import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FsServerDefaults; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.ha.HAServiceStatus; @@ -58,6 +59,7 @@ import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.DatanodeInfoWithStorage; import org.apache.hadoop.hdfs.protocol.DirectoryListing; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; @@ -88,7 +90,7 @@ import org.apache.hadoop.hdfs.server.protocol.StorageReport; import org.apache.hadoop.http.HttpServer2; import org.apache.hadoop.io.Text; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RPC.Server; import org.apache.hadoop.ipc.RemoteException; @@ -172,7 +174,7 @@ public HAServiceStatus answer(InvocationOnMock invocation) */ private void setupRPCServer(final Configuration conf) throws IOException { RPC.setProtocolEngine( - conf, ClientNamenodeProtocolPB.class, ProtobufRpcEngine.class); + conf, ClientNamenodeProtocolPB.class, ProtobufRpcEngine2.class); ClientNamenodeProtocolServerSideTranslatorPB clientNNProtoXlator = new ClientNamenodeProtocolServerSideTranslatorPB(mockNn); @@ -533,11 +535,14 @@ private static HdfsFileStatus getMockHdfsFileStatus( */ private static LocatedBlock getMockLocatedBlock(final String nsId) { LocatedBlock lb = mock(LocatedBlock.class); - when(lb.getCachedLocations()).thenReturn(new DatanodeInfo[0]); + when(lb.getCachedLocations()).thenReturn(DatanodeInfo.EMPTY_ARRAY); DatanodeID nodeId = new DatanodeID("localhost", "localhost", "dn0", 1111, 1112, 1113, 1114); DatanodeInfo dnInfo = new DatanodeDescriptor(nodeId); - when(lb.getLocations()).thenReturn(new DatanodeInfo[] {dnInfo}); + DatanodeInfoWithStorage datanodeInfoWithStorage = + new DatanodeInfoWithStorage(dnInfo, "storageID", StorageType.DEFAULT); + when(lb.getLocations()) + .thenReturn(new DatanodeInfoWithStorage[] {datanodeInfoWithStorage}); ExtendedBlock eb = mock(ExtendedBlock.class); when(eb.getBlockPoolId()).thenReturn(nsId); when(lb.getBlock()).thenReturn(eb); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/MockResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/MockResolver.java index 131dd74b8e087..39334250bc837 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/MockResolver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/MockResolver.java @@ -327,16 +327,30 @@ public PathLocation getDestinationForPath(String path) throws IOException { @Override public List getMountPoints(String path) throws IOException { - // Mounts only supported under root level - if (!path.equals("/")) { - return null; - } List mounts = new ArrayList<>(); - for (String mount : this.locations.keySet()) { - if (mount.length() > 1) { - // Remove leading slash, this is the behavior of the mount tree, - // return only names. - mounts.add(mount.replace("/", "")); + // for root path search, returning all downstream root level mapping + if (path.equals("/")) { + // Mounts only supported under root level + for (String mount : this.locations.keySet()) { + if (mount.length() > 1) { + // Remove leading slash, this is the behavior of the mount tree, + // return only names. + mounts.add(mount.replace("/", "")); + } + } + } else { + // a simplified version of MountTableResolver implementation + for (String key : this.locations.keySet()) { + if (key.startsWith(path)) { + String child = key.substring(path.length()); + if (child.length() > 0) { + // only take children so remove parent path and / + mounts.add(key.substring(path.length()+1)); + } + } + } + if (mounts.size() == 0) { + mounts = null; } } return mounts; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/metrics/TestMetricsBase.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/metrics/TestMetricsBase.java index 4759d05f820dc..b01e22006d776 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/metrics/TestMetricsBase.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/metrics/TestMetricsBase.java @@ -259,4 +259,15 @@ private MembershipState createRegistration(String ns, String nn, assertTrue(response.getResult()); return record; } + + // refresh namenode registration for new attributes + public boolean refreshNamenodeRegistration(NamenodeHeartbeatRequest request) + throws IOException { + boolean result = membershipStore.namenodeHeartbeat(request).getResult(); + membershipStore.loadCache(true); + MembershipNamenodeResolver resolver = + (MembershipNamenodeResolver) router.getNamenodeResolver(); + resolver.loadCache(true); + return result; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/metrics/TestRBFMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/metrics/TestRBFMetrics.java index e1d1d8ec28c0f..eed41c7ba396b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/metrics/TestRBFMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/metrics/TestRBFMetrics.java @@ -19,11 +19,13 @@ import static org.apache.hadoop.hdfs.server.federation.FederationTestUtils.getBean; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -import static org.junit.Assert.assertFalse; import java.io.IOException; +import java.math.BigInteger; import java.util.Iterator; import java.util.List; @@ -31,6 +33,7 @@ import org.apache.commons.collections.ListUtils; import org.apache.hadoop.hdfs.server.federation.router.Router; +import org.apache.hadoop.hdfs.server.federation.store.protocol.NamenodeHeartbeatRequest; import org.apache.hadoop.hdfs.server.federation.store.records.MembershipState; import org.apache.hadoop.hdfs.server.federation.store.records.MembershipStats; import org.apache.hadoop.hdfs.server.federation.store.records.MountTable; @@ -58,6 +61,7 @@ public void testClusterStatsJMX() FederationMBean federationBean = getBean(FEDERATION_BEAN, FederationMBean.class); validateClusterStatsFederationBean(federationBean); + testCapacity(federationBean); RouterMBean routerBean = getBean(ROUTER_BEAN, RouterMBean.class); validateClusterStatsRouterBean(routerBean); } @@ -326,4 +330,34 @@ private void validateClusterStatsRouterBean(RouterMBean bean) { assertTrue(bean.getHostAndPort().length() > 0); assertFalse(bean.isSecurityEnabled()); } + + private void testCapacity(FederationMBean bean) throws IOException { + List memberships = getActiveMemberships(); + assertTrue(memberships.size() > 1); + + BigInteger availableCapacity = BigInteger.valueOf(0); + BigInteger totalCapacity = BigInteger.valueOf(0); + BigInteger unitCapacity = BigInteger.valueOf(Long.MAX_VALUE); + for (MembershipState mock : memberships) { + MembershipStats stats = mock.getStats(); + stats.setTotalSpace(Long.MAX_VALUE); + stats.setAvailableSpace(Long.MAX_VALUE); + // reset stats to make the new value persistent + mock.setStats(stats); + // write back the new namenode information to state store + assertTrue(refreshNamenodeRegistration( + NamenodeHeartbeatRequest.newInstance(mock))); + totalCapacity = totalCapacity.add(unitCapacity); + availableCapacity = availableCapacity.add(unitCapacity); + } + + // for local cache update + assertEquals(totalCapacity, bean.getTotalCapacityBigInt()); + // not equal since overflow happened. + assertNotEquals(totalCapacity, BigInteger.valueOf(bean.getTotalCapacity())); + assertEquals(availableCapacity, bean.getRemainingCapacityBigInt()); + // not equal since overflow happened. + assertNotEquals(availableCapacity, + BigInteger.valueOf(bean.getRemainingCapacity())); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestFederationNamespaceInfo.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestFederationNamespaceInfo.java new file mode 100644 index 0000000000000..72681230c8b0d --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestFederationNamespaceInfo.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.federation.resolver; + +import org.junit.Test; + +import java.util.Set; +import java.util.TreeSet; + +import static org.assertj.core.api.Assertions.assertThat; + +public class TestFederationNamespaceInfo { + /** + * Regression test for HDFS-15900. + */ + @Test + public void testHashCode() { + Set set = new TreeSet<>(); + // set an empty bpId first + set.add(new FederationNamespaceInfo("", "nn1", "ns1")); + set.add(new FederationNamespaceInfo("bp1", "nn2", "ns1")); + assertThat(set).hasSize(2); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestMountTableResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestMountTableResolver.java index 32c54d9cb738c..15d3caa5e4e58 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestMountTableResolver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestMountTableResolver.java @@ -729,4 +729,41 @@ public void testInvalidateCache() throws Exception { assertEquals("2->/testInvalidateCache/foo", mountTable .getDestinationForPath("/testInvalidateCache/foo").toString()); } -} \ No newline at end of file + + /** + * Test location cache hit when get destination for path. + */ + @Test + public void testLocationCacheHitrate() throws Exception { + List entries = new ArrayList<>(); + + // Add entry and test location cache + Map map1 = getMountTableEntry("1", "/testlocationcache"); + MountTable entry1 = MountTable.newInstance("/testlocationcache", map1); + entries.add(entry1); + + Map map2 = getMountTableEntry("2", + "/anothertestlocationcache"); + MountTable entry2 = MountTable.newInstance("/anothertestlocationcache", + map2); + entries.add(entry2); + + mountTable.refreshEntries(entries); + mountTable.getLocCacheAccess().reset(); + mountTable.getLocCacheMiss().reset(); + assertEquals("1->/testlocationcache", + mountTable.getDestinationForPath("/testlocationcache").toString()); + assertEquals("2->/anothertestlocationcache", + mountTable.getDestinationForPath("/anothertestlocationcache") + .toString()); + + assertEquals(2, mountTable.getLocCacheMiss().intValue()); + assertEquals("1->/testlocationcache", + mountTable.getDestinationForPath("/testlocationcache").toString()); + assertEquals(3, mountTable.getLocCacheAccess().intValue()); + + // Cleanup before exit + mountTable.removeEntry("/testlocationcache"); + mountTable.removeEntry("/anothertestlocationcache"); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestNamenodeResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestNamenodeResolver.java index 932c861892e67..df80037c69917 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestNamenodeResolver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestNamenodeResolver.java @@ -307,6 +307,23 @@ public void testCacheUpdateOnNamenodeStateUpdate() throws IOException { FederationNamenodeServiceState.ACTIVE, namenode1.getState()); } + @Test + public void testCacheUpdateOnNamenodeStateUpdateWithIp() + throws IOException { + final String rpcAddress = "127.0.0.1:10000"; + assertTrue(namenodeResolver.registerNamenode( + createNamenodeReport(NAMESERVICES[0], NAMENODES[0], rpcAddress, + HAServiceState.STANDBY))); + stateStore.refreshCaches(true); + + InetSocketAddress inetAddr = getInetSocketAddress(rpcAddress); + namenodeResolver.updateActiveNamenode(NAMESERVICES[0], inetAddr); + FederationNamenodeContext namenode = + namenodeResolver.getNamenodesForNameserviceId(NAMESERVICES[0]).get(0); + assertEquals("The namenode state should be ACTIVE post update.", + FederationNamenodeServiceState.ACTIVE, namenode.getState()); + } + /** * Creates InetSocketAddress from the given RPC address. * @param rpcAddr RPC address (host:port). diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestDisableRouterQuota.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestDisableRouterQuota.java index 192c8078d44eb..bd3b60ba9195e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestDisableRouterQuota.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestDisableRouterQuota.java @@ -70,15 +70,21 @@ public void checkDisableQuota() { public void testSetQuota() throws Exception { long nsQuota = 1024; long ssQuota = 1024; + Quota quotaModule = router.getRpcServer().getQuotaModule(); - try { - Quota quotaModule = router.getRpcServer().getQuotaModule(); - quotaModule.setQuota("/test", nsQuota, ssQuota, null, false); - fail("The setQuota call should fail."); - } catch (IOException ioe) { - GenericTestUtils.assertExceptionContains( - "The quota system is disabled in Router.", ioe); - } + // don't checkMountEntry called by RouterAdminServer#synchronizeQuota + LambdaTestUtils.intercept( + IOException.class, + "The quota system is disabled in Router.", + "The setQuota call should fail.", + () -> quotaModule.setQuota("/test", nsQuota, ssQuota, null, false)); + + // do checkMountEntry called by RouterClientProtocol#setQuota + LambdaTestUtils.intercept( + IOException.class, + "The quota system is disabled in Router.", + "The setQuota call should fail.", + () -> quotaModule.setQuota("/test", nsQuota, ssQuota, null, true)); } @Test diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouter.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouter.java index 97cf94dc85f62..44c0fc7ed3095 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouter.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouter.java @@ -274,9 +274,18 @@ public void testNamenodeHeartBeatEnableDefault() throws IOException { */ private void checkNamenodeHeartBeatEnableDefault(boolean enable) throws IOException { - final Router router = new Router(); - try { + try (Router router = new Router()) { + // Use default config Configuration config = new HdfsConfiguration(); + // bind to any available port + config.set(RBFConfigKeys.DFS_ROUTER_RPC_BIND_HOST_KEY, "0.0.0.0"); + config.set(RBFConfigKeys.DFS_ROUTER_RPC_ADDRESS_KEY, "127.0.0.1:0"); + config.set(RBFConfigKeys.DFS_ROUTER_ADMIN_ADDRESS_KEY, "127.0.0.1:0"); + config.set(RBFConfigKeys.DFS_ROUTER_ADMIN_BIND_HOST_KEY, "0.0.0.0"); + config.set(RBFConfigKeys.DFS_ROUTER_HTTP_ADDRESS_KEY, "127.0.0.1:0"); + config.set(RBFConfigKeys.DFS_ROUTER_HTTPS_ADDRESS_KEY, "127.0.0.1:0"); + config.set(RBFConfigKeys.DFS_ROUTER_HTTP_BIND_HOST_KEY, "0.0.0.0"); + config.setBoolean(RBFConfigKeys.DFS_ROUTER_HEARTBEAT_ENABLE, enable); router.init(config); if (enable) { @@ -284,8 +293,6 @@ private void checkNamenodeHeartBeatEnableDefault(boolean enable) } else { assertNull(router.getNamenodeHeartbeatServices()); } - } finally { - router.close(); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterAdminCLI.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterAdminCLI.java index 303b3f6433a62..559a827dde5d3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterAdminCLI.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterAdminCLI.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -42,16 +42,20 @@ import org.apache.hadoop.hdfs.server.federation.StateStoreDFSCluster; import org.apache.hadoop.hdfs.server.federation.metrics.RBFMetrics; import org.apache.hadoop.hdfs.server.federation.resolver.ActiveNamenodeResolver; +import org.apache.hadoop.hdfs.server.federation.resolver.FederationNamenodeServiceState; import org.apache.hadoop.hdfs.server.federation.resolver.MountTableManager; import org.apache.hadoop.hdfs.server.federation.resolver.MountTableResolver; import org.apache.hadoop.hdfs.server.federation.resolver.MultipleDestinationMountTableResolver; import org.apache.hadoop.hdfs.server.federation.resolver.RemoteLocation; import org.apache.hadoop.hdfs.server.federation.resolver.order.DestinationOrder; import org.apache.hadoop.hdfs.server.federation.store.StateStoreService; +import org.apache.hadoop.hdfs.server.federation.store.driver.StateStoreDriver; import org.apache.hadoop.hdfs.server.federation.store.impl.DisabledNameserviceStoreImpl; import org.apache.hadoop.hdfs.server.federation.store.impl.MountTableStoreImpl; import org.apache.hadoop.hdfs.server.federation.store.protocol.GetMountTableEntriesRequest; import org.apache.hadoop.hdfs.server.federation.store.protocol.GetMountTableEntriesResponse; +import org.apache.hadoop.hdfs.server.federation.store.records.MembershipState; +import org.apache.hadoop.hdfs.server.federation.store.records.MockStateStoreDriver; import org.apache.hadoop.hdfs.server.federation.store.records.MountTable; import org.apache.hadoop.hdfs.tools.federation.RouterAdmin; import org.apache.hadoop.security.UserGroupInformation; @@ -64,7 +68,7 @@ import org.junit.Test; import org.mockito.Mockito; -import com.google.common.base.Supplier; +import java.util.function.Supplier; /** * Tests Router admin commands. @@ -671,6 +675,7 @@ public void testInvalidArgumentMessage() throws Exception { + " ]\n" + "\t[-clrQuota ]\n" + "\t[-clrStorageTypeQuota ]\n" + + "\t[-dumpState]\n" + "\t[-safemode enter | leave | get]\n" + "\t[-nameservice enable | disable ]\n" + "\t[-getDisabledNameservices]\n" @@ -932,6 +937,10 @@ public void testSafeModeStatus() throws Exception { // ensure the Router become RUNNING state waitState(RouterServiceState.RUNNING); assertFalse(routerContext.getRouter().getSafemodeService().isInSafeMode()); + final RouterClientProtocol clientProtocol = + routerContext.getRouter().getRpcServer().getClientProtocolModule(); + assertEquals(HAServiceState.ACTIVE, clientProtocol.getHAServiceState()); + assertEquals(0, ToolRunner.run(admin, new String[] {"-safemode", "enter" })); @@ -944,6 +953,7 @@ public void testSafeModeStatus() throws Exception { // verify state using RBFMetrics assertEquals(RouterServiceState.SAFEMODE.toString(), jsonString); assertTrue(routerContext.getRouter().getSafemodeService().isInSafeMode()); + assertEquals(HAServiceState.STANDBY, clientProtocol.getHAServiceState()); System.setOut(new PrintStream(out)); assertEquals(0, @@ -955,6 +965,7 @@ public void testSafeModeStatus() throws Exception { // verify state assertEquals(RouterServiceState.RUNNING.toString(), jsonString); assertFalse(routerContext.getRouter().getSafemodeService().isInSafeMode()); + assertEquals(HAServiceState.ACTIVE, clientProtocol.getHAServiceState()); out.reset(); assertEquals(0, ToolRunner.run(admin, new String[] {"-safemode", "get" })); @@ -1553,6 +1564,91 @@ public void testErrorFaultTolerant() throws Exception { assertEquals(0, ToolRunner.run(admin, argv)); } + @Test + public void testRefreshCallQueue() throws Exception { + + System.setOut(new PrintStream(out)); + System.setErr(new PrintStream(err)); + + String[] argv = new String[]{"-refreshCallQueue"}; + assertEquals(0, ToolRunner.run(admin, argv)); + assertTrue(out.toString().contains("Refresh call queue successfully")); + + argv = new String[]{}; + assertEquals(-1, ToolRunner.run(admin, argv)); + assertTrue(out.toString().contains("-refreshCallQueue")); + + argv = new String[]{"-refreshCallQueue", "redundant"}; + assertEquals(-1, ToolRunner.run(admin, argv)); + assertTrue(err.toString().contains("No arguments allowed")); + } + + @Test + public void testDumpState() throws Exception { + MockStateStoreDriver driver = new MockStateStoreDriver(); + driver.clearAll(); + // Add two records for block1 + driver.put(MembershipState.newInstance("routerId", "ns1", + "ns1-ha1", "cluster1", "block1", "rpc1", + "service1", "lifeline1", "https", "nn01", + FederationNamenodeServiceState.ACTIVE, false), false, false); + driver.put(MembershipState.newInstance("routerId", "ns1", + "ns1-ha2", "cluster1", "block1", "rpc2", + "service2", "lifeline2", "https", "nn02", + FederationNamenodeServiceState.STANDBY, false), false, false); + Configuration conf = new Configuration(); + conf.setClass(RBFConfigKeys.FEDERATION_STORE_DRIVER_CLASS, + MockStateStoreDriver.class, + StateStoreDriver.class); + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + try (PrintStream stream = new PrintStream(buffer)) { + RouterAdmin.dumpStateStore(conf, stream); + } + final String expected = + "---- DisabledNameservice ----\n" + + "\n" + + "---- MembershipState ----\n" + + " ns1-ha1-ns1-routerId:\n" + + " dateCreated: XXX\n" + + " dateModified: XXX\n" + + " routerId: \"routerId\"\n" + + " nameserviceId: \"ns1\"\n" + + " namenodeId: \"ns1-ha1\"\n" + + " clusterId: \"cluster1\"\n" + + " blockPoolId: \"block1\"\n" + + " webAddress: \"nn01\"\n" + + " rpcAddress: \"rpc1\"\n" + + " serviceAddress: \"service1\"\n" + + " lifelineAddress: \"lifeline1\"\n" + + " state: \"ACTIVE\"\n" + + " isSafeMode: false\n" + + " webScheme: \"https\"\n" + + " \n" + + " ns1-ha2-ns1-routerId:\n" + + " dateCreated: XXX\n" + + " dateModified: XXX\n" + + " routerId: \"routerId\"\n" + + " nameserviceId: \"ns1\"\n" + + " namenodeId: \"ns1-ha2\"\n" + + " clusterId: \"cluster1\"\n" + + " blockPoolId: \"block1\"\n" + + " webAddress: \"nn02\"\n" + + " rpcAddress: \"rpc2\"\n" + + " serviceAddress: \"service2\"\n" + + " lifelineAddress: \"lifeline2\"\n" + + " state: \"STANDBY\"\n" + + " isSafeMode: false\n" + + " webScheme: \"https\"\n" + + " \n" + + "\n" + + "---- MountTable ----\n" + + "\n" + + "---- RouterState ----"; + // Replace the time values with XXX + assertEquals(expected, + buffer.toString().trim().replaceAll("[0-9]{4,}+", "XXX")); + } + private void addMountTable(String src, String nsId, String dst) throws Exception { String[] argv = new String[] {"-add", src, nsId, dst}; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterFaultTolerant.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterFaultTolerant.java index 5e0e11752341d..bf571e2ff790c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterFaultTolerant.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterFaultTolerant.java @@ -248,6 +248,7 @@ private void testWriteWithFailedSubcluster(final DestinationOrder order) LOG.info("Setup {} with order {}", mountPoint, order); createMountTableEntry( getRandomRouter(), mountPoint, order, namenodes.keySet()); + refreshRoutersCaches(routers); LOG.info("Write in {} should succeed writing in ns0 and fail for ns1", mountPath); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterFederatedState.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterFederatedState.java new file mode 100644 index 0000000000000..60ab4b2c0bc48 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterFederatedState.java @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.federation.router; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import org.apache.hadoop.ipc.AlignmentContext; +import org.apache.hadoop.ipc.ClientId; +import org.apache.hadoop.ipc.RPC; +import org.apache.hadoop.ipc.RpcConstants; +import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos; +import org.apache.hadoop.hdfs.federation.protocol.proto.HdfsServerFederationProtos.RouterFederatedStateProto; +import org.apache.hadoop.thirdparty.protobuf.InvalidProtocolBufferException; +import org.apache.hadoop.util.ProtoUtil; +import org.junit.Test; + +import static org.junit.Assert.*; + + +public class TestRouterFederatedState { + + @Test + public void testRpcRouterFederatedState() throws InvalidProtocolBufferException { + byte[] uuid = ClientId.getClientId(); + Map expectedStateIds = new HashMap() {{ + put("namespace1", 11L ); + put("namespace2", 22L); + }}; + + AlignmentContext alignmentContext = new AlignmentContextWithRouterState(expectedStateIds); + + RpcHeaderProtos.RpcRequestHeaderProto header = ProtoUtil.makeRpcRequestHeader( + RPC.RpcKind.RPC_PROTOCOL_BUFFER, RpcHeaderProtos.RpcRequestHeaderProto.OperationProto.RPC_FINAL_PACKET, 0, + RpcConstants.INVALID_RETRY_COUNT, uuid, alignmentContext); + + Map stateIdsFromHeader = + RouterFederatedStateProto.parseFrom( + header.getRouterFederatedState().toByteArray() + ).getNamespaceStateIdsMap(); + + assertEquals(expectedStateIds, stateIdsFromHeader); + } + + private static class AlignmentContextWithRouterState implements AlignmentContext { + + Map routerFederatedState; + + public AlignmentContextWithRouterState(Map namespaceStates) { + this.routerFederatedState = namespaceStates; + } + + @Override + public void updateRequestState(RpcHeaderProtos.RpcRequestHeaderProto.Builder header) { + RouterFederatedStateProto fedState = RouterFederatedStateProto + .newBuilder() + .putAllNamespaceStateIds(routerFederatedState) + .build(); + + header.setRouterFederatedState(fedState.toByteString()); + } + + @Override + public void updateResponseState(RpcHeaderProtos.RpcResponseHeaderProto.Builder header) {} + + @Override + public void receiveResponseState(RpcHeaderProtos.RpcResponseHeaderProto header) {} + + @Override + public long receiveRequestState(RpcHeaderProtos.RpcRequestHeaderProto header, long threshold) throws IOException { + return 0; + } + + @Override + public long getLastSeenStateId() { + return 0; + } + + @Override + public boolean isCoordinatedCall(String protocolName, String method) { + return false; + } + } +} \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterMountTable.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterMountTable.java index 77ec47a7a7f12..6df94c0fdd29a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterMountTable.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterMountTable.java @@ -290,6 +290,37 @@ public void testGetMountPointStatusWithIOException() } } + /** + * Verify the getMountPointStatus result of passing in different parameters. + */ + @Test + public void testGetMountPointStatus() throws IOException { + MountTable addEntry = MountTable.newInstance("/testA/testB/testC/testD", + Collections.singletonMap("ns0", "/testA/testB/testC/testD")); + assertTrue(addMountTable(addEntry)); + RouterClientProtocol clientProtocol = new RouterClientProtocol( + nnFs0.getConf(), routerContext.getRouter().getRpcServer()); + String src = "/"; + String child = "testA"; + Path childPath = new Path(src, child); + HdfsFileStatus dirStatus = + clientProtocol.getMountPointStatus(childPath.toString(), 0, 0); + assertEquals(child, dirStatus.getLocalName()); + + String src1 = "/testA"; + String child1 = "testB"; + Path childPath1 = new Path(src1, child1); + HdfsFileStatus dirStatus1 = + clientProtocol.getMountPointStatus(childPath1.toString(), 0, 0); + assertEquals(child1, dirStatus1.getLocalName()); + + String src2 = "/testA/testB"; + String child2 = "testC"; + Path childPath2 = new Path(src2, child2); + HdfsFileStatus dirStatus2 = + clientProtocol.getMountPointStatus(childPath2.toString(), 0, 0); + assertEquals(child2, dirStatus2.getLocalName()); + } /** * GetListing of testPath through router. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterMountTableCacheRefreshSecure.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterMountTableCacheRefreshSecure.java new file mode 100644 index 0000000000000..0cfdaea365651 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterMountTableCacheRefreshSecure.java @@ -0,0 +1,344 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.federation.router; + +import static org.apache.hadoop.fs.contract.router.SecurityConfUtil.initSecurity; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.curator.test.TestingServer; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.hdfs.server.federation.FederationTestUtils; +import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster; +import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster.RouterContext; +import org.apache.hadoop.hdfs.server.federation.RouterConfigBuilder; +import org.apache.hadoop.hdfs.server.federation.resolver.FileSubclusterResolver; +import org.apache.hadoop.hdfs.server.federation.resolver.MountTableManager; +import org.apache.hadoop.hdfs.server.federation.store.RouterStore; +import org.apache.hadoop.hdfs.server.federation.store.driver.StateStoreDriver; +import org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreZooKeeperImpl; +import org.apache.hadoop.hdfs.server.federation.store.protocol.AddMountTableEntryRequest; +import org.apache.hadoop.hdfs.server.federation.store.protocol.AddMountTableEntryResponse; +import org.apache.hadoop.hdfs.server.federation.store.protocol.GetMountTableEntriesRequest; +import org.apache.hadoop.hdfs.server.federation.store.protocol.RemoveMountTableEntryRequest; +import org.apache.hadoop.hdfs.server.federation.store.protocol.RemoveMountTableEntryResponse; +import org.apache.hadoop.hdfs.server.federation.store.protocol.UpdateMountTableEntryRequest; +import org.apache.hadoop.hdfs.server.federation.store.protocol.UpdateMountTableEntryResponse; +import org.apache.hadoop.hdfs.server.federation.store.records.MountTable; +import org.apache.hadoop.service.Service.STATE; +import org.apache.hadoop.util.Time; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This test class verifies that mount table cache is updated on all the routers + * when MountTableRefreshService and security mode are enabled and there is a + * change in mount table entries. + */ +public class TestRouterMountTableCacheRefreshSecure { + + private static final Logger LOG = + LoggerFactory.getLogger(TestRouterMountTableCacheRefreshSecure.class); + + private static TestingServer curatorTestingServer; + private static MiniRouterDFSCluster cluster; + private static RouterContext routerContext; + private static MountTableManager mountTableManager; + + @BeforeClass + public static void setUp() throws Exception { + curatorTestingServer = new TestingServer(); + curatorTestingServer.start(); + final String connectString = curatorTestingServer.getConnectString(); + int numNameservices = 2; + Configuration conf = new RouterConfigBuilder().refreshCache().admin().rpc() + .heartbeat().build(); + conf.addResource(initSecurity()); + conf.setClass(RBFConfigKeys.FEDERATION_STORE_DRIVER_CLASS, + StateStoreZooKeeperImpl.class, StateStoreDriver.class); + conf.setClass(RBFConfigKeys.FEDERATION_FILE_RESOLVER_CLIENT_CLASS, + RBFConfigKeys.FEDERATION_FILE_RESOLVER_CLIENT_CLASS_DEFAULT, + FileSubclusterResolver.class); + conf.set(CommonConfigurationKeys.ZK_ADDRESS, connectString); + conf.setBoolean(RBFConfigKeys.DFS_ROUTER_STORE_ENABLE, true); + cluster = new MiniRouterDFSCluster(false, numNameservices, conf); + cluster.addRouterOverrides(conf); + cluster.startCluster(conf); + cluster.startRouters(); + cluster.waitClusterUp(); + routerContext = cluster.getRandomRouter(); + RouterStore routerStateManager = + routerContext.getRouter().getRouterStateManager(); + mountTableManager = routerContext.getAdminClient().getMountTableManager(); + // wait for one minute for all the routers to get registered + FederationTestUtils.waitRouterRegistered(routerStateManager, + numNameservices, 60000); + } + + @AfterClass + public static void destory() { + try { + curatorTestingServer.close(); + cluster.shutdown(); + } catch (IOException e) { + LOG.error("Found error when destroy, caused by: {}", e.getMessage()); + } + } + + @After + public void tearDown() throws IOException { + clearEntries(); + } + + private void clearEntries() throws IOException { + List result = getMountTableEntries(); + for (MountTable mountTable : result) { + RemoveMountTableEntryResponse removeMountTableEntry = mountTableManager. + removeMountTableEntry(RemoveMountTableEntryRequest. + newInstance(mountTable.getSourcePath())); + assertTrue(removeMountTableEntry.getStatus()); + } + } + + /** + * addMountTableEntry API should internally update the cache on all the + * routers. + */ + @Test + public void testMountTableEntriesCacheUpdatedAfterAddAPICall() + throws IOException { + // Existing mount table size + String srcPath = "/addPath"; + MountTable newEntry = MountTable.newInstance(srcPath, + Collections.singletonMap("ns0", "/addPathDest"), Time.now(), + Time.now()); + addMountTableEntry(mountTableManager, newEntry); + + // When add entry is done, all the routers must have updated its mount table + // entry + List routers = getRouters(); + for (RouterContext rc : routers) { + List result = getMountTableEntries(rc.getAdminClient() + .getMountTableManager()); + assertEquals(1, result.size()); + MountTable mountTableResult = result.get(0); + assertEquals(srcPath, mountTableResult.getSourcePath()); + } + } + + /** + * removeMountTableEntry API should internally update the cache on all the + * routers. + */ + @Test + public void testMountTableEntriesCacheUpdatedAfterRemoveAPICall() + throws IOException { + // add + String srcPath = "/removePathSrc"; + MountTable newEntry = MountTable.newInstance(srcPath, + Collections.singletonMap("ns0", "/removePathDest"), Time.now(), + Time.now()); + addMountTableEntry(mountTableManager, newEntry); + + // When add entry is done, all the routers must have updated its mount + // table entry + List routers = getRouters(); + for (RouterContext rc : routers) { + List result = + getMountTableEntries(rc.getAdminClient().getMountTableManager()); + assertEquals(1, result.size()); + MountTable mountTableResult = result.get(0); + assertEquals(srcPath, mountTableResult.getSourcePath()); + } + + // remove + RemoveMountTableEntryResponse removeMountTableEntry = + mountTableManager.removeMountTableEntry( + RemoveMountTableEntryRequest.newInstance(srcPath)); + assertTrue(removeMountTableEntry.getStatus()); + + // When remove entry is done, all the routers must have removed its mount + // table entry + routers = getRouters(); + for (RouterContext rc : routers) { + List result = + getMountTableEntries(rc.getAdminClient().getMountTableManager()); + assertEquals(0, result.size()); + } + } + + /** + * updateMountTableEntry API should internally update the cache on all the + * routers. + */ + @Test + public void testMountTableEntriesCacheUpdatedAfterUpdateAPICall() + throws IOException { + // add + String srcPath = "/updatePathSrc"; + String dstPath = "/updatePathDest"; + String nameServiceId = "ns0"; + MountTable newEntry = MountTable.newInstance(srcPath, + Collections.singletonMap("ns0", "/updatePathDest"), Time.now(), + Time.now()); + addMountTableEntry(mountTableManager, newEntry); + + // When add entry is done, all the routers must have updated its mount table + // entry + List routers = getRouters(); + for (RouterContext rc : routers) { + List result = + getMountTableEntries(rc.getAdminClient().getMountTableManager()); + assertEquals(1, result.size()); + MountTable mountTableResult = result.get(0); + assertEquals(srcPath, mountTableResult.getSourcePath()); + assertEquals(nameServiceId, + mountTableResult.getDestinations().get(0).getNameserviceId()); + assertEquals(dstPath, + mountTableResult.getDestinations().get(0).getDest()); + } + + // update + String key = "ns1"; + String value = "/updatePathDest2"; + MountTable upateEntry = MountTable.newInstance(srcPath, + Collections.singletonMap(key, value), Time.now(), Time.now()); + UpdateMountTableEntryResponse updateMountTableEntry = + mountTableManager.updateMountTableEntry( + UpdateMountTableEntryRequest.newInstance(upateEntry)); + assertTrue(updateMountTableEntry.getStatus()); + MountTable updatedMountTable = getMountTableEntry(srcPath); + assertNotNull("Updated mount table entrty cannot be null", + updatedMountTable); + + // When update entry is done, all the routers must have updated its mount + // table entry + routers = getRouters(); + for (RouterContext rc : routers) { + List result = + getMountTableEntries(rc.getAdminClient().getMountTableManager()); + assertEquals(1, result.size()); + MountTable mountTableResult = result.get(0); + assertEquals(srcPath, mountTableResult.getSourcePath()); + assertEquals(key, updatedMountTable.getDestinations().get(0) + .getNameserviceId()); + assertEquals(value, updatedMountTable.getDestinations().get(0).getDest()); + } + } + + /** + * After caching RouterClient if router goes down, refresh should be + * successful on other available router. The router which is not running + * should be ignored. + */ + @Test + public void testCachedRouterClientBehaviourAfterRouterStoped() + throws IOException { + String srcPath = "/addPathClientCache"; + MountTable newEntry = MountTable.newInstance(srcPath, + Collections.singletonMap("ns0", "/addPathClientCacheDest"), Time.now(), + Time.now()); + addMountTableEntry(mountTableManager, newEntry); + + // When Add entry is done, all the routers must have updated its mount table + // entry + List routers = getRouters(); + for (RouterContext rc : routers) { + List result = + getMountTableEntries(rc.getAdminClient().getMountTableManager()); + assertEquals(1, result.size()); + MountTable mountTableResult = result.get(0); + assertEquals(srcPath, mountTableResult.getSourcePath()); + } + + // Lets stop one router + for (RouterContext rc : routers) { + InetSocketAddress adminServerAddress = rc.getRouter() + .getAdminServerAddress(); + if (!routerContext.getRouter().getAdminServerAddress() + .equals(adminServerAddress)) { + cluster.stopRouter(rc); + break; + } + } + + srcPath = "/addPathClientCache2"; + newEntry = MountTable.newInstance(srcPath, + Collections.singletonMap("ns0", "/addPathClientCacheDest2"), Time.now(), + Time.now()); + addMountTableEntry(mountTableManager, newEntry); + for (RouterContext rc : getRouters()) { + List result = + getMountTableEntries(rc.getAdminClient().getMountTableManager()); + assertEquals(2, result.size()); + } + } + + private List getRouters() { + List result = new ArrayList<>(); + for (RouterContext rc : cluster.getRouters()) { + if (rc.getRouter().getServiceState() == STATE.STARTED) { + result.add(rc); + } + } + return result; + } + + private MountTable getMountTableEntry(String srcPath) throws IOException { + List mountTableEntries = getMountTableEntries(); + for (MountTable mountTable : mountTableEntries) { + String sourcePath = mountTable.getSourcePath(); + if (srcPath.equals(sourcePath)) { + return mountTable; + } + } + return null; + } + + private void addMountTableEntry(MountTableManager mountTableMgr, + MountTable newEntry) throws IOException { + AddMountTableEntryRequest addRequest = + AddMountTableEntryRequest.newInstance(newEntry); + AddMountTableEntryResponse addResponse = + mountTableMgr.addMountTableEntry(addRequest); + assertTrue(addResponse.getStatus()); + } + + private List getMountTableEntries() throws IOException { + return getMountTableEntries(mountTableManager); + } + + private List getMountTableEntries( + MountTableManager mountTableManagerParam) throws IOException { + GetMountTableEntriesRequest request = + GetMountTableEntriesRequest.newInstance("/"); + return mountTableManagerParam.getMountTableEntries(request).getEntries(); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterNetworkTopologyServlet.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterNetworkTopologyServlet.java new file mode 100644 index 0000000000000..e120c69007ee5 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterNetworkTopologyServlet.java @@ -0,0 +1,210 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.federation.router; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.server.federation.RouterConfigBuilder; +import org.apache.hadoop.hdfs.server.federation.StateStoreDFSCluster; +import org.apache.hadoop.hdfs.server.federation.resolver.MultipleDestinationMountTableResolver; +import org.apache.hadoop.io.IOUtils; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.net.HttpURLConnection; +import java.net.URL; +import java.util.Iterator; +import java.util.Map; + +import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.DFS_ROUTER_HTTP_ENABLE; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class TestRouterNetworkTopologyServlet { + + private static StateStoreDFSCluster clusterWithDatanodes; + private static StateStoreDFSCluster clusterNoDatanodes; + + @BeforeClass + public static void setUp() throws Exception { + // Builder configuration + Configuration routerConf = + new RouterConfigBuilder().stateStore().admin().quota().rpc().build(); + routerConf.set(DFS_ROUTER_HTTP_ENABLE, "true"); + Configuration hdfsConf = new Configuration(false); + + // Build and start a federated cluster + clusterWithDatanodes = new StateStoreDFSCluster(false, 2, + MultipleDestinationMountTableResolver.class); + clusterWithDatanodes.addNamenodeOverrides(hdfsConf); + clusterWithDatanodes.addRouterOverrides(routerConf); + clusterWithDatanodes.setNumDatanodesPerNameservice(9); + clusterWithDatanodes.setIndependentDNs(); + clusterWithDatanodes.setRacks( + new String[] {"/rack1", "/rack1", "/rack1", "/rack2", "/rack2", + "/rack2", "/rack3", "/rack3", "/rack3", "/rack4", "/rack4", + "/rack4", "/rack5", "/rack5", "/rack5", "/rack6", "/rack6", + "/rack6"}); + clusterWithDatanodes.startCluster(); + clusterWithDatanodes.startRouters(); + clusterWithDatanodes.waitClusterUp(); + clusterWithDatanodes.waitActiveNamespaces(); + + // Build and start a federated cluster + clusterNoDatanodes = new StateStoreDFSCluster(false, 2, + MultipleDestinationMountTableResolver.class); + clusterNoDatanodes.addNamenodeOverrides(hdfsConf); + clusterNoDatanodes.addRouterOverrides(routerConf); + clusterNoDatanodes.setNumDatanodesPerNameservice(0); + clusterNoDatanodes.setIndependentDNs(); + clusterNoDatanodes.startCluster(); + clusterNoDatanodes.startRouters(); + clusterNoDatanodes.waitClusterUp(); + clusterNoDatanodes.waitActiveNamespaces(); + } + + @Test + public void testPrintTopologyTextFormat() throws Exception { + // get http Address + String httpAddress = clusterWithDatanodes.getRandomRouter().getRouter() + .getHttpServerAddress().toString(); + + // send http request + URL url = new URL("http:/" + httpAddress + "/topology"); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setReadTimeout(20000); + conn.setConnectTimeout(20000); + conn.connect(); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IOUtils.copyBytes(conn.getInputStream(), out, 4096, true); + StringBuilder sb = + new StringBuilder("-- Network Topology -- \n"); + sb.append(out); + sb.append("\n-- Network Topology -- "); + String topology = sb.toString(); + + // assert rack info + assertTrue(topology.contains("/ns0/rack1")); + assertTrue(topology.contains("/ns0/rack2")); + assertTrue(topology.contains("/ns0/rack3")); + assertTrue(topology.contains("/ns1/rack4")); + assertTrue(topology.contains("/ns1/rack5")); + assertTrue(topology.contains("/ns1/rack6")); + + // assert node number + assertEquals(18, + topology.split("127.0.0.1").length - 1); + } + + @Test + public void testPrintTopologyJsonFormat() throws Exception { + // get http Address + String httpAddress = clusterWithDatanodes.getRandomRouter().getRouter() + .getHttpServerAddress().toString(); + + // send http request + URL url = new URL("http:/" + httpAddress + "/topology"); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setReadTimeout(20000); + conn.setConnectTimeout(20000); + conn.setRequestProperty("Accept", "application/json"); + conn.connect(); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IOUtils.copyBytes(conn.getInputStream(), out, 4096, true); + String topology = out.toString(); + + // parse json + JsonNode racks = new ObjectMapper().readTree(topology); + + // assert rack number + assertEquals(6, racks.size()); + + // assert rack info + assertTrue(topology.contains("/ns0/rack1")); + assertTrue(topology.contains("/ns0/rack2")); + assertTrue(topology.contains("/ns0/rack3")); + assertTrue(topology.contains("/ns1/rack4")); + assertTrue(topology.contains("/ns1/rack5")); + assertTrue(topology.contains("/ns1/rack6")); + + // assert node number + Iterator elements = racks.elements(); + int dataNodesCount = 0; + while(elements.hasNext()){ + JsonNode rack = elements.next(); + Iterator> fields = rack.fields(); + while (fields.hasNext()) { + dataNodesCount += fields.next().getValue().size(); + } + } + assertEquals(18, dataNodesCount); + } + + @Test + public void testPrintTopologyNoDatanodesTextFormat() throws Exception { + // get http Address + String httpAddress = clusterNoDatanodes.getRandomRouter().getRouter() + .getHttpServerAddress().toString(); + + // send http request + URL url = new URL("http:/" + httpAddress + "/topology"); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setReadTimeout(20000); + conn.setConnectTimeout(20000); + conn.connect(); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IOUtils.copyBytes(conn.getInputStream(), out, 4096, true); + StringBuilder sb = + new StringBuilder("-- Network Topology -- \n"); + sb.append(out); + sb.append("\n-- Network Topology -- "); + String topology = sb.toString(); + + // assert node number + assertTrue(topology.contains("No DataNodes")); + } + + @Test + public void testPrintTopologyNoDatanodesJsonFormat() throws Exception { + // get http Address + String httpAddress = clusterNoDatanodes.getRandomRouter().getRouter() + .getHttpServerAddress().toString(); + + // send http request + URL url = new URL("http:/" + httpAddress + "/topology"); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setReadTimeout(20000); + conn.setConnectTimeout(20000); + conn.setRequestProperty("Accept", "application/json"); + conn.connect(); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IOUtils.copyBytes(conn.getInputStream(), out, 4096, true); + StringBuilder sb = + new StringBuilder("-- Network Topology -- \n"); + sb.append(out); + sb.append("\n-- Network Topology -- "); + String topology = sb.toString(); + + // assert node number + assertTrue(topology.contains("No DataNodes")); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterPolicyProvider.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterPolicyProvider.java index 36a00e507633e..c2577e67a06fa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterPolicyProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterPolicyProvider.java @@ -41,7 +41,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.collect.Sets; +import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; /** * Test suite covering RouterPolicyProvider. We expect that it contains a diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterQuota.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterQuota.java index 7eb9badaafbcd..5326d48be2d31 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterQuota.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterQuota.java @@ -74,7 +74,7 @@ import org.junit.Before; import org.junit.Test; -import com.google.common.base.Supplier; +import java.util.function.Supplier; /** * Tests quota behaviors in Router-based Federation. diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRPCClientRetries.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRPCClientRetries.java index cea5212965cec..a16f9d10c85fc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRPCClientRetries.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRPCClientRetries.java @@ -54,7 +54,7 @@ import org.junit.Test; import org.junit.rules.Timeout; -import com.google.common.base.Supplier; +import java.util.function.Supplier; /** * Test retry behavior of the Router RPC Client. diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRPCMultipleDestinationMountTableResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRPCMultipleDestinationMountTableResolver.java index d00b93c43062c..e644dec123b5b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRPCMultipleDestinationMountTableResolver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRPCMultipleDestinationMountTableResolver.java @@ -25,6 +25,7 @@ import java.io.FileNotFoundException; import java.io.IOException; +import java.net.InetSocketAddress; import java.util.Arrays; import java.util.Collection; import java.util.Collections; @@ -34,6 +35,7 @@ import java.util.TreeSet; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Options.Rename; @@ -56,7 +58,9 @@ import org.apache.hadoop.hdfs.server.federation.store.protocol.GetDestinationResponse; import org.apache.hadoop.hdfs.server.federation.store.protocol.RemoveMountTableEntryRequest; import org.apache.hadoop.hdfs.server.federation.store.records.MountTable; +import org.apache.hadoop.hdfs.tools.federation.RouterAdmin; import org.apache.hadoop.test.LambdaTestUtils; +import org.apache.hadoop.util.ToolRunner; import org.junit.After; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -509,6 +513,100 @@ public void testRenameMultipleDestDirectories() throws Exception { verifyRenameOnMultiDestDirectories(DestinationOrder.SPACE, true); } + @Test + public void testClearQuota() throws Exception { + long nsQuota = 5; + long ssQuota = 100; + Path path = new Path("/router_test"); + nnFs0.mkdirs(path); + nnFs1.mkdirs(path); + MountTable addEntry = MountTable.newInstance("/router_test", + Collections.singletonMap("ns0", "/router_test")); + addEntry.setQuota(new RouterQuotaUsage.Builder().build()); + assertTrue(addMountTable(addEntry)); + RouterQuotaUpdateService updateService = + routerContext.getRouter().getQuotaCacheUpdateService(); + updateService.periodicInvoke(); + + //set quota and validate the quota + RouterAdmin admin = getRouterAdmin(); + String[] argv = new String[] {"-setQuota", path.toString(), "-nsQuota", + String.valueOf(nsQuota), "-ssQuota", String.valueOf(ssQuota)}; + assertEquals(0, ToolRunner.run(admin, argv)); + updateService.periodicInvoke(); + resolver.loadCache(true); + ContentSummary cs = routerFs.getContentSummary(path); + assertEquals(nsQuota, cs.getQuota()); + assertEquals(ssQuota, cs.getSpaceQuota()); + + //clear quota and validate the quota + argv = new String[] {"-clrQuota", path.toString()}; + assertEquals(0, ToolRunner.run(admin, argv)); + updateService.periodicInvoke(); + resolver.loadCache(true); + //quota should be cleared + ContentSummary cs1 = routerFs.getContentSummary(path); + assertEquals(-1, cs1.getQuota()); + assertEquals(-1, cs1.getSpaceQuota()); + } + + @Test + public void testContentSummaryWithMultipleDest() throws Exception { + MountTable addEntry; + long nsQuota = 5; + long ssQuota = 100; + Path path = new Path("/testContentSummaryWithMultipleDest"); + Map destMap = new HashMap<>(); + destMap.put("ns0", "/testContentSummaryWithMultipleDest"); + destMap.put("ns1", "/testContentSummaryWithMultipleDest"); + nnFs0.mkdirs(path); + nnFs1.mkdirs(path); + addEntry = + MountTable.newInstance("/testContentSummaryWithMultipleDest", destMap); + addEntry.setQuota( + new RouterQuotaUsage.Builder().quota(nsQuota).spaceQuota(ssQuota) + .build()); + assertTrue(addMountTable(addEntry)); + RouterQuotaUpdateService updateService = + routerContext.getRouter().getQuotaCacheUpdateService(); + updateService.periodicInvoke(); + ContentSummary cs = routerFs.getContentSummary(path); + assertEquals(nsQuota, cs.getQuota()); + assertEquals(ssQuota, cs.getSpaceQuota()); + ContentSummary ns0Cs = nnFs0.getContentSummary(path); + assertEquals(nsQuota, ns0Cs.getQuota()); + assertEquals(ssQuota, ns0Cs.getSpaceQuota()); + ContentSummary ns1Cs = nnFs1.getContentSummary(path); + assertEquals(nsQuota, ns1Cs.getQuota()); + assertEquals(ssQuota, ns1Cs.getSpaceQuota()); + } + + @Test + public void testContentSummaryMultipleDestWithMaxValue() + throws Exception { + MountTable addEntry; + long nsQuota = Long.MAX_VALUE - 2; + long ssQuota = Long.MAX_VALUE - 2; + Path path = new Path("/testContentSummaryMultipleDestWithMaxValue"); + Map destMap = new HashMap<>(); + destMap.put("ns0", "/testContentSummaryMultipleDestWithMaxValue"); + destMap.put("ns1", "/testContentSummaryMultipleDestWithMaxValue"); + nnFs0.mkdirs(path); + nnFs1.mkdirs(path); + addEntry = MountTable + .newInstance("/testContentSummaryMultipleDestWithMaxValue", destMap); + addEntry.setQuota( + new RouterQuotaUsage.Builder().quota(nsQuota).spaceQuota(ssQuota) + .build()); + assertTrue(addMountTable(addEntry)); + RouterQuotaUpdateService updateService = + routerContext.getRouter().getQuotaCacheUpdateService(); + updateService.periodicInvoke(); + ContentSummary cs = routerFs.getContentSummary(path); + assertEquals(nsQuota, cs.getQuota()); + assertEquals(ssQuota, cs.getSpaceQuota()); + } + /** * Test to verify rename operation on directories in case of multiple * destinations. @@ -690,4 +788,12 @@ private static FileSystem getFileSystem(final String nsId) { return null; } + private RouterAdmin getRouterAdmin() { + Router router = routerContext.getRouter(); + Configuration configuration = routerContext.getConf(); + InetSocketAddress routerSocket = router.getAdminServerAddress(); + configuration.setSocketAddr(RBFConfigKeys.DFS_ROUTER_ADMIN_ADDRESS_KEY, + routerSocket); + return new RouterAdmin(configuration); + } } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpc.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpc.java index fcf742d9dad97..143972ca23e98 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpc.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpc.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdfs.server.federation.router; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY; import static org.apache.hadoop.hdfs.server.federation.FederationTestUtils.addDirectory; import static org.apache.hadoop.hdfs.server.federation.FederationTestUtils.countContents; import static org.apache.hadoop.hdfs.server.federation.FederationTestUtils.createFile; @@ -25,6 +26,7 @@ import static org.apache.hadoop.hdfs.server.federation.FederationTestUtils.verifyFileExists; import static org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster.TEST_STRING; import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -37,6 +39,7 @@ import java.io.IOException; import java.lang.reflect.Method; import java.net.URISyntaxException; +import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; @@ -61,6 +64,7 @@ import org.apache.hadoop.fs.FsServerDefaults; import org.apache.hadoop.fs.Options; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.SafeModeAction; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSClient; @@ -83,7 +87,6 @@ import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyState; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; -import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; @@ -95,6 +98,7 @@ import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; +import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster; import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster.NamenodeContext; import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster.RouterContext; @@ -116,6 +120,7 @@ import org.apache.hadoop.io.EnumSetWritable; import org.apache.hadoop.io.erasurecode.ECSchema; import org.apache.hadoop.io.erasurecode.ErasureCodeConstants; +import org.apache.hadoop.ipc.CallerContext; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.service.Service.STATE; import org.apache.hadoop.test.GenericTestUtils; @@ -128,8 +133,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Supplier; -import com.google.common.collect.Maps; +import java.util.function.Supplier; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; /** * The the RPC interface of the {@link Router} implemented by @@ -178,6 +183,7 @@ public int compare( private NamenodeProtocol routerNamenodeProtocol; /** NameNodeProtocol interface to the Namenode. */ private NamenodeProtocol nnNamenodeProtocol; + private NamenodeProtocol nnNamenodeProtocol1; /** Filesystem interface to the Router. */ private FileSystem routerFS; @@ -192,10 +198,29 @@ public int compare( @BeforeClass public static void globalSetUp() throws Exception { + Configuration namenodeConf = new Configuration(); + namenodeConf.setBoolean(DFSConfigKeys.HADOOP_CALLER_CONTEXT_ENABLED_KEY, + true); + // It's very easy to become overloaded for some specific dn in this small + // cluster, which will cause the EC file block allocation failure. To avoid + // this issue, we disable considerLoad option. + namenodeConf.setBoolean(DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY, false); cluster = new MiniRouterDFSCluster(false, NUM_SUBCLUSTERS); cluster.setNumDatanodesPerNameservice(NUM_DNS); + cluster.addNamenodeOverrides(namenodeConf); cluster.setIndependentDNs(); + Configuration conf = new Configuration(); + // Setup proxy users. + conf.set("hadoop.proxyuser.testRealUser.groups", "*"); + conf.set("hadoop.proxyuser.testRealUser.hosts", "*"); + String loginUser = UserGroupInformation.getLoginUser().getUserName(); + conf.set(String.format("hadoop.proxyuser.%s.groups", loginUser), "*"); + conf.set(String.format("hadoop.proxyuser.%s.hosts", loginUser), "*"); + // Enable IP proxy users. + conf.set(DFSConfigKeys.DFS_NAMENODE_IP_PROXY_USERS, "placeholder"); + conf.setInt(DFSConfigKeys.DFS_LIST_LIMIT, 5); + cluster.addNamenodeOverrides(conf); // Start NNs and DNs and wait until ready cluster.startCluster(); @@ -338,6 +363,11 @@ protected void setNamenode(NamenodeContext nn) NamenodeContext nn0 = cluster.getNamenode(ns0, null); this.nnNamenodeProtocol = NameNodeProxies.createProxy(nn0.getConf(), nn0.getFileSystem().getUri(), NamenodeProtocol.class).getProxy(); + // Namenode from the other namespace + String ns1 = cluster.getNameservices().get(1); + NamenodeContext nn1 = cluster.getNamenode(ns1, null); + this.nnNamenodeProtocol1 = NameNodeProxies.createProxy(nn1.getConf(), + nn1.getFileSystem().getUri(), NamenodeProtocol.class).getProxy(); } protected String getNs() { @@ -429,6 +459,62 @@ public void testProxyListFiles() throws IOException, InterruptedException, new Object[] {badPath, HdfsFileStatus.EMPTY_NAME, false}); } + @Test + public void testProxyListFilesLargeDir() throws IOException { + // Call listStatus against a dir with many files + // Create a parent point as well as a subfolder mount + // /parent + // ns0 -> /parent + // /parent/file-7 + // ns0 -> /parent/file-7 + // /parent/file-0 + // ns0 -> /parent/file-0 + for (RouterContext rc : cluster.getRouters()) { + MockResolver resolver = + (MockResolver) rc.getRouter().getSubclusterResolver(); + resolver.addLocation("/parent", ns, "/parent"); + // file-0 is only in mount table + resolver.addLocation("/parent/file-0", ns, "/parent/file-0"); + // file-7 is both in mount table and in file system + resolver.addLocation("/parent/file-7", ns, "/parent/file-7"); + } + + // Test the case when there is no subcluster path and only mount point + FileStatus[] result = routerFS.listStatus(new Path("/parent")); + assertEquals(2, result.length); + // this makes sure file[0-8] is added in order + assertEquals("file-0", result[0].getPath().getName()); + assertEquals("file-7", result[1].getPath().getName()); + + // Create files and test full listing in order + NamenodeContext nn = cluster.getNamenode(ns, null); + FileSystem nnFileSystem = nn.getFileSystem(); + for (int i = 1; i < 9; i++) { + createFile(nnFileSystem, "/parent/file-"+i, 32); + } + + result = routerFS.listStatus(new Path("/parent")); + assertEquals(9, result.length); + // this makes sure file[0-8] is added in order + for (int i = 0; i < 9; i++) { + assertEquals("file-"+i, result[i].getPath().getName()); + } + + // Add file-9 and now this listing will be added from mount point + for (RouterContext rc : cluster.getRouters()) { + MockResolver resolver = + (MockResolver) rc.getRouter().getSubclusterResolver(); + resolver.addLocation("/parent/file-9", ns, "/parent/file-9"); + } + assertFalse(verifyFileExists(nnFileSystem, "/parent/file-9")); + result = routerFS.listStatus(new Path("/parent")); + // file-9 will be added by mount point + assertEquals(10, result.length); + for (int i = 0; i < 10; i++) { + assertEquals("file-"+i, result[i].getPath().getName()); + } + } + @Test public void testProxyListFilesWithConflict() throws IOException, InterruptedException { @@ -613,6 +699,7 @@ public void testProxyGetDatanodeReport() throws Exception { DatanodeInfo[] combinedData = routerProtocol.getDatanodeReport(DatanodeReportType.ALL); + assertEquals(0, routerProtocol.getSlowDatanodeReport().length); final Map routerDNMap = new TreeMap<>(); for (DatanodeInfo dn : combinedData) { String subcluster = dn.getNetworkLocation().split("/")[1]; @@ -1137,7 +1224,7 @@ public void testProxyGetAdditionalDatanode() newRouterFile, clientName, null, null, status.getFileId(), null, null); - DatanodeInfo[] exclusions = new DatanodeInfo[0]; + DatanodeInfo[] exclusions = DatanodeInfo.EMPTY_ARRAY; LocatedBlock newBlock = routerProtocol.getAdditionalDatanode( newRouterFile, status.getFileId(), block.getBlock(), block.getLocations(), block.getStorageIDs(), exclusions, 1, clientName); @@ -1205,11 +1292,14 @@ public void testProxyVersionRequest() throws Exception { // Check with default namespace specified. NamespaceInfo rVersion = routerNamenodeProtocol.versionRequest(); NamespaceInfo nnVersion = nnNamenodeProtocol.versionRequest(); + NamespaceInfo nnVersion1 = nnNamenodeProtocol1.versionRequest(); compareVersion(rVersion, nnVersion); // Check with default namespace unspecified. resolver.setDisableNamespace(true); - rVersion = routerNamenodeProtocol.versionRequest(); - compareVersion(rVersion, nnVersion); + // Verify the NamespaceInfo is of nn0 or nn1 + boolean isNN0 = + rVersion.getBlockPoolID().equals(nnVersion.getBlockPoolID()); + compareVersion(rVersion, isNN0 ? nnVersion : nnVersion1); } finally { resolver.setDisableNamespace(false); } @@ -1278,11 +1368,13 @@ public void testProxyGetTransactionID() throws IOException { // Check with default namespace specified. long routerTransactionID = routerNamenodeProtocol.getTransactionID(); long nnTransactionID = nnNamenodeProtocol.getTransactionID(); + long nnTransactionID1 = nnNamenodeProtocol1.getTransactionID(); assertEquals(nnTransactionID, routerTransactionID); // Check with default namespace unspecified. resolver.setDisableNamespace(true); + // Verify the transaction ID is of nn0 or nn1 routerTransactionID = routerNamenodeProtocol.getTransactionID(); - assertEquals(nnTransactionID, routerTransactionID); + assertThat(routerTransactionID).isIn(nnTransactionID, nnTransactionID1); } finally { resolver.setDisableNamespace(false); } @@ -1309,27 +1401,27 @@ public void testProxyGetMostRecentCheckpointTxId() throws IOException { @Test public void testProxySetSafemode() throws Exception { boolean routerSafemode = - routerProtocol.setSafeMode(SafeModeAction.SAFEMODE_GET, false); + routerProtocol.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_GET, false); boolean nnSafemode = - nnProtocol.setSafeMode(SafeModeAction.SAFEMODE_GET, false); + nnProtocol.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_GET, false); assertEquals(nnSafemode, routerSafemode); routerSafemode = - routerProtocol.setSafeMode(SafeModeAction.SAFEMODE_GET, true); + routerProtocol.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_GET, true); nnSafemode = - nnProtocol.setSafeMode(SafeModeAction.SAFEMODE_GET, true); + nnProtocol.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_GET, true); assertEquals(nnSafemode, routerSafemode); assertFalse(routerProtocol.setSafeMode( - SafeModeAction.SAFEMODE_GET, false)); + HdfsConstants.SafeModeAction.SAFEMODE_GET, false)); assertTrue(routerProtocol.setSafeMode( - SafeModeAction.SAFEMODE_ENTER, false)); + HdfsConstants.SafeModeAction.SAFEMODE_ENTER, false)); assertTrue(routerProtocol.setSafeMode( - SafeModeAction.SAFEMODE_GET, false)); + HdfsConstants.SafeModeAction.SAFEMODE_GET, false)); assertFalse(routerProtocol.setSafeMode( - SafeModeAction.SAFEMODE_LEAVE, false)); + HdfsConstants.SafeModeAction.SAFEMODE_LEAVE, false)); assertFalse(routerProtocol.setSafeMode( - SafeModeAction.SAFEMODE_GET, false)); + HdfsConstants.SafeModeAction.SAFEMODE_GET, false)); } @Test @@ -1561,18 +1653,18 @@ public void testGetCurrentTXIDandRollEdits() throws IOException { @Test public void testSaveNamespace() throws IOException { cluster.getCluster().getFileSystem(0) - .setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_ENTER); + .setSafeMode(SafeModeAction.ENTER); cluster.getCluster().getFileSystem(1) - .setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_ENTER); + .setSafeMode(SafeModeAction.ENTER); Boolean saveNamespace = routerProtocol.saveNamespace(0, 0); assertTrue(saveNamespace); cluster.getCluster().getFileSystem(0) - .setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE); + .setSafeMode(SafeModeAction.LEAVE); cluster.getCluster().getFileSystem(1) - .setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE); + .setSafeMode(SafeModeAction.LEAVE); } /* @@ -1651,6 +1743,14 @@ public void testRBFMetricsMethodsRelayOnStateStore() { assertEquals(0, metrics.getNumLiveNodes()); } + @Test + public void testNamenodeMetricsEnteringMaintenanceNodes() throws IOException { + final NamenodeBeanMetrics metrics = + router.getRouter().getNamenodeMetrics(); + + assertEquals("{}", metrics.getEnteringMaintenanceNodes()); + } + @Test public void testCacheAdmin() throws Exception { DistributedFileSystem routerDFS = (DistributedFileSystem) routerFS; @@ -1755,4 +1855,107 @@ private DFSClient getFileDFSClient(final String path) { } return null; } -} \ No newline at end of file + + @Test + public void testMkdirsWithCallerContext() throws IOException { + GenericTestUtils.LogCapturer auditlog = + GenericTestUtils.LogCapturer.captureLogs(FSNamesystem.auditLog); + + // Current callerContext is null + assertNull(CallerContext.getCurrent()); + + // Set client context + CallerContext.setCurrent( + new CallerContext.Builder("clientContext").build()); + + // Create a directory via the router + String dirPath = "/test_dir_with_callercontext"; + FsPermission permission = new FsPermission("755"); + routerProtocol.mkdirs(dirPath, permission, false); + + // The audit log should contains "callerContext=clientIp:...,clientContext" + final String logOutput = auditlog.getOutput(); + assertTrue(logOutput.contains("callerContext=clientIp:")); + assertTrue(logOutput.contains(",clientContext")); + assertTrue(verifyFileExists(routerFS, dirPath)); + } + + @Test + public void testRealUserPropagationInCallerContext() + throws IOException, InterruptedException { + GenericTestUtils.LogCapturer auditlog = + GenericTestUtils.LogCapturer.captureLogs(FSNamesystem.auditLog); + + // Current callerContext is null + assertNull(CallerContext.getCurrent()); + + UserGroupInformation loginUser = UserGroupInformation.getLoginUser(); + UserGroupInformation realUser = UserGroupInformation + .createUserForTesting("testRealUser", new String[]{"group"}); + UserGroupInformation proxyUser = UserGroupInformation + .createProxyUser("testProxyUser", realUser); + FileSystem proxyFs = proxyUser.doAs( + (PrivilegedExceptionAction) () -> router.getFileSystem()); + proxyFs.listStatus(new Path("/")); + + + final String logOutput = auditlog.getOutput(); + // Login user, which is used as the router's user, is different from the realUser. + assertNotEquals(loginUser.getUserName(), realUser.getUserName()); + // Login user is used in the audit log's ugi field. + assertTrue("The login user is the proxyUser in the UGI field", + logOutput.contains(String.format("ugi=%s (auth:PROXY) via %s (auth:SIMPLE)", + proxyUser.getUserName(), + loginUser.getUserName()))); + // Real user is added to the caller context. + assertTrue("The audit log should contain the real user.", + logOutput.contains(String.format("realUser:%s", realUser.getUserName()))); + } + + @Test + public void testSetBalancerBandwidth() throws Exception { + long defaultBandwidth = + DFSConfigKeys.DFS_DATANODE_BALANCE_BANDWIDTHPERSEC_DEFAULT; + long newBandwidth = defaultBandwidth * 2; + routerProtocol.setBalancerBandwidth(newBandwidth); + ArrayList datanodes = cluster.getCluster().getDataNodes(); + GenericTestUtils.waitFor(() -> { + return datanodes.get(0).getBalancerBandwidth() == newBandwidth; + }, 100, 60 * 1000); + } + + @Test + public void testAddClientIpPortToCallerContext() throws IOException { + GenericTestUtils.LogCapturer auditLog = + GenericTestUtils.LogCapturer.captureLogs(FSNamesystem.auditLog); + + // 1. ClientIp and ClientPort are not set on the client. + // Set client context. + CallerContext.setCurrent( + new CallerContext.Builder("clientContext").build()); + + // Create a directory via the router. + String dirPath = "/test"; + routerProtocol.mkdirs(dirPath, new FsPermission("755"), false); + + // The audit log should contains "clientIp:" and "clientPort:". + assertTrue(auditLog.getOutput().contains("clientIp:")); + assertTrue(auditLog.getOutput().contains("clientPort:")); + assertTrue(verifyFileExists(routerFS, dirPath)); + auditLog.clearOutput(); + + // 2. ClientIp and ClientPort are set on the client. + // Reset client context. + CallerContext.setCurrent( + new CallerContext.Builder( + "clientContext,clientIp:1.1.1.1,clientPort:1234").build()); + + // Create a directory via the router. + routerProtocol.getFileInfo(dirPath); + + // The audit log should not contain the original clientIp and clientPort + // set by client. + assertFalse(auditLog.getOutput().contains("clientIp:1.1.1.1")); + assertFalse(auditLog.getOutput().contains("clientPort:1234")); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpcMultiDestination.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpcMultiDestination.java index 30a47a45620fc..370a1250a7c11 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpcMultiDestination.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpcMultiDestination.java @@ -32,6 +32,7 @@ import java.io.IOException; import java.lang.reflect.Method; +import java.net.InetAddress; import java.net.URISyntaxException; import java.util.Arrays; import java.util.EnumSet; @@ -67,6 +68,7 @@ import org.apache.hadoop.hdfs.server.namenode.ha.HAContext; import org.apache.hadoop.io.EnumSetWritable; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.ipc.CallerContext; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.ipc.StandbyException; import org.apache.hadoop.test.GenericTestUtils; @@ -312,7 +314,7 @@ testPath, new FsPermission("777"), clientName, assertEquals(1, proxyNumAddBlock2 - proxyNumAddBlock); // Get additionalDatanode via router and block is not null. - DatanodeInfo[] exclusions = new DatanodeInfo[0]; + DatanodeInfo[] exclusions = DatanodeInfo.EMPTY_ARRAY; LocatedBlock newBlock = clientProtocol.getAdditionalDatanode( testPath, status.getFileId(), blockTwo.getBlock(), blockTwo.getLocations(), blockTwo.getStorageIDs(), exclusions, @@ -434,4 +436,44 @@ public void testSubclusterDown() throws Exception { setInternalState(ns0, "haContext", nn0haCtx); setInternalState(router0ClientProtocol, "allowPartialList", true); } + + @Test + public void testCallerContextWithMultiDestinations() throws IOException { + GenericTestUtils.LogCapturer auditLog = + GenericTestUtils.LogCapturer.captureLogs(FSNamesystem.auditLog); + + // set client context + CallerContext.setCurrent( + new CallerContext.Builder("clientContext").build()); + // assert the initial caller context as expected + assertEquals("clientContext", CallerContext.getCurrent().getContext()); + + DistributedFileSystem routerFs = + (DistributedFileSystem) getRouterFileSystem(); + // create a directory via the router + Path dirPath = new Path("/test_caller_context_with_multi_destinations"); + routerFs.mkdirs(dirPath); + // invoke concurrently in RouterRpcClient + routerFs.listStatus(dirPath); + // invoke sequentially in RouterRpcClient + routerFs.getFileStatus(dirPath); + + String auditFlag = "src=" + dirPath.toString(); + String clientIpInfo = "clientIp:" + + InetAddress.getLocalHost().getHostAddress(); + for (String line : auditLog.getOutput().split("\n")) { + if (line.contains(auditFlag)) { + // assert origin caller context exist in audit log + String callerContext = line.substring(line.indexOf("callerContext=")); + assertTrue(callerContext.contains("clientContext")); + // assert client ip info exist in caller context + assertTrue(callerContext.contains(clientIpInfo)); + // assert client ip info appears only once in caller context + assertEquals(callerContext.indexOf(clientIpInfo), + callerContext.lastIndexOf(clientIpInfo)); + } + } + // clear client context + CallerContext.setCurrent(null); + } } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpcSingleNS.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpcSingleNS.java index ae0afa42e4bf9..503a4177216f9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpcSingleNS.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpcSingleNS.java @@ -19,9 +19,9 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.SafeModeAction; import org.apache.hadoop.hdfs.NameNodeProxies; import org.apache.hadoop.hdfs.protocol.ClientProtocol; -import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster; import org.apache.hadoop.hdfs.server.federation.RouterConfigBuilder; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol; @@ -203,7 +203,7 @@ public void testGetCurrentTXIDandRollEdits() throws IOException { @Test public void testSaveNamespace() throws IOException { cluster.getCluster().getFileSystem() - .setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_ENTER); + .setSafeMode(SafeModeAction.ENTER); Boolean saveNamespace = routerProtocol.saveNamespace(0, 0); assertTrue(saveNamespace); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpcStoragePolicySatisfier.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpcStoragePolicySatisfier.java index fa1079a4edeb4..57518c811197f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpcStoragePolicySatisfier.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpcStoragePolicySatisfier.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster; import org.apache.hadoop.hdfs.server.federation.RouterConfigBuilder; -import org.apache.hadoop.hdfs.server.federation.metrics.NamenodeBeanMetrics; import org.apache.hadoop.hdfs.server.namenode.sps.Context; import org.apache.hadoop.hdfs.server.namenode.sps.StoragePolicySatisfier; import org.apache.hadoop.hdfs.server.sps.ExternalSPSContext; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterTrash.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterTrash.java new file mode 100644 index 0000000000000..dfb8c33c72d4b --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterTrash.java @@ -0,0 +1,335 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

      + * http://www.apache.org/licenses/LICENSE-2.0 + *

      + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.federation.router; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.Trash; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.DFSClient; +import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster; +import org.apache.hadoop.hdfs.server.federation.RouterConfigBuilder; +import org.apache.hadoop.hdfs.server.federation.StateStoreDFSCluster; +import org.apache.hadoop.hdfs.server.federation.resolver.MountTableManager; +import org.apache.hadoop.hdfs.server.federation.resolver.MountTableResolver; +import org.apache.hadoop.hdfs.server.federation.store.protocol.*; +import org.apache.hadoop.hdfs.server.federation.store.records.MountTable; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.Time; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.util.Collections; + +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +/** + * This is a test through the Router move data to the Trash. + */ +public class TestRouterTrash { + + public static final Logger LOG = + LoggerFactory.getLogger(TestRouterTrash.class); + + private static StateStoreDFSCluster cluster; + private static MiniRouterDFSCluster.RouterContext routerContext; + private static MountTableResolver mountTable; + private static FileSystem routerFs; + private static FileSystem nnFs; + private static final String TEST_USER = "test-trash"; + private static MiniRouterDFSCluster.NamenodeContext nnContext; + private static String ns0; + private static String ns1; + private static final String MOUNT_POINT = "/home/data"; + private static final String FILE = MOUNT_POINT + "/file1"; + private static final String TRASH_ROOT = "/user/" + TEST_USER + "/.Trash"; + private static final String CURRENT = "/Current"; + + @BeforeClass + public static void globalSetUp() throws Exception { + // Build and start a federated cluster + cluster = new StateStoreDFSCluster(false, 2); + Configuration conf = new RouterConfigBuilder() + .stateStore() + .admin() + .rpc() + .http() + .build(); + conf.set(FS_TRASH_INTERVAL_KEY, "100"); + cluster.addRouterOverrides(conf); + cluster.startCluster(); + cluster.startRouters(); + cluster.waitClusterUp(); + + ns0 = cluster.getNameservices().get(0); + ns1 = cluster.getNameservices().get(1); + + routerContext = cluster.getRandomRouter(); + routerFs = routerContext.getFileSystem(); + nnContext = cluster.getNamenode(ns0, null); + nnFs = nnContext.getFileSystem(); + Router router = routerContext.getRouter(); + mountTable = (MountTableResolver) router.getSubclusterResolver(); + } + + @AfterClass + public static void tearDown() { + if (cluster != null) { + cluster.stopRouter(routerContext); + cluster.shutdown(); + cluster = null; + } + } + + @After + public void clearMountTable() throws IOException { + RouterClient client = routerContext.getAdminClient(); + MountTableManager mountTableManager = client.getMountTableManager(); + GetMountTableEntriesRequest req1 = + GetMountTableEntriesRequest.newInstance("/"); + GetMountTableEntriesResponse response = + mountTableManager.getMountTableEntries(req1); + for (MountTable entry : response.getEntries()) { + RemoveMountTableEntryRequest req2 = + RemoveMountTableEntryRequest.newInstance(entry.getSourcePath()); + mountTableManager.removeMountTableEntry(req2); + } + } + + @After + public void clearFile() throws IOException { + FileStatus[] fileStatuses = nnFs.listStatus(new Path("/")); + for (FileStatus file : fileStatuses) { + nnFs.delete(file.getPath(), true); + } + } + + private boolean addMountTable(final MountTable entry) throws IOException { + RouterClient client = routerContext.getAdminClient(); + MountTableManager mountTableManager = client.getMountTableManager(); + AddMountTableEntryRequest addRequest = + AddMountTableEntryRequest.newInstance(entry); + AddMountTableEntryResponse addResponse = + mountTableManager.addMountTableEntry(addRequest); + // Reload the Router cache + mountTable.loadCache(true); + return addResponse.getStatus(); + } + + @Test + public void testMoveToTrashNoMountPoint() throws IOException, + URISyntaxException, InterruptedException { + MountTable addEntry = MountTable.newInstance(MOUNT_POINT, + Collections.singletonMap(ns0, MOUNT_POINT)); + assertTrue(addMountTable(addEntry)); + // current user client + DFSClient client = nnContext.getClient(); + client.setOwner("/", TEST_USER, TEST_USER); + UserGroupInformation ugi = UserGroupInformation. + createRemoteUser(TEST_USER); + // test user client + client = nnContext.getClient(ugi); + client.mkdirs(MOUNT_POINT, new FsPermission("777"), true); + assertTrue(client.exists(MOUNT_POINT)); + // create test file + client.create(FILE, true); + Path filePath = new Path(FILE); + + FileStatus[] fileStatuses = routerFs.listStatus(filePath); + assertEquals(1, fileStatuses.length); + assertEquals(TEST_USER, fileStatuses[0].getOwner()); + // move to Trash + Configuration routerConf = routerContext.getConf(); + FileSystem fs = + DFSTestUtil.getFileSystemAs(ugi, routerConf); + Trash trash = new Trash(fs, routerConf); + assertTrue(trash.moveToTrash(filePath)); + fileStatuses = nnFs.listStatus( + new Path(TRASH_ROOT + CURRENT + MOUNT_POINT)); + assertEquals(1, fileStatuses.length); + assertTrue(nnFs.exists(new Path(TRASH_ROOT + CURRENT + FILE))); + assertTrue(nnFs.exists(new Path("/user/" + + TEST_USER + "/.Trash/Current" + FILE))); + // When the target path in Trash already exists. + client.create(FILE, true); + filePath = new Path(FILE); + fileStatuses = routerFs.listStatus(filePath); + assertEquals(1, fileStatuses.length); + assertTrue(trash.moveToTrash(filePath)); + fileStatuses = routerFs.listStatus( + new Path(TRASH_ROOT + CURRENT + MOUNT_POINT)); + assertEquals(2, fileStatuses.length); + } + + @Test + public void testMoveToTrashWithKerberosUser() throws IOException, + URISyntaxException, InterruptedException { + //Constructs the structure of the KerBoers user name + String kerberosUser = "randomUser/dev@HADOOP.COM"; + UserGroupInformation ugi = UserGroupInformation.createRemoteUser(kerberosUser); + MountTable addEntry = MountTable.newInstance(MOUNT_POINT, + Collections.singletonMap(ns1, MOUNT_POINT)); + assertTrue(addMountTable(addEntry)); + // current user client + MiniRouterDFSCluster.NamenodeContext nn1Context = cluster.getNamenode(ns1, null); + DFSClient currentUserClientNs0 = nnContext.getClient(); + DFSClient currentUserClientNs1 = nn1Context.getClient(); + + currentUserClientNs0.setOwner("/", ugi.getShortUserName(), ugi.getShortUserName()); + currentUserClientNs1.setOwner("/", ugi.getShortUserName(), ugi.getShortUserName()); + + // test user client + DFSClient testUserClientNs1 = nn1Context.getClient(ugi); + testUserClientNs1.mkdirs(MOUNT_POINT, new FsPermission("777"), true); + assertTrue(testUserClientNs1.exists(MOUNT_POINT)); + // create test file + testUserClientNs1.create(FILE, true); + Path filePath = new Path(FILE); + + FileStatus[] fileStatuses = routerFs.listStatus(filePath); + assertEquals(1, fileStatuses.length); + assertEquals(ugi.getShortUserName(), fileStatuses[0].getOwner()); + // move to Trash + Configuration routerConf = routerContext.getConf(); + FileSystem fs = DFSTestUtil.getFileSystemAs(ugi, routerConf); + Trash trash = new Trash(fs, routerConf); + assertTrue(trash.moveToTrash(filePath)); + fileStatuses = fs.listStatus( + new Path("/user/" + ugi.getShortUserName() + "/.Trash/Current" + MOUNT_POINT)); + assertEquals(1, fileStatuses.length); + } + + @Test + public void testDeleteToTrashExistMountPoint() throws IOException, + URISyntaxException, InterruptedException { + MountTable addEntry = MountTable.newInstance(MOUNT_POINT, + Collections.singletonMap(ns0, MOUNT_POINT)); + assertTrue(addMountTable(addEntry)); + // add Trash mount point + addEntry = MountTable.newInstance(TRASH_ROOT, + Collections.singletonMap(ns1, TRASH_ROOT)); + assertTrue(addMountTable(addEntry)); + // current user client + DFSClient client = nnContext.getClient(); + client.setOwner("/", TEST_USER, TEST_USER); + UserGroupInformation ugi = UserGroupInformation. + createRemoteUser(TEST_USER); + // test user client + client = nnContext.getClient(ugi); + client.mkdirs(MOUNT_POINT, new FsPermission("777"), true); + assertTrue(client.exists(MOUNT_POINT)); + // create test file + client.create(FILE, true); + Path filePath = new Path(FILE); + + FileStatus[] fileStatuses = routerFs.listStatus(filePath); + assertEquals(1, fileStatuses.length); + assertEquals(TEST_USER, fileStatuses[0].getOwner()); + + // move to Trash + Configuration routerConf = routerContext.getConf(); + FileSystem fs = + DFSTestUtil.getFileSystemAs(ugi, routerConf); + Trash trash = new Trash(fs, routerConf); + assertTrue(trash.moveToTrash(filePath)); + fileStatuses = nnFs.listStatus( + new Path(TRASH_ROOT + CURRENT + MOUNT_POINT)); + assertEquals(1, fileStatuses.length); + assertTrue(nnFs.exists(new Path(TRASH_ROOT + CURRENT + FILE))); + // When the target path in Trash already exists. + client.create(FILE, true); + filePath = new Path(FILE); + + fileStatuses = nnFs.listStatus(filePath); + assertEquals(1, fileStatuses.length); + assertTrue(trash.moveToTrash(filePath)); + fileStatuses = nnFs.listStatus( + new Path(TRASH_ROOT + CURRENT + MOUNT_POINT)); + assertEquals(2, fileStatuses.length); + } + + @Test + public void testIsTrashPath() throws IOException { + UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); + assertNotNull(ugi); + assertTrue(MountTableResolver.isTrashPath( + "/user/" + ugi.getUserName() + "/.Trash/Current" + MOUNT_POINT)); + assertTrue(MountTableResolver.isTrashPath( + "/user/" + ugi.getUserName() + + "/.Trash/" + Time.now() + MOUNT_POINT)); + assertFalse(MountTableResolver.isTrashPath(MOUNT_POINT)); + + // Contains TrashCurrent but does not begin with TrashCurrent. + assertFalse(MountTableResolver.isTrashPath("/home/user/" + + ugi.getUserName() + "/.Trash/Current" + MOUNT_POINT)); + assertFalse(MountTableResolver.isTrashPath("/home/user/" + + ugi.getUserName() + "/.Trash/" + Time.now() + MOUNT_POINT)); + + // Special cases. + assertFalse(MountTableResolver.isTrashPath("")); + assertFalse(MountTableResolver.isTrashPath( + "/home/user/empty.Trash/Current")); + assertFalse(MountTableResolver.isTrashPath( + "/home/user/.Trash")); + assertFalse(MountTableResolver.isTrashPath( + "/.Trash/Current")); + } + + @Test + public void testSubtractTrashCurrentPath() throws IOException { + UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); + assertNotNull(ugi); + assertEquals(MOUNT_POINT, MountTableResolver.subtractTrashCurrentPath( + "/user/" + ugi.getUserName() + "/.Trash/Current" + MOUNT_POINT)); + assertEquals(MOUNT_POINT, MountTableResolver.subtractTrashCurrentPath( + "/user/" + ugi.getUserName() + + "/.Trash/" + Time.now() + MOUNT_POINT)); + + // Contains TrashCurrent but does not begin with TrashCurrent. + assertEquals("/home/user/" + ugi.getUserName() + + "/.Trash/Current" + MOUNT_POINT, MountTableResolver. + subtractTrashCurrentPath("/home/user/" + + ugi.getUserName() + "/.Trash/Current" + MOUNT_POINT)); + long time = Time.now(); + assertEquals("/home/user/" + ugi.getUserName() + + "/.Trash/" + time + MOUNT_POINT, MountTableResolver. + subtractTrashCurrentPath("/home/user/" + ugi.getUserName() + + "/.Trash/" + time + MOUNT_POINT)); + // Special cases. + assertEquals("", MountTableResolver.subtractTrashCurrentPath("")); + assertEquals("/home/user/empty.Trash/Current", MountTableResolver. + subtractTrashCurrentPath("/home/user/empty.Trash/Current")); + assertEquals("/home/user/.Trash", MountTableResolver. + subtractTrashCurrentPath("/home/user/.Trash")); + assertEquals("/.Trash/Current", MountTableResolver. + subtractTrashCurrentPath("/.Trash/Current")); + } +} \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/security/token/TestSQLDelegationTokenSecretManagerImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/security/token/TestSQLDelegationTokenSecretManagerImpl.java new file mode 100644 index 0000000000000..569a274042bfa --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/security/token/TestSQLDelegationTokenSecretManagerImpl.java @@ -0,0 +1,471 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.federation.router.security.token; + +import java.io.IOException; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.ReentrantLock; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier; +import org.apache.hadoop.security.token.delegation.web.DelegationTokenManager; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + + +public class TestSQLDelegationTokenSecretManagerImpl { + private static final String CONNECTION_URL = "jdbc:derby:memory:TokenStore"; + private static final int TEST_MAX_RETRIES = 3; + private static Configuration conf; + + @Before + public void init() throws SQLException { + createTestDBTables(); + } + + @After + public void cleanup() throws SQLException { + dropTestDBTables(); + } + + @BeforeClass + public static void initDatabase() throws SQLException { + DriverManager.getConnection(CONNECTION_URL + ";create=true"); + + conf = new Configuration(); + conf.set(SQLConnectionFactory.CONNECTION_URL, CONNECTION_URL); + conf.set(SQLConnectionFactory.CONNECTION_USERNAME, "testuser"); + conf.set(SQLConnectionFactory.CONNECTION_PASSWORD, "testpassword"); + conf.set(SQLConnectionFactory.CONNECTION_DRIVER, "org.apache.derby.jdbc.EmbeddedDriver"); + conf.setInt(SQLSecretManagerRetriableHandlerImpl.MAX_RETRIES, TEST_MAX_RETRIES); + conf.setInt(SQLSecretManagerRetriableHandlerImpl.RETRY_SLEEP_TIME_MS, 10); + } + + @AfterClass + public static void cleanupDatabase() { + try { + DriverManager.getConnection(CONNECTION_URL + ";drop=true"); + } catch (SQLException e) { + // SQLException expected when database is dropped + if (!e.getMessage().contains("dropped")) { + throw new RuntimeException(e); + } + } + } + + @Test + public void testSingleSecretManager() throws Exception { + DelegationTokenManager tokenManager = createTokenManager(); + try { + Token token = + tokenManager.createToken(UserGroupInformation.getCurrentUser(), "foo"); + validateToken(tokenManager, token); + } finally { + stopTokenManager(tokenManager); + } + } + + @Test + public void testMultipleSecretManagers() throws Exception { + DelegationTokenManager tokenManager1 = createTokenManager(); + DelegationTokenManager tokenManager2 = createTokenManager(); + + try { + Token token1 = + tokenManager1.createToken(UserGroupInformation.getCurrentUser(), "foo"); + Token token2 = + tokenManager2.createToken(UserGroupInformation.getCurrentUser(), "foo"); + + validateToken(tokenManager1, token2); + validateToken(tokenManager2, token1); + } finally { + stopTokenManager(tokenManager1); + stopTokenManager(tokenManager2); + } + } + + @Test + public void testSequenceNumAllocation() throws Exception { + int tokensPerManager = SQLDelegationTokenSecretManagerImpl.DEFAULT_SEQ_NUM_BATCH_SIZE * 5; + Set sequenceNums1 = new HashSet<>(); + Set sequenceNums2 = new HashSet<>(); + Set sequenceNums3 = new HashSet<>(); + Set sequenceNums = new HashSet<>(); + DelegationTokenManager tokenManager1 = createTokenManager(); + DelegationTokenManager tokenManager2 = createTokenManager(); + DelegationTokenManager tokenManager3 = createTokenManager(); + + try { + for (int i = 0; i < tokensPerManager; i++) { + allocateSequenceNum(tokenManager1, sequenceNums1); + allocateSequenceNum(tokenManager2, sequenceNums2); + allocateSequenceNum(tokenManager3, sequenceNums3); + sequenceNums.addAll(sequenceNums1); + sequenceNums.addAll(sequenceNums2); + sequenceNums.addAll(sequenceNums3); + } + + Assert.assertEquals("Verify that all tokens were created with unique sequence numbers", + tokensPerManager * 3, sequenceNums.size()); + Assert.assertEquals("Verify that tokenManager1 generated unique sequence numbers", + tokensPerManager, sequenceNums1.size()); + Assert.assertEquals("Verify that tokenManager2 generated unique sequence number", + tokensPerManager, sequenceNums2.size()); + Assert.assertEquals("Verify that tokenManager3 generated unique sequence numbers", + tokensPerManager, sequenceNums3.size()); + + // Validate sequence number batches allocated in order to each token manager + int batchSize = SQLDelegationTokenSecretManagerImpl.DEFAULT_SEQ_NUM_BATCH_SIZE; + for (int seqNum = 1; seqNum < tokensPerManager;) { + // First batch allocated tokenManager1 + for (int i = 0; i < batchSize; i++, seqNum++) { + Assert.assertTrue(sequenceNums1.contains(seqNum)); + } + // Second batch allocated tokenManager2 + for (int i = 0; i < batchSize; i++, seqNum++) { + Assert.assertTrue(sequenceNums2.contains(seqNum)); + } + // Third batch allocated tokenManager3 + for (int i = 0; i < batchSize; i++, seqNum++) { + Assert.assertTrue(sequenceNums3.contains(seqNum)); + } + } + + SQLDelegationTokenSecretManagerImpl secretManager = + (SQLDelegationTokenSecretManagerImpl) tokenManager1.getDelegationTokenSecretManager(); + Assert.assertEquals("Verify that the counter is set to the highest sequence number", + tokensPerManager * 3, secretManager.getDelegationTokenSeqNum()); + } finally { + stopTokenManager(tokenManager1); + stopTokenManager(tokenManager2); + stopTokenManager(tokenManager3); + } + } + + @Test + public void testSequenceNumRollover() throws Exception { + int tokenBatch = SQLDelegationTokenSecretManagerImpl.DEFAULT_SEQ_NUM_BATCH_SIZE; + Set sequenceNums = new HashSet<>(); + + DelegationTokenManager tokenManager = createTokenManager(); + + try { + SQLDelegationTokenSecretManagerImpl secretManager = + (SQLDelegationTokenSecretManagerImpl) tokenManager.getDelegationTokenSecretManager(); + secretManager.setDelegationTokenSeqNum(Integer.MAX_VALUE - tokenBatch); + + // Allocate sequence numbers before they are rolled over + for (int seqNum = Integer.MAX_VALUE - tokenBatch; seqNum < Integer.MAX_VALUE; seqNum++) { + allocateSequenceNum(tokenManager, sequenceNums); + Assert.assertTrue(sequenceNums.contains(seqNum + 1)); + } + + // Allocate sequence numbers after they are rolled over + for (int seqNum = 0; seqNum < tokenBatch; seqNum++) { + allocateSequenceNum(tokenManager, sequenceNums); + Assert.assertTrue(sequenceNums.contains(seqNum + 1)); + } + } finally { + stopTokenManager(tokenManager); + } + } + + @Test + public void testDelegationKeyAllocation() throws Exception { + DelegationTokenManager tokenManager1 = createTokenManager(); + + try { + SQLDelegationTokenSecretManagerImpl secretManager1 = + (SQLDelegationTokenSecretManagerImpl) tokenManager1.getDelegationTokenSecretManager(); + // Prevent delegation keys to roll for the rest of the test to avoid race conditions + // between the keys generated and the active keys in the database. + ((TestDelegationTokenSecretManager) secretManager1).lockKeyRoll(); + int keyId1 = secretManager1.getCurrentKeyId(); + + // Validate that latest key1 is assigned to tokenManager1 tokens + Token token1 = + tokenManager1.createToken(UserGroupInformation.getCurrentUser(), "foo"); + validateKeyId(token1, keyId1); + + DelegationTokenManager tokenManager2 = createTokenManager(); + try { + SQLDelegationTokenSecretManagerImpl secretManager2 = + (SQLDelegationTokenSecretManagerImpl) tokenManager2.getDelegationTokenSecretManager(); + // Prevent delegation keys to roll for the rest of the test + ((TestDelegationTokenSecretManager) secretManager2).lockKeyRoll(); + int keyId2 = secretManager2.getCurrentKeyId(); + + Assert.assertNotEquals("Each secret manager has its own key", keyId1, keyId2); + + // Validate that latest key2 is assigned to tokenManager2 tokens + Token token2 = + tokenManager2.createToken(UserGroupInformation.getCurrentUser(), "foo"); + validateKeyId(token2, keyId2); + + // Validate that key1 is still assigned to tokenManager1 tokens + token1 = tokenManager1.createToken(UserGroupInformation.getCurrentUser(), "foo"); + validateKeyId(token1, keyId1); + + // Validate that key2 is still assigned to tokenManager2 tokens + token2 = tokenManager2.createToken(UserGroupInformation.getCurrentUser(), "foo"); + validateKeyId(token2, keyId2); + } finally { + stopTokenManager(tokenManager2); + } + } finally { + stopTokenManager(tokenManager1); + } + } + + @Test + public void testHikariConfigs() { + HikariDataSourceConnectionFactory factory1 = new HikariDataSourceConnectionFactory(conf); + int defaultMaximumPoolSize = factory1.getDataSource().getMaximumPoolSize(); + factory1.shutdown(); + + // Changing default maximumPoolSize + Configuration hikariConf = new Configuration(conf); + hikariConf.setInt(HikariDataSourceConnectionFactory.HIKARI_PROPS + "maximumPoolSize", + defaultMaximumPoolSize + 1); + + // Verifying property is correctly set in datasource + HikariDataSourceConnectionFactory factory2 = new HikariDataSourceConnectionFactory(hikariConf); + Assert.assertEquals(factory2.getDataSource().getMaximumPoolSize(), + defaultMaximumPoolSize + 1); + factory2.shutdown(); + } + + @Test + public void testRetries() throws Exception { + DelegationTokenManager tokenManager = createTokenManager(); + TestDelegationTokenSecretManager secretManager = + (TestDelegationTokenSecretManager) tokenManager.getDelegationTokenSecretManager(); + + try { + // Prevent delegation keys to roll for the rest of the test + secretManager.lockKeyRoll(); + + // Reset counter and expect a single request when inserting a token + TestRetryHandler.resetExecutionAttemptCounter(); + tokenManager.createToken(UserGroupInformation.getCurrentUser(), "foo"); + Assert.assertEquals(1, TestRetryHandler.getExecutionAttempts()); + + // Breaking database connections to cause retries + secretManager.setReadOnly(true); + + // Reset counter and expect a multiple retries when failing to insert a token + TestRetryHandler.resetExecutionAttemptCounter(); + tokenManager.createToken(UserGroupInformation.getCurrentUser(), "foo"); + Assert.assertEquals(TEST_MAX_RETRIES + 1, TestRetryHandler.getExecutionAttempts()); + } finally { + // Fix database connections + secretManager.setReadOnly(false); + stopTokenManager(tokenManager); + } + } + + private DelegationTokenManager createTokenManager() { + DelegationTokenManager tokenManager = new DelegationTokenManager(new Configuration(), null); + tokenManager.setExternalDelegationTokenSecretManager(new TestDelegationTokenSecretManager()); + return tokenManager; + } + + private void allocateSequenceNum(DelegationTokenManager tokenManager, Set sequenceNums) + throws IOException { + Token token = + tokenManager.createToken(UserGroupInformation.getCurrentUser(), "foo"); + AbstractDelegationTokenIdentifier tokenIdentifier = token.decodeIdentifier(); + Assert.assertFalse("Verify sequence number is unique", + sequenceNums.contains(tokenIdentifier.getSequenceNumber())); + + sequenceNums.add(tokenIdentifier.getSequenceNumber()); + } + + private void validateToken(DelegationTokenManager tokenManager, + Token token) + throws Exception { + SQLDelegationTokenSecretManagerImpl secretManager = + (SQLDelegationTokenSecretManagerImpl) tokenManager.getDelegationTokenSecretManager(); + AbstractDelegationTokenIdentifier tokenIdentifier = token.decodeIdentifier(); + + // Verify token using token manager + tokenManager.verifyToken(token); + + byte[] tokenInfo1 = secretManager.selectTokenInfo(tokenIdentifier.getSequenceNumber(), + tokenIdentifier.getBytes()); + Assert.assertNotNull("Verify token exists in database", tokenInfo1); + + // Renew token using token manager + tokenManager.renewToken(token, "foo"); + + byte[] tokenInfo2 = secretManager.selectTokenInfo(tokenIdentifier.getSequenceNumber(), + tokenIdentifier.getBytes()); + Assert.assertNotNull("Verify token exists in database", tokenInfo2); + Assert.assertFalse("Verify token has been updated in database", + Arrays.equals(tokenInfo1, tokenInfo2)); + + // Cancel token using token manager + tokenManager.cancelToken(token, "foo"); + byte[] tokenInfo3 = secretManager.selectTokenInfo(tokenIdentifier.getSequenceNumber(), + tokenIdentifier.getBytes()); + Assert.assertNull("Verify token was removed from database", tokenInfo3); + } + + private void validateKeyId(Token token, + int expectedKeyiD) throws IOException { + AbstractDelegationTokenIdentifier tokenIdentifier = token.decodeIdentifier(); + Assert.assertEquals("Verify that keyId is assigned to token", + tokenIdentifier.getMasterKeyId(), expectedKeyiD); + } + + private static Connection getTestDBConnection() { + try { + return DriverManager.getConnection(CONNECTION_URL); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private static void createTestDBTables() throws SQLException { + execute("CREATE TABLE Tokens (sequenceNum INT NOT NULL, " + + "tokenIdentifier VARCHAR (255) FOR BIT DATA NOT NULL, " + + "tokenInfo VARCHAR (255) FOR BIT DATA NOT NULL, " + + "modifiedTime TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, " + + "PRIMARY KEY(sequenceNum))"); + execute("CREATE TABLE DelegationKeys (keyId INT NOT NULL, " + + "delegationKey VARCHAR (255) FOR BIT DATA NOT NULL, " + + "modifiedTime TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, " + + "PRIMARY KEY(keyId))"); + execute("CREATE TABLE LastSequenceNum (sequenceNum INT NOT NULL)"); + execute("INSERT INTO LastSequenceNum VALUES (0)"); + execute("CREATE TABLE LastDelegationKeyId (keyId INT NOT NULL)"); + execute("INSERT INTO LastDelegationKeyId VALUES (0)"); + } + + private static void dropTestDBTables() throws SQLException { + execute("DROP TABLE Tokens"); + execute("DROP TABLE DelegationKeys"); + execute("DROP TABLE LastSequenceNum"); + execute("DROP TABLE LastDelegationKeyId"); + } + + private static void execute(String statement) throws SQLException { + try (Connection connection = getTestDBConnection()) { + connection.createStatement().execute(statement); + } + } + + private void stopTokenManager(DelegationTokenManager tokenManager) { + TestDelegationTokenSecretManager secretManager = + (TestDelegationTokenSecretManager) tokenManager.getDelegationTokenSecretManager(); + // Release any locks on tables + secretManager.unlockKeyRoll(); + // Stop threads to close database connections + secretManager.stopThreads(); + } + + static class TestDelegationTokenSecretManager extends SQLDelegationTokenSecretManagerImpl { + private ReentrantLock keyRollLock; + + private synchronized ReentrantLock getKeyRollLock() { + if (keyRollLock == null) { + keyRollLock = new ReentrantLock(); + } + return keyRollLock; + } + + TestDelegationTokenSecretManager() { + super(conf, new TestConnectionFactory(conf), + SQLSecretManagerRetriableHandlerImpl.getInstance(conf, new TestRetryHandler())); + } + + // Tests can call this method to prevent delegation keys from + // being rolled in the middle of a test to prevent race conditions + public void lockKeyRoll() { + getKeyRollLock().lock(); + } + + public void unlockKeyRoll() { + if (getKeyRollLock().isHeldByCurrentThread()) { + getKeyRollLock().unlock(); + } + } + + @Override + protected void rollMasterKey() throws IOException { + try { + lockKeyRoll(); + super.rollMasterKey(); + } finally { + unlockKeyRoll(); + } + } + + public void setReadOnly(boolean readOnly) { + ((TestConnectionFactory) getConnectionFactory()).readOnly = readOnly; + } + } + + static class TestConnectionFactory extends HikariDataSourceConnectionFactory { + private boolean readOnly = false; + TestConnectionFactory(Configuration conf) { + super(conf); + } + + @Override + public Connection getConnection() throws SQLException { + Connection connection = super.getConnection(); + // Change to default schema as derby driver looks for user schema + connection.setSchema("APP"); + connection.setReadOnly(readOnly); + return connection; + } + } + + static class TestRetryHandler extends SQLSecretManagerRetriableHandlerImpl { + // Tracks the amount of times that a SQL command is executed, regardless of + // whether it completed successfully or not. + private static AtomicInteger executionAttemptCounter = new AtomicInteger(); + + static void resetExecutionAttemptCounter() { + executionAttemptCounter = new AtomicInteger(); + } + + static int getExecutionAttempts() { + return executionAttemptCounter.get(); + } + + @Override + public void execute(SQLCommandVoid command) throws SQLException { + executionAttemptCounter.incrementAndGet(); + super.execute(command); + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/security/token/TestZKDelegationTokenSecretManagerImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/security/token/TestZKDelegationTokenSecretManagerImpl.java new file mode 100644 index 0000000000000..3c7f8e88a91d1 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/security/token/TestZKDelegationTokenSecretManagerImpl.java @@ -0,0 +1,234 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.federation.security.token; + +import static org.apache.hadoop.hdfs.server.federation.router.security.token.ZKDelegationTokenSecretManagerImpl.ZK_DTSM_ROUTER_TOKEN_SYNC_INTERVAL; +import static org.apache.hadoop.security.token.delegation.ZKDelegationTokenSecretManager.ZK_DTSM_TOKEN_WATCHER_ENABLED; +import static org.apache.hadoop.security.token.delegation.web.DelegationTokenManager.REMOVAL_SCAN_INTERVAL; +import static org.apache.hadoop.security.token.delegation.web.DelegationTokenManager.RENEW_INTERVAL; +import static org.junit.Assert.fail; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.server.federation.router.security.token.ZKDelegationTokenSecretManagerImpl; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.SecretManager; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.delegation.TestZKDelegationTokenSecretManager; +import org.apache.hadoop.security.token.delegation.web.DelegationTokenIdentifier; +import org.apache.hadoop.security.token.delegation.web.DelegationTokenManager; +import org.apache.hadoop.util.Time; +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestZKDelegationTokenSecretManagerImpl + extends TestZKDelegationTokenSecretManager { + private static final Logger LOG = + LoggerFactory.getLogger(TestZKDelegationTokenSecretManagerImpl.class); + + @SuppressWarnings("unchecked") + @Test + public void testMultiNodeOperationWithoutWatch() throws Exception { + String connectString = zkServer.getConnectString(); + Configuration conf = getSecretConf(connectString); + // disable watch + conf.setBoolean(ZK_DTSM_TOKEN_WATCHER_ENABLED, false); + conf.setInt(ZK_DTSM_ROUTER_TOKEN_SYNC_INTERVAL, 3); + + for (int i = 0; i < TEST_RETRIES; i++) { + ZKDelegationTokenSecretManagerImpl dtsm1 = + new ZKDelegationTokenSecretManagerImpl(conf); + ZKDelegationTokenSecretManagerImpl dtsm2 = + new ZKDelegationTokenSecretManagerImpl(conf); + DelegationTokenManager tm1, tm2; + tm1 = new DelegationTokenManager(conf, new Text("bla")); + tm1.setExternalDelegationTokenSecretManager(dtsm1); + tm2 = new DelegationTokenManager(conf, new Text("bla")); + tm2.setExternalDelegationTokenSecretManager(dtsm2); + + // common token operation without watchers should still be working + Token token = + (Token) tm1.createToken( + UserGroupInformation.getCurrentUser(), "foo"); + Assert.assertNotNull(token); + tm2.verifyToken(token); + tm2.renewToken(token, "foo"); + tm1.verifyToken(token); + tm1.cancelToken(token, "foo"); + try { + verifyTokenFail(tm2, token); + fail("Expected InvalidToken"); + } catch (SecretManager.InvalidToken it) { + // Ignore + } + + token = (Token) tm2.createToken( + UserGroupInformation.getCurrentUser(), "bar"); + Assert.assertNotNull(token); + tm1.verifyToken(token); + tm1.renewToken(token, "bar"); + tm2.verifyToken(token); + tm2.cancelToken(token, "bar"); + try { + verifyTokenFail(tm1, token); + fail("Expected InvalidToken"); + } catch (SecretManager.InvalidToken it) { + // Ignore + } + + dtsm1.stopThreads(); + dtsm2.stopThreads(); + verifyDestroy(tm1, conf); + verifyDestroy(tm2, conf); + } + } + + @Test + public void testMultiNodeTokenRemovalShortSyncWithoutWatch() + throws Exception { + String connectString = zkServer.getConnectString(); + Configuration conf = getSecretConf(connectString); + // disable watch + conf.setBoolean(ZK_DTSM_TOKEN_WATCHER_ENABLED, false); + // make sync quick + conf.setInt(ZK_DTSM_ROUTER_TOKEN_SYNC_INTERVAL, 3); + // set the renew window and removal interval to be a + // short time to trigger the background cleanup + conf.setInt(RENEW_INTERVAL, 10); + conf.setInt(REMOVAL_SCAN_INTERVAL, 10); + + for (int i = 0; i < TEST_RETRIES; i++) { + ZKDelegationTokenSecretManagerImpl dtsm1 = + new ZKDelegationTokenSecretManagerImpl(conf); + ZKDelegationTokenSecretManagerImpl dtsm2 = + new ZKDelegationTokenSecretManagerImpl(conf); + DelegationTokenManager tm1, tm2; + tm1 = new DelegationTokenManager(conf, new Text("bla")); + tm1.setExternalDelegationTokenSecretManager(dtsm1); + tm2 = new DelegationTokenManager(conf, new Text("bla")); + tm2.setExternalDelegationTokenSecretManager(dtsm2); + + // time: X + // token expiry time: + // tm1: X + 10 + // tm2: X + 10 + Token token = + (Token) tm1.createToken( + UserGroupInformation.getCurrentUser(), "foo"); + Assert.assertNotNull(token); + tm2.verifyToken(token); + + // time: X + 9 + // token expiry time: + // tm1: X + 10 + // tm2: X + 19 + Thread.sleep(9 * 1000); + tm2.renewToken(token, "foo"); + tm1.verifyToken(token); + + // time: X + 13 + // token expiry time: (sync happened) + // tm1: X + 19 + // tm2: X + 19 + Thread.sleep(4 * 1000); + tm1.verifyToken(token); + tm2.verifyToken(token); + + dtsm1.stopThreads(); + dtsm2.stopThreads(); + verifyDestroy(tm1, conf); + verifyDestroy(tm2, conf); + } + } + + // This is very unlikely to happen in real case, but worth putting + // the case out + @Test + public void testMultiNodeTokenRemovalLongSyncWithoutWatch() + throws Exception { + String connectString = zkServer.getConnectString(); + Configuration conf = getSecretConf(connectString); + // disable watch + conf.setBoolean(ZK_DTSM_TOKEN_WATCHER_ENABLED, false); + // make sync quick + conf.setInt(ZK_DTSM_ROUTER_TOKEN_SYNC_INTERVAL, 20); + // set the renew window and removal interval to be a + // short time to trigger the background cleanup + conf.setInt(RENEW_INTERVAL, 10); + conf.setInt(REMOVAL_SCAN_INTERVAL, 10); + + for (int i = 0; i < TEST_RETRIES; i++) { + ZKDelegationTokenSecretManagerImpl dtsm1 = + new ZKDelegationTokenSecretManagerImpl(conf); + ZKDelegationTokenSecretManagerImpl dtsm2 = + new ZKDelegationTokenSecretManagerImpl(conf); + ZKDelegationTokenSecretManagerImpl dtsm3 = + new ZKDelegationTokenSecretManagerImpl(conf); + DelegationTokenManager tm1, tm2, tm3; + tm1 = new DelegationTokenManager(conf, new Text("bla")); + tm1.setExternalDelegationTokenSecretManager(dtsm1); + tm2 = new DelegationTokenManager(conf, new Text("bla")); + tm2.setExternalDelegationTokenSecretManager(dtsm2); + tm3 = new DelegationTokenManager(conf, new Text("bla")); + tm3.setExternalDelegationTokenSecretManager(dtsm3); + + // time: X + // token expiry time: + // tm1: X + 10 + // tm2: X + 10 + // tm3: No token due to no sync + Token token = + (Token) tm1.createToken( + UserGroupInformation.getCurrentUser(), "foo"); + Assert.assertNotNull(token); + tm2.verifyToken(token); + + // time: X + 9 + // token expiry time: + // tm1: X + 10 + // tm2: X + 19 + // tm3: No token due to no sync + Thread.sleep(9 * 1000); + long renewalTime = tm2.renewToken(token, "foo"); + LOG.info("Renew for token {} at current time {} renewal time {}", + token.getIdentifier(), Time.formatTime(Time.now()), + Time.formatTime(renewalTime)); + tm1.verifyToken(token); + + // time: X + 13 + // token expiry time: (sync din't happen) + // tm1: X + 10 + // tm2: X + 19 + // tm3: X + 19 due to fetch from zk + Thread.sleep(4 * 1000); + tm2.verifyToken(token); + tm3.verifyToken(token); + + dtsm1.stopThreads(); + dtsm2.stopThreads(); + dtsm3.stopThreads(); + verifyDestroy(tm1, conf); + verifyDestroy(tm2, conf); + verifyDestroy(tm3, conf); + } + } + +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/TestStateStoreMembershipState.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/TestStateStoreMembershipState.java index 857cc2362d75d..6b783b94a7b50 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/TestStateStoreMembershipState.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/TestStateStoreMembershipState.java @@ -33,13 +33,17 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import org.apache.hadoop.hdfs.server.federation.resolver.FederationNamenodeServiceState; +import org.apache.hadoop.hdfs.server.federation.resolver.FederationNamespaceInfo; import org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys; import org.apache.hadoop.hdfs.server.federation.store.protocol.GetNamenodeRegistrationsRequest; import org.apache.hadoop.hdfs.server.federation.store.protocol.GetNamenodeRegistrationsResponse; +import org.apache.hadoop.hdfs.server.federation.store.protocol.GetNamespaceInfoRequest; +import org.apache.hadoop.hdfs.server.federation.store.protocol.GetNamespaceInfoResponse; import org.apache.hadoop.hdfs.server.federation.store.protocol.NamenodeHeartbeatRequest; import org.apache.hadoop.hdfs.server.federation.store.protocol.NamenodeHeartbeatResponse; import org.apache.hadoop.hdfs.server.federation.store.protocol.UpdateNamenodeRegistrationRequest; @@ -473,6 +477,56 @@ public void testRegistrationExpiredAndDeletion() }, 100, 3000); } + @Test + public void testNamespaceInfoWithUnavailableNameNodeRegistration() + throws IOException { + // Populate the state store with one ACTIVE NameNode entry + // and one UNAVAILABLE NameNode entry + // 1) ns0:nn0 - ACTIVE + // 2) ns0:nn1 - UNAVAILABLE + List registrationList = new ArrayList<>(); + String router = ROUTERS[0]; + String ns = NAMESERVICES[0]; + String rpcAddress = "testrpcaddress"; + String serviceAddress = "testserviceaddress"; + String lifelineAddress = "testlifelineaddress"; + String blockPoolId = "testblockpool"; + String clusterId = "testcluster"; + String webScheme = "http"; + String webAddress = "testwebaddress"; + boolean safemode = false; + + MembershipState record = MembershipState.newInstance( + router, ns, NAMENODES[0], clusterId, blockPoolId, + rpcAddress, serviceAddress, lifelineAddress, webScheme, + webAddress, FederationNamenodeServiceState.ACTIVE, safemode); + registrationList.add(record); + + // Set empty clusterId and blockPoolId for UNAVAILABLE NameNode + record = MembershipState.newInstance( + router, ns, NAMENODES[1], "", "", + rpcAddress, serviceAddress, lifelineAddress, webScheme, + webAddress, FederationNamenodeServiceState.UNAVAILABLE, safemode); + registrationList.add(record); + + registerAndLoadRegistrations(registrationList); + + GetNamespaceInfoRequest request = GetNamespaceInfoRequest.newInstance(); + GetNamespaceInfoResponse response + = membershipStore.getNamespaceInfo(request); + Set namespaces = response.getNamespaceInfo(); + + // Verify only one namespace is registered + assertEquals(1, namespaces.size()); + + // Verify the registered namespace has a valid pair of clusterId + // and blockPoolId derived from ACTIVE NameNode + FederationNamespaceInfo namespace = namespaces.iterator().next(); + assertEquals(ns, namespace.getNameserviceId()); + assertEquals(clusterId, namespace.getClusterId()); + assertEquals(blockPoolId, namespace.getBlockPoolId()); + } + /** * Get a single namenode membership record from the store. * @@ -532,8 +586,6 @@ private MembershipState getExpiredNamenodeRegistration( /** * Register a namenode heartbeat with the state store. * - * @param store FederationMembershipStateStore instance to retrieve the - * membership data records. * @param namenode A fully populated namenode membership record to be * committed to the data store. * @return True if successful, false otherwise. diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/TestStateStoreMountTable.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/TestStateStoreMountTable.java index 6e5bd9ca85ffb..d4dfcc4863fdb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/TestStateStoreMountTable.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/TestStateStoreMountTable.java @@ -227,7 +227,6 @@ private MountTable getMountTableEntry(String mount) throws IOException { /** * Fetch all mount table records beneath a root path. * - * @param store FederationMountTableStore instance to commit the data. * @param mount The root search path, enter "/" to return all mount table * records. * diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreDriverBase.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreDriverBase.java index fe1b9a5bfa04c..06b05f45bbe39 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreDriverBase.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreDriverBase.java @@ -234,6 +234,25 @@ public void testInsert( assertEquals(11, records2.size()); } + public void testInsertWithErrorDuringWrite( + StateStoreDriver driver, Class recordClass) + throws IllegalArgumentException, IllegalAccessException, IOException { + + assertTrue(driver.removeAll(recordClass)); + QueryResult queryResult0 = driver.get(recordClass); + List records0 = queryResult0.getRecords(); + assertTrue(records0.isEmpty()); + + // Insert single + BaseRecord record = generateFakeRecord(recordClass); + driver.put(record, true, false); + + // Verify that no record was inserted. + QueryResult queryResult1 = driver.get(recordClass); + List records1 = queryResult1.getRecords(); + assertEquals(0, records1.size()); + } + public void testFetchErrors(StateStoreDriver driver, Class clazz) throws IllegalAccessException, IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFileSystem.java index 8c4b188cc47e3..dbd4b9bdae2ea 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFileSystem.java @@ -17,16 +17,26 @@ */ package org.apache.hadoop.hdfs.server.federation.store.driver; +import java.io.BufferedWriter; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.server.federation.store.FederationStateStoreTestUtils; +import org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileBaseImpl; import org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileSystemImpl; +import org.apache.hadoop.hdfs.server.federation.store.records.MembershipState; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import org.mockito.stubbing.Answer; + +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.spy; + /** * Test the FileSystem (e.g., HDFS) implementation of the State Store driver. @@ -91,4 +101,18 @@ public void testMetrics() throws IllegalArgumentException, IllegalAccessException, IOException { testMetrics(getStateStoreDriver()); } + + @Test + public void testInsertWithErrorDuringWrite() + throws IllegalArgumentException, IllegalAccessException, IOException { + StateStoreFileBaseImpl driver = spy((StateStoreFileBaseImpl)getStateStoreDriver()); + doAnswer((Answer) a -> { + BufferedWriter writer = (BufferedWriter) a.callRealMethod(); + BufferedWriter spyWriter = spy(writer); + doThrow(IOException.class).when(spyWriter).write(any(String.class)); + return spyWriter; + }).when(driver).getWriter(any()); + + testInsertWithErrorDuringWrite(driver, MembershipState.class); + } } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/MockStateStoreDriver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/MockStateStoreDriver.java new file mode 100644 index 0000000000000..9f600cb6f3fde --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/MockStateStoreDriver.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.federation.store.records; + +import org.apache.hadoop.hdfs.server.federation.store.StateStoreUtils; +import org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreBaseImpl; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +/** + * A mock StateStoreDriver that runs in memory that can force IOExceptions + * upon demand. + */ +public class MockStateStoreDriver extends StateStoreBaseImpl { + private boolean giveErrors = false; + private boolean initialized = false; + private static final Map> VALUE_MAP = new HashMap<>(); + + @Override + public boolean initDriver() { + initialized = true; + return true; + } + + @Override + public boolean initRecordStorage(String className, + Class clazz) { + return true; + } + + @Override + public boolean isDriverReady() { + return initialized; + } + + @Override + public void close() throws Exception { + VALUE_MAP.clear(); + initialized = false; + } + + /** + * Should this object throw an IOException on each following call? + * @param value should we throw errors? + */ + public void setGiveErrors(boolean value) { + giveErrors = value; + } + + /** + * Check to see if this StateStore should throw IOException on each call. + * @throws IOException thrown if giveErrors has been set + */ + private void checkErrors() throws IOException { + if (giveErrors) { + throw new IOException("Induced errors"); + } + } + + @Override + @SuppressWarnings("unchecked") + public QueryResult get(Class clazz) throws IOException { + checkErrors(); + Map map = VALUE_MAP.get(StateStoreUtils.getRecordName(clazz)); + List results = + map != null ? new ArrayList<>((Collection) map.values()) : new ArrayList<>(); + return new QueryResult<>(results, System.currentTimeMillis()); + } + + @Override + public boolean putAll(List records, + boolean allowUpdate, + boolean errorIfExists) + throws IOException { + checkErrors(); + for (T record : records) { + Map map = + VALUE_MAP.computeIfAbsent(StateStoreUtils.getRecordName(record.getClass()), + k -> new HashMap<>()); + String key = record.getPrimaryKey(); + BaseRecord oldRecord = map.get(key); + if (oldRecord == null || allowUpdate) { + map.put(key, record); + } else if (errorIfExists) { + throw new IOException("Record already exists for " + record.getClass() + + ": " + key); + } + } + return true; + } + + /** + * Clear all records from the store. + */ + public void clearAll() { + VALUE_MAP.clear(); + } + + @Override + public boolean removeAll(Class clazz) throws IOException { + checkErrors(); + return VALUE_MAP.remove(StateStoreUtils.getRecordName(clazz)) != null; + } + + @Override + @SuppressWarnings("unchecked") + public int remove(Class clazz, + Query query) + throws IOException { + checkErrors(); + int result = 0; + Map map = + VALUE_MAP.get(StateStoreUtils.getRecordName(clazz)); + if (map != null) { + for (Iterator itr = map.values().iterator(); itr.hasNext();) { + BaseRecord record = itr.next(); + if (query.matches((T) record)) { + itr.remove(); + result += 1; + } + } + } + return result; + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/TestRouterState.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/TestRouterState.java index dfe2bc98bf40a..8226178fe7691 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/TestRouterState.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/TestRouterState.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -20,8 +20,16 @@ import static org.junit.Assert.assertEquals; import java.io.IOException; +import java.util.List; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.server.federation.resolver.FederationNamenodeContext; +import org.apache.hadoop.hdfs.server.federation.resolver.FederationNamenodeServiceState; +import org.apache.hadoop.hdfs.server.federation.resolver.MembershipNamenodeResolver; +import org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys; import org.apache.hadoop.hdfs.server.federation.router.RouterServiceState; +import org.apache.hadoop.hdfs.server.federation.store.StateStoreService; +import org.apache.hadoop.hdfs.server.federation.store.driver.StateStoreDriver; import org.apache.hadoop.hdfs.server.federation.store.driver.StateStoreSerializer; import org.junit.Test; @@ -40,7 +48,7 @@ public class TestRouterState { private static final RouterServiceState STATE = RouterServiceState.RUNNING; - private RouterState generateRecord() throws IOException { + private RouterState generateRecord() { RouterState record = RouterState.newInstance(ADDRESS, START_TIME, STATE); record.setVersion(VERSION); record.setCompileInfo(COMPILE_INFO); @@ -82,4 +90,46 @@ public void testSerialization() throws IOException { validateRecord(newRecord); } + + @Test + public void testStateStoreResilience() throws Exception { + StateStoreService service = new StateStoreService(); + Configuration conf = new Configuration(); + conf.setClass(RBFConfigKeys.FEDERATION_STORE_DRIVER_CLASS, + MockStateStoreDriver.class, + StateStoreDriver.class); + conf.setBoolean(RBFConfigKeys.DFS_ROUTER_METRICS_ENABLE, false); + service.init(conf); + MockStateStoreDriver driver = (MockStateStoreDriver) service.getDriver(); + driver.clearAll(); + // Add two records for block1 + driver.put(MembershipState.newInstance("routerId", "ns1", + "ns1-ha1", "cluster1", "block1", "rpc1", + "service1", "lifeline1", "https", "nn01", + FederationNamenodeServiceState.ACTIVE, false), false, false); + driver.put(MembershipState.newInstance("routerId", "ns1", + "ns1-ha2", "cluster1", "block1", "rpc2", + "service2", "lifeline2", "https", "nn02", + FederationNamenodeServiceState.STANDBY, false), false, false); + // load the cache + service.loadDriver(); + MembershipNamenodeResolver resolver = new MembershipNamenodeResolver(conf, service); + service.refreshCaches(true); + + // look up block1 + List result = + resolver.getNamenodesForBlockPoolId("block1"); + assertEquals(2, result.size()); + + // cause io errors and then reload the cache + driver.setGiveErrors(true); + long previousUpdate = service.getCacheUpdateTime(); + service.refreshCaches(true); + assertEquals(previousUpdate, service.getCacheUpdateTime()); + + // make sure the old cache is still there + result = resolver.getNamenodesForBlockPoolId("block1"); + assertEquals(2, result.size()); + service.stop(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/resources/contract/hdfs.xml b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/resources/contract/hdfs.xml index 261d4ba136508..ae280a8e450c3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/resources/contract/hdfs.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/resources/contract/hdfs.xml @@ -111,4 +111,19 @@ true + + fs.contract.supports-hflush + true + + + + fs.contract.supports-hsync + true + + + + fs.contract.metadata_updated_on_hsync + false + + diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/resources/contract/webhdfs.xml b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/resources/contract/webhdfs.xml index 0cb6dd8a6d0f1..45aaa2264250c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/resources/contract/webhdfs.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/resources/contract/webhdfs.xml @@ -28,4 +28,19 @@ true + + fs.contract.supports-hflush + false + + + + fs.contract.supports-hsync + false + + + + fs.contract.metadata_updated_on_hsync + false + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.2.2.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.2.2.xml new file mode 100644 index 0000000000000..811d305856a5b --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.2.2.xml @@ -0,0 +1,674 @@ + + + + + + + + + + + A distributed implementation of {@link +org.apache.hadoop.fs.FileSystem}. This is loosely modelled after +Google's GFS.

      + +

      The most important difference is that unlike GFS, Hadoop DFS files +have strictly one writer at any one time. Bytes are always appended +to the end of the writer's stream. There is no notion of "record appends" +or "mutations" that are then checked or reordered. Writers simply emit +a byte stream. That byte stream is guaranteed to be stored in the +order written.

his method must return as quickly as possible, since it's called + in a critical section of the NameNode's operation. + + @param succeeded Whether authorization succeeded. + @param userName Name of the user executing the request. + @param addr Remote address of the request. + @param cmd The requested command. + @param src Path of affected source file. + @param dst Path of affected destination file (if any). + @param stat File information for operations that change the file's + metadata (permissions, owner, times, etc).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.2.3.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.2.3.xml new file mode 100644 index 0000000000000..5454f53be9122 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.2.3.xml @@ -0,0 +1,674 @@ + + + + + + + + + + + A distributed implementation of {@link +org.apache.hadoop.fs.FileSystem}. This is loosely modelled after +Google's GFS.

      + +

      The most important difference is that unlike GFS, Hadoop DFS files +have strictly one writer at any one time. Bytes are always appended +to the end of the writer's stream. There is no notion of "record appends" +or "mutations" that are then checked or reordered. Writers simply emit +a byte stream. That byte stream is guaranteed to be stored in the +order written.

      ]]> +
      +
his method must return as quickly as possible, since it's called + in a critical section of the NameNode's operation. + + @param succeeded Whether authorization succeeded. + @param userName Name of the user executing the request. + @param addr Remote address of the request. + @param cmd The requested command. + @param src Path of affected source file. + @param dst Path of affected destination file (if any). + @param stat File information for operations that change the file's + metadata (permissions, owner, times, etc).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.2.4.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.2.4.xml new file mode 100644 index 0000000000000..2aa6ef1cdb5be --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.2.4.xml @@ -0,0 +1,674 @@ + + + + + + + + + + + A distributed implementation of {@link +org.apache.hadoop.fs.FileSystem}. This is loosely modelled after +Google's GFS.

      + +

      The most important difference is that unlike GFS, Hadoop DFS files +have strictly one writer at any one time. Bytes are always appended +to the end of the writer's stream. There is no notion of "record appends" +or "mutations" that are then checked or reordered. Writers simply emit +a byte stream. That byte stream is guaranteed to be stored in the +order written.

      ]]> +
      +
his method must return as quickly as possible, since it's called + in a critical section of the NameNode's operation. + + @param succeeded Whether authorization succeeded. + @param userName Name of the user executing the request. + @param addr Remote address of the request. + @param cmd The requested command. + @param src Path of affected source file. + @param dst Path of affected destination file (if any). + @param stat File information for operations that change the file's + metadata (permissions, owner, times, etc).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.1.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.1.xml new file mode 100644 index 0000000000000..d4444cf5cb065 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.1.xml @@ -0,0 +1,835 @@ + + + + + + + + + + + A distributed implementation of {@link +org.apache.hadoop.fs.FileSystem}. This is loosely modelled after +Google's GFS.

      + +

      The most important difference is that unlike GFS, Hadoop DFS files +have strictly one writer at any one time. Bytes are always appended +to the end of the writer's stream. There is no notion of "record appends" +or "mutations" that are then checked or reordered. Writers simply emit +a byte stream. That byte stream is guaranteed to be stored in the +order written.

      ]]> +
      +
his method must return as quickly as possible, since it's called + in a critical section of the NameNode's operation. + + @param succeeded Whether authorization succeeded. + @param userName Name of the user executing the request. + @param addr Remote address of the request. + @param cmd The requested command. + @param src Path of affected source file. + @param dst Path of affected destination file (if any). + @param stat File information for operations that change the file's + metadata (permissions, owner, times, etc).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.2.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.2.xml new file mode 100644 index 0000000000000..b4d954cb53ebd --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.2.xml @@ -0,0 +1,835 @@ + + + + + + + + + + + A distributed implementation of {@link +org.apache.hadoop.fs.FileSystem}. This is loosely modelled after +Google's GFS.

      + +

      The most important difference is that unlike GFS, Hadoop DFS files +have strictly one writer at any one time. Bytes are always appended +to the end of the writer's stream. There is no notion of "record appends" +or "mutations" that are then checked or reordered. Writers simply emit +a byte stream. That byte stream is guaranteed to be stored in the +order written.

      ]]> +
      +
his method must return as quickly as possible, since it's called + in a critical section of the NameNode's operation. + + @param succeeded Whether authorization succeeded. + @param userName Name of the user executing the request. + @param addr Remote address of the request. + @param cmd The requested command. + @param src Path of affected source file. + @param dst Path of affected destination file (if any). + @param stat File information for operations that change the file's + metadata (permissions, owner, times, etc).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.3.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.3.xml new file mode 100644 index 0000000000000..0eaf5019aa6d2 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.3.xml @@ -0,0 +1,835 @@ + + + + + + + + + + + A distributed implementation of {@link +org.apache.hadoop.fs.FileSystem}. This is loosely modelled after +Google's GFS.

      + +

      The most important difference is that unlike GFS, Hadoop DFS files +have strictly one writer at any one time. Bytes are always appended +to the end of the writer's stream. There is no notion of "record appends" +or "mutations" that are then checked or reordered. Writers simply emit +a byte stream. That byte stream is guaranteed to be stored in the +order written.

      ]]> +
      +
his method must return as quickly as possible, since it's called + in a critical section of the NameNode's operation. + + @param succeeded Whether authorization succeeded. + @param userName Name of the user executing the request. + @param addr Remote address of the request. + @param cmd The requested command. + @param src Path of affected source file. + @param dst Path of affected destination file (if any). + @param stat File information for operations that change the file's + metadata (permissions, owner, times, etc).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.4.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.4.xml new file mode 100644 index 0000000000000..b3978b01a6994 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.4.xml @@ -0,0 +1,835 @@ + + + + + + + + + + + A distributed implementation of {@link +org.apache.hadoop.fs.FileSystem}. This is loosely modelled after +Google's GFS.

      + +

      The most important difference is that unlike GFS, Hadoop DFS files +have strictly one writer at any one time. Bytes are always appended +to the end of the writer's stream. There is no notion of "record appends" +or "mutations" that are then checked or reordered. Writers simply emit +a byte stream. That byte stream is guaranteed to be stored in the +order written.

      ]]> +
      +
      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method must return as quickly as possible, since it's called + in a critical section of the NameNode's operation. + + @param succeeded Whether authorization succeeded. + @param userName Name of the user executing the request. + @param addr Remote address of the request. + @param cmd The requested command. + @param src Path of affected source file. + @param dst Path of affected destination file (if any). + @param stat File information for operations that change the file's + metadata (permissions, owner, times, etc).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.5.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.5.xml new file mode 100644 index 0000000000000..399b62b301037 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.5.xml @@ -0,0 +1,835 @@ + + + + + + + + + + + A distributed implementation of {@link +org.apache.hadoop.fs.FileSystem}. This is loosely modelled after +Google's GFS.

      + +

      The most important difference is that unlike GFS, Hadoop DFS files +have strictly one writer at any one time. Bytes are always appended +to the end of the writer's stream. There is no notion of "record appends" +or "mutations" that are then checked or reordered. Writers simply emit +a byte stream. That byte stream is guaranteed to be stored in the +order written.

      ]]> +
      +
his method must return as quickly as possible, since it's called + in a critical section of the NameNode's operation. + + @param succeeded Whether authorization succeeded. + @param userName Name of the user executing the request. + @param addr Remote address of the request. + @param cmd The requested command. + @param src Path of affected source file. + @param dst Path of affected destination file (if any). + @param stat File information for operations that change the file's + metadata (permissions, owner, times, etc).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      diff --git a/hadoop-hdfs-project/hadoop-hdfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/pom.xml index be2164fc077eb..22dba0145278a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/pom.xml @@ -20,11 +20,11 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-project-dist - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project-dist hadoop-hdfs - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop HDFS Apache Hadoop HDFS jar @@ -35,11 +35,6 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> - - org.apache.hadoop - hadoop-annotations - provided - org.apache.hadoop hadoop-auth @@ -68,8 +63,18 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> test - com.google.guava - guava + io.dropwizard.metrics + metrics-core + provided + + + org.xerial.snappy + snappy-java + provided + + + org.apache.hadoop.thirdparty + hadoop-shaded-guava compile @@ -123,8 +128,8 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> compile - log4j - log4j + ch.qos.reload4j + reload4j compile @@ -167,7 +172,7 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.slf4j - slf4j-log4j12 + slf4j-reload4j provided @@ -180,10 +185,6 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> netty-all compile - - org.apache.htrace - htrace-core4 - org.apache.hadoop hadoop-kms @@ -219,6 +220,11 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> assertj-core test + + org.lz4 + lz4-java + test + @@ -307,10 +313,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> run - + - + @@ -349,6 +355,9 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> replace-sources false + + **/DFSUtil.java + @@ -413,7 +422,7 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> src/main/webapps/static/moment.min.js src/main/webapps/static/dust-full-2.0.0.min.js src/main/webapps/static/dust-helpers-1.1.1.min.js - src/main/webapps/static/jquery-3.4.1.min.js + src/main/webapps/static/jquery-3.6.0.min.js src/main/webapps/static/jquery.dataTables.min.js src/main/webapps/static/json-bignum.js src/main/webapps/static/dataTables.bootstrap.css @@ -462,7 +471,6 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.maven.plugins maven-surefire-plugin - ${ignoreTestFailure} ${testsThreadCount} false ${maven-surefire-plugin.argLine} -DminiClusterDedicatedDirs=true diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index b2f8ad2a5a41b..e3f4bfcde8408 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -123,6 +123,14 @@ public class DFSConfigKeys extends CommonConfigurationKeys { "dfs.datanode.data.write.bandwidthPerSec"; // A value of zero indicates no limit public static final long DFS_DATANODE_DATA_WRITE_BANDWIDTHPERSEC_DEFAULT = 0; + public static final String DFS_DATANODE_EC_RECONSTRUCT_READ_BANDWIDTHPERSEC_KEY = + "dfs.datanode.ec.reconstruct.read.bandwidthPerSec"; + public static final long DFS_DATANODE_EC_RECONSTRUCT_READ_BANDWIDTHPERSEC_DEFAULT = + 0; // A value of zero indicates no limit + public static final String DFS_DATANODE_EC_RECONSTRUCT_WRITE_BANDWIDTHPERSEC_KEY = + "dfs.datanode.ec.reconstruct.write.bandwidthPerSec"; + public static final long DFS_DATANODE_EC_RECONSTRUCT_WRITE_BANDWIDTHPERSEC_DEFAULT = + 0; // A value of zero indicates no limit @Deprecated public static final String DFS_DATANODE_READAHEAD_BYTES_KEY = HdfsClientConfigKeys.DFS_DATANODE_READAHEAD_BYTES_KEY; @@ -143,6 +151,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final long DFS_DATANODE_MAX_LOCKED_MEMORY_DEFAULT = 0; public static final String DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_KEY = "dfs.datanode.fsdatasetcache.max.threads.per.volume"; public static final int DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_DEFAULT = 4; + public static final String DFS_DATANODE_FSDATASETASYNCDISK_MAX_THREADS_PER_VOLUME_KEY = + "dfs.datanode.fsdatasetasyncdisk.max.threads.per.volume"; + public static final int DFS_DATANODE_FSDATASETASYNCDISK_MAX_THREADS_PER_VOLUME_DEFAULT = 4; public static final String DFS_DATANODE_LAZY_WRITER_INTERVAL_SEC = "dfs.datanode.lazywriter.interval.sec"; public static final int DFS_DATANODE_LAZY_WRITER_INTERVAL_DEFAULT_SEC = 60; public static final String DFS_DATANODE_RAM_DISK_REPLICA_TRACKER_KEY = "dfs.datanode.ram.disk.replica.tracker"; @@ -239,10 +250,19 @@ public class DFSConfigKeys extends CommonConfigurationKeys { HdfsClientConfigKeys.DeprecatedKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY; public static final boolean DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_DEFAULT = true; + public static final String + DFS_NAMENODE_REDUNDANCY_CONSIDERLOADBYSTORAGETYPE_KEY = + "dfs.namenode.redundancy.considerLoadByStorageType"; + public static final boolean + DFS_NAMENODE_REDUNDANCY_CONSIDERLOADBYSTORAGETYPE_DEFAULT = false; public static final String DFS_NAMENODE_READ_CONSIDERLOAD_KEY = "dfs.namenode.read.considerLoad"; public static final boolean DFS_NAMENODE_READ_CONSIDERLOAD_DEFAULT = false; + public static final String DFS_NAMENODE_READ_CONSIDERSTORAGETYPE_KEY = + "dfs.namenode.read.considerStorageType"; + public static final boolean DFS_NAMENODE_READ_CONSIDERSTORAGETYPE_DEFAULT = + false; public static final String DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_FACTOR = "dfs.namenode.redundancy.considerLoad.factor"; public static final double @@ -261,8 +281,6 @@ public class DFSConfigKeys extends CommonConfigurationKeys { = "dfs.namenode.file.close.num-committed-allowed"; public static final int DFS_NAMENODE_FILE_CLOSE_NUM_COMMITTED_ALLOWED_DEFAULT = 0; - public static final String DFS_NAMENODE_STRIPE_MIN_KEY = "dfs.namenode.stripe.min"; - public static final int DFS_NAMENODE_STRIPE_MIN_DEFAULT = 1; public static final String DFS_NAMENODE_SAFEMODE_REPLICATION_MIN_KEY = "dfs.namenode.safemode.replication.min"; @@ -284,18 +302,6 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final int DFS_NAMENODE_REPLICATION_MAX_STREAMS_DEFAULT = 2; public static final String DFS_NAMENODE_REPLICATION_STREAMS_HARD_LIMIT_KEY = "dfs.namenode.replication.max-streams-hard-limit"; public static final int DFS_NAMENODE_REPLICATION_STREAMS_HARD_LIMIT_DEFAULT = 4; - public static final String DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_INTERVAL_MS_KEY - = "dfs.namenode.storageinfo.defragment.interval.ms"; - public static final int - DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_INTERVAL_MS_DEFAULT = 10 * 60 * 1000; - public static final String DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_TIMEOUT_MS_KEY - = "dfs.namenode.storageinfo.defragment.timeout.ms"; - public static final int - DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_TIMEOUT_MS_DEFAULT = 4; - public static final String DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_RATIO_KEY - = "dfs.namenode.storageinfo.defragment.ratio"; - public static final double - DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_RATIO_DEFAULT = 0.75; public static final String DFS_NAMENODE_BLOCKREPORT_QUEUE_SIZE_KEY = "dfs.namenode.blockreport.queue.size"; public static final int DFS_NAMENODE_BLOCKREPORT_QUEUE_SIZE_DEFAULT @@ -367,7 +373,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final String DFS_NAMENODE_EDITS_DIR_MINIMUM_KEY = "dfs.namenode.edits.dir.minimum"; public static final int DFS_NAMENODE_EDITS_DIR_MINIMUM_DEFAULT = 1; public static final String DFS_NAMENODE_QUOTA_INIT_THREADS_KEY = "dfs.namenode.quota.init-threads"; - public static final int DFS_NAMENODE_QUOTA_INIT_THREADS_DEFAULT = 4; + public static final int DFS_NAMENODE_QUOTA_INIT_THREADS_DEFAULT = 12; public static final String DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD = "dfs.namenode.edit.log.autoroll.multiplier.threshold"; public static final float @@ -510,6 +516,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys { // Whether to enable datanode's stale state detection and usage for reads public static final String DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_KEY = "dfs.namenode.avoid.read.stale.datanode"; public static final boolean DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_DEFAULT = false; + public static final String DFS_NAMENODE_AVOID_SLOW_DATANODE_FOR_READ_KEY = + "dfs.namenode.avoid.read.slow.datanode"; + public static final boolean + DFS_NAMENODE_AVOID_SLOW_DATANODE_FOR_READ_DEFAULT = false; // Whether to enable datanode's stale state detection and usage for writes public static final String DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_WRITE_KEY = "dfs.namenode.avoid.write.stale.datanode"; public static final boolean DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_WRITE_DEFAULT = false; @@ -602,6 +612,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final String DFS_DATANODE_LOCK_FAIR_KEY = "dfs.datanode.lock.fair"; public static final boolean DFS_DATANODE_LOCK_FAIR_DEFAULT = true; + public static final String DFS_DATANODE_LOCK_READ_WRITE_ENABLED_KEY = + "dfs.datanode.lock.read.write.enabled"; + public static final Boolean DFS_DATANODE_LOCK_READ_WRITE_ENABLED_DEFAULT = + true; public static final String DFS_DATANODE_LOCK_REPORTING_THRESHOLD_MS_KEY = "dfs.datanode.lock-reporting-threshold-ms"; public static final long @@ -644,6 +658,34 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final long DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT = 1000; + public static final String DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY = + "dfs.datanode.min.outlier.detection.nodes"; + public static final long DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_DEFAULT = + 10L; + public static final String DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY = + "dfs.datanode.slowpeer.low.threshold.ms"; + public static final long DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_DEFAULT = + 5L; + public static final String DFS_DATANODE_MAX_NODES_TO_REPORT_KEY = + "dfs.datanode.max.nodes.to.report"; + public static final int DFS_DATANODE_MAX_NODES_TO_REPORT_DEFAULT = + 5; + public static final String DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY = + "dfs.datanode.min.outlier.detection.disks"; + public static final long DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_DEFAULT = + 5L; + public static final String DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY = + "dfs.datanode.slowdisk.low.threshold.ms"; + public static final long DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_DEFAULT = + 20L; + public static final String DFS_DATANODE_MAX_DISKS_TO_REPORT_KEY = + "dfs.datanode.max.disks.to.report"; + public static final int DFS_DATANODE_MAX_DISKS_TO_REPORT_DEFAULT = + 5; + public static final String DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY = + "dfs.datanode.max.slowdisks.to.exclude"; + public static final int DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_DEFAULT = + 0; public static final String DFS_DATANODE_HOST_NAME_KEY = HdfsClientConfigKeys.DeprecatedKeys.DFS_DATANODE_HOST_NAME_KEY; public static final String DFS_NAMENODE_CHECKPOINT_DIR_KEY = @@ -660,6 +702,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final boolean DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT = false; public static final String DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY = "dfs.namenode.audit.log.async"; public static final boolean DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT = false; + public static final String DFS_NAMENODE_AUDIT_LOG_ASYNC_BLOCKING_KEY = "dfs.namenode.audit.log.async.blocking"; + public static final boolean DFS_NAMENODE_AUDIT_LOG_ASYNC_BLOCKING_DEFAULT = true; + public static final String DFS_NAMENODE_AUDIT_LOG_ASYNC_BUFFER_SIZE_KEY = "dfs.namenode.audit.log.async.buffer.size"; + public static final int DFS_NAMENODE_AUDIT_LOG_ASYNC_BUFFER_SIZE_DEFAULT = 128; public static final String DFS_NAMENODE_AUDIT_LOG_DEBUG_CMDLIST = "dfs.namenode.audit.log.debug.cmdlist"; public static final String DFS_NAMENODE_METRICS_LOGGER_PERIOD_SECONDS_KEY = "dfs.namenode.metrics.logger.period.seconds"; @@ -669,9 +715,13 @@ public class DFSConfigKeys extends CommonConfigurationKeys { "dfs.datanode.metrics.logger.period.seconds"; public static final int DFS_DATANODE_METRICS_LOGGER_PERIOD_SECONDS_DEFAULT = 600; + public static final String DFS_NAMENODE_AUDIT_LOG_WITH_REMOTE_PORT_KEY = + "dfs.namenode.audit.log.with.remote.port"; + public static final boolean DFS_NAMENODE_AUDIT_LOG_WITH_REMOTE_PORT_DEFAULT = + false; /** * The maximum number of getBlocks RPCs data movement utilities can make to - * a NameNode per second. Values <= 0 disable throttling. This affects + * a NameNode per second. Values <= 0 disable throttling. This affects * anything that uses a NameNodeConnector, i.e., the Balancer, Mover, * and StoragePolicySatisfier. */ @@ -808,6 +858,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys { "dfs.datanode.ec.reconstruction.xmits.weight"; public static final float DFS_DN_EC_RECONSTRUCTION_XMITS_WEIGHT_DEFAULT = 0.5f; + public static final String DFS_DN_EC_RECONSTRUCTION_VALIDATION_KEY = + "dfs.datanode.ec.reconstruction.validation"; + public static final boolean DFS_DN_EC_RECONSTRUCTION_VALIDATION_VALUE = false; public static final String DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_KEY = @@ -842,6 +895,18 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final int DFS_DATANODE_SCAN_PERIOD_HOURS_DEFAULT = 21 * 24; // 3 weeks. public static final String DFS_BLOCK_SCANNER_VOLUME_BYTES_PER_SECOND = "dfs.block.scanner.volume.bytes.per.second"; public static final long DFS_BLOCK_SCANNER_VOLUME_BYTES_PER_SECOND_DEFAULT = 1048576L; + /** + * The amount of time in milliseconds that the BlockScanner times out waiting + * for the VolumeScanner thread to join during a shutdown call. + */ + public static final String DFS_BLOCK_SCANNER_VOLUME_JOIN_TIMEOUT_MSEC_KEY = + "dfs.block.scanner.volume.join.timeout.ms"; + public static final long DFS_BLOCK_SCANNER_VOLUME_JOIN_TIMEOUT_MSEC_DEFAULT = + TimeUnit.SECONDS.toMillis(5); + public static final String DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED = + "dfs.block.scanner.skip.recent.accessed"; + public static final boolean DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED_DEFAULT = + false; public static final String DFS_DATANODE_TRANSFERTO_ALLOWED_KEY = "dfs.datanode.transferTo.allowed"; public static final boolean DFS_DATANODE_TRANSFERTO_ALLOWED_DEFAULT = true; public static final String DFS_HEARTBEAT_INTERVAL_KEY = "dfs.heartbeat.interval"; @@ -881,6 +946,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys { "dfs.namenode.lifeline.handler.count"; public static final String DFS_NAMENODE_SERVICE_HANDLER_COUNT_KEY = "dfs.namenode.service.handler.count"; public static final int DFS_NAMENODE_SERVICE_HANDLER_COUNT_DEFAULT = 10; + // List of users that can override their client ip + public static final String DFS_NAMENODE_IP_PROXY_USERS = "dfs.namenode.ip-proxy-users"; public static final String DFS_HTTP_POLICY_KEY = "dfs.http.policy"; public static final String DFS_HTTP_POLICY_DEFAULT = HttpConfig.Policy.HTTP_ONLY.name(); public static final String DFS_DATANODE_HTTPSERVER_FILTER_HANDLERS = "dfs.datanode.httpserver.filter.handlers"; @@ -949,6 +1016,14 @@ public class DFSConfigKeys extends CommonConfigurationKeys { "dfs.datanode.outliers.report.interval"; public static final String DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_DEFAULT = "30m"; + public static final String DFS_NAMENODE_MAX_SLOWPEER_COLLECT_NODES_KEY = + "dfs.namenode.max.slowpeer.collect.nodes"; + public static final int DFS_NAMENODE_MAX_SLOWPEER_COLLECT_NODES_DEFAULT = + 5; + public static final String DFS_NAMENODE_SLOWPEER_COLLECT_INTERVAL_KEY = + "dfs.namenode.slowpeer.collect.interval"; + public static final String DFS_NAMENODE_SLOWPEER_COLLECT_INTERVAL_DEFAULT = + "30m"; // property for fsimage compression public static final String DFS_IMAGE_COMPRESS_KEY = "dfs.image.compress"; @@ -960,7 +1035,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final String DFS_IMAGE_TRANSFER_RATE_KEY = "dfs.image.transfer.bandwidthPerSec"; - public static final long DFS_IMAGE_TRANSFER_RATE_DEFAULT = 0; //no throttling + public static final long DFS_IMAGE_TRANSFER_RATE_DEFAULT = 52428800; public static final String DFS_IMAGE_TRANSFER_BOOTSTRAP_STANDBY_RATE_KEY = "dfs.image.transfer-bootstrap-standby.bandwidthPerSec"; @@ -1086,6 +1161,26 @@ public class DFSConfigKeys extends CommonConfigurationKeys { "dfs.namenode.available-space-block-placement-policy.balanced-space-preference-fraction"; public static final float DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT = 0.6f; + public static final String + DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_KEY = + "dfs.namenode.available-space-block-placement-policy.balanced-space-tolerance"; + public static final int + DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT = + 5; + public static final String + DFS_NAMENODE_AVAILABLE_SPACE_RACK_FAULT_TOLERANT_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY = + "dfs.namenode.available-space-rack-fault-tolerant-block-placement-policy" + + ".balanced-space-preference-fraction"; + public static final float + DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_RACK_FAULT_TOLERANT_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT = + 0.6f; + public static final String + DFS_NAMENODE_AVAILABLE_SPACE_RACK_FAULT_TOLERANT_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_KEY = + "dfs.namenode.available-space-rack-fault-tolerant-block-placement-policy" + + ".balanced-space-tolerance"; + public static final int + DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_RACK_FAULT_TOLERANT_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT = + 5; public static final String DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCE_LOCAL_NODE_KEY = "dfs.namenode.available-space-block-placement-policy.balance-local-node"; @@ -1095,6 +1190,13 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final String DFS_NAMENODE_BLOCKPLACEMENTPOLICY_DEFAULT_PREFER_LOCAL_NODE_KEY = "dfs.namenode.block-placement-policy.default.prefer-local-node"; public static final boolean DFS_NAMENODE_BLOCKPLACEMENTPOLICY_DEFAULT_PREFER_LOCAL_NODE_DEFAULT = true; + public static final String + DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_KEY = + "dfs.namenode.block-placement-policy.exclude-slow-nodes.enabled"; + public static final boolean + DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_DEFAULT = + false; + public static final String DFS_NAMENODE_GC_TIME_MONITOR_ENABLE = "dfs.namenode.gc.time.monitor.enable"; public static final boolean DFS_NAMENODE_GC_TIME_MONITOR_ENABLE_DEFAULT = @@ -1160,6 +1262,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys { "dfs.ha.nn.not-become-active-in-safemode"; public static final boolean DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE_DEFAULT = false; + public static final String DFS_HA_ALLOW_STALE_READ_KEY = + "dfs.ha.allow.stale.reads"; + public static final boolean DFS_HA_ALLOW_STALE_READ_DEFAULT = false; // Security-related configs public static final String DFS_ENCRYPT_DATA_TRANSFER_KEY = "dfs.encrypt.data.transfer"; @@ -1353,7 +1458,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys { "dfs.datanode.parallel.volumes.load.threads.num"; public static final String DFS_DATANODE_BLOCK_ID_LAYOUT_UPGRADE_THREADS_KEY = "dfs.datanode.block.id.layout.upgrade.threads"; - public static final int DFS_DATANODE_BLOCK_ID_LAYOUT_UPGRADE_THREADS = 12; + public static final int DFS_DATANODE_BLOCK_ID_LAYOUT_UPGRADE_THREADS = 6; public static final String DFS_NAMENODE_INOTIFY_MAX_EVENTS_PER_RPC_KEY = "dfs.namenode.inotify.max.events.per.rpc"; @@ -1460,6 +1565,16 @@ public class DFSConfigKeys extends CommonConfigurationKeys { "dfs.namenode.state.context.enabled"; public static final boolean DFS_NAMENODE_STATE_CONTEXT_ENABLED_DEFAULT = false; + /** + * whether to protect the subdirectories of directories which + * set on fs.protected.directories. + */ + public static final String DFS_PROTECTED_SUBDIRECTORIES_ENABLE = + "dfs.protected.subdirectories.enable"; + // Default value for DFS_PROTECTED_SUBDIRECTORIES_ENABLE. + public static final boolean DFS_PROTECTED_SUBDIRECTORIES_ENABLE_DEFAULT = + false; + // dfs.client.retry confs are moved to HdfsClientConfigKeys.Retry @Deprecated public static final String DFS_CLIENT_RETRY_POLICY_ENABLED_KEY diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java index c5ba8b96cf321..8f5e05f62fcf5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java @@ -39,7 +39,6 @@ import java.io.ByteArrayInputStream; import java.io.DataInputStream; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintStream; import java.net.InetAddress; @@ -70,8 +69,10 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.ParentNotDirectoryException; import org.apache.hadoop.fs.UnresolvedLinkException; +import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics; import org.apache.hadoop.hdfs.server.namenode.FSDirectory; import org.apache.hadoop.hdfs.server.namenode.INodesInPath; +import org.apache.hadoop.ipc.ProtobufRpcEngine; import org.apache.hadoop.security.AccessControlException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -92,7 +93,7 @@ import org.apache.hadoop.hdfs.web.AuthFilterInitializer; import org.apache.hadoop.http.HttpConfig; import org.apache.hadoop.http.HttpServer2; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.AuthenticationFilterInitializer; @@ -103,11 +104,11 @@ import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.ToolRunner; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; import org.apache.hadoop.thirdparty.protobuf.BlockingService; @InterfaceAudience.Private @@ -156,23 +157,36 @@ public int compare(DatanodeInfo a, DatanodeInfo b) { /** * Comparator for sorting DataNodeInfo[] based on - * stale, decommissioned and entering_maintenance states. - * Order: live {@literal ->} stale {@literal ->} entering_maintenance - * {@literal ->} decommissioned + * slow, stale, entering_maintenance and decommissioned states. + * Order: live {@literal ->} slow {@literal ->} stale {@literal ->} + * entering_maintenance {@literal ->} decommissioned */ @InterfaceAudience.Private - public static class ServiceAndStaleComparator extends ServiceComparator { + public static class StaleAndSlowComparator extends ServiceComparator { + private final boolean avoidStaleDataNodesForRead; private final long staleInterval; + private final boolean avoidSlowDataNodesForRead; + private final Set slowNodesUuidSet; /** * Constructor of ServiceAndStaleComparator - * + * @param avoidStaleDataNodesForRead + * Whether or not to avoid using stale DataNodes for reading. * @param interval * The time interval for marking datanodes as stale is passed from - * outside, since the interval may be changed dynamically + * outside, since the interval may be changed dynamically. + * @param avoidSlowDataNodesForRead + * Whether or not to avoid using slow DataNodes for reading. + * @param slowNodesUuidSet + * Slow DataNodes UUID set. */ - public ServiceAndStaleComparator(long interval) { + public StaleAndSlowComparator( + boolean avoidStaleDataNodesForRead, long interval, + boolean avoidSlowDataNodesForRead, Set slowNodesUuidSet) { + this.avoidStaleDataNodesForRead = avoidStaleDataNodesForRead; this.staleInterval = interval; + this.avoidSlowDataNodesForRead = avoidSlowDataNodesForRead; + this.slowNodesUuidSet = slowNodesUuidSet; } @Override @@ -183,9 +197,22 @@ public int compare(DatanodeInfo a, DatanodeInfo b) { } // Stale nodes will be moved behind the normal nodes - boolean aStale = a.isStale(staleInterval); - boolean bStale = b.isStale(staleInterval); - return aStale == bStale ? 0 : (aStale ? 1 : -1); + if (avoidStaleDataNodesForRead) { + boolean aStale = a.isStale(staleInterval); + boolean bStale = b.isStale(staleInterval); + ret = aStale == bStale ? 0 : (aStale ? 1 : -1); + if (ret != 0) { + return ret; + } + } + + // Slow nodes will be moved behind the normal nodes + if (avoidSlowDataNodesForRead) { + boolean aSlow = slowNodesUuidSet.contains(a.getDatanodeUuid()); + boolean bSlow = slowNodesUuidSet.contains(b.getDatanodeUuid()); + ret = aSlow == bSlow ? 0 : (aSlow ? 1 : -1); + } + return ret; } } @@ -1295,6 +1322,27 @@ static URI trimUri(URI uri) { */ public static void addPBProtocol(Configuration conf, Class protocol, BlockingService service, RPC.Server server) throws IOException { + RPC.setProtocolEngine(conf, protocol, ProtobufRpcEngine2.class); + server.addProtocol(RPC.RpcKind.RPC_PROTOCOL_BUFFER, protocol, service); + } + + /** + * Add protobuf based protocol to the {@link RPC.Server}. + * This engine uses Protobuf 2.5.0. Recommended to upgrade to + * Protobuf 3.x from hadoop-thirdparty and use + * {@link DFSUtil#addPBProtocol(Configuration, Class, BlockingService, + * RPC.Server)}. + * @param conf configuration + * @param protocol Protocol interface + * @param service service that implements the protocol + * @param server RPC server to which the protocol & implementation is + * added to + * @throws IOException + */ + @Deprecated + public static void addPBProtocol(Configuration conf, Class protocol, + com.google.protobuf.BlockingService service, RPC.Server server) + throws IOException { RPC.setProtocolEngine(conf, protocol, ProtobufRpcEngine.class); server.addProtocol(RPC.RpcKind.RPC_PROTOCOL_BUFFER, protocol, service); } @@ -1755,7 +1803,6 @@ public static DelegationTokenIdentifier decodeDelegationToken( * was found. * @throws ParentNotDirectoryException * @throws UnresolvedLinkException - * @throws FileNotFoundException */ public static void checkProtectedDescendants( FSDirectory fsd, INodesInPath iip) @@ -1787,6 +1834,18 @@ public static void checkProtectedDescendants( + descendant); } } + + if (fsd.isProtectedSubDirectoriesEnable()) { + while (!src.isEmpty()) { + int index = src.lastIndexOf(Path.SEPARATOR_CHAR); + src = src.substring(0, index); + if (protectedDirs.contains(src)) { + throw new AccessControlException( + "Cannot delete/rename subdirectory under protected subdirectory " + + src); + } + } + } } /** @@ -1835,4 +1894,18 @@ public static boolean isParentEntry(final String path, final String parent) { return path.charAt(parent.length()) == Path.SEPARATOR_CHAR || parent.equals(Path.SEPARATOR); } + + /** + * Add transfer rate metrics for valid data read and duration values. + * @param metrics metrics for datanodes + * @param read bytes read + * @param duration read duration + */ + public static void addTransferRateMetric(final DataNodeMetrics metrics, final long read, final long duration) { + if (read >= 0 && duration > 0) { + metrics.addReadTransferRate(read * 1000 / duration); + } else { + LOG.warn("Unexpected value for data transfer bytes={} duration={}", read, duration); + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java index aebc28aa793e3..53d3b4b2936cb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hdfs; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_ALLOW_STALE_READ_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_ALLOW_STALE_READ_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HTTPS_ADDRESS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HTTPS_BIND_HOST_KEY; @@ -54,9 +56,9 @@ import org.apache.hadoop.ipc.StandbyException; import org.apache.hadoop.security.UserGroupInformation; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.slf4j.LoggerFactory; @InterfaceAudience.Private @@ -220,11 +222,12 @@ public static List getConfForOtherNodes( * @return true if the NN should allow read operations while in standby mode. */ public static boolean shouldAllowStandbyReads(Configuration conf) { - return conf.getBoolean("dfs.ha.allow.stale.reads", false); + return conf.getBoolean(DFS_HA_ALLOW_STALE_READ_KEY, + DFS_HA_ALLOW_STALE_READ_DEFAULT); } public static void setAllowStandbyReads(Configuration conf, boolean val) { - conf.setBoolean("dfs.ha.allow.stale.reads", val); + conf.setBoolean(DFS_HA_ALLOW_STALE_READ_KEY, val); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HDFSPolicyProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HDFSPolicyProvider.java index e999375775329..b71a7deebb395 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HDFSPolicyProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HDFSPolicyProvider.java @@ -37,7 +37,6 @@ import org.apache.hadoop.tools.GetUserMappingsProtocol; import org.apache.hadoop.ipc.RefreshCallQueueProtocol; import org.apache.hadoop.ipc.GenericRefreshProtocol; -import org.apache.hadoop.tracing.TraceAdminProtocol; /** * {@link PolicyProvider} for HDFS protocols. @@ -80,9 +79,6 @@ public class HDFSPolicyProvider extends PolicyProvider { new Service( CommonConfigurationKeys.HADOOP_SECURITY_SERVICE_AUTHORIZATION_GENERIC_REFRESH, GenericRefreshProtocol.class), - new Service( - CommonConfigurationKeys.HADOOP_SECURITY_SERVICE_AUTHORIZATION_TRACING, - TraceAdminProtocol.class), new Service( CommonConfigurationKeys.HADOOP_SECURITY_SERVICE_AUTHORIZATION_DATANODE_LIFELINE, DatanodeLifelineProtocol.class), diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java index 3063083db8840..2a56ef3e1868b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java @@ -48,7 +48,7 @@ import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.io.retry.RetryProxy; import org.apache.hadoop.ipc.AlignmentContext; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.ProxyCombiner; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RefreshCallQueueProtocol; @@ -305,7 +305,7 @@ private static BalancerProtocols createNNProxyWithBalancerProtocol( private static T createNameNodeProxy(InetSocketAddress address, Configuration conf, UserGroupInformation ugi, Class xface, int rpcTimeout, AlignmentContext alignmentContext) throws IOException { - RPC.setProtocolEngine(conf, xface, ProtobufRpcEngine.class); + RPC.setProtocolEngine(conf, xface, ProtobufRpcEngine2.class); return RPC.getProtocolProxy(xface, RPC.getProtocolVersion(xface), address, ugi, conf, NetUtils.getDefaultSocketFactory(conf), rpcTimeout, null, null, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DFSNetworkTopology.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DFSNetworkTopology.java index 9082b910eb45c..c18fdc5b8cf84 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DFSNetworkTopology.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DFSNetworkTopology.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs.net; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.StorageType; @@ -249,17 +249,10 @@ Node chooseRandomWithStorageType(final String scope, return null; } // to this point, it is guaranteed that there is at least one node - // that satisfies the requirement, keep trying until we found one. - Node chosen; - do { - chosen = chooseRandomWithStorageTypeAndExcludeRoot(root, excludeRoot, - type); - if (excludedNodes == null || !excludedNodes.contains(chosen)) { - break; - } else { - LOG.debug("Node {} is excluded, continuing.", chosen); - } - } while (true); + // that satisfies the requirement. + Node chosen = + chooseRandomWithStorageTypeAndExcludeRoot(root, excludeRoot, type, + excludedNodes); LOG.debug("chooseRandom returning {}", chosen); return chosen; } @@ -277,23 +270,23 @@ private boolean isNodeInScope(Node node, String scope) { * Choose a random node that has the required storage type, under the given * root, with an excluded subtree root (could also just be a leaf node). * - * Note that excludedNode is checked after a random node, so it is not being - * handled here. - * * @param root the root node where we start searching for a datanode * @param excludeRoot the root of the subtree what should be excluded * @param type the expected storage type + * @param excludedNodes the list of nodes to be excluded * @return a random datanode, with the storage type, and is not in excluded * scope */ private Node chooseRandomWithStorageTypeAndExcludeRoot( - DFSTopologyNodeImpl root, Node excludeRoot, StorageType type) { + DFSTopologyNodeImpl root, Node excludeRoot, StorageType type, + Collection excludedNodes) { Node chosenNode; if (root.isRack()) { // children are datanode descriptor ArrayList candidates = new ArrayList<>(); for (Node node : root.getChildren()) { - if (node.equals(excludeRoot)) { + if (node.equals(excludeRoot) || (excludedNodes != null && excludedNodes + .contains(node))) { continue; } DatanodeDescriptor dnDescriptor = (DatanodeDescriptor)node; @@ -310,7 +303,7 @@ private Node chooseRandomWithStorageTypeAndExcludeRoot( } else { // the children are inner nodes ArrayList candidates = - getEligibleChildren(root, excludeRoot, type); + getEligibleChildren(root, excludeRoot, type, excludedNodes); if (candidates.size() == 0) { return null; } @@ -339,7 +332,7 @@ private Node chooseRandomWithStorageTypeAndExcludeRoot( } DFSTopologyNodeImpl nextRoot = candidates.get(idxChosen); chosenNode = chooseRandomWithStorageTypeAndExcludeRoot( - nextRoot, excludeRoot, type); + nextRoot, excludeRoot, type, excludedNodes); } return chosenNode; } @@ -352,11 +345,13 @@ private Node chooseRandomWithStorageTypeAndExcludeRoot( * @param root the subtree root we check. * @param excludeRoot the root of the subtree that should be excluded. * @param type the storage type we look for. + * @param excludedNodes the list of excluded nodes. * @return a list of possible nodes, each of them is eligible as the next * level root we search. */ private ArrayList getEligibleChildren( - DFSTopologyNodeImpl root, Node excludeRoot, StorageType type) { + DFSTopologyNodeImpl root, Node excludeRoot, StorageType type, + Collection excludedNodes) { ArrayList candidates = new ArrayList<>(); int excludeCount = 0; if (excludeRoot != null && root.isAncestor(excludeRoot)) { @@ -383,6 +378,24 @@ private ArrayList getEligibleChildren( (dfsNode.isAncestor(excludeRoot) || dfsNode.equals(excludeRoot))) { storageCount -= excludeCount; } + if (excludedNodes != null) { + for (Node excludedNode : excludedNodes) { + if (excludeRoot != null && isNodeInScope(excludedNode, + NodeBase.getPath(excludeRoot))) { + continue; + } + if (isNodeInScope(excludedNode, NodeBase.getPath(node))) { + if (excludedNode instanceof DatanodeDescriptor) { + storageCount -= + ((DatanodeDescriptor) excludedNode).hasStorageType(type) ? + 1 : 0; + } else if (excludedNode instanceof DFSTopologyNodeImpl) { + storageCount -= ((DFSTopologyNodeImpl) excludedNode) + .getSubtreeStorageCount(type); + } + } + } + } if (storageCount > 0) { candidates.add(dfsNode); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DFSTopologyNodeImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DFSTopologyNodeImpl.java index 7a9afabc1edb9..72c89f57872b6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DFSTopologyNodeImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DFSTopologyNodeImpl.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs.net; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.apache.hadoop.net.InnerNode; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/BlockListAsLongs.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/BlockListAsLongs.java index 6d2c0ac3c0391..d9baa8ff45f74 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/BlockListAsLongs.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/BlockListAsLongs.java @@ -33,8 +33,8 @@ import org.apache.hadoop.hdfs.protocol.BlockListAsLongs.BlockReportReplica; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; import org.apache.hadoop.hdfs.server.datanode.Replica; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.thirdparty.protobuf.ByteString; import org.apache.hadoop.thirdparty.protobuf.CodedInputStream; import org.apache.hadoop.thirdparty.protobuf.CodedOutputStream; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/CacheDirective.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/CacheDirective.java index 2c0d26363b3cf..aef009a03a1dc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/CacheDirective.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/CacheDirective.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.protocol; -import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull; import java.util.Date; @@ -28,7 +28,7 @@ import org.apache.hadoop.util.IntrusiveCollection; import org.apache.hadoop.util.IntrusiveCollection.Element; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * Namenode class that tracks state related to a cached path. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutFlags.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutFlags.java index fc1717f26a5f9..cd00c7a962222 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutFlags.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutFlags.java @@ -23,10 +23,10 @@ import org.apache.hadoop.classification.InterfaceAudience; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Sets; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableSet; +import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; /** * LayoutFlags represent features which the FSImage and edit logs can either @@ -36,14 +36,15 @@ */ @InterfaceAudience.Private public class LayoutFlags { + /** - * Load a LayoutFlags object from a stream. + * Read next int from given input stream. If the value is not 0 (unsupported + * feature flags), throw appropriate IOException. * * @param in The stream to read from. - * @throws IOException + * @throws IOException If next byte read from given stream is not 0. */ - public static LayoutFlags read(DataInputStream in) - throws IOException { + public static void read(DataInputStream in) throws IOException { int length = in.readInt(); if (length < 0) { throw new IOException("The length of the feature flag section " + @@ -52,7 +53,6 @@ public static LayoutFlags read(DataInputStream in) throw new IOException("Found feature flags which we can't handle. " + "Please upgrade your software."); } - return new LayoutFlags(); } private LayoutFlags() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotInfo.java index 676e8276f258e..03a2ec7322d35 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotInfo.java @@ -20,7 +20,6 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.protocol.proto.AclProtos.FsPermissionProto; -import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; /** * SnapshotInfo maintains information for a snapshot diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Receiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Receiver.java index 5d2d1f890bc50..8bcfb199ff5a9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Receiver.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Receiver.java @@ -32,7 +32,6 @@ import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BaseHeaderProto; import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.CachingStrategyProto; import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.ClientOperationHeaderProto; -import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.DataTransferTraceInfoProto; import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpBlockChecksumProto; import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpBlockGroupChecksumProto; import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpCopyBlockProto; @@ -46,9 +45,11 @@ import org.apache.hadoop.hdfs.protocolPB.PBHelperClient; import org.apache.hadoop.hdfs.server.datanode.CachingStrategy; import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.SlotId; -import org.apache.htrace.core.SpanId; -import org.apache.htrace.core.TraceScope; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.tracing.SpanContext; +import org.apache.hadoop.tracing.TraceScope; +import org.apache.hadoop.tracing.Tracer; +import org.apache.hadoop.tracing.TraceUtils; +import org.apache.hadoop.thirdparty.protobuf.ByteString; /** Receiver */ @InterfaceAudience.Private @@ -77,12 +78,13 @@ protected final Op readOp() throws IOException { return Op.read(in); } - private TraceScope continueTraceSpan(DataTransferTraceInfoProto proto, + private TraceScope continueTraceSpan(ByteString spanContextBytes, String description) { TraceScope scope = null; - SpanId spanId = fromProto(proto); - if (spanId != null) { - scope = tracer.newScope(description, spanId); + SpanContext spanContext = + TraceUtils.byteStringToSpanContext(spanContextBytes); + if (spanContext != null) { + scope = tracer.newScope(description, spanContext); } return scope; } @@ -94,7 +96,8 @@ private TraceScope continueTraceSpan(ClientOperationHeaderProto header, private TraceScope continueTraceSpan(BaseHeaderProto header, String description) { - return continueTraceSpan(header.getTraceInfo(), description); + return continueTraceSpan(header.getTraceInfo().getSpanContext(), + description); } /** Process op by the corresponding method. */ @@ -243,7 +246,8 @@ private void opReleaseShortCircuitFds(DataInputStream in) throws IOException { final ReleaseShortCircuitAccessRequestProto proto = ReleaseShortCircuitAccessRequestProto.parseFrom(vintPrefixed(in)); - TraceScope traceScope = continueTraceSpan(proto.getTraceInfo(), + TraceScope traceScope = continueTraceSpan( + proto.getTraceInfo().getSpanContext(), proto.getClass().getSimpleName()); try { releaseShortCircuitFds(PBHelperClient.convert(proto.getSlotId())); @@ -256,7 +260,8 @@ private void opReleaseShortCircuitFds(DataInputStream in) private void opRequestShortCircuitShm(DataInputStream in) throws IOException { final ShortCircuitShmRequestProto proto = ShortCircuitShmRequestProto.parseFrom(vintPrefixed(in)); - TraceScope traceScope = continueTraceSpan(proto.getTraceInfo(), + TraceScope traceScope = continueTraceSpan( + proto.getTraceInfo().getSpanContext(), proto.getClass().getSimpleName()); try { requestShortCircuitShm(proto.getClientName()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferServer.java index ae17761c204ab..059c920c24078 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferServer.java @@ -21,7 +21,7 @@ import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_CIPHER_SUITES_KEY; import static org.apache.hadoop.hdfs.protocol.datatransfer.sasl.DataTransferSaslUtil.*; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import java.io.ByteArrayInputStream; import java.io.DataInputStream; import java.io.DataOutputStream; @@ -52,15 +52,17 @@ import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.DataTransferEncryptorMessageProto.DataTransferEncryptorStatus; import org.apache.hadoop.hdfs.security.token.block.BlockPoolTokenSecretManager; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; +import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException; import org.apache.hadoop.hdfs.server.datanode.DNConf; import org.apache.hadoop.security.SaslPropertiesResolver; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.SecretManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Charsets; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; /** * Negotiates SASL for DataTransferProtocol on behalf of a server. There are @@ -441,6 +443,14 @@ private IOStreamPair doSaslHandshake(Peer peer, OutputStream underlyingOut, // error, the client will get a new encryption key from the NN and retry // connecting to this DN. sendInvalidKeySaslErrorMessage(out, ioe.getCause().getMessage()); + } else if (ioe instanceof SaslException && + ioe.getCause() != null && + (ioe.getCause() instanceof InvalidBlockTokenException || + ioe.getCause() instanceof SecretManager.InvalidToken)) { + // This could be because the client is long-lived and block token is expired + // The client will get new block token from the NN, upon receiving this error + // and retry connecting to this DN + sendInvalidTokenSaslErrorMessage(out, ioe.getCause().getMessage()); } else { sendGenericSaslErrorMessage(out, ioe.getMessage()); } @@ -460,4 +470,16 @@ private static void sendInvalidKeySaslErrorMessage(DataOutputStream out, sendSaslMessage(out, DataTransferEncryptorStatus.ERROR_UNKNOWN_KEY, null, message); } + + /** + * Sends a SASL negotiation message indicating an invalid token error. + * + * @param out stream to receive message + * @param message to send + * @throws IOException for any error + */ + private static void sendInvalidTokenSaslErrorMessage(DataOutputStream out, + String message) throws IOException { + sendSaslMessage(out, DataTransferEncryptorStatus.ERROR, null, message, null, true); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java index 9bd82485a508d..ba3b508bb8ddc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java @@ -155,6 +155,8 @@ import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetQuotaUsageResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetServerDefaultsRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetServerDefaultsResponseProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetSlowDatanodeReportRequestProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetSlowDatanodeReportResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetSnapshotDiffReportRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetSnapshotDiffReportResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetSnapshotDiffReportListingRequestProto; @@ -688,7 +690,8 @@ public Rename2ResponseProto rename2(RpcController controller, ArrayList optionList = new ArrayList(); if(req.getOverwriteDest()) { optionList.add(Rename.OVERWRITE); - } else if(req.hasMoveToTrash() && req.getMoveToTrash()) { + } + if (req.hasMoveToTrash() && req.getMoveToTrash()) { optionList.add(Rename.TO_TRASH); } @@ -2033,4 +2036,18 @@ public HAServiceStateResponseProto getHAServiceState( throw new ServiceException(e); } } + + @Override + public GetSlowDatanodeReportResponseProto getSlowDatanodeReport(RpcController controller, + GetSlowDatanodeReportRequestProto request) throws ServiceException { + try { + List result = + PBHelperClient.convert(server.getSlowDatanodeReport()); + return GetSlowDatanodeReportResponseProto.newBuilder() + .addAllDatanodeInfoProto(result) + .build(); + } catch (IOException e) { + throw new ServiceException(e); + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeLifelineProtocolClientSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeLifelineProtocolClientSideTranslatorPB.java index 050073fb952ed..220e9e2835625 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeLifelineProtocolClientSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeLifelineProtocolClientSideTranslatorPB.java @@ -30,7 +30,7 @@ import org.apache.hadoop.hdfs.server.protocol.StorageReport; import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary; import org.apache.hadoop.ipc.ProtobufHelper; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.ProtocolMetaInterface; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RpcClientUtil; @@ -57,7 +57,7 @@ public class DatanodeLifelineProtocolClientSideTranslatorPB implements public DatanodeLifelineProtocolClientSideTranslatorPB( InetSocketAddress nameNodeAddr, Configuration conf) throws IOException { RPC.setProtocolEngine(conf, DatanodeLifelineProtocolPB.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); rpcProxy = createNamenode(nameNodeAddr, conf, ugi); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java index 6ab98e5880c31..add19e9e102ec 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java @@ -62,14 +62,14 @@ import org.apache.hadoop.hdfs.server.protocol.StorageReport; import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary; import org.apache.hadoop.ipc.ProtobufHelper; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.ProtocolMetaInterface; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RpcClientUtil; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.UserGroupInformation; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.thirdparty.protobuf.RpcController; import org.apache.hadoop.thirdparty.protobuf.ServiceException; @@ -99,7 +99,7 @@ public DatanodeProtocolClientSideTranslatorPB(DatanodeProtocolPB rpcProxy) { public DatanodeProtocolClientSideTranslatorPB(InetSocketAddress nameNodeAddr, Configuration conf) throws IOException { RPC.setProtocolEngine(conf, DatanodeProtocolPB.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); rpcProxy = createNamenode(nameNodeAddr, conf, ugi); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java index 5ecbcb7bcd590..9244b9fef8571 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java @@ -61,7 +61,7 @@ import org.apache.hadoop.hdfs.server.protocol.StorageReport; import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.thirdparty.protobuf.RpcController; import org.apache.hadoop.thirdparty.protobuf.ServiceException; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/InterDatanodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/InterDatanodeProtocolTranslatorPB.java index 64d57562a1811..031b0e4512ad3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/InterDatanodeProtocolTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/InterDatanodeProtocolTranslatorPB.java @@ -35,7 +35,7 @@ import org.apache.hadoop.hdfs.server.protocol.InterDatanodeProtocol; import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo; import org.apache.hadoop.ipc.ProtobufHelper; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.ProtocolMetaInterface; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RpcClientUtil; @@ -62,7 +62,7 @@ public InterDatanodeProtocolTranslatorPB(InetSocketAddress addr, int socketTimeout) throws IOException { RPC.setProtocolEngine(conf, InterDatanodeProtocolPB.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); rpcProxy = RPC.getProxy(InterDatanodeProtocolPB.class, RPC.getProtocolVersion(InterDatanodeProtocolPB.class), addr, ugi, conf, factory, socketTimeout); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java index dff5fa574091c..b5f7b9c80f25f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java @@ -112,6 +112,7 @@ import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand; import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration; import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; +import org.apache.hadoop.hdfs.server.protocol.OutlierMetrics; import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo; import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo.BlockStatus; import org.apache.hadoop.hdfs.server.protocol.RegisterCommand; @@ -853,11 +854,15 @@ public static List convertSlowPeerInfo( List slowPeerInfoProtos = new ArrayList<>(slowPeers.getSlowPeers().size()); - for (Map.Entry entry : - slowPeers.getSlowPeers().entrySet()) { - slowPeerInfoProtos.add(SlowPeerReportProto.newBuilder() + for (Map.Entry entry : slowPeers.getSlowPeers().entrySet()) { + OutlierMetrics outlierMetrics = entry.getValue(); + slowPeerInfoProtos.add( + SlowPeerReportProto.newBuilder() .setDataNodeId(entry.getKey()) - .setAggregateLatency(entry.getValue()) + .setAggregateLatency(outlierMetrics.getActualLatency()) + .setMedian(outlierMetrics.getMedian()) + .setMad(outlierMetrics.getMad()) + .setUpperLimitLatency(outlierMetrics.getUpperLimitLatency()) .build()); } return slowPeerInfoProtos; @@ -871,15 +876,19 @@ public static SlowPeerReports convertSlowPeerInfo( return SlowPeerReports.EMPTY_REPORT; } - Map slowPeersMap = new HashMap<>(slowPeerProtos.size()); + Map slowPeersMap = new HashMap<>(slowPeerProtos.size()); for (SlowPeerReportProto proto : slowPeerProtos) { if (!proto.hasDataNodeId()) { // The DataNodeId should be reported. continue; } - slowPeersMap.put( - proto.getDataNodeId(), - proto.hasAggregateLatency() ? proto.getAggregateLatency() : 0.0); + Double aggregateLatency = proto.hasAggregateLatency() ? proto.getAggregateLatency() : 0.0; + Double medianLatency = proto.hasMedian() ? proto.getMedian() : 0.0; + Double madLatency = proto.hasMad() ? proto.getMad() : 0.0; + Double upperLimitLatency = proto.hasUpperLimitLatency() ? proto.getUpperLimitLatency() : 0.0; + OutlierMetrics outlierMetrics = + new OutlierMetrics(medianLatency, madLatency, upperLimitLatency, aggregateLatency); + slowPeersMap.put(proto.getDataNodeId(), outlierMetrics); } return SlowPeerReports.create(slowPeersMap); } @@ -967,8 +976,8 @@ public static JournalInfoProto convert(JournalInfo j) { public static BlockReportContext convert(BlockReportContextProto proto) { - return new BlockReportContext(proto.getTotalRpcs(), proto.getCurRpc(), - proto.getId(), proto.getLeaseId(), proto.getSorted()); + return new BlockReportContext(proto.getTotalRpcs(), + proto.getCurRpc(), proto.getId(), proto.getLeaseId()); } public static BlockReportContextProto convert(BlockReportContext context) { @@ -977,7 +986,6 @@ public static BlockReportContextProto convert(BlockReportContext context) { setCurRpc(context.getCurRpc()). setId(context.getReportId()). setLeaseId(context.getLeaseId()). - setSorted(context.isSorted()). build(); } @@ -1041,11 +1049,17 @@ public static BlockECReconstructionInfo convertBlockECReconstructionInfo( byte[] liveBlkIndices = blockEcReconstructionInfoProto.getLiveBlockIndices() .toByteArray(); + byte[] excludeReconstructedIndices = + blockEcReconstructionInfoProto.hasExcludeReconstructedIndices() ? + blockEcReconstructionInfoProto.getExcludeReconstructedIndices() + .toByteArray() : new byte[0]; ErasureCodingPolicy ecPolicy = PBHelperClient.convertErasureCodingPolicy( blockEcReconstructionInfoProto.getEcPolicy()); - return new BlockECReconstructionInfo(block, sourceDnInfos, targetDnInfos, - targetStorageUuids, convertStorageTypes, liveBlkIndices, ecPolicy); + return new BlockECReconstructionInfo( + block, sourceDnInfos, targetDnInfos, + targetStorageUuids, convertStorageTypes, liveBlkIndices, + excludeReconstructedIndices, ecPolicy); } public static BlockECReconstructionInfoProto convertBlockECRecoveryInfo( @@ -1071,6 +1085,10 @@ public static BlockECReconstructionInfoProto convertBlockECRecoveryInfo( byte[] liveBlockIndices = blockEcRecoveryInfo.getLiveBlockIndices(); builder.setLiveBlockIndices(PBHelperClient.getByteString(liveBlockIndices)); + byte[] excludeReconstructedIndices = blockEcRecoveryInfo.getExcludeReconstructedIndices(); + builder.setExcludeReconstructedIndices( + PBHelperClient.getByteString(excludeReconstructedIndices)); + builder.setEcPolicy(PBHelperClient.convertErasureCodingPolicy( blockEcRecoveryInfo.getErasureCodingPolicy())); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLogger.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLogger.java index 5eead67fa7b5d..bf72723071d26 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLogger.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLogger.java @@ -32,7 +32,7 @@ import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest; -import com.google.common.util.concurrent.ListenableFuture; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableFuture; /** * Interface for a remote log which is only communicated with asynchronously. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLoggerSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLoggerSet.java index f024b0e8c267d..684e7dd69fd40 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLoggerSet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLoggerSet.java @@ -34,12 +34,12 @@ import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Maps; -import com.google.common.util.concurrent.ListenableFuture; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableFuture; /** * Wrapper around a set of Loggers, taking care of fanning out diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java index 3b3f89e7d9b5a..94e9456971296 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java @@ -52,21 +52,21 @@ import org.apache.hadoop.hdfs.server.common.StorageInfo; import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.util.StopWatch; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.net.InetAddresses; -import com.google.common.util.concurrent.FutureCallback; -import com.google.common.util.concurrent.Futures; -import com.google.common.util.concurrent.ListenableFuture; -import com.google.common.util.concurrent.ListeningExecutorService; -import com.google.common.util.concurrent.MoreExecutors; -import com.google.common.util.concurrent.ThreadFactoryBuilder; -import com.google.common.util.concurrent.UncaughtExceptionHandlers; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.net.InetAddresses; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.FutureCallback; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Futures; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableFuture; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.UncaughtExceptionHandlers; /** * Channel to a remote JournalNode using Hadoop IPC. @@ -235,13 +235,13 @@ protected QJournalProtocol createProxy() throws IOException { true); RPC.setProtocolEngine(confCopy, - QJournalProtocolPB.class, ProtobufRpcEngine.class); + QJournalProtocolPB.class, ProtobufRpcEngine2.class); return SecurityUtil.doAsLoginUser( new PrivilegedExceptionAction() { @Override public QJournalProtocol run() throws IOException { RPC.setProtocolEngine(confCopy, - QJournalProtocolPB.class, ProtobufRpcEngine.class); + QJournalProtocolPB.class, ProtobufRpcEngine2.class); QJournalProtocolPB pbproxy = RPC.getProxy( QJournalProtocolPB.class, RPC.getProtocolVersion(QJournalProtocolPB.class), diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannelMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannelMetrics.java index fde6b99817674..6eef8ffd38620 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannelMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannelMetrics.java @@ -29,7 +29,7 @@ import org.apache.hadoop.metrics2.lib.MetricsRegistry; import org.apache.hadoop.metrics2.lib.MutableQuantiles; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; /** * The metrics for a journal from the writer's perspective. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java index 501a77ec1d738..e2a169aeb3c5f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java @@ -24,17 +24,17 @@ import java.util.concurrent.TimeoutException; import java.util.concurrent.TimeUnit; -import com.google.common.util.concurrent.MoreExecutors; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.util.StopWatch; import org.apache.hadoop.util.Timer; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.Maps; -import com.google.common.util.concurrent.FutureCallback; -import com.google.common.util.concurrent.Futures; -import com.google.common.util.concurrent.ListenableFuture; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.FutureCallback; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Futures; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableFuture; import org.apache.hadoop.thirdparty.protobuf.Message; import org.apache.hadoop.thirdparty.protobuf.TextFormat; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumException.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumException.java index 446092ebe1cc2..1f60e3d468821 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumException.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumException.java @@ -22,8 +22,8 @@ import org.apache.hadoop.util.StringUtils; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * Exception thrown when too many exceptions occur while gathering diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java index 11bf46077d794..ff7d6e1b9afa0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java @@ -58,10 +58,10 @@ import org.apache.hadoop.log.LogThrottlingHelper; import org.apache.hadoop.log.LogThrottlingHelper.LogAction; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.hadoop.thirdparty.protobuf.TextFormat; /** @@ -73,9 +73,9 @@ public class QuorumJournalManager implements JournalManager { static final Logger LOG = LoggerFactory.getLogger(QuorumJournalManager.class); // This config is not publicly exposed - static final String QJM_RPC_MAX_TXNS_KEY = + public static final String QJM_RPC_MAX_TXNS_KEY = "dfs.ha.tail-edits.qjm.rpc.max-txns"; - static final int QJM_RPC_MAX_TXNS_DEFAULT = 5000; + public static final int QJM_RPC_MAX_TXNS_DEFAULT = 5000; // Maximum number of transactions to fetch at a time when using the // RPC edit fetch mechanism @@ -586,7 +586,7 @@ private void selectRpcInputStreams(Collection streams, responseCounts.get(responseCounts.size() - loggers.getMajoritySize()); if (maxAllowedTxns == 0) { LOG.debug("No new edits available in logs; requested starting from " + - "ID " + fromTxnId); + "ID {}", fromTxnId); return; } LogAction logAction = selectInputStreamLogHelper.record(fromTxnId); @@ -618,9 +618,10 @@ private void selectStreamingInputStreams( Map resps = loggers.waitForWriteQuorum(q, selectInputStreamsTimeoutMs, "selectStreamingInputStreams"); - - LOG.debug("selectStreamingInputStream manifests:\n" + - Joiner.on("\n").withKeyValueSeparator(": ").join(resps)); + if (LOG.isDebugEnabled()) { + LOG.debug("selectStreamingInputStream manifests:\n {}", + Joiner.on("\n").withKeyValueSeparator(": ").join(resps)); + } final PriorityQueue allStreams = new PriorityQueue(64, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/SegmentRecoveryComparator.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/SegmentRecoveryComparator.java index 61b60aaad875d..4b2a518ac0dff 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/SegmentRecoveryComparator.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/SegmentRecoveryComparator.java @@ -23,9 +23,9 @@ import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto; -import com.google.common.base.Preconditions; -import com.google.common.collect.ComparisonChain; -import com.google.common.primitives.Booleans; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.ComparisonChain; +import org.apache.hadoop.thirdparty.com.google.common.primitives.Booleans; /** * Compares responses to the prepareRecovery RPC. This is responsible for diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/GetJournalEditServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/GetJournalEditServlet.java index 81b3f8c1a1f1f..f726ff8f84de6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/GetJournalEditServlet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/GetJournalEditServlet.java @@ -27,11 +27,11 @@ import javax.servlet.ServletContext; import javax.servlet.ServletException; -import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.apache.commons.text.StringEscapeUtils; +import org.apache.hadoop.hdfs.server.namenode.DfsServlet; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -64,7 +64,7 @@ * */ @InterfaceAudience.Private -public class GetJournalEditServlet extends HttpServlet { +public class GetJournalEditServlet extends DfsServlet { private static final long serialVersionUID = -4635891628211723009L; private static final Logger LOG = @@ -77,17 +77,11 @@ public class GetJournalEditServlet extends HttpServlet { protected boolean isValidRequestor(HttpServletRequest request, Configuration conf) throws IOException { - String remotePrincipal = request.getUserPrincipal().getName(); - String remoteShortName = request.getRemoteUser(); - if (remotePrincipal == null) { // This really shouldn't happen... - LOG.warn("Received null remoteUser while authorizing access to " + - "GetJournalEditServlet"); - return false; - } + UserGroupInformation ugi = getUGI(request, conf); if (LOG.isDebugEnabled()) { - LOG.debug("Validating request made by " + remotePrincipal + - " / " + remoteShortName + ". This user is: " + + LOG.debug("Validating request made by " + ugi.getUserName() + + " / " + ugi.getShortUserName() + ". This user is: " + UserGroupInformation.getLoginUser()); } @@ -115,9 +109,9 @@ protected boolean isValidRequestor(HttpServletRequest request, Configuration con for (String v : validRequestors) { if (LOG.isDebugEnabled()) LOG.debug("isValidRequestor is comparing to valid requestor: " + v); - if (v != null && v.equals(remotePrincipal)) { + if (v != null && v.equals(ugi.getUserName())) { if (LOG.isDebugEnabled()) - LOG.debug("isValidRequestor is allowing: " + remotePrincipal); + LOG.debug("isValidRequestor is allowing: " + ugi.getUserName()); return true; } } @@ -125,16 +119,16 @@ protected boolean isValidRequestor(HttpServletRequest request, Configuration con // Additionally, we compare the short name of the requestor to this JN's // username, because we want to allow requests from other JNs during // recovery, but we can't enumerate the full list of JNs. - if (remoteShortName.equals( + if (ugi.getShortUserName().equals( UserGroupInformation.getLoginUser().getShortUserName())) { if (LOG.isDebugEnabled()) LOG.debug("isValidRequestor is allowing other JN principal: " + - remotePrincipal); + ugi.getUserName()); return true; } if (LOG.isDebugEnabled()) - LOG.debug("isValidRequestor is rejecting: " + remotePrincipal); + LOG.debug("isValidRequestor is rejecting: " + ugi.getUserName()); return false; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java index b7d652395c631..7f82bff3376f0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java @@ -38,7 +38,7 @@ import org.apache.hadoop.hdfs.server.namenode.NNStorage; import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; -import com.google.common.collect.ImmutableList; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; /** * A {@link Storage} implementation for the {@link JournalNode}. @@ -236,6 +236,10 @@ void format(NamespaceInfo nsInfo, boolean force) throws IOException { void analyzeStorage() throws IOException { this.state = sd.analyzeStorage(StartupOption.REGULAR, this); + refreshStorage(); + } + + void refreshStorage() throws IOException { if (state == StorageState.NORMAL) { readProperties(sd); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java index 70ed4c7159fa4..1f256415ba5a0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java @@ -71,10 +71,10 @@ import org.apache.hadoop.util.StopWatch; import org.apache.hadoop.util.Time; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Charsets; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; import org.apache.hadoop.thirdparty.protobuf.TextFormat; /** @@ -264,9 +264,9 @@ void format(NamespaceInfo nsInfo, boolean force) throws IOException { */ @Override // Closeable public void close() throws IOException { - storage.close(); IOUtils.closeStream(committedTxnId); IOUtils.closeStream(curSegment); + storage.close(); } JNStorage getStorage() { @@ -773,7 +773,7 @@ public GetJournaledEditsResponseProto getJournaledEdits(long sinceTxId, .setEditLog(output.toByteString()) .build(); } catch (JournaledEditsCache.CacheMissException cme) { - metrics.rpcRequestCacheMissAmount.add(cme.getCacheMissAmount()); + metrics.addRpcRequestCacheMissAmount(cme.getCacheMissAmount()); throw cme; } } @@ -1178,6 +1178,8 @@ public synchronized void doRollback() throws IOException { // directory will be renamed. It will be reopened lazily on next access. IOUtils.cleanupWithLogger(LOG, committedTxnId); storage.getJournalManager().doRollback(); + // HADOOP-17142: refresh properties after rollback performed. + storage.refreshStorage(); } synchronized void discardSegments(long startTxId) throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalFaultInjector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalFaultInjector.java index cefb7b5294406..f55933fa3536b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalFaultInjector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalFaultInjector.java @@ -19,7 +19,7 @@ import java.io.IOException; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalMetrics.java index 7d271f36653a3..3499da98dca8e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalMetrics.java @@ -51,12 +51,7 @@ class JournalMetrics { @Metric("Number of bytes served via RPC") MutableCounterLong bytesServedViaRpc; - @Metric - MutableStat rpcRequestCacheMissAmount = new MutableStat( - "RpcRequestCacheMissAmount", "Number of RPC requests unable to be " + - "served due to lack of availability in cache, and how many " + - "transactions away the request was from being in the cache.", - "Misses", "Txns"); + private MutableStat rpcRequestCacheMissAmount; @Metric("Number of RPC requests with zero edits returned") MutableCounterLong rpcEmptyResponses; @@ -87,6 +82,11 @@ class JournalMetrics { "syncs" + interval + "s", "Journal sync time", "ops", "latencyMicros", interval); } + rpcRequestCacheMissAmount = registry + .newStat("RpcRequestCacheMissAmount", "Number of RPC requests unable to be " + + "served due to lack of availability in cache, and how many " + + "transactions away the request was from being in the cache.", + "Misses", "Txns"); } public static JournalMetrics create(Journal j) { @@ -149,4 +149,8 @@ public MutableCounterLong getNumEditLogsSynced() { public void incrNumEditLogsSynced() { numEditLogsSynced.incr(); } + + public void addRpcRequestCacheMissAmount(long cacheMissAmount) { + rpcRequestCacheMissAmount.add(cacheMissAmount); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java index 3df69f1448ad5..0b5932d70f146 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java @@ -17,10 +17,12 @@ */ package org.apache.hadoop.hdfs.qjournal.server; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Strings; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; +import org.apache.hadoop.util.VersionInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -46,7 +48,7 @@ import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.tracing.Tracer; import org.eclipse.jetty.util.ajax.JSON; import javax.management.ObjectName; @@ -57,7 +59,9 @@ import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.stream.Collectors; /** * The JournalNode is a daemon which allows namenodes using @@ -118,6 +122,11 @@ synchronized Journal getOrCreateJournal(String jid, return journal; } + @VisibleForTesting + public JournalNodeSyncer getJournalSyncer(String jid) { + return journalSyncersById.get(jid); + } + @VisibleForTesting public boolean getJournalSyncerStatus(String jid) { if (journalSyncersById.get(jid) != null) { @@ -392,7 +401,25 @@ public boolean accept(File file) { return JSON.toString(status); } - + + @Override // JournalNodeMXBean + public String getHostAndPort() { + return NetUtils.getHostPortString(rpcServer.getAddress()); + } + + @Override // JournalNodeMXBean + public List getClusterIds() { + return journalsById.values().stream() + .map(j -> j.getStorage().getClusterID()) + .filter(cid -> !Strings.isNullOrEmpty(cid)) + .distinct().collect(Collectors.toList()); + } + + @Override // JournalNodeMXBean + public String getVersion() { + return VersionInfo.getVersion() + ", r" + VersionInfo.getRevision(); + } + /** * Register JournalNodeMXBean */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeMXBean.java index 4e8d9da50f9e8..813a9e0a1f731 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeMXBean.java @@ -20,6 +20,9 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import java.util.Collections; +import java.util.List; + /** * This is the JMX management interface for JournalNode information */ @@ -32,5 +35,33 @@ public interface JournalNodeMXBean { * * @return A string presenting status for each journal */ - public String getJournalsStatus(); + String getJournalsStatus(); + + /** + * Get host and port of JournalNode. + * + * @return colon separated host and port. + */ + default String getHostAndPort() { + return ""; + } + + /** + * Get list of the clusters of JournalNode's journals + * as one JournalNode may support multiple clusters. + * + * @return list of clusters. + */ + default List getClusterIds() { + return Collections.emptyList() ; + } + + /** + * Gets the version of Hadoop. + * + * @return the version of Hadoop. + */ + default String getVersion() { + return ""; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java index 36f7faaedb01e..d13c98f5c0f1b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.qjournal.server; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.thirdparty.protobuf.BlockingService; import org.slf4j.Logger; import org.apache.hadoop.classification.InterfaceAudience; @@ -46,7 +46,7 @@ import org.apache.hadoop.hdfs.server.common.StorageInfo; import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RPC.Server; import org.apache.hadoop.net.NetUtils; @@ -85,7 +85,7 @@ public class JournalNodeRpcServer implements QJournalProtocol, LOG.info("RPC server is binding to " + bindHost + ":" + addr.getPort()); RPC.setProtocolEngine(confCopy, QJournalProtocolPB.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); QJournalProtocolServerSideTranslatorPB translator = new QJournalProtocolServerSideTranslatorPB(this); BlockingService service = QJournalProtocolService diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeSyncer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeSyncer.java index dc352c5d367c2..6e861e62aa892 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeSyncer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeSyncer.java @@ -17,9 +17,10 @@ */ package org.apache.hadoop.hdfs.qjournal.server; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; @@ -35,7 +36,7 @@ import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog; import org.apache.hadoop.hdfs.util.DataTransferThrottler; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; @@ -54,6 +55,7 @@ import java.util.Collection; import java.util.HashSet; import java.util.List; +import java.util.Set; /** * A Journal Sync thread runs through the lifetime of the JN. It periodically @@ -153,6 +155,9 @@ private boolean getOtherJournalNodeProxies() { LOG.warn("Could not add proxy for Journal at addresss " + addr, e); } } + // Check if there are any other JournalNodes before starting the sync. Although some proxies + // may be unresolved now, the act of attempting to sync will instigate resolution when the + // servers become available. if (otherJNProxies.isEmpty()) { LOG.error("Cannot sync as there is no other JN available for sync."); return false; @@ -175,7 +180,7 @@ private void startSyncJournalsDaemon() { } if (!createEditsSyncDir()) { LOG.error("Failed to create directory for downloading log " + - "segments: %s. Stopping Journal Node Sync.", + "segments: {}. Stopping Journal Node Sync.", journal.getStorage().getEditsSyncDir()); return; } @@ -310,12 +315,24 @@ private List getOtherJournalNodeAddrs() { return null; } - private List getJournalAddrList(String uriStr) throws + @VisibleForTesting + protected List getJournalAddrList(String uriStr) throws URISyntaxException, IOException { URI uri = new URI(uriStr); - return Util.getLoggerAddresses(uri, - Sets.newHashSet(jn.getBoundIpcAddress())); + + InetSocketAddress boundIpcAddress = jn.getBoundIpcAddress(); + Set excluded = Sets.newHashSet(boundIpcAddress); + List addrList = Util.getLoggerAddresses(uri, excluded); + + // Exclude the current JournalNode instance (a local address and the same port). If the address + // is bound to a local address on the same port, then remove it to handle scenarios where a + // wildcard address (e.g. "0.0.0.0") is used. We can't simply exclude all local addresses + // since we may be running multiple servers on the same host. + addrList.removeIf(addr -> !addr.isUnresolved() && addr.getAddress().isAnyLocalAddress() + && boundIpcAddress.getPort() == addr.getPort()); + + return addrList; } private void getMissingLogSegments(List thisJournalEditLogs, @@ -467,7 +484,7 @@ private boolean downloadMissingLogSegment(URL url, RemoteEditLog log) moveSuccess = journal.moveTmpSegmentToCurrent(tmpEditsFile, finalEditsFile, log.getEndTxId()); } catch (IOException e) { - LOG.info("Could not move %s to current directory.", tmpEditsFile); + LOG.info("Could not move {} to current directory.", tmpEditsFile); } finally { if (tmpEditsFile.exists() && !tmpEditsFile.delete()) { LOG.warn("Deleting " + tmpEditsFile + " has failed"); @@ -505,7 +522,7 @@ private class JournalNodeProxy { @Override public InterQJournalProtocol run() throws IOException { RPC.setProtocolEngine(confCopy, InterQJournalProtocolPB.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); InterQJournalProtocolPB interQJournalProtocolPB = RPC.getProxy( InterQJournalProtocolPB.class, RPC.getProtocolVersion(InterQJournalProtocolPB.class), diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournaledEditsCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournaledEditsCache.java index 3cd7fffc587aa..e0b84d75fb212 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournaledEditsCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournaledEditsCache.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.qjournal.server; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInputStream; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockPoolTokenSecretManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockPoolTokenSecretManager.java index e477eee8437f7..d81bc98ff8aea 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockPoolTokenSecretManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockPoolTokenSecretManager.java @@ -27,7 +27,7 @@ import org.apache.hadoop.security.token.SecretManager; import org.apache.hadoop.security.token.Token; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.fs.StorageType; /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockTokenSecretManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockTokenSecretManager.java index c01ab56ca2053..68b8e3c05751d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockTokenSecretManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockTokenSecretManager.java @@ -18,10 +18,11 @@ package org.apache.hadoop.hdfs.security.token.block; -import com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; import java.io.ByteArrayInputStream; import java.io.DataInputStream; import java.io.IOException; +import java.security.MessageDigest; import java.security.SecureRandom; import java.util.Arrays; import java.util.EnumSet; @@ -44,10 +45,10 @@ import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Timer; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.HashMultiset; -import com.google.common.collect.Multiset; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.HashMultiset; +import org.apache.hadoop.thirdparty.com.google.common.collect.Multiset; /** * BlockTokenSecretManager can be instantiated in 2 modes, master mode @@ -407,7 +408,7 @@ public void checkAccess(Token token, String userId, + ", block=" + block + ", access mode=" + mode); } checkAccess(id, userId, block, mode, storageTypes, storageIds); - if (!Arrays.equals(retrievePassword(id), token.getPassword())) { + if (!MessageDigest.isEqual(retrievePassword(id), token.getPassword())) { throw new InvalidToken("Block token with " + id + " doesn't have the correct token password"); } @@ -427,7 +428,7 @@ public void checkAccess(Token token, String userId, + ", block=" + block + ", access mode=" + mode); } checkAccess(id, userId, block, mode); - if (!Arrays.equals(retrievePassword(id), token.getPassword())) { + if (!MessageDigest.isEqual(retrievePassword(id), token.getPassword())) { throw new InvalidToken("Block token with " + id + " doesn't have the correct token password"); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java index 49986e9d9827a..68f3dd6d67f12 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java @@ -49,8 +49,8 @@ import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager; import org.apache.hadoop.security.token.delegation.DelegationKey; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.hadoop.thirdparty.protobuf.ByteString; /** @@ -191,7 +191,7 @@ public SecretManagerState( } } - public synchronized void loadSecretManagerState(SecretManagerState state) + public synchronized void loadSecretManagerState(SecretManagerState state, Counter counter) throws IOException { Preconditions.checkState(!running, "Can't load state from image in a running SecretManager."); @@ -211,6 +211,7 @@ public synchronized void loadSecretManagerState(SecretManagerState state) id.setSequenceNumber(t.getSequenceNumber()); id.setMasterKeyId(t.getMasterKeyId()); addPersistedDelegationToken(id, t.getExpiryDate()); + counter.increment(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/aliasmap/InMemoryAliasMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/aliasmap/InMemoryAliasMap.java index 2810434609acf..4060b78208509 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/aliasmap/InMemoryAliasMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/aliasmap/InMemoryAliasMap.java @@ -16,8 +16,8 @@ */ package org.apache.hadoop.hdfs.server.aliasmap; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.hadoop.thirdparty.protobuf.InvalidProtocolBufferException; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; @@ -320,21 +320,15 @@ static File createSnapshot(InMemoryAliasMap aliasMap) throws IOException { private static File getCompressedAliasMap(File aliasMapDir) throws IOException { File outCompressedFile = new File(aliasMapDir.getParent(), TAR_NAME); - BufferedOutputStream bOut = null; - GzipCompressorOutputStream gzOut = null; - TarArchiveOutputStream tOut = null; - try { - bOut = new BufferedOutputStream( - Files.newOutputStream(outCompressedFile.toPath())); - gzOut = new GzipCompressorOutputStream(bOut); - tOut = new TarArchiveOutputStream(gzOut); + + try (BufferedOutputStream bOut = new BufferedOutputStream( + Files.newOutputStream(outCompressedFile.toPath())); + GzipCompressorOutputStream gzOut = new GzipCompressorOutputStream(bOut); + TarArchiveOutputStream tOut = new TarArchiveOutputStream(gzOut)){ + addFileToTarGzRecursively(tOut, aliasMapDir, "", new Configuration()); - } finally { - if (tOut != null) { - tOut.finish(); - } - IOUtils.cleanupWithLogger(null, tOut, gzOut, bOut); } + return outCompressedFile; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/aliasmap/InMemoryLevelDBAliasMapServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/aliasmap/InMemoryLevelDBAliasMapServer.java index f6ba4239d7110..2ba22b1a90ba7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/aliasmap/InMemoryLevelDBAliasMapServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/aliasmap/InMemoryLevelDBAliasMapServer.java @@ -19,7 +19,7 @@ import org.apache.hadoop.thirdparty.protobuf.BlockingService; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configurable; @@ -71,7 +71,7 @@ public InMemoryLevelDBAliasMapServer( public void start() throws IOException { RPC.setProtocolEngine(getConf(), AliasMapProtocolPB.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); AliasMapProtocolServerSideTranslatorPB aliasMapProtocolXlator = new AliasMapProtocolServerSideTranslatorPB(this); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java index e8b49718fedf7..998598ec12f16 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.balancer; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkArgument; import static org.apache.hadoop.hdfs.protocol.BlockType.CONTIGUOUS; import java.io.IOException; @@ -35,8 +35,10 @@ import java.util.List; import java.util.Set; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.hdfs.DFSUtilClient; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.HadoopIllegalArgumentException; @@ -68,7 +70,7 @@ import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /**

      The balancer is a tool that balances disk space usage on an HDFS cluster * when some datanodes become full or when new empty nodes join the cluster. @@ -203,8 +205,10 @@ public class Balancer { @VisibleForTesting private static volatile boolean serviceRunning = false; - private static volatile int exceptionsSinceLastBalance = 0; - private static volatile int failedTimesSinceLastSuccessfulBalance = 0; + private static final AtomicInteger EXCEPTIONS_SINCE_LAST_BALANCE = + new AtomicInteger(0); + private static final AtomicInteger + FAILED_TIMES_SINCE_LAST_SUCCESSFUL_BALANCE = new AtomicInteger(0); private final Dispatcher dispatcher; private final NameNodeConnector nnc; @@ -265,11 +269,11 @@ static int getInt(Configuration conf, String key, int defaultValue) { } static int getExceptionsSinceLastBalance() { - return exceptionsSinceLastBalance; + return EXCEPTIONS_SINCE_LAST_BALANCE.get(); } static int getFailedTimesSinceLastSuccessfulBalance() { - return failedTimesSinceLastSuccessfulBalance; + return FAILED_TIMES_SINCE_LAST_SUCCESSFUL_BALANCE.get(); } /** @@ -281,6 +285,9 @@ static int getFailedTimesSinceLastSuccessfulBalance() { */ Balancer(NameNodeConnector theblockpool, BalancerParameters p, Configuration conf) { + // NameNode configuration parameters for balancing + getInt(conf, DFSConfigKeys.DFS_NAMENODE_GETBLOCKS_MAX_QPS_KEY, + DFSConfigKeys.DFS_NAMENODE_GETBLOCKS_MAX_QPS_DEFAULT); final long movedWinWidth = getLong(conf, DFSConfigKeys.DFS_BALANCER_MOVEDWINWIDTH_KEY, DFSConfigKeys.DFS_BALANCER_MOVEDWINWIDTH_DEFAULT); @@ -290,10 +297,6 @@ static int getFailedTimesSinceLastSuccessfulBalance() { final int dispatcherThreads = getInt(conf, DFSConfigKeys.DFS_BALANCER_DISPATCHERTHREADS_KEY, DFSConfigKeys.DFS_BALANCER_DISPATCHERTHREADS_DEFAULT); - final int maxConcurrentMovesPerNode = getInt(conf, - DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY, - DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT); - final long getBlocksSize = getLongBytes(conf, DFSConfigKeys.DFS_BALANCER_GETBLOCKS_SIZE_KEY, DFSConfigKeys.DFS_BALANCER_GETBLOCKS_SIZE_DEFAULT); @@ -310,6 +313,13 @@ static int getFailedTimesSinceLastSuccessfulBalance() { DFSConfigKeys.DFS_BALANCER_MAX_ITERATION_TIME_KEY, DFSConfigKeys.DFS_BALANCER_MAX_ITERATION_TIME_DEFAULT); + // DataNode configuration parameters for balancing + final int maxConcurrentMovesPerNode = getInt(conf, + DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY, + DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT); + getLongBytes(conf, DFSConfigKeys.DFS_DATANODE_BALANCE_BANDWIDTHPERSEC_KEY, + DFSConfigKeys.DFS_DATANODE_BALANCE_BANDWIDTHPERSEC_DEFAULT); + this.nnc = theblockpool; this.dispatcher = new Dispatcher(theblockpool, p.getIncludedNodes(), @@ -589,35 +599,60 @@ void resetData(Configuration conf) { } static class Result { - final ExitStatus exitStatus; - final long bytesLeftToMove; - final long bytesBeingMoved; - final long bytesAlreadyMoved; + private final ExitStatus exitStatus; + private final long bytesLeftToMove; + private final long bytesBeingMoved; + private final long bytesAlreadyMoved; + private final long blocksMoved; Result(ExitStatus exitStatus, long bytesLeftToMove, long bytesBeingMoved, - long bytesAlreadyMoved) { + long bytesAlreadyMoved, long blocksMoved) { this.exitStatus = exitStatus; this.bytesLeftToMove = bytesLeftToMove; this.bytesBeingMoved = bytesBeingMoved; this.bytesAlreadyMoved = bytesAlreadyMoved; + this.blocksMoved = blocksMoved; + } + + public ExitStatus getExitStatus() { + return exitStatus; + } + + public long getBytesLeftToMove() { + return bytesLeftToMove; } - void print(int iteration, PrintStream out) { - out.printf("%-24s %10d %19s %18s %17s%n", + public long getBytesBeingMoved() { + return bytesBeingMoved; + } + + public long getBytesAlreadyMoved() { + return bytesAlreadyMoved; + } + + public long getBlocksMoved() { + return blocksMoved; + } + + void print(int iteration, NameNodeConnector nnc, PrintStream out) { + out.printf("%-24s %10d %19s %18s %17s %17s %s%n", DateFormat.getDateTimeInstance().format(new Date()), iteration, StringUtils.byteDesc(bytesAlreadyMoved), StringUtils.byteDesc(bytesLeftToMove), - StringUtils.byteDesc(bytesBeingMoved)); + StringUtils.byteDesc(bytesBeingMoved), + blocksMoved, + nnc.getNameNodeUri()); } } Result newResult(ExitStatus exitStatus, long bytesLeftToMove, long bytesBeingMoved) { return new Result(exitStatus, bytesLeftToMove, bytesBeingMoved, - dispatcher.getBytesMoved()); + dispatcher.getBytesMoved(), dispatcher.getBblocksMoved()); } Result newResult(ExitStatus exitStatus) { - return new Result(exitStatus, -1, -1, dispatcher.getBytesMoved()); + return new Result(exitStatus, -1, -1, dispatcher.getBytesMoved(), + dispatcher.getBblocksMoved()); } /** Run an iteration for all datanodes. */ @@ -652,8 +687,10 @@ Result runOneIteration() { System.out.println("No block can be moved. Exiting..."); return newResult(ExitStatus.NO_MOVE_BLOCK, bytesLeftToMove, bytesBeingMoved); } else { - LOG.info( "Will move " + StringUtils.byteDesc(bytesBeingMoved) + - " in this iteration"); + LOG.info("Will move {} in this iteration for {}", + StringUtils.byteDesc(bytesBeingMoved), nnc.toString()); + LOG.info("Total target DataNodes in this iteration: {}", + dispatcher.moveTasksTotal()); } /* For each pair of , start a thread that repeatedly @@ -688,7 +725,7 @@ Result runOneIteration() { * execute a {@link Balancer} to work through all datanodes once. */ static private int doBalance(Collection namenodes, - final BalancerParameters p, Configuration conf) + Collection nsIds, final BalancerParameters p, Configuration conf) throws IOException, InterruptedException { final long sleeptime = conf.getTimeDuration(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, @@ -704,14 +741,15 @@ static private int doBalance(Collection namenodes, LOG.info("excluded nodes = " + p.getExcludedNodes()); LOG.info("source nodes = " + p.getSourceNodes()); checkKeytabAndInit(conf); - System.out.println("Time Stamp Iteration# Bytes Already Moved Bytes Left To Move Bytes Being Moved"); + System.out.println("Time Stamp Iteration#" + + " Bytes Already Moved Bytes Left To Move Bytes Being Moved" + + " NameNode"); List connectors = Collections.emptyList(); try { - connectors = NameNodeConnector.newNameNodeConnectors(namenodes, - Balancer.class.getSimpleName(), BALANCER_ID_PATH, conf, - p.getMaxIdleIteration()); - + connectors = NameNodeConnector.newNameNodeConnectors(namenodes, nsIds, + Balancer.class.getSimpleName(), BALANCER_ID_PATH, conf, + p.getMaxIdleIteration()); boolean done = false; for(int iteration = 0; !done; iteration++) { done = true; @@ -721,7 +759,7 @@ static private int doBalance(Collection namenodes, || p.getBlockPools().contains(nnc.getBlockpoolID())) { final Balancer b = new Balancer(nnc, p, conf); final Result r = b.runOneIteration(); - r.print(iteration, System.out); + r.print(iteration, nnc, System.out); // clean all lists b.resetData(conf); @@ -751,9 +789,15 @@ static private int doBalance(Collection namenodes, } static int run(Collection namenodes, final BalancerParameters p, - Configuration conf) throws IOException, InterruptedException { + Configuration conf) throws IOException, InterruptedException { + return run(namenodes, null, p, conf); + } + + static int run(Collection namenodes, Collection nsIds, + final BalancerParameters p, Configuration conf) + throws IOException, InterruptedException { if (!p.getRunAsService()) { - return doBalance(namenodes, p, conf); + return doBalance(namenodes, nsIds, p, conf); } if (!serviceRunning) { serviceRunning = true; @@ -772,23 +816,24 @@ static int run(Collection namenodes, final BalancerParameters p, while (serviceRunning) { try { - int retCode = doBalance(namenodes, p, conf); + int retCode = doBalance(namenodes, nsIds, p, conf); if (retCode < 0) { LOG.info("Balance failed, error code: " + retCode); - failedTimesSinceLastSuccessfulBalance++; + FAILED_TIMES_SINCE_LAST_SUCCESSFUL_BALANCE.incrementAndGet(); } else { LOG.info("Balance succeed!"); - failedTimesSinceLastSuccessfulBalance = 0; + FAILED_TIMES_SINCE_LAST_SUCCESSFUL_BALANCE.set(0); } - exceptionsSinceLastBalance = 0; + EXCEPTIONS_SINCE_LAST_BALANCE.set(0); } catch (Exception e) { - if (++exceptionsSinceLastBalance > retryOnException) { + if (EXCEPTIONS_SINCE_LAST_BALANCE.incrementAndGet() + > retryOnException) { // The caller will process and log the exception throw e; } LOG.warn( "Encounter exception while do balance work. Already tried {} times", - exceptionsSinceLastBalance, e); + EXCEPTIONS_SINCE_LAST_BALANCE, e); } // sleep for next round, will retry for next round when it's interrupted @@ -856,7 +901,8 @@ public int run(String[] args) { checkReplicationPolicyCompatibility(conf); final Collection namenodes = DFSUtil.getInternalNsRpcUris(conf); - return Balancer.run(namenodes, parse(args), conf); + final Collection nsIds = DFSUtilClient.getNameServiceIds(conf); + return Balancer.run(namenodes, nsIds, parse(args), conf); } catch (IOException e) { System.out.println(e + ". Exiting ..."); return ExitStatus.IO_EXCEPTION.getExitCode(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java index c222270882fc0..3937f331e6446 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java @@ -83,8 +83,8 @@ import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** Dispatching block replica moves between datanodes. */ @InterfaceAudience.Private @@ -239,7 +239,8 @@ public class PendingMove { private DDatanode proxySource; private StorageGroup target; - private PendingMove(Source source, StorageGroup target) { + @VisibleForTesting + PendingMove(Source source, StorageGroup target) { this.source = source; this.target = target; } @@ -280,12 +281,19 @@ private boolean chooseBlockAndProxy() { /** * @return true if the given block is good for the tentative move. */ - private boolean markMovedIfGoodBlock(DBlock block, StorageType targetStorageType) { + @VisibleForTesting + boolean markMovedIfGoodBlock(DBlock block, StorageType targetStorageType) { synchronized (block) { synchronized (movedBlocks) { if (isGoodBlockCandidate(source, target, targetStorageType, block)) { if (block instanceof DBlockStriped) { reportedBlock = ((DBlockStriped) block).getInternalBlock(source); + if (reportedBlock == null) { + LOG.info( + "No striped internal block on source {}, block {}. Skipping.", + source, block); + return false; + } } else { reportedBlock = block; } @@ -392,7 +400,7 @@ private void dispatch() { sendRequest(out, eb, accessToken); receiveResponse(in); - nnc.getBytesMoved().addAndGet(reportedBlock.getNumBytes()); + nnc.addBytesMoved(reportedBlock.getNumBytes()); target.getDDatanode().setHasSuccess(); LOG.info("Successfully moved " + this); } catch (IOException e) { @@ -485,7 +493,7 @@ public long getNumBytes(StorageGroup storage) { public static class DBlockStriped extends DBlock { - final byte[] indices; + private byte[] indices; final short dataBlockNum; final int cellSize; @@ -497,7 +505,7 @@ public DBlockStriped(Block block, byte[] indices, short dataBlockNum, this.cellSize = cellSize; } - public DBlock getInternalBlock(StorageGroup storage) { + DBlock getInternalBlock(StorageGroup storage) { int idxInLocs = locations.indexOf(storage); if (idxInLocs == -1) { return null; @@ -516,7 +524,34 @@ public DBlock getInternalBlock(StorageGroup storage) { @Override public long getNumBytes(StorageGroup storage) { - return getInternalBlock(storage).getNumBytes(); + DBlock block = getInternalBlock(storage); + if (block == null) { + return 0; + } + return block.getNumBytes(); + } + + public void setIndices(byte[] indices) { + this.indices = indices; + } + + /** + * Adjust EC block indices,it will remove the element of adjustList from indices. + * @param adjustList the list will be removed from indices + */ + public void adjustIndices(List adjustList) { + if (adjustList.isEmpty()) { + return; + } + + byte[] newIndices = new byte[indices.length - adjustList.size()]; + for (int i = 0, j = 0; i < indices.length; ++i) { + if (!adjustList.contains(i)) { + newIndices[j] = indices[i]; + ++j; + } + } + this.indices = newIndices; } } @@ -794,7 +829,7 @@ Iterator getBlockIterator() { * * @return the total size of the received blocks in the number of bytes. */ - private long getBlockList() throws IOException { + private long getBlockList() throws IOException, IllegalArgumentException { final long size = Math.min(getBlocksSize, blocksToReceive); final BlocksWithLocations newBlksLocs = nnc.getBlocks(getDatanodeInfo(), size, getBlocksMinBlockSize); @@ -831,7 +866,14 @@ private long getBlockList() throws IOException { synchronized (block) { block.clearLocations(); + if (blkLocs instanceof StripedBlockWithLocations) { + // EC block may adjust indices before, avoid repeated adjustments + ((DBlockStriped) block).setIndices( + ((StripedBlockWithLocations) blkLocs).getIndices()); + } + // update locations + List adjustList = new ArrayList<>(); final String[] datanodeUuids = blkLocs.getDatanodeUuids(); final StorageType[] storageTypes = blkLocs.getStorageTypes(); for (int i = 0; i < datanodeUuids.length; i++) { @@ -839,8 +881,20 @@ private long getBlockList() throws IOException { datanodeUuids[i], storageTypes[i]); if (g != null) { // not unknown block.addLocation(g); + } else if (blkLocs instanceof StripedBlockWithLocations) { + // some datanode may not in storageGroupMap due to decommission operation + // or balancer cli with "-exclude" parameter + adjustList.add(i); } } + + if (!adjustList.isEmpty()) { + // block.locations mismatch with block.indices + // adjust indices to get correct internalBlock for Datanode in #getInternalBlock + ((DBlockStriped) block).adjustIndices(adjustList); + Preconditions.checkArgument(((DBlockStriped) block).indices.length + == block.locations.size()); + } } if (!srcBlocks.contains(block) && isGoodBlockCandidate(block)) { if (LOG.isTraceEnabled()) { @@ -960,7 +1014,7 @@ private void dispatchBlocks() { } blocksToReceive -= received; continue; - } catch (IOException e) { + } catch (IOException | IllegalArgumentException e) { LOG.warn("Exception while getting reportedBlock list", e); return; } @@ -1064,6 +1118,10 @@ long getBytesMoved() { return nnc.getBytesMoved().get(); } + long getBblocksMoved() { + return nnc.getBlocksMoved().get(); + } + long bytesToMove() { Preconditions.checkState( storageGroupMap.size() >= sources.size() + targets.size(), @@ -1083,6 +1141,14 @@ void add(Source source, StorageGroup target) { targets.add(target); } + public int moveTasksTotal() { + int b = 0; + for (Source src : sources) { + b += src.tasks.size(); + } + return b; + } + private boolean shouldIgnore(DatanodeInfo dn) { // ignore out-of-service nodes final boolean outOfService = !dn.isInService(); @@ -1164,12 +1230,13 @@ public boolean dispatchAndCheckContinue() throws InterruptedException { */ private long dispatchBlockMoves() throws InterruptedException { final long bytesLastMoved = getBytesMoved(); + final long blocksLastMoved = getBblocksMoved(); final Future[] futures = new Future[sources.size()]; int concurrentThreads = Math.min(sources.size(), ((ThreadPoolExecutor)dispatchExecutor).getCorePoolSize()); assert concurrentThreads > 0 : "Number of concurrent threads is 0."; - LOG.debug("Balancer concurrent dispatcher threads = {}", concurrentThreads); + LOG.info("Balancer concurrent dispatcher threads = {}", concurrentThreads); // Determine the size of each mover thread pool per target int threadsPerTarget = maxMoverThreads/targets.size(); @@ -1211,6 +1278,9 @@ public void run() { // wait for all reportedBlock moving to be done waitForMoveCompletion(targets); + LOG.info("Total bytes (blocks) moved in this iteration {} ({})", + StringUtils.byteDesc(getBytesMoved() - bytesLastMoved), + (getBblocksMoved() - blocksLastMoved)); return getBytesMoved() - bytesLastMoved; } @@ -1289,7 +1359,8 @@ public static boolean checkForSuccess( * 2. the block does not have a replica/internalBlock on the target; * 3. doing the move does not reduce the number of racks that the block has */ - private boolean isGoodBlockCandidate(StorageGroup source, StorageGroup target, + @VisibleForTesting + boolean isGoodBlockCandidate(StorageGroup source, StorageGroup target, StorageType targetStorageType, DBlock block) { if (source.equals(target)) { return false; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java index 2844ad5a94350..4e2079298d40c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java @@ -21,18 +21,25 @@ import java.io.IOException; import java.io.OutputStream; import java.net.InetAddress; +import java.net.InetSocketAddress; import java.net.URI; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; -import com.google.common.base.Preconditions; -import com.google.common.util.concurrent.RateLimiter; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.RateLimiter; +import org.apache.hadoop.ha.HAServiceProtocol; import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.HAUtil; +import org.apache.hadoop.hdfs.protocol.ClientProtocol; +import org.apache.hadoop.ipc.RPC; +import org.apache.hadoop.security.UserGroupInformation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -57,7 +64,7 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.ipc.RemoteException; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * The class provides utilities for accessing a NameNode. @@ -100,6 +107,32 @@ public static List newNameNodeConnectors( return connectors; } + public static List newNameNodeConnectors( + Collection namenodes, Collection nsIds, String name, + Path idPath, Configuration conf, int maxIdleIterations) + throws IOException { + final List connectors = new ArrayList( + namenodes.size()); + Map uriToNsId = new HashMap<>(); + if (nsIds != null) { + for (URI uri : namenodes) { + for (String nsId : nsIds) { + if (uri.getAuthority().equals(nsId)) { + uriToNsId.put(uri, nsId); + } + } + } + } + for (URI uri : namenodes) { + String nsId = uriToNsId.get(uri); + NameNodeConnector nnc = new NameNodeConnector(name, uri, nsId, idPath, + null, conf, maxIdleIterations); + nnc.getKeyManager().startBlockKeyUpdater(); + connectors.add(nnc); + } + return connectors; + } + @VisibleForTesting public static void setWrite2IdFile(boolean write2IdFile) { NameNodeConnector.write2IdFile = write2IdFile; @@ -114,6 +147,14 @@ public static void checkOtherInstanceRunning(boolean toCheck) { private final String blockpoolID; private final BalancerProtocols namenode; + /** + * If set requestToStandby true, Balancer will getBlocks from + * Standby NameNode only and it can reduce the performance impact of Active + * NameNode, especially in a busy HA mode cluster. + */ + private boolean requestToStandby; + private String nsId; + private Configuration config; private final KeyManager keyManager; final AtomicBoolean fallbackToSimpleAuth = new AtomicBoolean(false); @@ -122,6 +163,7 @@ public static void checkOtherInstanceRunning(boolean toCheck) { private OutputStream out; private final List targetPaths; private final AtomicLong bytesMoved = new AtomicLong(); + private final AtomicLong blocksMoved = new AtomicLong(); private final int maxNotChangedIterations; private int notChangedIterations = 0; @@ -149,6 +191,11 @@ public NameNodeConnector(String name, URI nameNodeUri, Path idPath, this.namenode = NameNodeProxies.createProxy(conf, nameNodeUri, BalancerProtocols.class, fallbackToSimpleAuth).getProxy(); + this.requestToStandby = conf.getBoolean( + DFSConfigKeys.DFS_HA_ALLOW_STALE_READ_KEY, + DFSConfigKeys.DFS_HA_ALLOW_STALE_READ_DEFAULT); + this.config = conf; + this.fs = (DistributedFileSystem)FileSystem.get(nameNodeUri, conf); final NamespaceInfo namespaceinfo = namenode.versionRequest(); @@ -167,6 +214,14 @@ public NameNodeConnector(String name, URI nameNodeUri, Path idPath, } } + public NameNodeConnector(String name, URI nameNodeUri, String nsId, + Path idPath, List targetPaths, + Configuration conf, int maxNotChangedIterations) + throws IOException { + this(name, nameNodeUri, idPath, targetPaths, conf, maxNotChangedIterations); + this.nsId = nsId; + } + public DistributedFileSystem getDistributedFileSystem() { return fs; } @@ -180,13 +235,65 @@ AtomicLong getBytesMoved() { return bytesMoved; } + AtomicLong getBlocksMoved() { + return blocksMoved; + } + + public void addBytesMoved(long numBytes) { + bytesMoved.addAndGet(numBytes); + blocksMoved.incrementAndGet(); + } + + public URI getNameNodeUri() { + return nameNodeUri; + } + /** @return blocks with locations. */ public BlocksWithLocations getBlocks(DatanodeInfo datanode, long size, long minBlockSize) throws IOException { if (getBlocksRateLimiter != null) { getBlocksRateLimiter.acquire(); } - return namenode.getBlocks(datanode, size, minBlockSize); + boolean isRequestStandby = false; + NamenodeProtocol nnproxy = null; + InetSocketAddress standbyAddress = null; + try { + if (requestToStandby && nsId != null + && HAUtil.isHAEnabled(config, nsId)) { + List namenodes = + HAUtil.getProxiesForAllNameNodesInNameservice(config, nsId); + for (ClientProtocol proxy : namenodes) { + try { + if (proxy.getHAServiceState().equals( + HAServiceProtocol.HAServiceState.STANDBY)) { + standbyAddress = RPC.getServerAddress(proxy); + NamenodeProtocol sbn = NameNodeProxies.createNonHAProxy( + config, standbyAddress, NamenodeProtocol.class, + UserGroupInformation.getCurrentUser(), false).getProxy(); + nnproxy = sbn; + isRequestStandby = true; + break; + } + } catch (Exception e) { + // Ignore the exception while connecting to a namenode. + LOG.debug("Error while connecting to namenode", e); + } + } + if (nnproxy == null) { + LOG.warn("Request #getBlocks to Standby NameNode but meet exception," + + " will fallback to normal way."); + nnproxy = namenode; + } + } else { + nnproxy = namenode; + } + return nnproxy.getBlocks(datanode, size, minBlockSize); + } finally { + if (isRequestStandby) { + LOG.info("Request #getBlocks to Standby NameNode success. " + + "remoteAddress: {}", standbyAddress.getHostString()); + } + } } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/AvailableSpaceBlockPlacementPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/AvailableSpaceBlockPlacementPolicy.java index 21c110f5ab556..f4bd87542b969 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/AvailableSpaceBlockPlacementPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/AvailableSpaceBlockPlacementPolicy.java @@ -20,6 +20,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_KEY; import java.util.Collection; import java.util.EnumMap; @@ -27,7 +29,7 @@ import java.util.Random; import java.util.Set; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -47,6 +49,8 @@ public class AvailableSpaceBlockPlacementPolicy extends private static final Random RAND = new Random(); private int balancedPreference = (int) (100 * DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT); + private int balancedSpaceTolerance = + DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT; private boolean optimizeLocal; @Override @@ -59,9 +63,14 @@ public void initialize(Configuration conf, FSClusterStats stats, DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT); LOG.info("Available space block placement policy initialized: " - + DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY + + DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY + " = " + balancedPreferencePercent); + balancedSpaceTolerance = + conf.getInt( + DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_KEY, + DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT); + optimizeLocal = conf.getBoolean( DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCE_LOCAL_NODE_KEY, DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCE_LOCAL_NODE_DEFAULT); @@ -77,6 +86,16 @@ public void initialize(Configuration conf, FSClusterStats stats, + " is less than 0.5 so datanodes with more used percent will" + " receive more block allocations."); } + + if (balancedSpaceTolerance > 20 || balancedSpaceTolerance < 0) { + LOG.warn("The value of " + + DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_KEY + + " is invalid, Current value is " + balancedSpaceTolerance + ", Default value " + + DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT + + " will be used instead."); + balancedSpaceTolerance = + DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT; + } balancedPreference = (int) (100 * balancedPreferencePercent); } @@ -87,9 +106,9 @@ protected DatanodeDescriptor chooseDataNode(final String scope, Preconditions.checkArgument(clusterMap instanceof DFSNetworkTopology); DFSNetworkTopology dfsClusterMap = (DFSNetworkTopology)clusterMap; DatanodeDescriptor a = (DatanodeDescriptor) dfsClusterMap - .chooseRandomWithStorageType(scope, excludedNode, type); + .chooseRandomWithStorageTypeTwoTrial(scope, excludedNode, type); DatanodeDescriptor b = (DatanodeDescriptor) dfsClusterMap - .chooseRandomWithStorageType(scope, excludedNode, type); + .chooseRandomWithStorageTypeTwoTrial(scope, excludedNode, type); return select(a, b, false); } @@ -183,7 +202,7 @@ private DatanodeDescriptor select(DatanodeDescriptor a, DatanodeDescriptor b, protected int compareDataNode(final DatanodeDescriptor a, final DatanodeDescriptor b, boolean isBalanceLocal) { if (a.equals(b) - || Math.abs(a.getDfsUsedPercent() - b.getDfsUsedPercent()) < 5 || (( + || Math.abs(a.getDfsUsedPercent() - b.getDfsUsedPercent()) < balancedSpaceTolerance || (( isBalanceLocal && a.getDfsUsedPercent() < 50))) { return 0; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/AvailableSpaceRackFaultTolerantBlockPlacementPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/AvailableSpaceRackFaultTolerantBlockPlacementPolicy.java new file mode 100644 index 0000000000000..365990a7ce942 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/AvailableSpaceRackFaultTolerantBlockPlacementPolicy.java @@ -0,0 +1,145 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

      + * http://www.apache.org/licenses/LICENSE-2.0 + *

      + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.blockmanagement; + +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.net.DFSNetworkTopology; +import org.apache.hadoop.net.NetworkTopology; +import org.apache.hadoop.net.Node; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collection; +import java.util.Random; + +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_RACK_FAULT_TOLERANT_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_RACK_FAULT_TOLERANT_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_RACK_FAULT_TOLERANT_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_RACK_FAULT_TOLERANT_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_KEY; + +/** + * Space balanced rack fault tolerant block placement policy. + */ +public class AvailableSpaceRackFaultTolerantBlockPlacementPolicy + extends BlockPlacementPolicyRackFaultTolerant { + + private static final Logger LOG = LoggerFactory + .getLogger(AvailableSpaceRackFaultTolerantBlockPlacementPolicy.class); + private static final Random RAND = new Random(); + private int balancedPreference = (int) (100 + * DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_RACK_FAULT_TOLERANT_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT); + private int balancedSpaceTolerance = + DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_RACK_FAULT_TOLERANT_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT; + @Override + public void initialize(Configuration conf, FSClusterStats stats, + NetworkTopology clusterMap, Host2NodesMap host2datanodeMap) { + super.initialize(conf, stats, clusterMap, host2datanodeMap); + float balancedPreferencePercent = conf.getFloat( + DFS_NAMENODE_AVAILABLE_SPACE_RACK_FAULT_TOLERANT_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY, + DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_RACK_FAULT_TOLERANT_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT); + + balancedSpaceTolerance = conf.getInt( + DFS_NAMENODE_AVAILABLE_SPACE_RACK_FAULT_TOLERANT_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_KEY, + DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_RACK_FAULT_TOLERANT_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT); + + LOG.info("Available space rack fault tolerant block placement policy " + + "initialized: " + + DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_RACK_FAULT_TOLERANT_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY + + " = " + balancedPreferencePercent); + + if (balancedPreferencePercent > 1.0) { + LOG.warn("The value of " + + DFS_NAMENODE_AVAILABLE_SPACE_RACK_FAULT_TOLERANT_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY + + " is greater than 1.0 but should be in the range 0.0 - 1.0"); + } + if (balancedPreferencePercent < 0.5) { + LOG.warn("The value of " + + DFS_NAMENODE_AVAILABLE_SPACE_RACK_FAULT_TOLERANT_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY + + " is less than 0.5 so datanodes with more used percent will" + + " receive more block allocations."); + } + + + if (balancedSpaceTolerance > 20 || balancedSpaceTolerance < 0) { + LOG.warn("The value of " + + DFS_NAMENODE_AVAILABLE_SPACE_RACK_FAULT_TOLERANT_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_KEY + + " is invalid, Current value is " + balancedSpaceTolerance + ", Default value " + + DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_RACK_FAULT_TOLERANT_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT + + " will be used instead."); + balancedSpaceTolerance = + DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_RACK_FAULT_TOLERANT_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT; + } + + balancedPreference = (int) (100 * balancedPreferencePercent); + } + + @Override + protected DatanodeDescriptor chooseDataNode(final String scope, + final Collection excludedNode, StorageType type) { + // only the code that uses DFSNetworkTopology should trigger this code path. + Preconditions.checkArgument(clusterMap instanceof DFSNetworkTopology); + DFSNetworkTopology dfsClusterMap = (DFSNetworkTopology) clusterMap; + DatanodeDescriptor a = (DatanodeDescriptor) dfsClusterMap + .chooseRandomWithStorageTypeTwoTrial(scope, excludedNode, type); + DatanodeDescriptor b = (DatanodeDescriptor) dfsClusterMap + .chooseRandomWithStorageTypeTwoTrial(scope, excludedNode, type); + return select(a, b); + } + + @Override + protected DatanodeDescriptor chooseDataNode(final String scope, + final Collection excludedNode) { + DatanodeDescriptor a = + (DatanodeDescriptor) clusterMap.chooseRandom(scope, excludedNode); + DatanodeDescriptor b = + (DatanodeDescriptor) clusterMap.chooseRandom(scope, excludedNode); + return select(a, b); + } + + private DatanodeDescriptor select(DatanodeDescriptor a, + DatanodeDescriptor b) { + if (a != null && b != null) { + int ret = compareDataNode(a, b); + if (ret == 0) { + return a; + } else if (ret < 0) { + return (RAND.nextInt(100) < balancedPreference) ? a : b; + } else { + return (RAND.nextInt(100) < balancedPreference) ? b : a; + } + } else { + return a == null ? b : a; + } + } + + /** + * Compare the two data nodes. + */ + protected int compareDataNode(final DatanodeDescriptor a, + final DatanodeDescriptor b) { + if (a.equals(b) + || Math.abs(a.getDfsUsedPercent() - b.getDfsUsedPercent()) < balancedSpaceTolerance) { + return 0; + } + return a.getDfsUsedPercent() < b.getDfsUsedPercent() ? -1 : 1; + } +} \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockIdManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockIdManager.java index bec6ec8368120..0ac1d53e14267 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockIdManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockIdManager.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs.server.blockmanagement; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.BlockType; import org.apache.hadoop.hdfs.protocol.HdfsConstants; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java index dc6cf3266a5fc..b8047a8d08015 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java @@ -19,10 +19,10 @@ import java.io.IOException; import java.util.Iterator; +import java.util.LinkedList; import java.util.List; -import java.util.NoSuchElementException; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.protocol.Block; @@ -57,9 +57,19 @@ public abstract class BlockInfo extends Block /** For implementing {@link LightWeightGSet.LinkedElement} interface. */ private LightWeightGSet.LinkedElement nextLinkedElement; - - // Storages this block is replicated on - protected DatanodeStorageInfo[] storages; + /** + * This array contains triplets of references. For each i-th storage, the + * block belongs to triplets[3*i] is the reference to the + * {@link DatanodeStorageInfo} and triplets[3*i+1] and triplets[3*i+2] are + * references to the previous and the next blocks, respectively, in the list + * of blocks belonging to this storage. + * + * Using previous and next in Object triplets is done instead of a + * {@link LinkedList} list to efficiently use memory. With LinkedList the cost + * per replica is 42 bytes (LinkedList#Entry object per replica) versus 16 + * bytes using the triplets. + */ + protected Object[] triplets; private BlockUnderConstructionFeature uc; @@ -69,14 +79,14 @@ public abstract class BlockInfo extends Block * in the block group */ public BlockInfo(short size) { - this.storages = new DatanodeStorageInfo[size]; + this.triplets = new Object[3 * size]; this.bcId = INVALID_INODE_ID; this.replication = isStriped() ? 0 : size; } public BlockInfo(Block blk, short size) { super(blk); - this.storages = new DatanodeStorageInfo[size]; + this.triplets = new Object[3*size]; this.bcId = INVALID_INODE_ID; this.replication = isStriped() ? 0 : size; } @@ -106,31 +116,7 @@ public boolean isDeleted() { } public Iterator getStorageInfos() { - return new Iterator() { - - private int index = 0; - - @Override - public boolean hasNext() { - while (index < storages.length && storages[index] == null) { - index++; - } - return index < storages.length; - } - - @Override - public DatanodeStorageInfo next() { - if (!hasNext()) { - throw new NoSuchElementException(); - } - return storages[index++]; - } - - @Override - public void remove() { - throw new UnsupportedOperationException("Sorry. can't remove."); - } - }; + return new BlocksMap.StorageIterator(this); } public DatanodeDescriptor getDatanode(int index) { @@ -139,18 +125,73 @@ public DatanodeDescriptor getDatanode(int index) { } DatanodeStorageInfo getStorageInfo(int index) { - assert this.storages != null : "BlockInfo is not initialized"; - return storages[index]; + assert this.triplets != null : "BlockInfo is not initialized"; + assert index >= 0 && index*3 < triplets.length : "Index is out of bound"; + return (DatanodeStorageInfo)triplets[index*3]; + } + + BlockInfo getPrevious(int index) { + assert this.triplets != null : "BlockInfo is not initialized"; + assert index >= 0 && index*3+1 < triplets.length : "Index is out of bound"; + BlockInfo info = (BlockInfo)triplets[index*3+1]; + assert info == null || + info.getClass().getName().startsWith(BlockInfo.class.getName()) : + "BlockInfo is expected at " + index*3; + return info; + } + + BlockInfo getNext(int index) { + assert this.triplets != null : "BlockInfo is not initialized"; + assert index >= 0 && index*3+2 < triplets.length : "Index is out of bound"; + BlockInfo info = (BlockInfo)triplets[index*3+2]; + assert info == null || info.getClass().getName().startsWith( + BlockInfo.class.getName()) : + "BlockInfo is expected at " + index*3; + return info; } void setStorageInfo(int index, DatanodeStorageInfo storage) { - assert this.storages != null : "BlockInfo is not initialized"; - this.storages[index] = storage; + assert this.triplets != null : "BlockInfo is not initialized"; + assert index >= 0 && index*3 < triplets.length : "Index is out of bound"; + triplets[index*3] = storage; + } + + /** + * Return the previous block on the block list for the datanode at + * position index. Set the previous block on the list to "to". + * + * @param index - the datanode index + * @param to - block to be set to previous on the list of blocks + * @return current previous block on the list of blocks + */ + BlockInfo setPrevious(int index, BlockInfo to) { + assert this.triplets != null : "BlockInfo is not initialized"; + assert index >= 0 && index*3+1 < triplets.length : "Index is out of bound"; + BlockInfo info = (BlockInfo) triplets[index*3+1]; + triplets[index*3+1] = to; + return info; + } + + /** + * Return the next block on the block list for the datanode at + * position index. Set the next block on the list to "to". + * + * @param index - the datanode index + * @param to - block to be set to next on the list of blocks + * @return current next block on the list of blocks + */ + BlockInfo setNext(int index, BlockInfo to) { + assert this.triplets != null : "BlockInfo is not initialized"; + assert index >= 0 && index*3+2 < triplets.length : "Index is out of bound"; + BlockInfo info = (BlockInfo) triplets[index*3+2]; + triplets[index*3+2] = to; + return info; } public int getCapacity() { - assert this.storages != null : "BlockInfo is not initialized"; - return storages.length; + assert this.triplets != null : "BlockInfo is not initialized"; + assert triplets.length % 3 == 0 : "Malformed BlockInfo"; + return triplets.length / 3; } /** @@ -227,6 +268,80 @@ int findStorageInfo(DatanodeStorageInfo storageInfo) { return -1; } + /** + * Insert this block into the head of the list of blocks + * related to the specified DatanodeStorageInfo. + * If the head is null then form a new list. + * @return current block as the new head of the list. + */ + BlockInfo listInsert(BlockInfo head, DatanodeStorageInfo storage) { + int dnIndex = this.findStorageInfo(storage); + assert dnIndex >= 0 : "Data node is not found: current"; + assert getPrevious(dnIndex) == null && getNext(dnIndex) == null : + "Block is already in the list and cannot be inserted."; + this.setPrevious(dnIndex, null); + this.setNext(dnIndex, head); + if (head != null) { + head.setPrevious(head.findStorageInfo(storage), this); + } + return this; + } + + /** + * Remove this block from the list of blocks + * related to the specified DatanodeStorageInfo. + * If this block is the head of the list then return the next block as + * the new head. + * @return the new head of the list or null if the list becomes + * empy after deletion. + */ + BlockInfo listRemove(BlockInfo head, DatanodeStorageInfo storage) { + if (head == null) { + return null; + } + int dnIndex = this.findStorageInfo(storage); + if (dnIndex < 0) { // this block is not on the data-node list + return head; + } + + BlockInfo next = this.getNext(dnIndex); + BlockInfo prev = this.getPrevious(dnIndex); + this.setNext(dnIndex, null); + this.setPrevious(dnIndex, null); + if (prev != null) { + prev.setNext(prev.findStorageInfo(storage), next); + } + if (next != null) { + next.setPrevious(next.findStorageInfo(storage), prev); + } + if (this == head) { // removing the head + head = next; + } + return head; + } + + /** + * Remove this block from the list of blocks related to the specified + * DatanodeDescriptor. Insert it into the head of the list of blocks. + * + * @return the new head of the list. + */ + public BlockInfo moveBlockToHead(BlockInfo head, DatanodeStorageInfo storage, + int curIndex, int headIndex) { + if (head == this) { + return this; + } + BlockInfo next = this.setNext(curIndex, head); + BlockInfo prev = this.setPrevious(curIndex, null); + + head.setPrevious(headIndex, this); + prev.setNext(prev.findStorageInfo(storage), next); + if (next != null) { + next.setPrevious(next.findStorageInfo(storage), prev); + } + return this; + } + @Override public int hashCode() { // Super implementation is sufficient diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoContiguous.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoContiguous.java index 7378e6f21b765..d68b72d6c6277 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoContiguous.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoContiguous.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.blockmanagement; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.protocol.Block; @@ -38,20 +38,20 @@ public BlockInfoContiguous(Block blk, short size) { } /** - * Ensure that there is enough space to include num more storages. - * @return first free storage index. + * Ensure that there is enough space to include num more triplets. + * @return first free triplet index. */ private int ensureCapacity(int num) { - assert this.storages != null : "BlockInfo is not initialized"; + assert this.triplets != null : "BlockInfo is not initialized"; int last = numNodes(); - if (storages.length >= (last+num)) { + if (triplets.length >= (last+num)*3) { return last; } /* Not enough space left. Create a new array. Should normally * happen only when replication is manually increased by the user. */ - DatanodeStorageInfo[] old = storages; - storages = new DatanodeStorageInfo[(last+num)]; - System.arraycopy(old, 0, storages, 0, last); + Object[] old = triplets; + triplets = new Object[(last+num)*3]; + System.arraycopy(old, 0, triplets, 0, last * 3); return last; } @@ -63,6 +63,8 @@ boolean addStorage(DatanodeStorageInfo storage, Block reportedBlock) { // find the last null node int lastNode = ensureCapacity(1); setStorageInfo(lastNode, storage); + setNext(lastNode, null); + setPrevious(lastNode, null); return true; } @@ -72,12 +74,18 @@ boolean removeStorage(DatanodeStorageInfo storage) { if (dnIndex < 0) { // the node is not found return false; } + assert getPrevious(dnIndex) == null && getNext(dnIndex) == null : + "Block is still in the list and must be removed first."; // find the last not null node int lastNode = numNodes()-1; - // replace current node entry by the lastNode one + // replace current node triplet by the lastNode one setStorageInfo(dnIndex, getStorageInfo(lastNode)); - // set the last entry to null + setNext(dnIndex, getNext(lastNode)); + setPrevious(dnIndex, getPrevious(lastNode)); + // set the last triplet to null setStorageInfo(lastNode, null); + setNext(lastNode, null); + setPrevious(lastNode, null); return true; } @@ -96,7 +104,8 @@ boolean isProvided() { @Override public int numNodes() { - assert this.storages != null : "BlockInfo is not initialized"; + assert this.triplets != null : "BlockInfo is not initialized"; + assert triplets.length % 3 == 0 : "Malformed BlockInfo"; for (int idx = getCapacity()-1; idx >= 0; idx--) { if (getDatanode(idx) != null) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoStriped.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoStriped.java index ce186a7b3d1d2..42d0471433414 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoStriped.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoStriped.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs.server.blockmanagement; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.BlockType; @@ -32,20 +32,21 @@ /** * Subclass of {@link BlockInfo}, presenting a block group in erasure coding. * - * We still use a storage array to store DatanodeStorageInfo for each block in - * the block group. For a (m+k) block group, the first (m+k) storage units + * We still use triplets to store DatanodeStorageInfo for each block in the + * block group, as well as the previous/next block in the corresponding + * DatanodeStorageInfo. For a (m+k) block group, the first (m+k) triplet units * are sorted and strictly mapped to the corresponding block. * * Normally each block belonging to group is stored in only one DataNode. - * However, it is possible that some block is over-replicated. Thus the storage + * However, it is possible that some block is over-replicated. Thus the triplet * array's size can be larger than (m+k). Thus currently we use an extra byte - * array to record the block index for each entry. + * array to record the block index for each triplet. */ @InterfaceAudience.Private public class BlockInfoStriped extends BlockInfo { private final ErasureCodingPolicy ecPolicy; /** - * Always the same size with storage. Record the block index for each entry + * Always the same size with triplets. Record the block index for each triplet * TODO: actually this is only necessary for over-replicated block. Thus can * be further optimized to save memory usage. */ @@ -109,7 +110,7 @@ private int findSlot() { return i; } } - // need to expand the storage size + // need to expand the triplet size ensureCapacity(i + 1, true); return i; } @@ -141,6 +142,8 @@ boolean addStorage(DatanodeStorageInfo storage, Block reportedBlock) { private void addStorage(DatanodeStorageInfo storage, int index, int blockIndex) { setStorageInfo(index, storage); + setNext(index, null); + setPrevious(index, null); indices[index] = (byte) blockIndex; } @@ -183,22 +186,26 @@ boolean removeStorage(DatanodeStorageInfo storage) { if (dnIndex < 0) { // the node is not found return false; } - // set the entry to null + assert getPrevious(dnIndex) == null && getNext(dnIndex) == null : + "Block is still in the list and must be removed first."; + // set the triplet to null setStorageInfo(dnIndex, null); + setNext(dnIndex, null); + setPrevious(dnIndex, null); indices[dnIndex] = -1; return true; } private void ensureCapacity(int totalSize, boolean keepOld) { if (getCapacity() < totalSize) { - DatanodeStorageInfo[] old = storages; + Object[] old = triplets; byte[] oldIndices = indices; - storages = new DatanodeStorageInfo[totalSize]; + triplets = new Object[totalSize * 3]; indices = new byte[totalSize]; initIndices(); if (keepOld) { - System.arraycopy(old, 0, storages, 0, old.length); + System.arraycopy(old, 0, triplets, 0, old.length); System.arraycopy(oldIndices, 0, indices, 0, oldIndices.length); } } @@ -225,7 +232,8 @@ public BlockType getBlockType() { @Override public int numNodes() { - assert this.storages != null : "BlockInfo is not initialized"; + assert this.triplets != null : "BlockInfo is not initialized"; + assert triplets.length % 3 == 0 : "Malformed BlockInfo"; int num = 0; for (int idx = getCapacity()-1; idx >= 0; idx--) { if (getStorageInfo(idx) != null) { @@ -304,7 +312,8 @@ public StorageAndBlockIndex next() { throw new NoSuchElementException(); } int i = index++; - return new StorageAndBlockIndex(storages[i], indices[i]); + return new StorageAndBlockIndex( + (DatanodeStorageInfo) triplets[i * 3], indices[i]); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index ad61c716a8019..b786de53ffa2b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -35,6 +35,7 @@ import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashSet; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Queue; @@ -46,6 +47,7 @@ import java.util.concurrent.FutureTask; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; +import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.atomic.AtomicLong; import javax.management.ObjectName; @@ -68,7 +70,6 @@ import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; -import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; import org.apache.hadoop.hdfs.protocol.HdfsConstants.StoragePolicySatisfierMode; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; @@ -109,7 +110,6 @@ import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks; import org.apache.hadoop.hdfs.server.protocol.StorageReport; import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary; -import org.apache.hadoop.hdfs.util.FoldedTreeSet; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; import org.apache.hadoop.hdfs.server.namenode.CacheManager; @@ -124,10 +124,9 @@ import org.apache.hadoop.util.LightWeightGSet; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; -import org.apache.hadoop.util.VersionInfo; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -192,6 +191,9 @@ public class BlockManager implements BlockStatsMXBean { private volatile long lowRedundancyBlocksCount = 0L; private volatile long scheduledReplicationBlocksCount = 0L; + private final long deleteBlockLockTimeMs = 500; + private final long deleteBlockUnlockIntervalTimeMs = 100; + /** flag indicating whether replication queues have been initialized */ private boolean initializedReplQueues; @@ -311,11 +313,6 @@ public long getTotalECBlockGroups() { private int replQueueResetToHeadThreshold; private int replQueueCallsSinceReset = 0; - /** How often to check and the limit for the storageinfo efficiency. */ - private final long storageInfoDefragmentInterval; - private final long storageInfoDefragmentTimeout; - private final double storageInfoDefragmentRatio; - /** * Mapping: Block {@literal ->} { BlockCollection, datanodes, self ref } * Updated only in response to client-sent information. @@ -330,10 +327,12 @@ public long getTotalECBlockGroups() { * {@link #redundancyThread} has run at least one full iteration. */ private final AtomicLong lastRedundancyCycleTS = new AtomicLong(-1); - /** StorageInfoDefragmenter thread. */ - private final Daemon storageInfoDefragmenterThread = - new Daemon(new StorageInfoDefragmenter()); - + /** + * markedDeleteBlockScrubber thread for handling async delete blocks. + */ + private final Daemon markedDeleteBlockScrubberThread = + new Daemon(new MarkedDeleteBlockScrubber()); + /** Block report thread for handling async reports. */ private final BlockReportProcessingThread blockReportThread; @@ -387,12 +386,12 @@ public long getTotalECBlockGroups() { * The maximum number of outgoing replication streams a given node should have * at one time considering all but the highest priority replications needed. */ - int maxReplicationStreams; + private volatile int maxReplicationStreams; /** * The maximum number of outgoing replication streams a given node should have * at one time. */ - int replicationStreamsHardLimit; + private volatile int replicationStreamsHardLimit; /** Minimum copies needed or else write is disallowed */ public final short minReplication; /** Default number of replicas */ @@ -401,7 +400,7 @@ public long getTotalECBlockGroups() { final int maxCorruptFilesReturned; final float blocksInvalidateWorkPct; - private int blocksReplWorkMultiplier; + private volatile int blocksReplWorkMultiplier; // whether or not to issue block encryption keys. final boolean encryptDataTransfer; @@ -432,6 +431,12 @@ public long getTotalECBlockGroups() { */ private int numBlocksPerIteration; + /** + * The blocks of deleted files are put into the queue, + * and the cleanup thread processes these blocks periodically. + */ + private final ConcurrentLinkedQueue> markedDeleteQueue; + /** * Progress of the Reconstruction queues initialisation. */ @@ -485,7 +490,7 @@ public BlockManager(final Namesystem namesystem, boolean haEnabled, datanodeManager.getBlockInvalidateLimit(), startupDelayBlockDeletionInMs, blockIdManager); - + markedDeleteQueue = new ConcurrentLinkedQueue<>(); // Compute the map capacity by allocating 2% of total memory blocksMap = new BlocksMap( LightWeightGSet.computeCapacity(2.0, "BlocksMap")); @@ -547,19 +552,6 @@ public BlockManager(final Namesystem namesystem, boolean haEnabled, DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_DEFAULT, TimeUnit.SECONDS, TimeUnit.MILLISECONDS); - this.storageInfoDefragmentInterval = - conf.getLong( - DFSConfigKeys.DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_INTERVAL_MS_KEY, - DFSConfigKeys.DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_INTERVAL_MS_DEFAULT); - this.storageInfoDefragmentTimeout = - conf.getLong( - DFSConfigKeys.DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_TIMEOUT_MS_KEY, - DFSConfigKeys.DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_TIMEOUT_MS_DEFAULT); - this.storageInfoDefragmentRatio = - conf.getDouble( - DFSConfigKeys.DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_RATIO_KEY, - DFSConfigKeys.DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_RATIO_DEFAULT); - this.encryptDataTransfer = conf.getBoolean(DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_KEY, DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_DEFAULT); @@ -748,8 +740,9 @@ public void activate(Configuration conf, long blockTotal) { datanodeManager.activate(conf); this.redundancyThread.setName("RedundancyMonitor"); this.redundancyThread.start(); - storageInfoDefragmenterThread.setName("StorageInfoMonitor"); - storageInfoDefragmenterThread.start(); + this.markedDeleteBlockScrubberThread. + setName("MarkedDeleteBlockScrubberThread"); + this.markedDeleteBlockScrubberThread.start(); this.blockReportThread.start(); mxBeanName = MBeans.register("NameNode", "BlockStats", this); bmSafeMode.activate(blockTotal); @@ -762,11 +755,11 @@ public void close() { bmSafeMode.close(); try { redundancyThread.interrupt(); - storageInfoDefragmenterThread.interrupt(); blockReportThread.interrupt(); + markedDeleteBlockScrubberThread.interrupt(); redundancyThread.join(3000); - storageInfoDefragmenterThread.join(3000); blockReportThread.join(3000); + markedDeleteBlockScrubberThread.join(3000); } catch (InterruptedException ie) { } datanodeManager.close(); @@ -784,6 +777,11 @@ public BlockPlacementPolicy getBlockPlacementPolicy() { return placementPolicies.getPolicy(CONTIGUOUS); } + @VisibleForTesting + public BlockPlacementPolicy getStriptedBlockPlacementPolicy() { + return placementPolicies.getPolicy(STRIPED); + } + public void refreshBlockPlacementPolicy(Configuration conf) { BlockPlacementPolicies bpp = new BlockPlacementPolicies(conf, datanodeManager.getFSClusterStats(), @@ -908,7 +906,7 @@ private void dumpBlockMeta(Block block, PrintWriter out) { // source node returned is not used chooseSourceDatanodes(blockInfo, containingNodes, containingLiveReplicasNodes, numReplicas, new ArrayList(), - new ArrayList(), LowRedundancyBlocks.LEVEL); + new ArrayList(), new ArrayList(), LowRedundancyBlocks.LEVEL); // containingLiveReplicasNodes can include READ_ONLY_SHARED replicas which are // not included in the numReplicas.liveReplicas() count @@ -976,12 +974,19 @@ static private void ensurePositiveInt(int val, String key) { * * @param newVal - Must be a positive non-zero integer. */ - public void setMaxReplicationStreams(int newVal) { - ensurePositiveInt(newVal, - DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY); + @VisibleForTesting + public void setMaxReplicationStreams(int newVal, boolean ensurePositiveInt) { + if (ensurePositiveInt) { + ensurePositiveInt(newVal, + DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY); + } maxReplicationStreams = newVal; } + public void setMaxReplicationStreams(int newVal) { + setMaxReplicationStreams(newVal, true); + } + /** Returns the current setting for maxReplicationStreamsHardLimit, set by * {@code DFSConfigKeys.DFS_NAMENODE_REPLICATION_STREAMS_HARD_LIMIT_KEY}. * @@ -1056,7 +1061,7 @@ public short getMinReplicationToBeInMaintenance() { return minReplicationToBeInMaintenance; } - private short getMinMaintenanceStorageNum(BlockInfo block) { + short getMinMaintenanceStorageNum(BlockInfo block) { if (block.isStriped()) { return ((BlockInfoStriped) block).getRealDataBlockNum(); } else { @@ -1280,7 +1285,14 @@ public LocatedBlock convertLastBlockToUnderConstruction( neededReconstruction.remove(lastBlock, replicas.liveReplicas(), replicas.readOnlyReplicas(), replicas.outOfServiceReplicas(), getExpectedRedundancyNum(lastBlock)); - pendingReconstruction.remove(lastBlock); + PendingBlockInfo remove = pendingReconstruction.remove(lastBlock); + if (remove != null) { + List locations = remove.getTargets(); + DatanodeStorageInfo[] removedBlockTargets = + new DatanodeStorageInfo[locations.size()]; + locations.toArray(removedBlockTargets); + DatanodeStorageInfo.decrementBlocksScheduled(removedBlockTargets); + } // remove this block from the list of pending blocks to be deleted. for (DatanodeStorageInfo storage : targets) { @@ -1637,9 +1649,16 @@ public BlocksWithLocations getBlocksWithLocations(final DatanodeID datanode, if(numBlocks == 0) { return new BlocksWithLocations(new BlockWithLocations[0]); } + + // skip stale storage + DatanodeStorageInfo[] storageInfos = Arrays + .stream(node.getStorageInfos()) + .filter(s -> !s.areBlockContentsStale()) + .toArray(DatanodeStorageInfo[]::new); + // starting from a random block int startBlock = ThreadLocalRandom.current().nextInt(numBlocks); - Iterator iter = node.getBlockIterator(startBlock); + Iterator iter = node.getBlockIterator(startBlock, storageInfos); List results = new ArrayList(); long totalSize = 0; BlockInfo curBlock; @@ -1652,8 +1671,8 @@ public BlocksWithLocations getBlocksWithLocations(final DatanodeID datanode, totalSize += addBlock(curBlock, results); } if(totalSize it = storage.getBlockIterator(); - //add the BlockInfos to a new collection as the - //returned iterator is not modifiable. - Collection toRemove = new ArrayList<>(); - while (it.hasNext()) { - toRemove.add(it.next()); - } - - for (BlockInfo b : toRemove) { - removeStoredBlock(b, node); - } + final Iterator it = node.getBlockIterator(); + while(it.hasNext()) { + removeStoredBlock(it.next(), node); } // Remove all pending DN messages referencing this DN. pendingDNMessages.removeAllMessagesForDatanode(node); @@ -1696,11 +1706,8 @@ void removeBlocksAssociatedTo(final DatanodeStorageInfo storageInfo) { assert namesystem.hasWriteLock(); final Iterator it = storageInfo.getBlockIterator(); DatanodeDescriptor node = storageInfo.getDatanodeDescriptor(); - Collection toRemove = new ArrayList<>(); - while (it.hasNext()) { - toRemove.add(it.next()); - } - for (BlockInfo block : toRemove) { + while(it.hasNext()) { + BlockInfo block = it.next(); removeStoredBlock(block, node); final Block b = getBlockOnStorage(block, storageInfo); if (b != null) { @@ -1837,25 +1844,41 @@ private void markBlockAsCorrupt(BlockToMarkCorrupt b, b.getReasonCode(), b.getStored().isStriped()); NumberReplicas numberOfReplicas = countNodes(b.getStored()); - boolean hasEnoughLiveReplicas = numberOfReplicas.liveReplicas() >= + final int numUsableReplicas = numberOfReplicas.liveReplicas() + + numberOfReplicas.decommissioning() + + numberOfReplicas.liveEnteringMaintenanceReplicas(); + boolean hasEnoughLiveReplicas = numUsableReplicas >= expectedRedundancies; boolean minReplicationSatisfied = hasMinStorage(b.getStored(), - numberOfReplicas.liveReplicas()); + numUsableReplicas); boolean hasMoreCorruptReplicas = minReplicationSatisfied && (numberOfReplicas.liveReplicas() + numberOfReplicas.corruptReplicas()) > expectedRedundancies; boolean corruptedDuringWrite = minReplicationSatisfied && b.isCorruptedDuringWrite(); - // case 1: have enough number of live replicas - // case 2: corrupted replicas + live replicas > Replication factor + // case 1: have enough number of usable replicas + // case 2: corrupted replicas + usable replicas > Replication factor // case 3: Block is marked corrupt due to failure while writing. In this // case genstamp will be different than that of valid block. // In all these cases we can delete the replica. - // In case of 3, rbw block will be deleted and valid block can be replicated + // In case 3, rbw block will be deleted and valid block can be replicated. + // Note NN only becomes aware of corrupt blocks when the block report is sent, + // this means that by default it can take up to 6 hours for a corrupt block to + // be invalidated, after which the valid block can be replicated. if (hasEnoughLiveReplicas || hasMoreCorruptReplicas || corruptedDuringWrite) { + if (b.getStored().isStriped()) { + // If the block is an EC block, the whole block group is marked + // corrupted, so if this block is getting deleted, remove the block + // from corrupt replica map explicitly, since removal of the + // block from corrupt replicas may be delayed if the blocks are on + // stale storage due to failover or any other reason. + corruptReplicas.removeFromCorruptReplicasMap(b.getStored(), node); + BlockInfoStriped blk = (BlockInfoStriped) getStoredBlock(b.getStored()); + storageInfo.removeBlock(blk); + } // the block is over-replicated so invalidate the replicas immediately invalidateBlock(b, node, numberOfReplicas); } else if (isPopulatingReplQueues()) { @@ -2096,9 +2119,10 @@ BlockReconstructionWork scheduleReconstruction(BlockInfo block, NumberReplicas numReplicas = new NumberReplicas(); List liveBlockIndices = new ArrayList<>(); List liveBusyBlockIndices = new ArrayList<>(); + List excludeReconstructed = new ArrayList<>(); final DatanodeDescriptor[] srcNodes = chooseSourceDatanodes(block, containingNodes, liveReplicaNodes, numReplicas, - liveBlockIndices, liveBusyBlockIndices, priority); + liveBlockIndices, liveBusyBlockIndices, excludeReconstructed, priority); short requiredRedundancy = getExpectedLiveRedundancyNum(block, numReplicas); if(srcNodes == null || srcNodes.length == 0) { @@ -2108,6 +2132,16 @@ BlockReconstructionWork scheduleReconstruction(BlockInfo block, return null; } + // skip if source datanodes for reconstructing ec block are not enough + if (block.isStriped()) { + BlockInfoStriped stripedBlock = (BlockInfoStriped) block; + if (stripedBlock.getRealDataBlockNum() > srcNodes.length) { + LOG.debug("Block {} cannot be reconstructed due to shortage of source datanodes ", block); + NameNode.getNameNodeMetrics().incNumTimesReReplicationNotScheduled(); + return null; + } + } + // liveReplicaNodes can include READ_ONLY_SHARED replicas which are // not included in the numReplicas.liveReplicas() count assert liveReplicaNodes.size() >= numReplicas.liveReplicas(); @@ -2156,9 +2190,13 @@ BlockReconstructionWork scheduleReconstruction(BlockInfo block, for (int i = 0; i < liveBusyBlockIndices.size(); i++) { busyIndices[i] = liveBusyBlockIndices.get(i); } + byte[] excludeReconstructedIndices = new byte[excludeReconstructed.size()]; + for (int i = 0; i < excludeReconstructed.size(); i++) { + excludeReconstructedIndices[i] = excludeReconstructed.get(i); + } return new ErasureCodingWork(getBlockPoolId(), block, bc, newSrcNodes, containingNodes, liveReplicaNodes, additionalReplRequired, - priority, newIndices, busyIndices); + priority, newIndices, busyIndices, excludeReconstructedIndices); } else { return new ReplicationWork(block, bc, srcNodes, containingNodes, liveReplicaNodes, additionalReplRequired, @@ -2386,6 +2424,10 @@ private DatanodeDescriptor getDatanodeDescriptorFromStorage( * replicas of the given block. * @param liveBlockIndices List to be populated with indices of healthy * blocks in a striped block group + * @param liveBusyBlockIndices List to be populated with indices of healthy + * blocks in a striped block group in busy DN, + * which the recovery work have reached their + * replication limits * @param priority integer representing replication priority of the given * block * @return the array of DatanodeDescriptor of the chosen nodes from which to @@ -2396,7 +2438,7 @@ DatanodeDescriptor[] chooseSourceDatanodes(BlockInfo block, List containingNodes, List nodesContainingLiveReplicas, NumberReplicas numReplicas, List liveBlockIndices, - List liveBusyBlockIndices, int priority) { + List liveBusyBlockIndices, List excludeReconstructed, int priority) { containingNodes.clear(); nodesContainingLiveReplicas.clear(); List srcNodes = new ArrayList<>(); @@ -2462,18 +2504,24 @@ DatanodeDescriptor[] chooseSourceDatanodes(BlockInfo block, if (priority != LowRedundancyBlocks.QUEUE_HIGHEST_PRIORITY && (!node.isDecommissionInProgress() && !node.isEnteringMaintenance()) - && node.getNumberOfBlocksToBeReplicated() >= maxReplicationStreams) { + && node.getNumberOfBlocksToBeReplicated() + + node.getNumberOfBlocksToBeErasureCoded() >= maxReplicationStreams) { if (isStriped && (state == StoredReplicaState.LIVE || state == StoredReplicaState.DECOMMISSIONING)) { liveBusyBlockIndices.add(blockIndex); + //HDFS-16566 ExcludeReconstructed won't be reconstructed. + excludeReconstructed.add(blockIndex); } continue; // already reached replication limit } - if (node.getNumberOfBlocksToBeReplicated() >= replicationStreamsHardLimit) { + if (node.getNumberOfBlocksToBeReplicated() + + node.getNumberOfBlocksToBeErasureCoded() >= replicationStreamsHardLimit) { if (isStriped && (state == StoredReplicaState.LIVE || state == StoredReplicaState.DECOMMISSIONING)) { liveBusyBlockIndices.add(blockIndex); + //HDFS-16566 ExcludeReconstructed won't be reconstructed. + excludeReconstructed.add(blockIndex); } continue; } @@ -2517,7 +2565,7 @@ void processPendingReconstructions() { * with the most up-to-date block information (e.g. genstamp). */ BlockInfo bi = blocksMap.getStoredBlock(timedOutItems[i]); - if (bi == null) { + if (bi == null || bi.isDeleted()) { continue; } NumberReplicas num = countNodes(timedOutItems[i]); @@ -2725,6 +2773,8 @@ public boolean processReport(final DatanodeID nodeID, Collection invalidatedBlocks = Collections.emptyList(); String strBlockReportId = context != null ? Long.toHexString(context.getReportId()) : ""; + String fullBrLeaseId = + context != null ? Long.toHexString(context.getLeaseId()) : ""; try { node = datanodeManager.getDatanode(nodeID); @@ -2745,11 +2795,12 @@ public boolean processReport(final DatanodeID nodeID, storageInfo = node.updateStorage(storage); } if (namesystem.isInStartupSafeMode() + && !StorageType.PROVIDED.equals(storageInfo.getStorageType()) && storageInfo.getBlockReportCount() > 0) { - blockLog.info("BLOCK* processReport 0x{}: " + blockLog.info("BLOCK* processReport 0x{} with lease ID 0x{}: " + "discarded non-initial block report from {}" + " because namenode still in startup phase", - strBlockReportId, nodeID); + strBlockReportId, fullBrLeaseId, nodeID); blockReportLeaseManager.removeLease(node); return !node.hasStaleStorages(); } @@ -2757,17 +2808,17 @@ public boolean processReport(final DatanodeID nodeID, if (storageInfo.getBlockReportCount() == 0) { // The first block report can be processed a lot more efficiently than // ordinary block reports. This shortens restart times. - blockLog.info("BLOCK* processReport 0x{}: Processing first " + blockLog.info("BLOCK* processReport 0x{} with lease ID 0x{}: Processing first " + "storage report for {} from datanode {}", - strBlockReportId, + strBlockReportId, fullBrLeaseId, storageInfo.getStorageID(), - nodeID.getDatanodeUuid()); + nodeID); processFirstBlockReport(storageInfo, newReport); } else { // Block reports for provided storage are not // maintained by DN heartbeats if (!StorageType.PROVIDED.equals(storageInfo.getStorageType())) { - invalidatedBlocks = processReport(storageInfo, newReport, context); + invalidatedBlocks = processReport(storageInfo, newReport); } } storageInfo.receivedBlockReport(); @@ -2776,9 +2827,12 @@ public boolean processReport(final DatanodeID nodeID, namesystem.writeUnlock(); } - for (Block b : invalidatedBlocks) { - blockLog.debug("BLOCK* processReport 0x{}: {} on node {} size {} does not" - + " belong to any file", strBlockReportId, b, node, b.getNumBytes()); + if(blockLog.isDebugEnabled()) { + for (Block b : invalidatedBlocks) { + blockLog.debug("BLOCK* processReport 0x{} with lease ID 0x{}: {} on node {} size {} " + + "does not belong to any file.", strBlockReportId, fullBrLeaseId, b, + node, b.getNumBytes()); + } } // Log the block report processing stats from Namenode perspective @@ -2786,9 +2840,9 @@ public boolean processReport(final DatanodeID nodeID, if (metrics != null) { metrics.addStorageBlockReport((int) (endTime - startTime)); } - blockLog.info("BLOCK* processReport 0x{}: from storage {} node {}, " + + blockLog.info("BLOCK* processReport 0x{} with lease ID 0x{}: from storage {} node {}, " + "blocks: {}, hasStaleStorage: {}, processing time: {} msecs, " + - "invalidatedBlocks: {}", strBlockReportId, storage.getStorageID(), + "invalidatedBlocks: {}", strBlockReportId, fullBrLeaseId, storage.getStorageID(), nodeID, newReport.getNumberOfBlocks(), node.hasStaleStorages(), (endTime - startTime), invalidatedBlocks.size()); @@ -2861,8 +2915,7 @@ void rescanPostponedMisreplicatedBlocks() { Collection processReport( final DatanodeStorageInfo storageInfo, - final BlockListAsLongs report, - BlockReportContext context) throws IOException { + final BlockListAsLongs report) throws IOException { // Normal case: // Modify the (block-->datanode) map, according to the difference // between the old and new block report. @@ -2872,36 +2925,8 @@ Collection processReport( Collection toInvalidate = new ArrayList<>(); Collection toCorrupt = new ArrayList<>(); Collection toUC = new ArrayList<>(); - - boolean sorted = false; - String strBlockReportId = ""; - if (context != null) { - sorted = context.isSorted(); - strBlockReportId = Long.toHexString(context.getReportId()); - } - - Iterable sortedReport; - if (!sorted) { - blockLog.warn("BLOCK* processReport 0x{}: Report from the DataNode ({}) " - + "is unsorted. This will cause overhead on the NameNode " - + "which needs to sort the Full BR. Please update the " - + "DataNode to the same version of Hadoop HDFS as the " - + "NameNode ({}).", - strBlockReportId, - storageInfo.getDatanodeDescriptor().getDatanodeUuid(), - VersionInfo.getVersion()); - Set set = new FoldedTreeSet<>(); - for (BlockReportReplica iblk : report) { - set.add(new BlockReportReplica(iblk)); - } - sortedReport = set; - } else { - sortedReport = report; - } - - reportDiffSorted(storageInfo, sortedReport, - toAdd, toRemove, toInvalidate, toCorrupt, toUC); - + reportDiff(storageInfo, report, + toAdd, toRemove, toInvalidate, toCorrupt, toUC); DatanodeDescriptor node = storageInfo.getDatanodeDescriptor(); // Process the blocks on each queue @@ -2918,8 +2943,8 @@ Collection processReport( numBlocksLogged++; } if (numBlocksLogged > maxNumBlocksToLog) { - blockLog.info("BLOCK* processReport 0x{}: logged info for {} of {} " + - "reported.", strBlockReportId, maxNumBlocksToLog, numBlocksLogged); + blockLog.info("BLOCK* processReport: logged info for {} of {} " + + "reported.", maxNumBlocksToLog, numBlocksLogged); } for (Block b : toInvalidate) { addToInvalidates(b, node); @@ -3051,129 +3076,158 @@ void processFirstBlockReport( } } - private void reportDiffSorted(DatanodeStorageInfo storageInfo, - Iterable newReport, + private void reportDiff(DatanodeStorageInfo storageInfo, + BlockListAsLongs newReport, Collection toAdd, // add to DatanodeDescriptor Collection toRemove, // remove from DatanodeDescriptor Collection toInvalidate, // should be removed from DN Collection toCorrupt, // add to corrupt replicas list Collection toUC) { // add to under-construction list - // The blocks must be sorted and the storagenodes blocks must be sorted - Iterator storageBlocksIterator = storageInfo.getBlockIterator(); + // place a delimiter in the list which separates blocks + // that have been reported from those that have not DatanodeDescriptor dn = storageInfo.getDatanodeDescriptor(); - BlockInfo storageBlock = null; - - for (BlockReportReplica replica : newReport) { - - long replicaID = replica.getBlockId(); - if (BlockIdManager.isStripedBlockID(replicaID) - && (!hasNonEcBlockUsingStripedID || - !blocksMap.containsBlock(replica))) { - replicaID = BlockIdManager.convertToStripedID(replicaID); - } - - ReplicaState reportedState = replica.getState(); - - LOG.debug("Reported block {} on {} size {} replicaState = {}", - replica, dn, replica.getNumBytes(), reportedState); - - if (shouldPostponeBlocksFromFuture - && isGenStampInFuture(replica)) { - queueReportedBlock(storageInfo, replica, reportedState, - QUEUE_REASON_FUTURE_GENSTAMP); - continue; - } - - if (storageBlock == null && storageBlocksIterator.hasNext()) { - storageBlock = storageBlocksIterator.next(); - } - - do { - int cmp; - if (storageBlock == null || - (cmp = Long.compare(replicaID, storageBlock.getBlockId())) < 0) { - // Check if block is available in NN but not yet on this storage - BlockInfo nnBlock = blocksMap.getStoredBlock(new Block(replicaID)); - if (nnBlock != null) { - reportDiffSortedInner(storageInfo, replica, reportedState, - nnBlock, toAdd, toCorrupt, toUC); - } else { - // Replica not found anywhere so it should be invalidated - toInvalidate.add(new Block(replica)); - } - break; - } else if (cmp == 0) { - // Replica matched current storageblock - reportDiffSortedInner(storageInfo, replica, reportedState, - storageBlock, toAdd, toCorrupt, toUC); - storageBlock = null; - } else { - // replica has higher ID than storedBlock - // Remove all stored blocks with IDs lower than replica - do { - toRemove.add(storageBlock); - storageBlock = storageBlocksIterator.hasNext() - ? storageBlocksIterator.next() : null; - } while (storageBlock != null && - Long.compare(replicaID, storageBlock.getBlockId()) > 0); + Block delimiterBlock = new Block(); + BlockInfo delimiter = new BlockInfoContiguous(delimiterBlock, + (short) 1); + AddBlockResult result = storageInfo.addBlock(delimiter, delimiterBlock); + assert result == AddBlockResult.ADDED + : "Delimiting block cannot be present in the node"; + int headIndex = 0; //currently the delimiter is in the head of the list + int curIndex; + + if (newReport == null) { + newReport = BlockListAsLongs.EMPTY; + } + // scan the report and process newly reported blocks + for (BlockReportReplica iblk : newReport) { + ReplicaState iState = iblk.getState(); + LOG.debug("Reported block {} on {} size {} replicaState = {}", iblk, dn, + iblk.getNumBytes(), iState); + BlockInfo storedBlock = processReportedBlock(storageInfo, + iblk, iState, toAdd, toInvalidate, toCorrupt, toUC); + + // move block to the head of the list + if (storedBlock != null) { + curIndex = storedBlock.findStorageInfo(storageInfo); + if (curIndex >= 0) { + headIndex = + storageInfo.moveBlockToHead(storedBlock, curIndex, headIndex); } - } while (storageBlock != null); + } } - // Iterate any remaining blocks that have not been reported and remove them - while (storageBlocksIterator.hasNext()) { - toRemove.add(storageBlocksIterator.next()); + // collect blocks that have not been reported + // all of them are next to the delimiter + Iterator it = + storageInfo.new BlockIterator(delimiter.getNext(0)); + while (it.hasNext()) { + toRemove.add(it.next()); } + storageInfo.removeBlock(delimiter); } - private void reportDiffSortedInner( + /** + * Process a block replica reported by the data-node. + * No side effects except adding to the passed-in Collections. + * + *

        + *
      1. If the block is not known to the system (not in blocksMap) then the + * data-node should be notified to invalidate this block.
      2. + *
      3. If the reported replica is valid that is has the same generation stamp + * and length as recorded on the name-node, then the replica location should + * be added to the name-node.
      4. + *
      5. If the reported replica is not valid, then it is marked as corrupt, + * which triggers replication of the existing valid replicas. + * Corrupt replicas are removed from the system when the block + * is fully replicated.
      6. + *
      7. If the reported replica is for a block currently marked "under + * construction" in the NN, then it should be added to the + * BlockUnderConstructionFeature's list of replicas.
      8. + *
      + * + * @param storageInfo DatanodeStorageInfo that sent the report. + * @param block reported block replica + * @param reportedState reported replica state + * @param toAdd add to DatanodeDescriptor + * @param toInvalidate missing blocks (not in the blocks map) + * should be removed from the data-node + * @param toCorrupt replicas with unexpected length or generation stamp; + * add to corrupt replicas + * @param toUC replicas of blocks currently under construction + * @return the up-to-date stored block, if it should be kept. + * Otherwise, null. + */ + private BlockInfo processReportedBlock( final DatanodeStorageInfo storageInfo, - final BlockReportReplica replica, final ReplicaState reportedState, - final BlockInfo storedBlock, + final Block block, final ReplicaState reportedState, final Collection toAdd, + final Collection toInvalidate, final Collection toCorrupt, final Collection toUC) { - assert replica != null; - assert storedBlock != null; - DatanodeDescriptor dn = storageInfo.getDatanodeDescriptor(); + + LOG.debug("Reported block {} on {} size {} replicaState = {}", block, dn, + block.getNumBytes(), reportedState); + + if (shouldPostponeBlocksFromFuture && isGenStampInFuture(block)) { + queueReportedBlock(storageInfo, block, reportedState, + QUEUE_REASON_FUTURE_GENSTAMP); + return null; + } + + // find block by blockId + BlockInfo storedBlock = getStoredBlock(block); + if(storedBlock == null) { + // If blocksMap does not contain reported block id, + // the replica should be removed from the data-node. + toInvalidate.add(new Block(block)); + return null; + } BlockUCState ucState = storedBlock.getBlockUCState(); // Block is on the NN LOG.debug("In memory blockUCState = {}", ucState); // Ignore replicas already scheduled to be removed from the DN - if (invalidateBlocks.contains(dn, replica)) { - return; + if(invalidateBlocks.contains(dn, block)) { + return storedBlock; } - BlockToMarkCorrupt c = checkReplicaCorrupt(replica, reportedState, - storedBlock, ucState, dn); + BlockToMarkCorrupt c = checkReplicaCorrupt( + block, reportedState, storedBlock, ucState, dn); if (c != null) { if (shouldPostponeBlocksFromFuture) { // If the block is an out-of-date generation stamp or state, // but we're the standby, we shouldn't treat it as corrupt, // but instead just queue it for later processing. - // TODO: Pretty confident this should be s/storedBlock/block below, - // since we should be postponing the info of the reported block, not - // the stored block. See HDFS-6289 for more context. - queueReportedBlock(storageInfo, storedBlock, reportedState, + // Storing the reported block for later processing, as that is what + // comes from the IBR / FBR and hence what we should use to compare + // against the memory state. + // See HDFS-6289 and HDFS-15422 for more context. + queueReportedBlock(storageInfo, block, reportedState, QUEUE_REASON_CORRUPT_STATE); } else { toCorrupt.add(c); } - } else if (isBlockUnderConstruction(storedBlock, ucState, reportedState)) { - toUC.add(new StatefulBlockInfo(storedBlock, new Block(replica), - reportedState)); - } else if (reportedState == ReplicaState.FINALIZED && - (storedBlock.findStorageInfo(storageInfo) == -1 || - corruptReplicas.isReplicaCorrupt(storedBlock, dn))) { - // Add replica if appropriate. If the replica was previously corrupt - // but now okay, it might need to be updated. - toAdd.add(new BlockInfoToAdd(storedBlock, new Block(replica))); + return storedBlock; + } + + if (isBlockUnderConstruction(storedBlock, ucState, reportedState)) { + toUC.add(new StatefulBlockInfo(storedBlock, + new Block(block), reportedState)); + return storedBlock; } + + // Add replica if appropriate. If the replica was previously corrupt + // but now okay, it might need to be updated. + if (reportedState == ReplicaState.FINALIZED + && (storedBlock.findStorageInfo(storageInfo) == -1 || + corruptReplicas.isReplicaCorrupt(storedBlock, dn))) { + toAdd.add(new BlockInfoToAdd(storedBlock, new Block(block))); + } + return storedBlock; } /** @@ -3416,7 +3470,7 @@ private void addStoredBlockImmediate(BlockInfo storedBlock, Block reported, } // just add it - AddBlockResult result = storageInfo.addBlockInitial(storedBlock, reported); + AddBlockResult result = storageInfo.addBlock(storedBlock, reported); // Now check for completion of blocks and safe block count int numCurrentReplica = countLiveNodes(storedBlock); @@ -3541,7 +3595,7 @@ private Block addStoredBlock(final BlockInfo block, ". blockMap has {} but corrupt replicas map has {}", storedBlock, numCorruptNodes, corruptReplicasCount); } - if ((corruptReplicasCount > 0) && (numLiveReplicas >= fileRedundancy)) { + if ((corruptReplicasCount > 0) && (numUsableReplicas >= fileRedundancy)) { invalidateCorruptReplicas(storedBlock, reportedBlock, num); } return storedBlock; @@ -3580,9 +3634,24 @@ private void invalidateCorruptReplicas(BlockInfo blk, Block reported, // ConcurrentModificationException, when the block is removed from the node DatanodeDescriptor[] nodesCopy = nodes.toArray(new DatanodeDescriptor[nodes.size()]); + + DatanodeStorageInfo[] storages = null; + if (blk.isStriped()) { + storages = getStorages(blk); + } + for (DatanodeDescriptor node : nodesCopy) { + Block blockToInvalidate = reported; + if (storages != null && blk.isStriped()) { + for (DatanodeStorageInfo s : storages) { + if (s.getDatanodeDescriptor().equals(node)) { + blockToInvalidate = getBlockOnStorage(blk, s); + break; + } + } + } try { - if (!invalidateBlock(new BlockToMarkCorrupt(reported, blk, null, + if (!invalidateBlock(new BlockToMarkCorrupt(blockToInvalidate, blk, null, Reason.ANY), node, numberReplicas)) { removedFromBlocksMap = false; } @@ -3627,7 +3696,7 @@ public void run() { /* * Stop the ongoing initialisation of reconstruction queues */ - private void stopReconstructionInitializer() { + public void stopReconstructionInitializer() { if (reconstructionQueuesInitializer != null) { reconstructionQueuesInitializer.interrupt(); try { @@ -4004,6 +4073,14 @@ private void chooseExcessRedundancyStriped(BlockCollection bc, List replicasToDelete = placementPolicy .chooseReplicasToDelete(nonExcess, candidates, (short) 1, excessTypes, null, null); + if (LOG.isDebugEnabled()) { + LOG.debug("Choose redundant EC replicas to delete from blk_{} which is located in {}", + sblk.getBlockId(), storage2index); + LOG.debug("Storages with candidate blocks to be deleted: {}", candidates); + LOG.debug("Storages with blocks to be deleted: {}", replicasToDelete); + } + Preconditions.checkArgument(candidates.containsAll(replicasToDelete), + "The EC replicas to be deleted are not in the candidate list"); for (DatanodeStorageInfo chosen : replicasToDelete) { processChosenExcessRedundancy(nonExcess, chosen, storedBlock); candidates.remove(chosen); @@ -4181,6 +4258,12 @@ private boolean processAndHandleReportedBlock( DatanodeStorageInfo storageInfo, Block block, ReplicaState reportedState, DatanodeDescriptor delHintNode) throws IOException { + // blockReceived reports a finalized block + Collection toAdd = new LinkedList<>(); + Collection toInvalidate = new LinkedList(); + Collection toCorrupt = + new LinkedList(); + Collection toUC = new LinkedList(); final DatanodeDescriptor node = storageInfo.getDatanodeDescriptor(); @@ -4194,57 +4277,33 @@ private boolean processAndHandleReportedBlock( return false; } - // find block by blockId - BlockInfo storedBlock = getStoredBlock(block); - if(storedBlock == null) { - // If blocksMap does not contain reported block id, - // the replica should be removed from the data-node. - blockLog.debug("BLOCK* addBlock: block {} on node {} size {} does not " + - "belong to any file", block, node, block.getNumBytes()); - addToInvalidates(new Block(block), node); - return true; - } + processReportedBlock(storageInfo, block, reportedState, toAdd, toInvalidate, + toCorrupt, toUC); + // the block is only in one of the to-do lists + // if it is in none then data-node already has it + assert toUC.size() + toAdd.size() + toInvalidate.size() + toCorrupt + .size() <= 1 : "The block should be only in one of the lists."; - BlockUCState ucState = storedBlock.getBlockUCState(); - // Block is on the NN - LOG.debug("In memory blockUCState = {}", ucState); - - // Ignore replicas already scheduled to be removed from the DN - if(invalidateBlocks.contains(node, block)) { - return true; + for (StatefulBlockInfo b : toUC) { + addStoredBlockUnderConstruction(b, storageInfo); } - - BlockToMarkCorrupt c = checkReplicaCorrupt( - block, reportedState, storedBlock, ucState, node); - if (c != null) { - if (shouldPostponeBlocksFromFuture) { - // If the block is an out-of-date generation stamp or state, - // but we're the standby, we shouldn't treat it as corrupt, - // but instead just queue it for later processing. - // TODO: Pretty confident this should be s/storedBlock/block below, - // since we should be postponing the info of the reported block, not - // the stored block. See HDFS-6289 for more context. - queueReportedBlock(storageInfo, storedBlock, reportedState, - QUEUE_REASON_CORRUPT_STATE); - } else { - markBlockAsCorrupt(c, storageInfo, node); - } - return true; + long numBlocksLogged = 0; + for (BlockInfoToAdd b : toAdd) { + addStoredBlock(b.stored, b.reported, storageInfo, delHintNode, + numBlocksLogged < maxNumBlocksToLog); + numBlocksLogged++; } - - if (isBlockUnderConstruction(storedBlock, ucState, reportedState)) { - addStoredBlockUnderConstruction( - new StatefulBlockInfo(storedBlock, new Block(block), reportedState), - storageInfo); - return true; + if (numBlocksLogged > maxNumBlocksToLog) { + blockLog.debug("BLOCK* addBlock: logged info for {} of {} reported.", + maxNumBlocksToLog, numBlocksLogged); } - - // Add replica if appropriate. If the replica was previously corrupt - // but now okay, it might need to be updated. - if (reportedState == ReplicaState.FINALIZED - && (storedBlock.findStorageInfo(storageInfo) == -1 || - corruptReplicas.isReplicaCorrupt(storedBlock, node))) { - addStoredBlock(storedBlock, block, storageInfo, delHintNode, true); + for (Block b : toInvalidate) { + blockLog.debug("BLOCK* addBlock: block {} on node {} size {} does not " + + "belong to any file", b, node, b.getNumBytes()); + addToInvalidates(b, node); + } + for (BlockToMarkCorrupt b : toCorrupt) { + markBlockAsCorrupt(b, storageInfo, node); } return true; } @@ -4539,7 +4598,7 @@ boolean isNodeHealthyForDecommissionOrMaintenance(DatanodeDescriptor node) { if (pendingReconstructionBlocksCount == 0 && lowRedundancyBlocksCount == 0) { LOG.info("Node {} is dead and there are no low redundancy" + - " blocks or blocks pending reconstruction. Safe to decommission or", + " blocks or blocks pending reconstruction. Safe to decommission or" + " put in maintenance.", node); return true; } @@ -4907,6 +4966,77 @@ public long getLastRedundancyMonitorTS() { return lastRedundancyCycleTS.get(); } + /** + * Periodically deletes the marked block. + */ + private class MarkedDeleteBlockScrubber implements Runnable { + private Iterator toDeleteIterator = null; + private boolean isSleep; + private NameNodeMetrics metrics; + + private void remove(long time) { + if (checkToDeleteIterator()) { + namesystem.writeLock(); + try { + while (toDeleteIterator.hasNext()) { + removeBlock(toDeleteIterator.next()); + metrics.decrPendingDeleteBlocksCount(); + if (Time.monotonicNow() - time > deleteBlockLockTimeMs) { + isSleep = true; + break; + } + } + } finally { + namesystem.writeUnlock(); + } + } + } + + private boolean checkToDeleteIterator() { + return toDeleteIterator != null && toDeleteIterator.hasNext(); + } + + @Override + public void run() { + LOG.info("Start MarkedDeleteBlockScrubber thread"); + while (namesystem.isRunning() && + !Thread.currentThread().isInterrupted()) { + if (!markedDeleteQueue.isEmpty() || checkToDeleteIterator()) { + try { + metrics = NameNode.getNameNodeMetrics(); + metrics.setDeleteBlocksQueued(markedDeleteQueue.size()); + isSleep = false; + long startTime = Time.monotonicNow(); + remove(startTime); + while (!isSleep && !markedDeleteQueue.isEmpty() && + !Thread.currentThread().isInterrupted()) { + List markedDeleteList = markedDeleteQueue.poll(); + if (markedDeleteList != null) { + toDeleteIterator = markedDeleteList.listIterator(); + } + remove(startTime); + } + } catch (Exception e){ + LOG.warn("MarkedDeleteBlockScrubber encountered an exception" + + " during the block deletion process, " + + " the deletion of the block will retry in {} millisecond.", + deleteBlockUnlockIntervalTimeMs, e); + } + } + if (isSleep) { + LOG.debug("Clear markedDeleteQueue over {}" + + " millisecond to release the write lock", deleteBlockLockTimeMs); + } + try { + Thread.sleep(deleteBlockUnlockIntervalTimeMs); + } catch (InterruptedException e) { + LOG.info("Stopping MarkedDeleteBlockScrubber."); + break; + } + } + } + } + /** * Periodically calls computeBlockRecoveryWork(). */ @@ -4944,91 +5074,6 @@ public void run() { } } - /** - * Runnable that monitors the fragmentation of the StorageInfo TreeSet and - * compacts it when it falls under a certain threshold. - */ - private class StorageInfoDefragmenter implements Runnable { - - @Override - public void run() { - while (namesystem.isRunning()) { - try { - // Check storage efficiency only when active NN is out of safe mode. - if (isPopulatingReplQueues()) { - scanAndCompactStorages(); - } - Thread.sleep(storageInfoDefragmentInterval); - } catch (Throwable t) { - if (!namesystem.isRunning()) { - LOG.info("Stopping thread."); - if (!(t instanceof InterruptedException)) { - LOG.info("Received an exception while shutting down.", t); - } - break; - } else if (!checkNSRunning && t instanceof InterruptedException) { - LOG.info("Stopping for testing."); - break; - } - LOG.error("Thread received Runtime exception.", t); - terminate(1, t); - } - } - } - - private void scanAndCompactStorages() throws InterruptedException { - ArrayList datanodesAndStorages = new ArrayList<>(); - for (DatanodeDescriptor node - : datanodeManager.getDatanodeListForReport(DatanodeReportType.ALL)) { - for (DatanodeStorageInfo storage : node.getStorageInfos()) { - try { - namesystem.readLock(); - double ratio = storage.treeSetFillRatio(); - if (ratio < storageInfoDefragmentRatio) { - datanodesAndStorages.add(node.getDatanodeUuid()); - datanodesAndStorages.add(storage.getStorageID()); - } - LOG.debug("StorageInfo TreeSet fill ratio {} : {}{}", - storage.getStorageID(), ratio, - (ratio < storageInfoDefragmentRatio) - ? " (queued for defragmentation)" : ""); - } finally { - namesystem.readUnlock(); - } - } - } - if (!datanodesAndStorages.isEmpty()) { - for (int i = 0; i < datanodesAndStorages.size(); i += 2) { - namesystem.writeLock(); - try { - final DatanodeDescriptor dn = datanodeManager. - getDatanode(datanodesAndStorages.get(i)); - if (dn == null) { - continue; - } - final DatanodeStorageInfo storage = dn. - getStorageInfo(datanodesAndStorages.get(i + 1)); - if (storage != null) { - boolean aborted = - !storage.treeSetCompact(storageInfoDefragmentTimeout); - if (aborted) { - // Compaction timed out, reset iterator to continue with - // the same storage next iteration. - i -= 2; - } - LOG.info("StorageInfo TreeSet defragmented {} : {}{}", - storage.getStorageID(), storage.treeSetFillRatio(), - aborted ? " (aborted)" : ""); - } - } finally { - namesystem.writeUnlock(); - } - // Wait between each iteration - Thread.sleep(1000); - } - } - } - } /** * Compute block replication and block invalidation work that can be scheduled @@ -5338,6 +5383,17 @@ public BlockIdManager getBlockIdManager() { return blockIdManager; } + @VisibleForTesting + public ConcurrentLinkedQueue> getMarkedDeleteQueue() { + return markedDeleteQueue; + } + + public void addBLocksToMarkedDeleteQueue(List blockInfos) { + markedDeleteQueue.add(blockInfos); + NameNode.getNameNodeMetrics(). + incrPendingDeleteBlocksCount(blockInfos.size()); + } + public long nextGenerationStamp(boolean legacyBlock) throws IOException { return blockIdManager.nextGenerationStamp(legacyBlock); } @@ -5441,4 +5497,14 @@ public void disableSPS() { public StoragePolicySatisfyManager getSPSManager() { return spsManager; } + + public void setExcludeSlowNodesEnabled(boolean enable) { + placementPolicies.getPolicy(CONTIGUOUS).setExcludeSlowNodesEnabled(enable); + placementPolicies.getPolicy(STRIPED).setExcludeSlowNodesEnabled(enable); + } + + @VisibleForTesting + public boolean getExcludeSlowNodesEnabled(BlockType blockType) { + return placementPolicies.getPolicy(blockType).getExcludeSlowNodesEnabled(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerFaultInjector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerFaultInjector.java index 957c5c0c3733c..a529eca6d6ceb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerFaultInjector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerFaultInjector.java @@ -19,7 +19,7 @@ import java.io.IOException; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.server.protocol.BlockReportContext; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerSafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerSafeMode.java index aecdb59df066f..cd4c521807ea2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerSafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerSafeMode.java @@ -35,8 +35,8 @@ import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.util.Daemon; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicy.java index 563183882765c..9f717217da538 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicy.java @@ -25,7 +25,7 @@ import java.util.Map; import java.util.Set; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.AddBlockFlag; @@ -196,8 +196,9 @@ public void adjustSetsWithChosenReplica( if (moreThanOne.remove(cur)) { if (storages.size() == 1) { final DatanodeStorageInfo remaining = storages.get(0); - moreThanOne.remove(remaining); - exactlyOne.add(remaining); + if (moreThanOne.remove(remaining)) { + exactlyOne.add(remaining); + } } } else { exactlyOne.remove(cur); @@ -261,4 +262,16 @@ public void splitNodesWithRack( } } } + + /** + * Updates the value used for excludeSlowNodesEnabled, which is set by + * {@code DFSConfigKeys.DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_KEY} + * initially. + * + * @param enable true, we will filter out slow nodes + * when choosing targets for blocks, otherwise false not filter. + */ + public abstract void setExcludeSlowNodesEnabled(boolean enable); + + public abstract boolean getExcludeSlowNodesEnabled(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java index a1ea63de22fd7..9407deae712ba 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java @@ -17,12 +17,16 @@ */ package org.apache.hadoop.hdfs.server.blockmanagement; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOADBYSTORAGETYPE_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOADBYSTORAGETYPE_KEY; import static org.apache.hadoop.util.Time.monotonicNow; import java.util.*; import java.util.concurrent.TimeUnit; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.AddBlockFlag; @@ -35,7 +39,7 @@ import org.apache.hadoop.net.Node; import org.apache.hadoop.net.NodeBase; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * The class is responsible for choosing the desired number of targets @@ -79,7 +83,9 @@ private enum NodeNotChosenReason { NODE_STALE("the node is stale"), NODE_TOO_BUSY("the node is too busy"), TOO_MANY_NODES_ON_RACK("the rack has too many chosen nodes"), - NOT_ENOUGH_STORAGE_SPACE("not enough storage space to place the block"); + NOT_ENOUGH_STORAGE_SPACE("not enough storage space to place the block"), + NO_REQUIRED_STORAGE_TYPE("required storage types are unavailable"), + NODE_SLOW("the node is too slow"); private final String text; @@ -92,9 +98,12 @@ private String getText() { } } - protected boolean considerLoad; + protected boolean considerLoad; + private boolean considerLoadByStorageType; protected double considerLoadFactor; private boolean preferLocalNode; + private boolean dataNodePeerStatsEnabled; + private volatile boolean excludeSlowNodesEnabled; protected NetworkTopology clusterMap; protected Host2NodesMap host2datanodeMap; private FSClusterStats stats; @@ -116,6 +125,9 @@ public void initialize(Configuration conf, FSClusterStats stats, this.considerLoad = conf.getBoolean( DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY, DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_DEFAULT); + this.considerLoadByStorageType = conf.getBoolean( + DFS_NAMENODE_REDUNDANCY_CONSIDERLOADBYSTORAGETYPE_KEY, + DFS_NAMENODE_REDUNDANCY_CONSIDERLOADBYSTORAGETYPE_DEFAULT); this.considerLoadFactor = conf.getDouble( DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_FACTOR, DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_FACTOR_DEFAULT); @@ -137,6 +149,12 @@ public void initialize(Configuration conf, FSClusterStats stats, DFS_NAMENODE_BLOCKPLACEMENTPOLICY_DEFAULT_PREFER_LOCAL_NODE_KEY, DFSConfigKeys. DFS_NAMENODE_BLOCKPLACEMENTPOLICY_DEFAULT_PREFER_LOCAL_NODE_DEFAULT); + this.dataNodePeerStatsEnabled = conf.getBoolean( + DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY, + DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT); + this.excludeSlowNodesEnabled = conf.getBoolean( + DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_KEY, + DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_DEFAULT); } @Override @@ -286,7 +304,7 @@ private DatanodeStorageInfo[] chooseTarget(int numOfReplicas, && stats.isAvoidingStaleDataNodesForWrite()); boolean avoidLocalRack = (addBlockFlags != null && addBlockFlags.contains(AddBlockFlag.NO_LOCAL_RACK) && writer != null - && clusterMap.getNumOfRacks() > 2); + && clusterMap.getNumOfNonEmptyRacks() > 2); boolean avoidLocalNode = (addBlockFlags != null && addBlockFlags.contains(AddBlockFlag.NO_LOCAL_WRITE) && writer != null @@ -367,7 +385,7 @@ protected int[] getMaxNodesPerRack(int numOfChosen, int numOfReplicas) { totalNumOfReplicas = clusterSize; } // No calculation needed when there is only one rack or picking one node. - int numOfRacks = clusterMap.getNumOfRacks(); + int numOfRacks = clusterMap.getNumOfNonEmptyRacks(); // HDFS-14527 return default when numOfRacks = 0 to avoid // ArithmeticException when calc maxNodesPerRack at following logic. if (numOfRacks <= 1 || totalNumOfReplicas <= 1) { @@ -417,7 +435,7 @@ private EnumMap getRequiredStorageTypes( * @param storageTypes storage type to be considered for target * @return local node of writer (not chosen node) */ - private Node chooseTarget(int numOfReplicas, + private Node chooseTarget(final int numOfReplicas, Node writer, final Set excludedNodes, final long blocksize, @@ -451,7 +469,7 @@ private Node chooseTarget(int numOfReplicas, LOG.trace("storageTypes={}", storageTypes); try { - if ((numOfReplicas = requiredStorageTypes.size()) == 0) { + if (requiredStorageTypes.size() == 0) { throw new NotEnoughReplicasException( "All required storage types are unavailable: " + " unavailableStorages=" + unavailableStorages @@ -480,10 +498,10 @@ private Node chooseTarget(int numOfReplicas, for (DatanodeStorageInfo resultStorage : results) { addToExcludedNodes(resultStorage.getDatanodeDescriptor(), oldExcludedNodes); } - // Set numOfReplicas, since it can get out of sync with the result list + // Set newNumOfReplicas, since it can get out of sync with the result list // if the NotEnoughReplicasException was thrown in chooseRandom(). - numOfReplicas = totalReplicasExpected - results.size(); - return chooseTarget(numOfReplicas, writer, oldExcludedNodes, blocksize, + int newNumOfReplicas = totalReplicasExpected - results.size(); + return chooseTarget(newNumOfReplicas, writer, oldExcludedNodes, blocksize, maxNodesPerRack, results, false, storagePolicy, unavailableStorages, newBlock, null); } @@ -502,8 +520,8 @@ private Node chooseTarget(int numOfReplicas, addToExcludedNodes(resultStorage.getDatanodeDescriptor(), oldExcludedNodes); } - numOfReplicas = totalReplicasExpected - results.size(); - return chooseTarget(numOfReplicas, writer, oldExcludedNodes, blocksize, + int newNumOfReplicas = totalReplicasExpected - results.size(); + return chooseTarget(newNumOfReplicas, writer, oldExcludedNodes, blocksize, maxNodesPerRack, results, false, storagePolicy, unavailableStorages, newBlock, null); } @@ -816,6 +834,9 @@ protected DatanodeStorageInfo chooseRandom(int numOfReplicas, includeType = type; break; } + logNodeIsNotChosen(null, + NodeNotChosenReason.NO_REQUIRED_STORAGE_TYPE, + " for storage type " + type); } } else { chosenNode = chooseDataNode(scope, excludedNodes); @@ -952,7 +973,7 @@ private static void logNodeIsNotChosen(DatanodeDescriptor node, if (LOG.isDebugEnabled()) { // build the error message for later use. debugLoggingBuilder.get() - .append("\n Datanode ").append(node) + .append("\n Datanode ").append((node==null)?"None":node) .append(" is not chosen since ").append(reason.getText()); if (reasonDetails != null) { debugLoggingBuilder.get().append(" ").append(reasonDetails); @@ -976,8 +997,9 @@ private static void logNodeIsNotChosen(DatanodeDescriptor node, * @return Return true if the datanode should be excluded, otherwise false */ boolean excludeNodeByLoad(DatanodeDescriptor node){ - final double maxLoad = considerLoadFactor * - stats.getInServiceXceiverAverage(); + double inServiceXceiverCount = getInServiceXceiverAverage(node); + final double maxLoad = considerLoadFactor * inServiceXceiverCount; + final int nodeLoad = node.getXceiverCount(); if ((nodeLoad > maxLoad) && (maxLoad > 0)) { logNodeIsNotChosen(node, NodeNotChosenReason.NODE_TOO_BUSY, @@ -987,6 +1009,48 @@ boolean excludeNodeByLoad(DatanodeDescriptor node){ return false; } + /** + * Gets the inServiceXceiver average count for the cluster, if + * considerLoadByStorageType is true, then load is calculated only for the + * storage types present on the datanode. + * @param node the datanode whose storage types are to be taken into account. + * @return the InServiceXceiverAverage count. + */ + private double getInServiceXceiverAverage(DatanodeDescriptor node) { + double inServiceXceiverCount; + if (considerLoadByStorageType) { + inServiceXceiverCount = + getInServiceXceiverAverageByStorageType(node.getStorageTypes()); + } else { + inServiceXceiverCount = stats.getInServiceXceiverAverage(); + } + return inServiceXceiverCount; + } + + /** + * Gets the average xceiver count with respect to the storage types. + * @param storageTypes the storage types. + * @return the average xceiver count wrt the provided storage types. + */ + private double getInServiceXceiverAverageByStorageType( + Set storageTypes) { + double avgLoad = 0; + final Map storageStats = + stats.getStorageTypeStats(); + int numNodes = 0; + int numXceiver = 0; + for (StorageType s : storageTypes) { + StorageTypeStats storageTypeStats = storageStats.get(s); + numNodes += storageTypeStats.getNodesInService(); + numXceiver += storageTypeStats.getNodesInServiceXceiverCount(); + } + if (numNodes != 0) { + avgLoad = (double) numXceiver / numNodes; + } + + return avgLoad; + } + /** * Determine if a datanode is good for placing block. * @@ -1038,6 +1102,15 @@ boolean isGoodDatanode(DatanodeDescriptor node, return false; } + // check if the target is a slow node + if (dataNodePeerStatsEnabled && excludeSlowNodesEnabled) { + Set slowNodesUuidSet = DatanodeManager.getSlowNodesUuidSet(); + if (slowNodesUuidSet.contains(node.getDatanodeUuid())) { + logNodeIsNotChosen(node, NodeNotChosenReason.NODE_SLOW); + return false; + } + } + return true; } @@ -1100,7 +1173,7 @@ public BlockPlacementStatus verifyBlockPlacement(DatanodeInfo[] locs, .map(dn -> dn.getNetworkLocation()).distinct().count(); return new BlockPlacementStatusDefault(Math.toIntExact(rackCount), - minRacks, clusterMap.getNumOfRacks()); + minRacks, clusterMap.getNumOfNonEmptyRacks()); } /** @@ -1286,5 +1359,14 @@ protected Collection pickupReplicaSet( void setPreferLocalNode(boolean prefer) { this.preferLocalNode = prefer; } -} + @Override + public void setExcludeSlowNodesEnabled(boolean enable) { + this.excludeSlowNodesEnabled = enable; + } + + @Override + public boolean getExcludeSlowNodesEnabled() { + return excludeSlowNodesEnabled; + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyRackFaultTolerant.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyRackFaultTolerant.java index b204450491a7b..a3b3f482e8c23 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyRackFaultTolerant.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyRackFaultTolerant.java @@ -42,7 +42,7 @@ protected int[] getMaxNodesPerRack(int numOfChosen, int numOfReplicas) { totalNumOfReplicas = clusterSize; } // No calculation needed when there is only one rack or picking one node. - int numOfRacks = clusterMap.getNumOfRacks(); + int numOfRacks = clusterMap.getNumOfNonEmptyRacks(); // HDFS-14527 return default when numOfRacks = 0 to avoid // ArithmeticException when calc maxNodesPerRack at following logic. if (numOfRacks <= 1 || totalNumOfReplicas <= 1) { @@ -90,38 +90,39 @@ protected Node chooseTargetInOrder(int numOfReplicas, EnumMap storageTypes) throws NotEnoughReplicasException { int totalReplicaExpected = results.size() + numOfReplicas; - int numOfRacks = clusterMap.getNumOfRacks(); - if (totalReplicaExpected < numOfRacks || - totalReplicaExpected % numOfRacks == 0) { - writer = chooseOnce(numOfReplicas, writer, excludedNodes, blocksize, - maxNodesPerRack, results, avoidStaleNodes, storageTypes); - return writer; - } + int numOfRacks = clusterMap.getNumOfNonEmptyRacks(); - assert totalReplicaExpected > (maxNodesPerRack -1) * numOfRacks; + try { + if (totalReplicaExpected < numOfRacks || + totalReplicaExpected % numOfRacks == 0) { + writer = chooseOnce(numOfReplicas, writer, excludedNodes, blocksize, + maxNodesPerRack, results, avoidStaleNodes, storageTypes); + return writer; + } - // Calculate numOfReplicas for filling each rack exactly (maxNodesPerRack-1) - // replicas. - HashMap rackCounts = new HashMap<>(); - for (DatanodeStorageInfo dsInfo : results) { - String rack = dsInfo.getDatanodeDescriptor().getNetworkLocation(); - Integer count = rackCounts.get(rack); - if (count != null) { - rackCounts.put(rack, count + 1); - } else { - rackCounts.put(rack, 1); + assert totalReplicaExpected > (maxNodesPerRack -1) * numOfRacks; + + // Calculate numOfReplicas for filling each rack exactly (maxNodesPerRack-1) + // replicas. + HashMap rackCounts = new HashMap<>(); + for (DatanodeStorageInfo dsInfo : results) { + String rack = dsInfo.getDatanodeDescriptor().getNetworkLocation(); + Integer count = rackCounts.get(rack); + if (count != null) { + rackCounts.put(rack, count + 1); + } else { + rackCounts.put(rack, 1); + } } - } - int excess = 0; // Sum of the above (maxNodesPerRack-1) part of nodes in results - for (int count : rackCounts.values()) { - if (count > maxNodesPerRack -1) { - excess += count - (maxNodesPerRack -1); + int excess = 0; // Sum of the above (maxNodesPerRack-1) part of nodes in results + for (int count : rackCounts.values()) { + if (count > maxNodesPerRack -1) { + excess += count - (maxNodesPerRack -1); + } } - } - numOfReplicas = Math.min(totalReplicaExpected - results.size(), - (maxNodesPerRack -1) * numOfRacks - (results.size() - excess)); + numOfReplicas = Math.min(totalReplicaExpected - results.size(), + (maxNodesPerRack -1) * numOfRacks - (results.size() - excess)); - try { // Try to spread the replicas as evenly as possible across racks. // This is done by first placing with (maxNodesPerRack-1), then spreading // the remainder by calling again with maxNodesPerRack. @@ -237,14 +238,13 @@ public BlockPlacementStatus verifyBlockPlacement(DatanodeInfo[] locs, // only one rack return new BlockPlacementStatusDefault(1, 1, 1); } - // 1. Check that all locations are different. - // 2. Count locations on different racks. - Set racks = new TreeSet<>(); + // Count locations on different racks. + Set racks = new HashSet<>(); for (DatanodeInfo dn : locs) { racks.add(dn.getNetworkLocation()); } return new BlockPlacementStatusDefault(racks.size(), numberOfReplicas, - clusterMap.getNumOfRacks()); + clusterMap.getNumOfNonEmptyRacks()); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyWithNodeGroup.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyWithNodeGroup.java index 194f6ba9c874a..39f15191534d1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyWithNodeGroup.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyWithNodeGroup.java @@ -46,6 +46,11 @@ protected BlockPlacementPolicyWithNodeGroup() { public void initialize(Configuration conf, FSClusterStats stats, NetworkTopology clusterMap, Host2NodesMap host2datanodeMap) { + if (!(clusterMap instanceof NetworkTopologyWithNodeGroup)) { + throw new IllegalArgumentException( + "Configured cluster topology should be " + + NetworkTopologyWithNodeGroup.class.getName()); + } super.initialize(conf, stats, clusterMap, host2datanodeMap); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockReportLeaseManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockReportLeaseManager.java index 2a4b6e8455226..2dc4a2b0e6f04 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockReportLeaseManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockReportLeaseManager.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.blockmanagement; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.util.Time; @@ -267,7 +267,7 @@ public synchronized long requestLease(DatanodeDescriptor dn) { private synchronized boolean pruneIfExpired(long monotonicNowMs, NodeData node) { - if (monotonicNowMs < node.leaseTimeMs + leaseExpiryMs) { + if (monotonicNowMs - node.leaseTimeMs < leaseExpiryMs) { return false; } LOG.info("Removing expired block report lease 0x{} for DN {}.", diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockStoragePolicySuite.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockStoragePolicySuite.java index 5af1bf281b150..ca08fe9a5398c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockStoragePolicySuite.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockStoragePolicySuite.java @@ -17,9 +17,9 @@ */ package org.apache.hadoop.hdfs.server.blockmanagement; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.fs.XAttr; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockToMarkCorrupt.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockToMarkCorrupt.java index a871390ae2274..3ce5ef07acdca 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockToMarkCorrupt.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockToMarkCorrupt.java @@ -19,7 +19,7 @@ import static org.apache.hadoop.hdfs.server.blockmanagement.CorruptReplicasMap.Reason; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.hdfs.protocol.Block; /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlocksMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlocksMap.java index a96c815b0069b..9deeb41a55500 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlocksMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlocksMap.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hdfs.server.blockmanagement; -import java.util.Collections; import java.util.Iterator; import java.util.concurrent.atomic.LongAdder; @@ -32,6 +31,37 @@ * the datanodes that store the block. */ class BlocksMap { + public static class StorageIterator implements Iterator { + private final BlockInfo blockInfo; + private int nextIdx = 0; + + StorageIterator(BlockInfo blkInfo) { + this.blockInfo = blkInfo; + } + + @Override + public boolean hasNext() { + if (blockInfo == null) { + return false; + } + while (nextIdx < blockInfo.getCapacity() && + blockInfo.getDatanode(nextIdx) == null) { + // note that for striped blocks there may be null in the triplets + nextIdx++; + } + return nextIdx < blockInfo.getCapacity(); + } + + @Override + public DatanodeStorageInfo next() { + return blockInfo.getStorageInfo(nextIdx++); + } + + @Override + public void remove() { + throw new UnsupportedOperationException("Sorry. can't remove."); + } + } /** Constant {@link LightWeightGSet} capacity. */ private final int capacity; @@ -111,16 +141,6 @@ void removeBlock(BlockInfo block) { } } - /** - * Check if BlocksMap contains the block. - * - * @param b Block to check - * @return true if block is in the map, otherwise false - */ - boolean containsBlock(Block b) { - return blocks.contains(b); - } - /** Returns the block object if it exists in the map. */ BlockInfo getStoredBlock(Block b) { return blocks.get(b); @@ -131,9 +151,7 @@ BlockInfo getStoredBlock(Block b) { * returns {@link Iterable} of the storages the block belongs to. */ Iterable getStorages(Block b) { - BlockInfo block = blocks.get(b); - return block != null ? getStorages(block) - : Collections.emptyList(); + return getStorages(blocks.get(b)); } /** @@ -141,16 +159,12 @@ Iterable getStorages(Block b) { * returns {@link Iterable} of the storages the block belongs to. */ Iterable getStorages(final BlockInfo storedBlock) { - if (storedBlock == null) { - return Collections.emptyList(); - } else { - return new Iterable() { - @Override - public Iterator iterator() { - return storedBlock.getStorageInfos(); - } - }; - } + return new Iterable() { + @Override + public Iterator iterator() { + return new StorageIterator(storedBlock); + } + }; } /** counts number of containing nodes. Better than using iterator. */ @@ -169,7 +183,7 @@ boolean removeNode(Block b, DatanodeDescriptor node) { if (info == null) return false; - // remove block from the data-node set and the node from the block info + // remove block from the data-node list and the node from the block info boolean removed = removeBlock(node, info); if (info.hasNoStorage() // no datanodes left @@ -181,7 +195,7 @@ boolean removeNode(Block b, DatanodeDescriptor node) { } /** - * Remove block from the set of blocks belonging to the data-node. Remove + * Remove block from the list of blocks belonging to the data-node. Remove * data-node from the block. */ static boolean removeBlock(DatanodeDescriptor dn, BlockInfo b) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java index 35e4a2e92b89b..eab58124cb11c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java @@ -55,7 +55,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * Scans the namesystem, scheduling blocks to be cached as appropriate. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CombinedHostFileManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CombinedHostFileManager.java index d607789420c20..662e2e471dfbc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CombinedHostFileManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CombinedHostFileManager.java @@ -17,13 +17,12 @@ */ package org.apache.hadoop.hdfs.server.blockmanagement; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.HashMultimap; -import com.google.common.collect.Multimap; -import com.google.common.collect.UnmodifiableIterator; -import com.google.common.collect.Iterables; -import com.google.common.collect.Collections2; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.collect.HashMultimap; +import org.apache.hadoop.thirdparty.com.google.common.collect.Multimap; +import org.apache.hadoop.thirdparty.com.google.common.collect.UnmodifiableIterator; +import java.util.stream.Collectors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -40,7 +39,7 @@ import java.util.Iterator; import java.util.Map; -import com.google.common.base.Predicate; + import org.apache.hadoop.hdfs.util.CombinedHostsFileReader; @@ -82,37 +81,26 @@ synchronized void add(InetAddress addr, // If the includes list is empty, act as if everything is in the // includes list. synchronized boolean isIncluded(final InetSocketAddress address) { - return emptyInServiceNodeLists || Iterables.any( - allDNs.get(address.getAddress()), - new Predicate() { - public boolean apply(DatanodeAdminProperties input) { - return input.getPort() == 0 || - input.getPort() == address.getPort(); - } - }); + return emptyInServiceNodeLists || allDNs.get(address.getAddress()) + .stream().anyMatch( + input -> input.getPort() == 0 || + input.getPort() == address.getPort()); } synchronized boolean isExcluded(final InetSocketAddress address) { - return Iterables.any(allDNs.get(address.getAddress()), - new Predicate() { - public boolean apply(DatanodeAdminProperties input) { - return input.getAdminState().equals( - AdminStates.DECOMMISSIONED) && - (input.getPort() == 0 || - input.getPort() == address.getPort()); - } - }); + return allDNs.get(address.getAddress()).stream().anyMatch( + input -> input.getAdminState().equals( + AdminStates.DECOMMISSIONED) && + (input.getPort() == 0 || + input.getPort() == address.getPort())); } synchronized String getUpgradeDomain(final InetSocketAddress address) { - Iterable datanode = Iterables.filter( - allDNs.get(address.getAddress()), - new Predicate() { - public boolean apply(DatanodeAdminProperties input) { - return (input.getPort() == 0 || - input.getPort() == address.getPort()); - } - }); + Iterable datanode = + allDNs.get(address.getAddress()).stream().filter( + input -> (input.getPort() == 0 || + input.getPort() == address.getPort())).collect( + Collectors.toList()); return datanode.iterator().hasNext() ? datanode.iterator().next().getUpgradeDomain() : null; } @@ -127,36 +115,22 @@ public Iterator iterator() { } Iterable getExcludes() { - return new Iterable() { - @Override - public Iterator iterator() { - return new HostIterator( - Collections2.filter(allDNs.entries(), - new Predicate>() { - public boolean apply(java.util.Map.Entry entry) { - return entry.getValue().getAdminState().equals( - AdminStates.DECOMMISSIONED); - } - } - )); - } - }; + return () -> new HostIterator( + allDNs.entries().stream().filter( + entry -> entry.getValue().getAdminState().equals( + AdminStates.DECOMMISSIONED)).collect( + Collectors.toList())); } synchronized long getMaintenanceExpireTimeInMS( final InetSocketAddress address) { - Iterable datanode = Iterables.filter( - allDNs.get(address.getAddress()), - new Predicate() { - public boolean apply(DatanodeAdminProperties input) { - return input.getAdminState().equals( + Iterable datanode = + allDNs.get(address.getAddress()).stream().filter( + input -> input.getAdminState().equals( AdminStates.IN_MAINTENANCE) && (input.getPort() == 0 || - input.getPort() == address.getPort()); - } - }); + input.getPort() == address.getPort())).collect( + Collectors.toList()); // if DN isn't set to maintenance state, ignore MaintenanceExpireTimeInMS // set in the config. return datanode.iterator().hasNext() ? diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java index fc31584ae694c..fdc8bb7491c15 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java @@ -30,7 +30,7 @@ import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.ipc.Server; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Stores information about all corrupt blocks in the File System. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminBackoffMonitor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminBackoffMonitor.java index af2c12f35c78a..a4eb1776fbef0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminBackoffMonitor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminBackoffMonitor.java @@ -17,13 +17,14 @@ */ package org.apache.hadoop.hdfs.server.blockmanagement; -import com.google.common.collect.Iterables; +import org.apache.hadoop.thirdparty.com.google.common.collect.Iterables; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.server.namenode.INode; import org.apache.hadoop.hdfs.server.namenode.INodeFile; import org.apache.hadoop.hdfs.server.namenode.INodeId; import org.apache.hadoop.hdfs.util.LightWeightHashSet; import org.apache.hadoop.hdfs.util.LightWeightLinkedSet; +import org.apache.hadoop.classification.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.HashMap; @@ -32,8 +33,7 @@ import java.util.List; import java.util.Iterator; import java.util.LinkedList; -import java.util.ArrayDeque; -import java.util.Queue; +import java.util.stream.Collectors; /** * This class implements the logic to track decommissioning and entering @@ -71,16 +71,10 @@ public class DatanodeAdminBackoffMonitor extends DatanodeAdminMonitorBase outOfServiceNodeBlocks = new HashMap<>(); /** - * Any nodes where decommission or maintenance has been cancelled are added - * to this queue for later processing. - */ - private final Queue cancelledNodes = new ArrayDeque<>(); - - /** - * The numbe of blocks to process when moving blocks to pendingReplication + * The number of blocks to process when moving blocks to pendingReplication * before releasing and reclaiming the namenode lock. */ - private int blocksPerLock; + private volatile int blocksPerLock; /** * The number of blocks that have been checked on this tick. @@ -89,7 +83,7 @@ public class DatanodeAdminBackoffMonitor extends DatanodeAdminMonitorBase /** * The maximum number of blocks to hold in PendingRep at any time. */ - private int pendingRepLimit; + private volatile int pendingRepLimit; /** * The list of blocks which have been placed onto the replication queue @@ -149,8 +143,8 @@ protected void processConf() { */ @Override public void stopTrackingNode(DatanodeDescriptor dn) { - pendingNodes.remove(dn); - cancelledNodes.add(dn); + getPendingNodes().remove(dn); + getCancelledNodes().add(dn); } @Override @@ -189,6 +183,29 @@ public void run() { * node will be removed from tracking by the pending cancel. */ processCancelledNodes(); + + // Having more nodes decommissioning than can be tracked will impact decommissioning + // performance due to queueing delay + int numTrackedNodes = outOfServiceNodeBlocks.size(); + int numQueuedNodes = getPendingNodes().size(); + int numDecommissioningNodes = numTrackedNodes + numQueuedNodes; + if (numDecommissioningNodes > maxConcurrentTrackedNodes) { + LOG.warn( + "{} nodes are decommissioning but only {} nodes will be tracked at a time. " + + "{} nodes are currently queued waiting to be decommissioned.", + numDecommissioningNodes, maxConcurrentTrackedNodes, numQueuedNodes); + + // Re-queue unhealthy nodes to make space for decommissioning healthy nodes + final List unhealthyDns = outOfServiceNodeBlocks.keySet().stream() + .filter(dn -> !blockManager.isNodeHealthyForDecommissionOrMaintenance(dn)) + .collect(Collectors.toList()); + getUnhealthyNodesToRequeue(unhealthyDns, numDecommissioningNodes).forEach(dn -> { + getPendingNodes().add(dn); + outOfServiceNodeBlocks.remove(dn); + pendingRep.remove(dn); + }); + } + processPendingNodes(); } finally { namesystem.writeUnlock(); @@ -207,8 +224,8 @@ public void run() { LOG.info("Checked {} blocks this tick. {} nodes are now " + "in maintenance or transitioning state. {} nodes pending. {} " + "nodes waiting to be cancelled.", - numBlocksChecked, outOfServiceNodeBlocks.size(), pendingNodes.size(), - cancelledNodes.size()); + numBlocksChecked, outOfServiceNodeBlocks.size(), getPendingNodes().size(), + getCancelledNodes().size()); } } @@ -220,10 +237,10 @@ public void run() { * the pendingNodes list from being modified externally. */ private void processPendingNodes() { - while (!pendingNodes.isEmpty() && + while (!getPendingNodes().isEmpty() && (maxConcurrentTrackedNodes == 0 || outOfServiceNodeBlocks.size() < maxConcurrentTrackedNodes)) { - outOfServiceNodeBlocks.put(pendingNodes.poll(), null); + outOfServiceNodeBlocks.put(getPendingNodes().poll(), null); } } @@ -235,8 +252,8 @@ private void processPendingNodes() { * write lock to prevent the cancelledNodes list being modified externally. */ private void processCancelledNodes() { - while(!cancelledNodes.isEmpty()) { - DatanodeDescriptor dn = cancelledNodes.poll(); + while(!getCancelledNodes().isEmpty()) { + DatanodeDescriptor dn = getCancelledNodes().poll(); outOfServiceNodeBlocks.remove(dn); pendingRep.remove(dn); } @@ -785,6 +802,26 @@ private boolean isBlockReplicatedOk(DatanodeDescriptor datanode, return false; } + @VisibleForTesting + @Override + public int getPendingRepLimit() { + return pendingRepLimit; + } + + public void setPendingRepLimit(int pendingRepLimit) { + this.pendingRepLimit = pendingRepLimit; + } + + @VisibleForTesting + @Override + public int getBlocksPerLock() { + return blocksPerLock; + } + + public void setBlocksPerLock(int blocksPerLock) { + this.blocksPerLock = blocksPerLock; + } + static class BlockStats { private LightWeightHashSet openFiles = new LightWeightLinkedSet<>(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminDefaultMonitor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminDefaultMonitor.java index a5650d1c4865b..f43f8cf10d83d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminDefaultMonitor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminDefaultMonitor.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.blockmanagement; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.server.namenode.INode; @@ -27,6 +27,7 @@ import org.apache.hadoop.hdfs.util.LightWeightHashSet; import org.apache.hadoop.hdfs.util.LightWeightLinkedSet; import org.apache.hadoop.util.ChunkedArrayList; +import org.apache.hadoop.classification.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,7 +41,7 @@ /** * Checks to see if datanodes have finished DECOMMISSION_INPROGRESS or * ENTERING_MAINTENANCE state. - *

      + *

      * Since this is done while holding the namesystem lock, * the amount of work per monitor tick is limited. */ @@ -123,8 +124,8 @@ private boolean exceededNumBlocksPerCheck() { @Override public void stopTrackingNode(DatanodeDescriptor dn) { - pendingNodes.remove(dn); - outOfServiceNodeBlocks.remove(dn); + getPendingNodes().remove(dn); + getCancelledNodes().add(dn); } @Override @@ -137,6 +138,28 @@ public int getNumNodesChecked() { return numNodesChecked; } + @VisibleForTesting + @Override + public int getPendingRepLimit() { + return 0; + } + + @Override + public void setPendingRepLimit(int pendingRepLimit) { + // nothing. + } + + @VisibleForTesting + @Override + public int getBlocksPerLock() { + return 0; + } + + @Override + public void setBlocksPerLock(int blocksPerLock) { + // nothing. + } + @Override public void run() { LOG.debug("DatanodeAdminMonitor is running."); @@ -152,6 +175,7 @@ public void run() { // Check decommission or maintenance progress. namesystem.writeLock(); try { + processCancelledNodes(); processPendingNodes(); check(); } catch (Exception e) { @@ -164,19 +188,33 @@ public void run() { LOG.info("Checked {} blocks and {} nodes this tick. {} nodes are now " + "in maintenance or transitioning state. {} nodes pending.", numBlocksChecked, numNodesChecked, outOfServiceNodeBlocks.size(), - pendingNodes.size()); + getPendingNodes().size()); } } /** - * Pop datanodes off the pending list and into decomNodeBlocks, + * Pop datanodes off the pending priority queue and into decomNodeBlocks, * subject to the maxConcurrentTrackedNodes limit. */ private void processPendingNodes() { - while (!pendingNodes.isEmpty() && + while (!getPendingNodes().isEmpty() && (maxConcurrentTrackedNodes == 0 || outOfServiceNodeBlocks.size() < maxConcurrentTrackedNodes)) { - outOfServiceNodeBlocks.put(pendingNodes.poll(), null); + outOfServiceNodeBlocks.put(getPendingNodes().poll(), null); + } + } + + /** + * Process any nodes which have had their decommission or maintenance mode + * cancelled by an administrator. + * + * This method must be executed under the write lock to prevent the + * internal structures being modified concurrently. + */ + private void processCancelledNodes() { + while(!getCancelledNodes().isEmpty()) { + DatanodeDescriptor dn = getCancelledNodes().poll(); + outOfServiceNodeBlocks.remove(dn); } } @@ -185,6 +223,7 @@ private void check() { it = new CyclicIteration<>(outOfServiceNodeBlocks, iterkey).iterator(); final List toRemove = new ArrayList<>(); + final List unhealthyDns = new ArrayList<>(); while (it.hasNext() && !exceededNumBlocksPerCheck() && namesystem .isRunning()) { @@ -221,6 +260,10 @@ private void check() { LOG.debug("Processing {} node {}", dn.getAdminState(), dn); pruneReliableBlocks(dn, blocks); } + final boolean isHealthy = blockManager.isNodeHealthyForDecommissionOrMaintenance(dn); + if (!isHealthy) { + unhealthyDns.add(dn); + } if (blocks.size() == 0) { if (!fullScan) { // If we didn't just do a full scan, need to re-check with the @@ -236,8 +279,6 @@ private void check() { } // If the full scan is clean AND the node liveness is okay, // we can finally mark as DECOMMISSIONED or IN_MAINTENANCE. - final boolean isHealthy = - blockManager.isNodeHealthyForDecommissionOrMaintenance(dn); if (blocks.size() == 0 && isHealthy) { if (dn.isDecommissionInProgress()) { dnAdmin.setDecommissioned(dn); @@ -270,12 +311,32 @@ private void check() { // an invalid state. LOG.warn("DatanodeAdminMonitor caught exception when processing node " + "{}.", dn, e); - pendingNodes.add(dn); + getPendingNodes().add(dn); toRemove.add(dn); + unhealthyDns.remove(dn); } finally { iterkey = dn; } } + + // Having more nodes decommissioning than can be tracked will impact decommissioning + // performance due to queueing delay + int numTrackedNodes = outOfServiceNodeBlocks.size() - toRemove.size(); + int numQueuedNodes = getPendingNodes().size(); + int numDecommissioningNodes = numTrackedNodes + numQueuedNodes; + if (numDecommissioningNodes > maxConcurrentTrackedNodes) { + LOG.warn( + "{} nodes are decommissioning but only {} nodes will be tracked at a time. " + + "{} nodes are currently queued waiting to be decommissioned.", + numDecommissioningNodes, maxConcurrentTrackedNodes, numQueuedNodes); + + // Re-queue unhealthy nodes to make space for decommissioning healthy nodes + getUnhealthyNodesToRequeue(unhealthyDns, numDecommissioningNodes).forEach(dn -> { + getPendingNodes().add(dn); + outOfServiceNodeBlocks.remove(dn); + }); + } + // Remove the datanodes that are DECOMMISSIONED or in service after // maintenance expiration. for (DatanodeDescriptor dn : toRemove) { @@ -367,8 +428,10 @@ private void processBlocksInternal( // Remove the block from the list if it's no longer in the block map, // e.g. the containing file has been deleted if (blockManager.blocksMap.getStoredBlock(block) == null) { - LOG.trace("Removing unknown block {}", block); - it.remove(); + if (pruneReliableBlocks) { + LOG.trace("Removing unknown block {}", block); + it.remove(); + } continue; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminManager.java index 0771c28243a3c..421d15f04d674 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminManager.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.blockmanagement; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkArgument; import static org.apache.hadoop.util.Time.monotonicNow; import java.util.Queue; @@ -33,8 +33,8 @@ import org.apache.hadoop.util.ReflectionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; /** * Manages decommissioning and maintenance state for DataNodes. A background @@ -148,7 +148,7 @@ void activate(Configuration conf) { throw new RuntimeException("Unable to create the Decommission monitor " + "from "+cls, e); } - executor.scheduleAtFixedRate(monitor, intervalSecs, intervalSecs, + executor.scheduleWithFixedDelay(monitor, intervalSecs, intervalSecs, TimeUnit.SECONDS); LOG.debug("Activating DatanodeAdminManager with interval {} seconds, " + @@ -176,6 +176,8 @@ public void startDecommission(DatanodeDescriptor node) { if (!node.isDecommissionInProgress() && !node.isDecommissioned()) { // Update DN stats maintained by HeartbeatManager hbManager.startDecommission(node); + // Update cluster's emptyRack + blockManager.getDatanodeManager().getNetworkTopology().decommissionNode(node); // hbManager.startDecommission will set dead node to decommissioned. if (node.isDecommissionInProgress()) { for (DatanodeStorageInfo storage : node.getStorageInfos()) { @@ -200,6 +202,8 @@ public void stopDecommission(DatanodeDescriptor node) { if (node.isDecommissionInProgress() || node.isDecommissioned()) { // Update DN stats maintained by HeartbeatManager hbManager.stopDecommission(node); + // Update cluster's emptyRack + blockManager.getDatanodeManager().getNetworkTopology().recommissionNode(node); // extra redundancy blocks will be detected and processed when // the dead node comes back and send in its full block report. if (node.isAlive()) { @@ -345,8 +349,7 @@ protected boolean isSufficient(BlockInfo block, BlockCollection bc, } } } - if (isMaintenance - && numLive >= blockManager.getMinReplicationToBeInMaintenance()) { + if (isMaintenance && numLive >= blockManager.getMinMaintenanceStorageNum(block)) { return true; } return false; @@ -376,6 +379,8 @@ protected void logBlockReplicationInfo(BlockInfo block, + ", maintenance replicas: " + num.maintenanceReplicas() + ", live entering maintenance replicas: " + num.liveEnteringMaintenanceReplicas() + + ", replicas on stale nodes: " + num.replicasOnStaleNodes() + + ", readonly replicas: " + num.readOnlyReplicas() + ", excess replicas: " + num.excessReplicas() + ", Is Open File: " + bc.isUnderConstruction() + ", Datanodes having this block: " + nodeList + ", Current Datanode: " @@ -410,4 +415,30 @@ void runMonitorForTest() throws ExecutionException, InterruptedException { executor.submit(monitor).get(); } -} \ No newline at end of file + public void refreshPendingRepLimit(int pendingRepLimit, String key) { + ensurePositiveInt(pendingRepLimit, key); + this.monitor.setPendingRepLimit(pendingRepLimit); + } + + @VisibleForTesting + public int getPendingRepLimit() { + return this.monitor.getPendingRepLimit(); + } + + public void refreshBlocksPerLock(int blocksPerLock, String key) { + ensurePositiveInt(blocksPerLock, key); + this.monitor.setBlocksPerLock(blocksPerLock); + } + + @VisibleForTesting + public int getBlocksPerLock() { + return this.monitor.getBlocksPerLock(); + } + + private void ensurePositiveInt(int val, String key) { + checkArgument( + (val > 0), + key + " = '" + val + "' is invalid. " + + "It should be a positive, non-zero integer value."); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminMonitorBase.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminMonitorBase.java index 9eee241edddf8..5aab1b4a8a18c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminMonitorBase.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminMonitorBase.java @@ -25,7 +25,11 @@ import org.slf4j.LoggerFactory; import java.util.ArrayDeque; +import java.util.Comparator; +import java.util.List; +import java.util.PriorityQueue; import java.util.Queue; +import java.util.stream.Stream; /** * This abstract class provides some base methods which are inherited by @@ -35,12 +39,26 @@ public abstract class DatanodeAdminMonitorBase implements DatanodeAdminMonitorInterface, Configurable { + /** + * Sort by lastUpdate time descending order, such that unhealthy + * nodes are de-prioritized given they cannot be decommissioned. + */ + static final Comparator PENDING_NODES_QUEUE_COMPARATOR = + (dn1, dn2) -> Long.compare(dn2.getLastUpdate(), dn1.getLastUpdate()); + protected BlockManager blockManager; protected Namesystem namesystem; protected DatanodeAdminManager dnAdmin; protected Configuration conf; - protected final Queue pendingNodes = new ArrayDeque<>(); + private final PriorityQueue pendingNodes = new PriorityQueue<>( + PENDING_NODES_QUEUE_COMPARATOR); + + /** + * Any nodes where decommission or maintenance has been cancelled are added + * to this queue for later processing. + */ + private final Queue cancelledNodes = new ArrayDeque<>(); /** * The maximum number of nodes to track in outOfServiceNodeBlocks. @@ -151,4 +169,39 @@ public int getPendingNodeCount() { public Queue getPendingNodes() { return pendingNodes; } + + @Override + public Queue getCancelledNodes() { + return cancelledNodes; + } + + /** + * If node "is dead while in Decommission In Progress", it cannot be decommissioned + * until it becomes healthy again. If there are more pendingNodes than can be tracked + * & some unhealthy tracked nodes, then re-queue the unhealthy tracked nodes + * to avoid blocking decommissioning of healthy nodes. + * + * @param unhealthyDns The unhealthy datanodes which may be re-queued + * @param numDecommissioningNodes The total number of nodes being decommissioned + * @return Stream of unhealthy nodes to be re-queued + */ + Stream getUnhealthyNodesToRequeue( + final List unhealthyDns, int numDecommissioningNodes) { + if (!unhealthyDns.isEmpty()) { + // Compute the number of unhealthy nodes to re-queue + final int numUnhealthyNodesToRequeue = + Math.min(numDecommissioningNodes - maxConcurrentTrackedNodes, unhealthyDns.size()); + + LOG.warn("{} limit has been reached, re-queueing {} " + + "nodes which are dead while in Decommission In Progress.", + DFSConfigKeys.DFS_NAMENODE_DECOMMISSION_MAX_CONCURRENT_TRACKED_NODES, + numUnhealthyNodesToRequeue); + + // Order unhealthy nodes by lastUpdate descending such that nodes + // which have been unhealthy the longest are preferred to be re-queued + return unhealthyDns.stream().sorted(PENDING_NODES_QUEUE_COMPARATOR.reversed()) + .limit(numUnhealthyNodesToRequeue); + } + return Stream.empty(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminMonitorInterface.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminMonitorInterface.java index f34c00587c78d..a4774742108fe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminMonitorInterface.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminMonitorInterface.java @@ -32,8 +32,17 @@ public interface DatanodeAdminMonitorInterface extends Runnable { int getTrackedNodeCount(); int getNumNodesChecked(); Queue getPendingNodes(); + Queue getCancelledNodes(); void setBlockManager(BlockManager bm); void setDatanodeAdminManager(DatanodeAdminManager dnm); void setNameSystem(Namesystem ns); + + int getPendingRepLimit(); + + void setPendingRepLimit(int pendingRepLimit); + + int getBlocksPerLock(); + + void setBlocksPerLock(int blocksPerLock); } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java index 3fa9b3ad51dd2..69779a62dadd6 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java @@ -30,7 +30,7 @@ import java.util.Queue; import java.util.Set; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -237,7 +237,8 @@ public Type getType() { */ public DatanodeDescriptor(DatanodeID nodeID) { super(nodeID); - updateHeartbeatState(StorageReport.EMPTY_ARRAY, 0L, 0L, 0, 0, null); + setLastUpdate(Time.now()); + setLastUpdateMonotonic(Time.monotonicNow()); } /** @@ -248,7 +249,8 @@ public DatanodeDescriptor(DatanodeID nodeID) { public DatanodeDescriptor(DatanodeID nodeID, String networkLocation) { super(nodeID, networkLocation); - updateHeartbeatState(StorageReport.EMPTY_ARRAY, 0L, 0L, 0, 0, null); + setLastUpdate(Time.now()); + setLastUpdateMonotonic(Time.monotonicNow()); } public CachedBlocksList getPendingCached() { @@ -634,6 +636,17 @@ Iterator getBlockIterator(final int startBlock) { return new BlockIterator(startBlock, getStorageInfos()); } + /** + * Get iterator, which starts iterating from the specified block and storages. + * + * @param startBlock on which blocks are start iterating + * @param storageInfos specified storages + */ + Iterator getBlockIterator( + final int startBlock, final DatanodeStorageInfo[] storageInfos) { + return new BlockIterator(startBlock, storageInfos); + } + @VisibleForTesting public void incrementPendingReplicationWithoutTargets() { pendingReplicationWithoutTargets++; @@ -659,10 +672,10 @@ public void addBlockToBeReplicated(Block block, */ void addBlockToBeErasureCoded(ExtendedBlock block, DatanodeDescriptor[] sources, DatanodeStorageInfo[] targets, - byte[] liveBlockIndices, ErasureCodingPolicy ecPolicy) { + byte[] liveBlockIndices, byte[] excludeReconstrutedIndices, ErasureCodingPolicy ecPolicy) { assert (block != null && sources != null && sources.length > 0); BlockECReconstructionInfo task = new BlockECReconstructionInfo(block, - sources, targets, liveBlockIndices, ecPolicy); + sources, targets, liveBlockIndices, excludeReconstrutedIndices, ecPolicy); erasurecodeBlocks.offer(task); BlockManager.LOG.debug("Adding block reconstruction task " + task + "to " + getName() + ", current queue size is " + erasurecodeBlocks.size()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java index f5cf1d5744b07..07381fc696fae 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java @@ -20,10 +20,14 @@ import static org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol.DNA_ERASURE_CODING_RECONSTRUCTION; import static org.apache.hadoop.util.Time.monotonicNow; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.net.InetAddresses; - +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; +import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.net.InetAddresses; + +import org.apache.hadoop.fs.StorageType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.HadoopIllegalArgumentException; @@ -52,6 +56,7 @@ import org.apache.hadoop.net.*; import org.apache.hadoop.net.NetworkTopology.InvalidTopologyException; import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.util.Daemon; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.Timer; @@ -134,9 +139,15 @@ public class DatanodeManager { /** Whether or not to avoid using stale DataNodes for reading */ private final boolean avoidStaleDataNodesForRead; + /** Whether or not to avoid using slow DataNodes for reading. */ + private volatile boolean avoidSlowDataNodesForRead; + /** Whether or not to consider lad for reading. */ private final boolean readConsiderLoad; + /** Whether or not to consider storageType for reading. */ + private final boolean readConsiderStorageType; + /** * Whether or not to avoid using stale DataNodes for writing. * Note that, even if this is configured, the policy may be @@ -183,10 +194,6 @@ public class DatanodeManager { private final HashMap datanodesSoftwareVersions = new HashMap<>(4, 0.75f); - /** - * True if we should process latency metrics from downstream peers. - */ - private final boolean dataNodePeerStatsEnabled; /** * True if we should process latency metrics from individual DN disks. */ @@ -197,8 +204,15 @@ public class DatanodeManager { */ private final boolean useDfsNetworkTopology; + private static final String IP_PORT_SEPARATOR = ":"; + @Nullable - private final SlowPeerTracker slowPeerTracker; + private SlowPeerTracker slowPeerTracker; + private static Set slowNodesUuidSet = Sets.newConcurrentHashSet(); + private Daemon slowPeerCollectorDaemon; + private final long slowPeerCollectionInterval; + private volatile int maxSlowPeerReportNodes; + @Nullable private final SlowDiskTracker slowDiskTracker; @@ -225,27 +239,32 @@ public class DatanodeManager { } else { networktopology = NetworkTopology.getInstance(conf); } - this.heartbeatManager = new HeartbeatManager(namesystem, blockManager, conf); this.datanodeAdminManager = new DatanodeAdminManager(namesystem, blockManager, heartbeatManager); this.fsClusterStats = newFSClusterStats(); - this.dataNodePeerStatsEnabled = conf.getBoolean( - DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY, - DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT); this.dataNodeDiskStatsEnabled = Util.isDiskStatsEnabled(conf.getInt( DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY, DFSConfigKeys. DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_DEFAULT)); - final Timer timer = new Timer(); - this.slowPeerTracker = dataNodePeerStatsEnabled ? - new SlowPeerTracker(conf, timer) : null; - + final boolean dataNodePeerStatsEnabledVal = + conf.getBoolean(DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY, + DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT); + initSlowPeerTracker(conf, timer, dataNodePeerStatsEnabledVal); + this.maxSlowPeerReportNodes = conf.getInt( + DFSConfigKeys.DFS_NAMENODE_MAX_SLOWPEER_COLLECT_NODES_KEY, + DFSConfigKeys.DFS_NAMENODE_MAX_SLOWPEER_COLLECT_NODES_DEFAULT); + this.slowPeerCollectionInterval = conf.getTimeDuration( + DFSConfigKeys.DFS_NAMENODE_SLOWPEER_COLLECT_INTERVAL_KEY, + DFSConfigKeys.DFS_NAMENODE_SLOWPEER_COLLECT_INTERVAL_DEFAULT, + TimeUnit.MILLISECONDS); + if (slowPeerTracker.isSlowPeerTrackerEnabled()) { + startSlowPeerCollector(); + } this.slowDiskTracker = dataNodeDiskStatsEnabled ? new SlowDiskTracker(conf, timer) : null; - this.defaultXferPort = NetUtils.createSocketAddr( conf.getTrimmed(DFSConfigKeys.DFS_DATANODE_ADDRESS_KEY, DFSConfigKeys.DFS_DATANODE_ADDRESS_DEFAULT)).getPort(); @@ -266,11 +285,9 @@ public class DatanodeManager { } catch (IOException e) { LOG.error("error reading hosts files: ", e); } - this.dnsToSwitchMapping = ReflectionUtils.newInstance( conf.getClass(DFSConfigKeys.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, ScriptBasedMapping.class, DNSToSwitchMapping.class), conf); - this.rejectUnresolvedTopologyDN = conf.getBoolean( DFSConfigKeys.DFS_REJECT_UNRESOLVED_DN_TOPOLOGY_MAPPING_KEY, DFSConfigKeys.DFS_REJECT_UNRESOLVED_DN_TOPOLOGY_MAPPING_DEFAULT); @@ -285,7 +302,6 @@ public class DatanodeManager { } dnsToSwitchMapping.resolve(locations); } - heartbeatIntervalSeconds = conf.getTimeDuration( DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.SECONDS); @@ -294,32 +310,37 @@ public class DatanodeManager { DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_DEFAULT); // 5 minutes this.heartbeatExpireInterval = 2 * heartbeatRecheckInterval + 10 * 1000 * heartbeatIntervalSeconds; - - // Effected block invalidate limit is the bigger value between - // value configured in hdfs-site.xml, and 20 * HB interval. final int configuredBlockInvalidateLimit = conf.getInt( DFSConfigKeys.DFS_BLOCK_INVALIDATE_LIMIT_KEY, DFSConfigKeys.DFS_BLOCK_INVALIDATE_LIMIT_DEFAULT); - final int countedBlockInvalidateLimit = 20*(int)(heartbeatIntervalSeconds); - this.blockInvalidateLimit = Math.max(countedBlockInvalidateLimit, - configuredBlockInvalidateLimit); - LOG.info(DFSConfigKeys.DFS_BLOCK_INVALIDATE_LIMIT_KEY - + ": configured=" + configuredBlockInvalidateLimit - + ", counted=" + countedBlockInvalidateLimit - + ", effected=" + blockInvalidateLimit); - + // Block invalidate limit also has some dependency on heartbeat interval. + // Check setBlockInvalidateLimit(). + setBlockInvalidateLimit(configuredBlockInvalidateLimit); this.checkIpHostnameInRegistration = conf.getBoolean( DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY, DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT); LOG.info(DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY + "=" + checkIpHostnameInRegistration); - this.avoidStaleDataNodesForRead = conf.getBoolean( DFSConfigKeys.DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_KEY, DFSConfigKeys.DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_DEFAULT); + this.avoidSlowDataNodesForRead = conf.getBoolean( + DFSConfigKeys.DFS_NAMENODE_AVOID_SLOW_DATANODE_FOR_READ_KEY, + DFSConfigKeys.DFS_NAMENODE_AVOID_SLOW_DATANODE_FOR_READ_DEFAULT); this.readConsiderLoad = conf.getBoolean( DFSConfigKeys.DFS_NAMENODE_READ_CONSIDERLOAD_KEY, DFSConfigKeys.DFS_NAMENODE_READ_CONSIDERLOAD_DEFAULT); + this.readConsiderStorageType = conf.getBoolean( + DFSConfigKeys.DFS_NAMENODE_READ_CONSIDERSTORAGETYPE_KEY, + DFSConfigKeys.DFS_NAMENODE_READ_CONSIDERSTORAGETYPE_DEFAULT); + if (readConsiderLoad && readConsiderStorageType) { + LOG.warn( + "{} and {} are incompatible and only one can be enabled. " + + "Both are currently enabled. {} will be ignored.", + DFSConfigKeys.DFS_NAMENODE_READ_CONSIDERLOAD_KEY, + DFSConfigKeys.DFS_NAMENODE_READ_CONSIDERSTORAGETYPE_KEY, + DFSConfigKeys.DFS_NAMENODE_READ_CONSIDERSTORAGETYPE_KEY); + } this.avoidStaleDataNodesForWrite = conf.getBoolean( DFSConfigKeys.DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_WRITE_KEY, DFSConfigKeys.DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_WRITE_DEFAULT); @@ -341,6 +362,59 @@ public class DatanodeManager { DFSConfigKeys.DFS_NAMENODE_BLOCKS_PER_POSTPONEDBLOCKS_RESCAN_KEY_DEFAULT); } + /** + * Determines whether slow peer tracker should be enabled. If dataNodePeerStatsEnabledVal is + * true, slow peer tracker is initialized. + * + * @param conf The configuration to use while initializing slowPeerTracker. + * @param timer Timer object for slowPeerTracker. + * @param dataNodePeerStatsEnabled To determine whether slow peer tracking should be enabled. + */ + public void initSlowPeerTracker(Configuration conf, Timer timer, + boolean dataNodePeerStatsEnabled) { + this.slowPeerTracker = dataNodePeerStatsEnabled ? + new SlowPeerTracker(conf, timer) : + new SlowPeerDisabledTracker(conf, timer); + } + + private void startSlowPeerCollector() { + if (slowPeerCollectorDaemon != null) { + return; + } + slowPeerCollectorDaemon = new Daemon(new Runnable() { + @Override + public void run() { + while (true) { + try { + slowNodesUuidSet = getSlowPeersUuidSet(); + } catch (Exception e) { + LOG.error("Failed to collect slow peers", e); + } + + try { + Thread.sleep(slowPeerCollectionInterval); + } catch (InterruptedException e) { + LOG.error("Slow peers collection thread interrupted", e); + return; + } + } + } + }); + slowPeerCollectorDaemon.start(); + } + + public void stopSlowPeerCollector() { + if (slowPeerCollectorDaemon == null) { + return; + } + slowPeerCollectorDaemon.interrupt(); + try { + slowPeerCollectorDaemon.join(); + } catch (InterruptedException e) { + LOG.error("Slow peers collection thread did not shutdown", e); + } + } + private static long getStaleIntervalFromConf(Configuration conf, long heartbeatExpireInterval) { long staleInterval = conf.getLong( @@ -386,6 +460,7 @@ void activate(final Configuration conf) { void close() { datanodeAdminManager.close(); heartbeatManager.close(); + stopSlowPeerCollector(); } /** @return the network topology. */ @@ -428,16 +503,37 @@ public DatanodeStatistics getDatanodeStatistics() { } private boolean isInactive(DatanodeInfo datanode) { - return datanode.isDecommissioned() || + return datanode.isDecommissioned() || datanode.isEnteringMaintenance() || (avoidStaleDataNodesForRead && datanode.isStale(staleInterval)); + } + private boolean isSlowNode(String dnUuid) { + return avoidSlowDataNodesForRead && slowNodesUuidSet.contains(dnUuid); } - + + public void setAvoidSlowDataNodesForReadEnabled(boolean enable) { + this.avoidSlowDataNodesForRead = enable; + } + + @VisibleForTesting + public boolean getEnableAvoidSlowDataNodesForRead() { + return this.avoidSlowDataNodesForRead; + } + + public void setMaxSlowpeerCollectNodes(int maxNodes) { + this.maxSlowPeerReportNodes = maxNodes; + } + + @VisibleForTesting + public int getMaxSlowpeerCollectNodes() { + return this.maxSlowPeerReportNodes; + } + /** * Sort the non-striped located blocks by the distance to the target host. * - * For striped blocks, it will only move decommissioned/stale nodes to the - * bottom. For example, assume we have storage list: + * For striped blocks, it will only move decommissioned/stale/slow + * nodes to the bottom. For example, assume we have storage list: * d0, d1, d2, d3, d4, d5, d6, d7, d8, d9 * mapping to block indices: * 0, 1, 2, 3, 4, 5, 6, 7, 8, 2 @@ -449,8 +545,11 @@ private boolean isInactive(DatanodeInfo datanode) { */ public void sortLocatedBlocks(final String targetHost, final List locatedBlocks) { - Comparator comparator = avoidStaleDataNodesForRead ? - new DFSUtil.ServiceAndStaleComparator(staleInterval) : + Comparator comparator = + avoidStaleDataNodesForRead || avoidSlowDataNodesForRead ? + new DFSUtil.StaleAndSlowComparator( + avoidStaleDataNodesForRead, staleInterval, + avoidSlowDataNodesForRead, slowNodesUuidSet) : new DFSUtil.ServiceComparator(); // sort located block for (LocatedBlock lb : locatedBlocks) { @@ -463,7 +562,8 @@ public void sortLocatedBlocks(final String targetHost, } /** - * Move decommissioned/stale datanodes to the bottom. After sorting it will + * Move decommissioned/entering_maintenance/stale/slow + * datanodes to the bottom. After sorting it will * update block indices and block tokens respectively. * * @param lb located striped block @@ -494,8 +594,9 @@ private void sortLocatedStripedBlock(final LocatedBlock lb, } /** - * Move decommissioned/stale datanodes to the bottom. Also, sort nodes by - * network distance. + * Move decommissioned/entering_maintenance/stale/slow + * datanodes to the bottom. Also, sort nodes by network + * distance. * * @param lb located block * @param targetHost target host @@ -524,13 +625,16 @@ private void sortLocatedBlock(final LocatedBlock lb, String targetHost, } } - DatanodeInfo[] di = lb.getLocations(); - // Move decommissioned/stale datanodes to the bottom + DatanodeInfoWithStorage[] di = lb.getLocations(); + // Move decommissioned/entering_maintenance/stale/slow + // datanodes to the bottom Arrays.sort(di, comparator); // Sort nodes by network distance only for located blocks int lastActiveIndex = di.length - 1; - while (lastActiveIndex > 0 && isInactive(di[lastActiveIndex])) { + while (lastActiveIndex > 0 && ( + isSlowNode(di[lastActiveIndex].getDatanodeUuid()) || + isInactive(di[lastActiveIndex]))) { --lastActiveIndex; } int activeLen = lastActiveIndex + 1; @@ -547,11 +651,15 @@ private void sortLocatedBlock(final LocatedBlock lb, String targetHost, lb.updateCachedStorageInfo(); } - private Consumer> createSecondaryNodeSorter() { - Consumer> secondarySort = - list -> Collections.shuffle(list); + private Consumer> createSecondaryNodeSorter() { + Consumer> secondarySort = null; + if (readConsiderStorageType) { + Comparator comp = + Comparator.comparing(DatanodeInfoWithStorage::getStorageType); + secondarySort = list -> Collections.sort(list, comp); + } if (readConsiderLoad) { - Comparator comp = + Comparator comp = Comparator.comparingInt(DatanodeInfo::getXceiverCount); secondarySort = list -> Collections.sort(list, comp); } @@ -1064,6 +1172,7 @@ public void registerDatanode(DatanodeRegistration nodeReg) DatanodeDescriptor nodeN = host2DatanodeMap.getDatanodeByXferAddr( nodeReg.getIpAddr(), nodeReg.getXferPort()); + // Non-change. REMOVE. Number 2 if (nodeN != null && nodeN != nodeS) { NameNode.LOG.info("BLOCK* registerDatanode: " + nodeN); // nodeN previously served a different data storage, @@ -1085,14 +1194,14 @@ public void registerDatanode(DatanodeRegistration nodeReg) } } else { // nodeS is found - /* The registering datanode is a replacement node for the existing + /* The registering datanode is a replacement node for the existing data storage, which from now on will be served by a new node. If this message repeats, both nodes might have same storageID by (insanely rare) random chance. User needs to restart one of the nodes with its data cleared (or user can just remove the StorageID value in "VERSION" file under the data directory of the datanode, but this is might not work if VERSION file format has changed - */ + */ NameNode.stateChangeLog.info("BLOCK* registerDatanode: " + nodeS + " is replaced by " + nodeReg + " with the same storageID " + nodeReg.getDatanodeUuid()); @@ -1543,7 +1652,17 @@ public List getDatanodeListForReport( } return nodes; } - + + public List getAllSlowDataNodes() { + if (slowPeerTracker == null) { + LOG.debug("{} is disabled. Try enabling it first to capture slow peer outliers.", + DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY); + return ImmutableList.of(); + } + List slowNodes = slowPeerTracker.getSlowNodes(getNumOfDataNodes()); + return getDnDescriptorsFromIpAddr(slowNodes); + } + /** * Checks if name resolution was successful for the given address. If IP * address and host name are the same, then it means name resolution has @@ -1759,15 +1878,17 @@ public DatanodeCommand[] handleHeartbeat(DatanodeRegistration nodeReg, nodeinfo.setBalancerBandwidth(0); } - if (slowPeerTracker != null) { - final Map slowPeersMap = slowPeers.getSlowPeers(); + Preconditions.checkNotNull(slowPeerTracker, "slowPeerTracker should not be un-assigned"); + + if (slowPeerTracker.isSlowPeerTrackerEnabled()) { + final Map slowPeersMap = slowPeers.getSlowPeers(); if (!slowPeersMap.isEmpty()) { if (LOG.isDebugEnabled()) { - LOG.debug("DataNode " + nodeReg + " reported slow peers: " + - slowPeersMap); + LOG.debug("DataNode " + nodeReg + " reported slow peers: " + slowPeersMap); } - for (String slowNodeId : slowPeersMap.keySet()) { - slowPeerTracker.addReport(slowNodeId, nodeReg.getIpcAddr(false)); + for (Map.Entry slowNodeEntry : slowPeersMap.entrySet()) { + slowPeerTracker.addReport(slowNodeEntry.getKey(), nodeReg.getIpcAddr(false), + slowNodeEntry.getValue()); } } } @@ -1795,18 +1916,16 @@ public DatanodeCommand[] handleHeartbeat(DatanodeRegistration nodeReg, * * @param nodeReg registration info for DataNode sending the lifeline * @param reports storage reports from DataNode - * @param blockPoolId block pool ID * @param cacheCapacity cache capacity at DataNode * @param cacheUsed cache used at DataNode * @param xceiverCount estimated count of transfer threads running at DataNode - * @param maxTransfers count of transfers running at DataNode * @param failedVolumes count of failed volumes at DataNode * @param volumeFailureSummary info on failed volumes at DataNode * @throws IOException if there is an error */ public void handleLifeline(DatanodeRegistration nodeReg, - StorageReport[] reports, String blockPoolId, long cacheCapacity, - long cacheUsed, int xceiverCount, int maxTransfers, int failedVolumes, + StorageReport[] reports, long cacheCapacity, + long cacheUsed, int xceiverCount, int failedVolumes, VolumeFailureSummary volumeFailureSummary) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug("Received handleLifeline from nodeReg = " + nodeReg); @@ -1960,6 +2079,11 @@ public double getInServiceXceiverAverage() { } return avgLoad; } + + @Override + public Map getStorageTypeStats() { + return heartbeatManager.getStorageTypeStats(); + } }; } @@ -1982,8 +2106,25 @@ private void setHeartbeatInterval(long intervalSeconds, this.heartbeatRecheckInterval = recheckInterval; this.heartbeatExpireInterval = 2L * recheckInterval + 10 * 1000 * intervalSeconds; - this.blockInvalidateLimit = Math.max(20 * (int) (intervalSeconds), - blockInvalidateLimit); + this.blockInvalidateLimit = getBlockInvalidateLimit(blockInvalidateLimit); + } + + private int getBlockInvalidateLimitFromHBInterval() { + return 20 * (int) heartbeatIntervalSeconds; + } + + private int getBlockInvalidateLimit(int configuredBlockInvalidateLimit) { + return Math.max(getBlockInvalidateLimitFromHBInterval(), configuredBlockInvalidateLimit); + } + + public void setBlockInvalidateLimit(int configuredBlockInvalidateLimit) { + final int countedBlockInvalidateLimit = getBlockInvalidateLimitFromHBInterval(); + // Effected block invalidate limit is the bigger value between + // value configured in hdfs-site.xml, and 20 * HB interval. + this.blockInvalidateLimit = getBlockInvalidateLimit(configuredBlockInvalidateLimit); + LOG.info("{} : configured={}, counted={}, effected={}", + DFSConfigKeys.DFS_BLOCK_INVALIDATE_LIMIT_KEY, configuredBlockInvalidateLimit, + countedBlockInvalidateLimit, this.blockInvalidateLimit); } /** @@ -1992,7 +2133,55 @@ private void setHeartbeatInterval(long intervalSeconds, * @return */ public String getSlowPeersReport() { - return slowPeerTracker != null ? slowPeerTracker.getJson() : null; + Preconditions.checkNotNull(slowPeerTracker, "slowPeerTracker should not be un-assigned"); + return slowPeerTracker.getJson(); + } + + /** + * Returns all tracking slow peers. + * @return + */ + public Set getSlowPeersUuidSet() { + Set slowPeersUuidSet = Sets.newConcurrentHashSet(); + List slowNodes; + Preconditions.checkNotNull(slowPeerTracker, "slowPeerTracker should not be un-assigned"); + slowNodes = slowPeerTracker.getSlowNodes(maxSlowPeerReportNodes); + List datanodeDescriptors = getDnDescriptorsFromIpAddr(slowNodes); + datanodeDescriptors.forEach( + datanodeDescriptor -> slowPeersUuidSet.add(datanodeDescriptor.getDatanodeUuid())); + return slowPeersUuidSet; + } + + private List getDnDescriptorsFromIpAddr(List nodes) { + List datanodeDescriptors = new ArrayList<>(); + for (String node : nodes) { + if (StringUtils.isBlank(node) || !node.contains(IP_PORT_SEPARATOR)) { + continue; + } + String ipAddr = node.split(IP_PORT_SEPARATOR)[0]; + DatanodeDescriptor datanodeByHost = + host2DatanodeMap.getDatanodeByHost(ipAddr); + if (datanodeByHost != null) { + datanodeDescriptors.add(datanodeByHost); + } + } + return datanodeDescriptors; + } + + /** + * Returns all tracking slow datanodes uuids. + * @return + */ + public static Set getSlowNodesUuidSet() { + return slowNodesUuidSet; + } + + /** + * Use only for testing. + */ + @VisibleForTesting + public SlowPeerTracker getSlowPeerTracker() { + return slowPeerTracker; } /** @@ -2002,6 +2191,12 @@ public String getSlowPeersReport() { public SlowDiskTracker getSlowDiskTracker() { return slowDiskTracker; } + + @VisibleForTesting + public void addSlowPeers(String dnUuid) { + slowNodesUuidSet.add(dnUuid); + } + /** * Retrieve information about slow disks as a JSON. * Returns null if we are not tracking slow disks. @@ -2028,7 +2223,8 @@ public DatanodeStorageReport[] getDatanodeStorageReport( for (int i = 0; i < reports.length; i++) { final DatanodeDescriptor d = datanodes.get(i); reports[i] = new DatanodeStorageReport( - new DatanodeInfoBuilder().setFrom(d).build(), d.getStorageReports()); + new DatanodeInfoBuilder().setFrom(d).setNumBlocks(d.numBlocks()).build(), + d.getStorageReports()); } return reports; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStorageInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStorageInfo.java index 3a56ef16c8efd..e3a19fb41911a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStorageInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStorageInfo.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hdfs.server.blockmanagement; import java.util.Arrays; -import java.util.Collections; import java.util.Iterator; import java.util.List; @@ -28,9 +27,8 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage.State; import org.apache.hadoop.hdfs.server.protocol.StorageReport; -import org.apache.hadoop.hdfs.util.FoldedTreeSet; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * A Datanode has one or more storages. A storage in the Datanode is represented @@ -87,6 +85,32 @@ public void updateFromStorage(DatanodeStorage storage) { storageType = storage.getStorageType(); } + /** + * Iterates over the list of blocks belonging to the data-node. + */ + class BlockIterator implements Iterator { + private BlockInfo current; + + BlockIterator(BlockInfo head) { + this.current = head; + } + + public boolean hasNext() { + return current != null; + } + + public BlockInfo next() { + BlockInfo res = current; + current = + current.getNext(current.findStorageInfo(DatanodeStorageInfo.this)); + return res; + } + + public void remove() { + throw new UnsupportedOperationException("Sorry. can't remove."); + } + } + private final DatanodeDescriptor dn; private final String storageID; private StorageType storageType; @@ -98,7 +122,8 @@ public void updateFromStorage(DatanodeStorage storage) { private volatile long remaining; private long blockPoolUsed; - private final FoldedTreeSet blocks = new FoldedTreeSet<>(); + private volatile BlockInfo blockList = null; + private int numBlocks = 0; /** The number of block reports received */ private int blockReportCount = 0; @@ -143,6 +168,11 @@ public boolean areBlockContentsStale() { return blockContentsStale; } + @VisibleForTesting + public void setBlockContentsStale(boolean value) { + blockContentsStale = value; + } + void markStaleAfterFailover() { heartbeatedSinceFailover = false; blockContentsStale = true; @@ -182,7 +212,7 @@ void setHeartbeatedSinceFailover(boolean value) { } boolean areBlocksOnFailedStorage() { - return getState() == State.FAILED && !blocks.isEmpty(); + return getState() == State.FAILED && numBlocks != 0; } @VisibleForTesting @@ -213,36 +243,6 @@ long getRemaining() { long getBlockPoolUsed() { return blockPoolUsed; } - /** - * For use during startup. Expects block to be added in sorted order - * to enable fast insert in to the DatanodeStorageInfo - * - * @param b Block to add to DatanodeStorageInfo - * @param reportedBlock The reported replica - * @return Enum describing if block was added, replaced or already existed - */ - public AddBlockResult addBlockInitial(BlockInfo b, Block reportedBlock) { - // First check whether the block belongs to a different storage - // on the same DN. - AddBlockResult result = AddBlockResult.ADDED; - DatanodeStorageInfo otherStorage = - b.findStorageInfo(getDatanodeDescriptor()); - - if (otherStorage != null) { - if (otherStorage != this) { - // The block belongs to a different storage. Remove it first. - otherStorage.removeBlock(b); - result = AddBlockResult.REPLACED; - } else { - // The block is already associated with this storage. - return AddBlockResult.ALREADY_EXIST; - } - } - - b.addStorage(this, reportedBlock); - blocks.addSortedLast(b); - return result; - } public AddBlockResult addBlock(BlockInfo b, Block reportedBlock) { // First check whether the block belongs to a different storage @@ -262,8 +262,9 @@ public AddBlockResult addBlock(BlockInfo b, Block reportedBlock) { } } + // add to the head of the data-node list b.addStorage(this, reportedBlock); - blocks.add(b); + insertToList(b); return result; } @@ -271,21 +272,45 @@ AddBlockResult addBlock(BlockInfo b) { return addBlock(b, b); } + public void insertToList(BlockInfo b) { + blockList = b.listInsert(blockList, this); + numBlocks++; + } boolean removeBlock(BlockInfo b) { - blocks.remove(b); - return b.removeStorage(this); + blockList = b.listRemove(blockList, this); + if (b.removeStorage(this)) { + numBlocks--; + return true; + } else { + return false; + } } int numBlocks() { - return blocks.size(); + return numBlocks; } - + + Iterator getBlockIterator() { + return new BlockIterator(blockList); + } + /** - * @return iterator to an unmodifiable set of blocks - * related to this {@link DatanodeStorageInfo} + * Move block to the head of the list of blocks belonging to the data-node. + * @return the index of the head of the blockList */ - Iterator getBlockIterator() { - return Collections.unmodifiableSet(blocks).iterator(); + int moveBlockToHead(BlockInfo b, int curIndex, int headIndex) { + blockList = b.moveBlockToHead(blockList, this, curIndex, headIndex); + return curIndex; + } + + + /** + * Used for testing only. + * @return the head of the blockList + */ + @VisibleForTesting + BlockInfo getBlockListHeadForTesting(){ + return blockList; } void updateState(StorageReport r) { @@ -344,27 +369,6 @@ StorageReport toStorageReport() { false, capacity, dfsUsed, remaining, blockPoolUsed, nonDfsUsed); } - /** - * The fill ratio of the underlying TreeSet holding blocks. - * - * @return the fill ratio of the tree - */ - public double treeSetFillRatio() { - return blocks.fillRatio(); - } - - /** - * Compact the underlying TreeSet holding blocks. - * - * @param timeout Maximum time to spend compacting the tree set in - * milliseconds. - * - * @return true if compaction completed, false if aborted - */ - public boolean treeSetCompact(long timeout) { - return blocks.compact(timeout); - } - static Iterable toStorageTypes( final Iterable infos) { return new Iterable() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ErasureCodingWork.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ErasureCodingWork.java index 8de3f381ddffe..e5303a28d714e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ErasureCodingWork.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ErasureCodingWork.java @@ -30,8 +30,9 @@ import java.util.Set; class ErasureCodingWork extends BlockReconstructionWork { - private final byte[] liveBlockIndicies; - private final byte[] liveBusyBlockIndicies; + private final byte[] liveBlockIndices; + private final byte[] liveBusyBlockIndices; + private final byte[] excludeReconstructedIndices; private final String blockPoolId; public ErasureCodingWork(String blockPoolId, BlockInfo block, @@ -40,18 +41,20 @@ public ErasureCodingWork(String blockPoolId, BlockInfo block, List containingNodes, List liveReplicaStorages, int additionalReplRequired, int priority, - byte[] liveBlockIndicies, byte[] liveBusyBlockIndicies) { + byte[] liveBlockIndices, byte[] liveBusyBlockIndices, + byte[] excludeReconstrutedIndices) { super(block, bc, srcNodes, containingNodes, liveReplicaStorages, additionalReplRequired, priority); this.blockPoolId = blockPoolId; - this.liveBlockIndicies = liveBlockIndicies; - this.liveBusyBlockIndicies = liveBusyBlockIndicies; + this.liveBlockIndices = liveBlockIndices; + this.liveBusyBlockIndices = liveBusyBlockIndices; + this.excludeReconstructedIndices=excludeReconstrutedIndices; LOG.debug("Creating an ErasureCodingWork to {} reconstruct ", block); } - byte[] getLiveBlockIndicies() { - return liveBlockIndicies; + byte[] getLiveBlockIndices() { + return liveBlockIndices; } @Override @@ -72,15 +75,15 @@ void chooseTargets(BlockPlacementPolicy blockplacement, */ private boolean hasAllInternalBlocks() { final BlockInfoStriped block = (BlockInfoStriped) getBlock(); - if (liveBlockIndicies.length - + liveBusyBlockIndicies.length < block.getRealTotalBlockNum()) { + if (liveBlockIndices.length + + liveBusyBlockIndices.length < block.getRealTotalBlockNum()) { return false; } BitSet bitSet = new BitSet(block.getTotalBlockNum()); - for (byte index : liveBlockIndicies) { + for (byte index : liveBlockIndices) { bitSet.set(index); } - for (byte busyIndex: liveBusyBlockIndicies) { + for (byte busyIndex: liveBusyBlockIndices) { bitSet.set(busyIndex); } for (int i = 0; i < block.getRealDataBlockNum(); i++) { @@ -147,14 +150,14 @@ void addTaskToDatanode(NumberReplicas numberReplicas) { } else { targets[0].getDatanodeDescriptor().addBlockToBeErasureCoded( new ExtendedBlock(blockPoolId, stripedBlk), getSrcNodes(), targets, - getLiveBlockIndicies(), stripedBlk.getErasureCodingPolicy()); + liveBlockIndices, excludeReconstructedIndices, stripedBlk.getErasureCodingPolicy()); } } private void createReplicationWork(int sourceIndex, DatanodeStorageInfo target) { BlockInfoStriped stripedBlk = (BlockInfoStriped) getBlock(); - final byte blockIndex = liveBlockIndicies[sourceIndex]; + final byte blockIndex = liveBlockIndices[sourceIndex]; final DatanodeDescriptor source = getSrcNodes()[sourceIndex]; final long internBlkLen = StripedBlockUtil.getInternalBlockLength( stripedBlk.getNumBytes(), stripedBlk.getCellSize(), @@ -173,7 +176,7 @@ private List findLeavingServiceSources() { BitSet bitSet = new BitSet(block.getRealTotalBlockNum()); for (int i = 0; i < getSrcNodes().length; i++) { if (getSrcNodes()[i].isInService()) { - bitSet.set(liveBlockIndicies[i]); + bitSet.set(liveBlockIndices[i]); } } // If the block is on the node which is decommissioning or @@ -184,7 +187,7 @@ private List findLeavingServiceSources() { if ((getSrcNodes()[i].isDecommissionInProgress() || (getSrcNodes()[i].isEnteringMaintenance() && getSrcNodes()[i].isAlive())) && - !bitSet.get(liveBlockIndicies[i])) { + !bitSet.get(liveBlockIndices[i])) { srcIndices.add(i); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ExcessRedundancyMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ExcessRedundancyMap.java index ccdcf5451ccd8..41bb7d3428c39 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ExcessRedundancyMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ExcessRedundancyMap.java @@ -25,7 +25,7 @@ import org.apache.hadoop.hdfs.util.LightWeightHashSet; import org.slf4j.Logger; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Maps a datnode to the set of excess redundancy details. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/FSClusterStats.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/FSClusterStats.java index 556b7fcaad73c..14122952bb18d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/FSClusterStats.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/FSClusterStats.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hdfs.server.blockmanagement; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.StorageType; + +import java.util.Map; /** * This interface is used for retrieving the load related statistics of @@ -57,4 +60,10 @@ public interface FSClusterStats { * writes that are currently occurring on the cluster. */ public double getInServiceXceiverAverage(); + + /** + * Indicates the storage statistics per storage type. + * @return storage statistics per storage type. + */ + Map getStorageTypeStats(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java index 9e4d867a0bfc4..5da47c4b2a8a8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java @@ -36,7 +36,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Manage the heartbeats received from datanodes. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostFileManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostFileManager.java index 4ead0ba6f704a..57b690262a6d3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostFileManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostFileManager.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.blockmanagement; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java index cf7cfac95ab8e..af0c92df5e272 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java @@ -17,15 +17,12 @@ */ package org.apache.hadoop.hdfs.server.blockmanagement; -import com.google.common.base.Function; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.HashMultimap; -import com.google.common.collect.Iterators; -import com.google.common.collect.Multimap; -import com.google.common.collect.UnmodifiableIterator; -import javax.annotation.Nullable; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.HashMultimap; +import org.apache.hadoop.thirdparty.com.google.common.collect.Multimap; +import org.apache.hadoop.thirdparty.com.google.common.collect.UnmodifiableIterator; + import java.net.InetAddress; import java.net.InetSocketAddress; import java.util.Collection; @@ -101,14 +98,16 @@ public InetSocketAddress next() { @Override public String toString() { StringBuilder sb = new StringBuilder("HostSet("); - Joiner.on(",").appendTo(sb, Iterators.transform(iterator(), - new Function() { - @Override - public String apply(@Nullable InetSocketAddress addr) { - assert addr != null; - return addr.getAddress().getHostAddress() + ":" + addr.getPort(); - } - })); - return sb.append(")").toString(); + Iterator iter = iterator(); + String sep = ""; + while (iter.hasNext()) { + InetSocketAddress addr = iter.next(); + sb.append(sep); + sb.append(addr.getAddress().getHostAddress()); + sb.append(':'); + sb.append(addr.getPort()); + sep = ","; + } + return sb.append(')').toString(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/InvalidateBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/InvalidateBlocks.java index bbe729c3332cc..1ce967a4f3703 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/InvalidateBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/InvalidateBlocks.java @@ -39,7 +39,7 @@ import org.apache.hadoop.util.Time; import org.apache.hadoop.hdfs.DFSUtil; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Keeps a Collection for every named machine containing blocks diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/LowRedundancyBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/LowRedundancyBlocks.java index 8cf9dd40ca6d8..77480db71ed8f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/LowRedundancyBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/LowRedundancyBlocks.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdfs.server.blockmanagement; import java.util.ArrayList; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; @@ -85,10 +86,10 @@ class LowRedundancyBlocks implements Iterable { private final List> priorityQueues = new ArrayList<>(LEVEL); - /** The number of corrupt blocks with replication factor 1 */ private final LongAdder lowRedundancyBlocks = new LongAdder(); private final LongAdder corruptBlocks = new LongAdder(); + /** The number of corrupt blocks with replication factor 1 */ private final LongAdder corruptReplicationOneBlocks = new LongAdder(); private final LongAdder lowRedundancyECBlockGroups = new LongAdder(); private final LongAdder corruptECBlockGroups = new LongAdder(); @@ -245,8 +246,8 @@ private int getPriorityContiguous(int curReplicas, int readOnlyReplicas, // highest priority return QUEUE_HIGHEST_PRIORITY; } else if ((curReplicas * 3) < expectedReplicas) { - //can only afford one replica loss - //this is considered very insufficiently redundant blocks. + //there is less than a third as many blocks as requested; + //this is considered very under-replicated. return QUEUE_VERY_LOW_REDUNDANCY; } else { //add to the normal queue for insufficiently redundant blocks @@ -366,11 +367,11 @@ synchronized boolean remove(BlockInfo block, * @return true if the block was found and removed from one of the priority * queues */ - boolean remove(BlockInfo block, int priLevel) { + synchronized boolean remove(BlockInfo block, int priLevel) { return remove(block, priLevel, block.getReplication()); } - boolean remove(BlockInfo block, int priLevel, int oldExpectedReplicas) { + synchronized boolean remove(BlockInfo block, int priLevel, int oldExpectedReplicas) { if(priLevel >= 0 && priLevel < LEVEL && priorityQueues.get(priLevel).remove(block)) { NameNode.blockStateChangeLog.debug( @@ -382,17 +383,18 @@ boolean remove(BlockInfo block, int priLevel, int oldExpectedReplicas) { } else { // Try to remove the block from all queues if the block was // not found in the queue for the given priority level. + boolean found = false; for (int i = 0; i < LEVEL; i++) { if (i != priLevel && priorityQueues.get(i).remove(block)) { NameNode.blockStateChangeLog.debug( "BLOCK* NameSystem.LowRedundancyBlock.remove: Removing block" + " {} from priority queue {}", block, i); decrementBlockStat(block, i, oldExpectedReplicas); - return true; + found = true; } } + return found; } - return false; } private void decrementBlockStat(BlockInfo blockInfo, int priLevel, @@ -499,6 +501,8 @@ synchronized List> chooseLowRedundancyBlocks( * the block count is met or iteration reaches the end of the lowest priority * list, in which case bookmarks for each block list are reset to the heads * of their respective lists. + * If a block is deleted (has invalid bcId), it will be removed from the low + * redundancy queues. * * @param blocksToProcess - number of blocks to fetch from low redundancy * blocks. @@ -514,21 +518,32 @@ synchronized List> chooseLowRedundancyBlocks( int count = 0; int priority = 0; + HashSet toRemove = new HashSet<>(); for (; count < blocksToProcess && priority < LEVEL; priority++) { - if (priority == QUEUE_WITH_CORRUPT_BLOCKS) { - // do not choose corrupted blocks. - continue; - } - // Go through all blocks that need reconstructions with current priority. // Set the iterator to the first unprocessed block at this priority level + // We do not want to skip QUEUE_WITH_CORRUPT_BLOCKS because we still need + // to look for deleted blocks if any. + final boolean inCorruptLevel = (QUEUE_WITH_CORRUPT_BLOCKS == priority); final Iterator i = priorityQueues.get(priority).getBookmark(); final List blocks = new LinkedList<>(); - blocksToReconstruct.add(blocks); - // Loop through all remaining blocks in the list. + if (!inCorruptLevel) { + blocksToReconstruct.add(blocks); + } for(; count < blocksToProcess && i.hasNext(); count++) { - blocks.add(i.next()); + BlockInfo block = i.next(); + if (block.isDeleted()) { + toRemove.add(block); + continue; + } + if (!inCorruptLevel) { + blocks.add(block); + } + } + for (BlockInfo bInfo : toRemove) { + remove(bInfo, priority); } + toRemove.clear(); } if (priority == LEVEL || resetIterators) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingDataNodeMessages.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingDataNodeMessages.java index 6e9dfa2c0089e..8a8501f4aeee5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingDataNodeMessages.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingDataNodeMessages.java @@ -24,8 +24,8 @@ import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; /** * In the Standby Node, we can receive messages about blocks diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReconstructionBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReconstructionBlocks.java index 6e1af5729aae5..322d2696e84c4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReconstructionBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReconstructionBlocks.java @@ -29,7 +29,7 @@ import java.util.List; import java.util.Map; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.util.Daemon; @@ -59,7 +59,7 @@ class PendingReconstructionBlocks { // It might take anywhere between 5 to 10 minutes before // a request is timed out. // - private long timeout = + private volatile long timeout = DFS_NAMENODE_RECONSTRUCTION_PENDING_TIMEOUT_SEC_DEFAULT * 1000; private final static long DEFAULT_RECHECK_INTERVAL = 5 * 60 * 1000; @@ -333,7 +333,7 @@ List getTargets(BlockInfo block) { synchronized (pendingReconstructions) { PendingBlockInfo found = pendingReconstructions.get(block); if (found != null) { - return found.targets; + return new ArrayList<>(found.targets); } } return null; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingRecoveryBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingRecoveryBlocks.java index 3f5f27c819000..8a432cead742b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingRecoveryBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingRecoveryBlocks.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.blockmanagement; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hdfs.util.LightWeightHashSet; import org.apache.hadoop.util.Time; import org.slf4j.Logger; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java index c8c9bb5d1cc74..31ab66d93f6b9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java @@ -29,7 +29,7 @@ import java.util.UUID; import java.util.concurrent.ConcurrentSkipListMap; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowDiskTracker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowDiskTracker.java index 894121eeee7d0..782340185c986 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowDiskTracker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowDiskTracker.java @@ -23,10 +23,10 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; -import com.google.common.primitives.Doubles; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.primitives.Doubles; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -77,7 +77,7 @@ public class SlowDiskTracker { * Number of disks to include in JSON report per operation. We will return * disks with the highest latency. */ - private static final int MAX_DISKS_TO_REPORT = 5; + private final int maxDisksToReport; private static final String DATANODE_DISK_SEPARATOR = ":"; private final long reportGenerationIntervalMs; @@ -107,6 +107,9 @@ public SlowDiskTracker(Configuration conf, Timer timer) { DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY, DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); + this.maxDisksToReport = conf.getInt( + DFSConfigKeys.DFS_DATANODE_MAX_DISKS_TO_REPORT_KEY, + DFSConfigKeys.DFS_DATANODE_MAX_DISKS_TO_REPORT_DEFAULT); this.reportValidityMs = reportGenerationIntervalMs * 3; } @@ -153,7 +156,7 @@ public void updateSlowDiskReportAsync(long now) { @Override public void run() { slowDisksReport = getSlowDisks(diskIDLatencyMap, - MAX_DISKS_TO_REPORT, now); + maxDisksToReport, now); cleanUpOldReports(now); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowPeerDisabledTracker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowPeerDisabledTracker.java new file mode 100644 index 0000000000000..ac109e0c90ae3 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowPeerDisabledTracker.java @@ -0,0 +1,95 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

      + * http://www.apache.org/licenses/LICENSE-2.0 + *

      + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.blockmanagement; + +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedSet; + +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableSet; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.server.protocol.OutlierMetrics; +import org.apache.hadoop.util.Preconditions; +import org.apache.hadoop.util.Timer; + +/** + * Disabled tracker for slow peers. To be used when dfs.datanode.peer.stats.enabled is disabled. + */ +@InterfaceAudience.Private +public class SlowPeerDisabledTracker extends SlowPeerTracker { + + private static final Logger LOG = LoggerFactory.getLogger(SlowPeerDisabledTracker.class); + + public SlowPeerDisabledTracker(Configuration conf, Timer timer) { + super(conf, timer); + final boolean dataNodePeerStatsEnabledVal = + conf.getBoolean(DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY, + DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT); + Preconditions.checkArgument(!dataNodePeerStatsEnabledVal, + "SlowPeerDisabledTracker should only be used for disabled slow peer stats."); + } + + @Override + public boolean isSlowPeerTrackerEnabled() { + return false; + } + + @Override + public void addReport(String slowNode, String reportingNode, OutlierMetrics slowNodeMetrics) { + LOG.trace("Adding slow peer report is disabled. To enable it, please enable config {}.", + DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY); + } + + @Override + public Set getReportsForNode(String slowNode) { + LOG.trace("Retrieval of slow peer report is disabled. To enable it, please enable config {}.", + DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY); + return ImmutableSet.of(); + } + + @Override + public Map> getReportsForAllDataNodes() { + LOG.trace("Retrieval of slow peer report for all nodes is disabled. " + + "To enable it, please enable config {}.", + DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY); + return ImmutableMap.of(); + } + + @Override + public String getJson() { + LOG.trace("Retrieval of slow peer reports as json string is disabled. " + + "To enable it, please enable config {}.", + DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY); + return null; + } + + @Override + public List getSlowNodes(int numNodes) { + return ImmutableList.of(); + } + +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowPeerJsonReport.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowPeerJsonReport.java new file mode 100644 index 0000000000000..b9b741e9d611c --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowPeerJsonReport.java @@ -0,0 +1,84 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

      + * http://www.apache.org/licenses/LICENSE-2.0 + *

      + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.blockmanagement; + +import java.util.SortedSet; + +import com.fasterxml.jackson.annotation.JsonProperty; + +import org.apache.commons.lang3.builder.EqualsBuilder; +import org.apache.commons.lang3.builder.HashCodeBuilder; +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * This structure is a thin wrapper over slow peer reports to make Json + * [de]serialization easy. + */ +@InterfaceAudience.Private +final class SlowPeerJsonReport { + + @JsonProperty("SlowNode") + private final String slowNode; + + @JsonProperty("SlowPeerLatencyWithReportingNodes") + private final SortedSet slowPeerLatencyWithReportingNodes; + + SlowPeerJsonReport( + @JsonProperty("SlowNode") + String slowNode, + @JsonProperty("SlowPeerLatencyWithReportingNodes") + SortedSet slowPeerLatencyWithReportingNodes) { + this.slowNode = slowNode; + this.slowPeerLatencyWithReportingNodes = slowPeerLatencyWithReportingNodes; + } + + public String getSlowNode() { + return slowNode; + } + + public SortedSet getSlowPeerLatencyWithReportingNodes() { + return slowPeerLatencyWithReportingNodes; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (o == null || getClass() != o.getClass()) { + return false; + } + + SlowPeerJsonReport that = (SlowPeerJsonReport) o; + + return new EqualsBuilder() + .append(slowNode, that.slowNode) + .append(slowPeerLatencyWithReportingNodes, that.slowPeerLatencyWithReportingNodes) + .isEquals(); + } + + @Override + public int hashCode() { + return new HashCodeBuilder(17, 37) + .append(slowNode) + .append(slowPeerLatencyWithReportingNodes) + .toHashCode(); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowPeerLatencyWithReportingNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowPeerLatencyWithReportingNode.java new file mode 100644 index 0000000000000..b90f809f1ed8c --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowPeerLatencyWithReportingNode.java @@ -0,0 +1,124 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

      + * http://www.apache.org/licenses/LICENSE-2.0 + *

      + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.blockmanagement; + +import com.fasterxml.jackson.annotation.JsonProperty; + +import org.apache.commons.lang3.builder.EqualsBuilder; +import org.apache.commons.lang3.builder.HashCodeBuilder; +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * This class represents the reporting node and the slow node's latency as observed by the + * reporting node. This class is used by SlowPeerJsonReport class. + */ +@InterfaceAudience.Private +final class SlowPeerLatencyWithReportingNode + implements Comparable { + + @JsonProperty("ReportingNode") + private final String reportingNode; + + @JsonProperty("ReportedLatency") + private final Double reportedLatency; + + @JsonProperty("MedianLatency") + private final Double medianLatency; + + @JsonProperty("MadLatency") + private final Double madLatency; + + @JsonProperty("UpperLimitLatency") + private final Double upperLimitLatency; + + SlowPeerLatencyWithReportingNode( + @JsonProperty("ReportingNode") + String reportingNode, + @JsonProperty("ReportedLatency") + Double reportedLatency, + @JsonProperty("MedianLatency") + Double medianLatency, + @JsonProperty("MadLatency") + Double madLatency, + @JsonProperty("UpperLimitLatency") + Double upperLimitLatency) { + this.reportingNode = reportingNode; + this.reportedLatency = reportedLatency; + this.medianLatency = medianLatency; + this.madLatency = madLatency; + this.upperLimitLatency = upperLimitLatency; + } + + public String getReportingNode() { + return reportingNode; + } + + public Double getReportedLatency() { + return reportedLatency; + } + + public Double getMedianLatency() { + return medianLatency; + } + + public Double getMadLatency() { + return madLatency; + } + + public Double getUpperLimitLatency() { + return upperLimitLatency; + } + + @Override + public int compareTo(SlowPeerLatencyWithReportingNode o) { + return this.reportingNode.compareTo(o.getReportingNode()); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (o == null || getClass() != o.getClass()) { + return false; + } + + SlowPeerLatencyWithReportingNode that = (SlowPeerLatencyWithReportingNode) o; + + return new EqualsBuilder() + .append(reportingNode, that.reportingNode) + .append(reportedLatency, that.reportedLatency) + .append(medianLatency, that.medianLatency) + .append(madLatency, that.madLatency) + .append(upperLimitLatency, that.upperLimitLatency) + .isEquals(); + } + + @Override + public int hashCode() { + return new HashCodeBuilder(17, 37) + .append(reportingNode) + .append(reportedLatency) + .append(medianLatency) + .append(madLatency) + .append(upperLimitLatency) + .toHashCode(); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowPeerTracker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowPeerTracker.java index 03a6918152f40..492287f023888 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowPeerTracker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowPeerTracker.java @@ -18,26 +18,27 @@ package org.apache.hadoop.hdfs.server.blockmanagement; -import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.ImmutableMap; -import com.google.common.primitives.Ints; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; +import org.apache.hadoop.thirdparty.com.google.common.primitives.Ints; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.server.protocol.OutlierMetrics; import org.apache.hadoop.hdfs.server.protocol.SlowPeerReports; import org.apache.hadoop.util.Timer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.PriorityQueue; import java.util.Set; @@ -79,7 +80,7 @@ public class SlowPeerTracker { * Number of nodes to include in JSON report. We will return nodes with * the highest number of votes from peers. */ - private static final int MAX_NODES_TO_REPORT = 5; + private final int maxNodesToReport; /** * Information about peers that have reported a node as being slow. @@ -93,7 +94,7 @@ public class SlowPeerTracker { * Stale reports are not evicted proactively and can potentially * hang around forever. */ - private final ConcurrentMap> + private final ConcurrentMap> allReports; public SlowPeerTracker(Configuration conf, Timer timer) { @@ -103,18 +104,31 @@ public SlowPeerTracker(Configuration conf, Timer timer) { DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY, DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS) * 3; + this.maxNodesToReport = conf.getInt( + DFSConfigKeys.DFS_DATANODE_MAX_NODES_TO_REPORT_KEY, + DFSConfigKeys.DFS_DATANODE_MAX_NODES_TO_REPORT_DEFAULT); + } + + /** + * If SlowPeerTracker is enabled, return true, else returns false. + * + * @return true if slow peer tracking is enabled, else false. + */ + public boolean isSlowPeerTrackerEnabled() { + return true; } /** * Add a new report. DatanodeIds can be the DataNodeIds or addresses * We don't care as long as the caller is consistent. * - * @param reportingNode DataNodeId of the node reporting on its peer. * @param slowNode DataNodeId of the peer suspected to be slow. + * @param reportingNode DataNodeId of the node reporting on its peer. + * @param slowNodeMetrics Aggregate latency metrics of slownode as reported by the + * reporting node. */ - public void addReport(String slowNode, - String reportingNode) { - ConcurrentMap nodeEntries = allReports.get(slowNode); + public void addReport(String slowNode, String reportingNode, OutlierMetrics slowNodeMetrics) { + ConcurrentMap nodeEntries = allReports.get(slowNode); if (nodeEntries == null) { // putIfAbsent guards against multiple writers. @@ -123,7 +137,8 @@ public void addReport(String slowNode, } // Replace the existing entry from this node, if any. - nodeEntries.put(reportingNode, timer.monotonicNow()); + nodeEntries.put(reportingNode, + new LatencyWithLastReportTime(timer.monotonicNow(), slowNodeMetrics)); } /** @@ -133,8 +148,8 @@ public void addReport(String slowNode, * @param slowNode target node Id. * @return set of reports which implicate the target node as being slow. */ - public Set getReportsForNode(String slowNode) { - final ConcurrentMap nodeEntries = + public Set getReportsForNode(String slowNode) { + final ConcurrentMap nodeEntries = allReports.get(slowNode); if (nodeEntries == null || nodeEntries.isEmpty()) { @@ -149,17 +164,19 @@ public Set getReportsForNode(String slowNode) { * * @return map from SlowNodeId {@literal ->} (set of nodes reporting peers). */ - public Map> getReportsForAllDataNodes() { + public Map> getReportsForAllDataNodes() { if (allReports.isEmpty()) { return ImmutableMap.of(); } - final Map> allNodesValidReports = new HashMap<>(); + final Map> allNodesValidReports = + new HashMap<>(); final long now = timer.monotonicNow(); - for (Map.Entry> entry : - allReports.entrySet()) { - SortedSet validReports = filterNodeReports(entry.getValue(), now); + for (Map.Entry> entry + : allReports.entrySet()) { + SortedSet validReports = + filterNodeReports(entry.getValue(), now); if (!validReports.isEmpty()) { allNodesValidReports.put(entry.getKey(), validReports); } @@ -170,17 +187,21 @@ public Map> getReportsForAllDataNodes() { /** * Filter the given reports to return just the valid ones. * - * @param reports - * @param now - * @return + * @param reports Current set of reports. + * @param now Current time. + * @return Set of valid reports that were created within last reportValidityMs millis. */ - private SortedSet filterNodeReports( - ConcurrentMap reports, long now) { - final SortedSet validReports = new TreeSet<>(); - - for (Map.Entry entry : reports.entrySet()) { - if (now - entry.getValue() < reportValidityMs) { - validReports.add(entry.getKey()); + private SortedSet filterNodeReports( + ConcurrentMap reports, long now) { + final SortedSet validReports = new TreeSet<>(); + + for (Map.Entry entry : reports.entrySet()) { + if (now - entry.getValue().getTime() < reportValidityMs) { + OutlierMetrics outlierMetrics = entry.getValue().getLatency(); + validReports.add( + new SlowPeerLatencyWithReportingNode(entry.getKey(), outlierMetrics.getActualLatency(), + outlierMetrics.getMedian(), outlierMetrics.getMad(), + outlierMetrics.getUpperLimitLatency())); } } return validReports; @@ -192,8 +213,8 @@ private SortedSet filterNodeReports( * serialization failed. */ public String getJson() { - Collection validReports = getJsonReports( - MAX_NODES_TO_REPORT); + Collection validReports = getJsonReports( + maxNodesToReport); try { return WRITER.writeValueAsString(validReports); } catch (JsonProcessingException e) { @@ -204,30 +225,20 @@ public String getJson() { } /** - * This structure is a thin wrapper over reports to make Json - * [de]serialization easy. + * Returns all tracking slow peers. + * @param numNodes + * @return */ - public static class ReportForJson { - @JsonProperty("SlowNode") - final private String slowNode; - - @JsonProperty("ReportingNodes") - final private SortedSet reportingNodes; - - public ReportForJson( - @JsonProperty("SlowNode") String slowNode, - @JsonProperty("ReportingNodes") SortedSet reportingNodes) { - this.slowNode = slowNode; - this.reportingNodes = reportingNodes; - } - - public String getSlowNode() { - return slowNode; + public List getSlowNodes(int numNodes) { + Collection jsonReports = getJsonReports(numNodes); + ArrayList slowNodes = new ArrayList<>(); + for (SlowPeerJsonReport jsonReport : jsonReports) { + slowNodes.add(jsonReport.getSlowNode()); } - - public SortedSet getReportingNodes() { - return reportingNodes; + if (!slowNodes.isEmpty()) { + LOG.warn("Slow nodes list: " + slowNodes); } + return slowNodes; } /** @@ -236,35 +247,30 @@ public SortedSet getReportingNodes() { * @param numNodes number of nodes to return. This is to limit the * size of the generated JSON. */ - private Collection getJsonReports(int numNodes) { + private Collection getJsonReports(int numNodes) { if (allReports.isEmpty()) { return Collections.emptyList(); } - final PriorityQueue topNReports = - new PriorityQueue<>(allReports.size(), - new Comparator() { - @Override - public int compare(ReportForJson o1, ReportForJson o2) { - return Ints.compare(o1.reportingNodes.size(), - o2.reportingNodes.size()); - } - }); + final PriorityQueue topNReports = new PriorityQueue<>(allReports.size(), + (o1, o2) -> Ints.compare(o1.getSlowPeerLatencyWithReportingNodes().size(), + o2.getSlowPeerLatencyWithReportingNodes().size())); final long now = timer.monotonicNow(); - for (Map.Entry> entry : - allReports.entrySet()) { - SortedSet validReports = filterNodeReports( - entry.getValue(), now); + for (Map.Entry> entry + : allReports.entrySet()) { + SortedSet validReports = + filterNodeReports(entry.getValue(), now); if (!validReports.isEmpty()) { if (topNReports.size() < numNodes) { - topNReports.add(new ReportForJson(entry.getKey(), validReports)); - } else if (topNReports.peek().getReportingNodes().size() < - validReports.size()){ + topNReports.add(new SlowPeerJsonReport(entry.getKey(), validReports)); + } else if (topNReports.peek() != null + && topNReports.peek().getSlowPeerLatencyWithReportingNodes().size() + < validReports.size()) { // Remove the lowest element topNReports.poll(); - topNReports.add(new ReportForJson(entry.getKey(), validReports)); + topNReports.add(new SlowPeerJsonReport(entry.getKey(), validReports)); } } } @@ -275,4 +281,23 @@ public int compare(ReportForJson o1, ReportForJson o2) { long getReportValidityMs() { return reportValidityMs; } + + private static class LatencyWithLastReportTime { + private final Long time; + private final OutlierMetrics latency; + + LatencyWithLastReportTime(Long time, OutlierMetrics latency) { + this.time = time; + this.latency = latency; + } + + public Long getTime() { + return time; + } + + public OutlierMetrics getLatency() { + return latency; + } + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/StorageTypeStats.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/StorageTypeStats.java index c335ec6a7ef89..f90dbad6980ad 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/StorageTypeStats.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/StorageTypeStats.java @@ -20,6 +20,7 @@ import java.beans.ConstructorProperties; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.StorageType; @@ -39,6 +40,15 @@ public class StorageTypeStats { private int nodesInService = 0; private StorageType storageType; + @VisibleForTesting + void setDataNodesInServiceXceiverCount(int avgXceiverPerDatanode, + int numNodesInService) { + this.nodesInService = numNodesInService; + this.nodesInServiceXceiverCount = numNodesInService * avgXceiverPerDatanode; + } + + private int nodesInServiceXceiverCount; + @ConstructorProperties({"capacityTotal", "capacityUsed", "capacityNonDfsUsed", "capacityRemaining", "blockPoolUsed", "nodesInService"}) public StorageTypeStats( @@ -101,6 +111,10 @@ public int getNodesInService() { return nodesInService; } + public int getNodesInServiceXceiverCount() { + return nodesInServiceXceiverCount; + } + StorageTypeStats(StorageType storageType) { this.storageType = storageType; } @@ -131,6 +145,7 @@ void addStorage(final DatanodeStorageInfo info, void addNode(final DatanodeDescriptor node) { if (node.isInService()) { nodesInService++; + nodesInServiceXceiverCount += node.getXceiverCount(); } } @@ -151,6 +166,7 @@ void subtractStorage(final DatanodeStorageInfo info, void subtractNode(final DatanodeDescriptor node) { if (node.isInService()) { nodesInService--; + nodesInServiceXceiverCount -= node.getXceiverCount(); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java index 78d4289a047cb..03b180121fcf7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java @@ -23,6 +23,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.commons.lang3.Validate; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSUtil; @@ -31,7 +32,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSDirectory; import org.apache.hadoop.hdfs.server.namenode.MetaRecoveryContext; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.hdfs.server.namenode.NameNodeLayoutVersion; import org.apache.hadoop.util.StringUtils; @@ -294,6 +295,10 @@ enum ReplicaState { /** Temporary replica: created for replication and relocation only. */ TEMPORARY(4); + // Since ReplicaState (de)serialization depends on ordinal, either adding + // new value should be avoided to this enum or newly appended value should + // be handled by NameNodeLayoutVersion#Feature. + private static final ReplicaState[] cachedValues = ReplicaState.values(); private final int value; @@ -306,13 +311,32 @@ public int getValue() { return value; } + /** + * Retrieve ReplicaState corresponding to given index. + * + * @param v Index to retrieve {@link ReplicaState}. + * @return {@link ReplicaState} object. + * @throws IndexOutOfBoundsException if the index is invalid. + */ public static ReplicaState getState(int v) { + Validate.validIndex(cachedValues, v, "Index Expected range: [0, " + + (cachedValues.length - 1) + "]. Actual value: " + v); return cachedValues[v]; } - /** Read from in */ + /** + * Retrieve ReplicaState corresponding to index provided in binary stream. + * + * @param in Index value provided as bytes in given binary stream. + * @return {@link ReplicaState} object. + * @throws IOException if an I/O error occurs while reading bytes. + * @throws IndexOutOfBoundsException if the index is invalid. + */ public static ReplicaState read(DataInput in) throws IOException { - return cachedValues[in.readByte()]; + byte idx = in.readByte(); + Validate.validIndex(cachedValues, idx, "Index Expected range: [0, " + + (cachedValues.length - 1) + "]. Actual value: " + idx); + return cachedValues[idx]; } /** Write to out */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HostRestrictingAuthorizationFilter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HostRestrictingAuthorizationFilter.java index 1a51b46e585ea..0eb999039cb81 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HostRestrictingAuthorizationFilter.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HostRestrictingAuthorizationFilter.java @@ -229,9 +229,13 @@ public void handleInteraction(HttpInteraction interaction) throws IOException, ServletException { final String address = interaction.getRemoteAddr(); final String query = interaction.getQueryString(); - final String path = - interaction.getRequestURI() - .substring(WebHdfsFileSystem.PATH_PREFIX.length()); + final String uri = interaction.getRequestURI(); + if (!uri.startsWith(WebHdfsFileSystem.PATH_PREFIX)) { + LOG.trace("Proceeding with interaction since the request doesn't access WebHDFS API"); + interaction.proceed(); + return; + } + final String path = uri.substring(WebHdfsFileSystem.PATH_PREFIX.length()); String user = interaction.getRemoteUser(); LOG.trace("Got request user: {}, remoteIp: {}, query: {}, path: {}", diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/IncorrectVersionException.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/IncorrectVersionException.java index 43b8c3c05d102..7a14e38f5505f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/IncorrectVersionException.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/IncorrectVersionException.java @@ -21,7 +21,6 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.hdfs.protocol.HdfsConstants; /** * The exception is thrown when external version does not match diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java index 2f249655eed8e..4265c288e88d9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java @@ -198,6 +198,9 @@ public static String getRemoteAddr(HttpServletRequest request) { return remoteAddr; } + public static int getRemotePort(HttpServletRequest request) { + return request.getRemotePort(); + } /** * Expected user name should be a short name. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java index e7da44e689a60..83a82566f6c3e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java @@ -53,8 +53,8 @@ import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.util.VersionInfo; -import com.google.common.base.Charsets; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -801,8 +801,7 @@ public void doRecover(StorageState curState) throws IOException { case RECOVER_UPGRADE: // mv previous.tmp -> current LOG.info("Recovering storage directory {} from previous upgrade", rootPath); - if (curDir.exists()) - deleteDir(curDir); + deleteAsync(curDir); rename(getPreviousTmp(), curDir); return; case COMPLETE_ROLLBACK: // rm removed.tmp @@ -818,21 +817,19 @@ public void doRecover(StorageState curState) throws IOException { case COMPLETE_FINALIZE: // rm finalized.tmp LOG.info("Completing previous finalize for storage directory {}", rootPath); - deleteDir(getFinalizedTmp()); + deleteAsync(getFinalizedTmp()); return; case COMPLETE_CHECKPOINT: // mv lastcheckpoint.tmp -> previous.checkpoint LOG.info("Completing previous checkpoint for storage directory {}", rootPath); File prevCkptDir = getPreviousCheckpoint(); - if (prevCkptDir.exists()) - deleteDir(prevCkptDir); + deleteAsync(prevCkptDir); rename(getLastCheckpointTmp(), prevCkptDir); return; case RECOVER_CHECKPOINT: // mv lastcheckpoint.tmp -> current LOG.info("Recovering storage directory {} from failed checkpoint", rootPath); - if (curDir.exists()) - deleteDir(curDir); + deleteAsync(curDir); rename(getLastCheckpointTmp(), curDir); return; default: @@ -840,7 +837,30 @@ public void doRecover(StorageState curState) throws IOException { + " for storage directory: " + rootPath); } } - + + /** + * Rename the curDir to curDir.tmp and delete the curDir.tmp parallely. + * @throws IOException + */ + private void deleteAsync(File curDir) throws IOException { + if (curDir.exists()) { + File curTmp = new File(curDir.getParent(), curDir.getName() + ".tmp"); + if (curTmp.exists()) { + deleteDir(curTmp); + } + rename(curDir, curTmp); + new Thread("Async Delete Current.tmp") { + public void run() { + try { + deleteDir(curTmp); + } catch (IOException e) { + LOG.warn("Deleting storage directory {} failed", curTmp); + } + } + }.start(); + } + } + /** * @return true if the storage directory should prompt the user prior * to formatting (i.e if the directory appears to contain some data) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/StorageInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/StorageInfo.java index 28871e5da32a0..23911c0ac74ec 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/StorageInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/StorageInfo.java @@ -34,7 +34,7 @@ import org.apache.hadoop.hdfs.server.datanode.DataNodeLayoutVersion; import org.apache.hadoop.hdfs.server.namenode.NameNodeLayoutVersion; -import com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; /** * Common class for storage information. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Util.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Util.java index e9f9bfb830cdc..be487bed25763 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Util.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Util.java @@ -36,8 +36,8 @@ import java.util.Map; import java.util.Set; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -285,7 +285,7 @@ public static MD5Hash receiveFile(String url, List localPaths, fos.getChannel().force(true); fos.close(); double writeSec = Math.max(((float) - (flushStartTime - Time.monotonicNow())) / 1000.0, 0.001); + (Time.monotonicNow() - flushStartTime)) / 1000.0, 0.001); xferCombined += writeSec; xferStats.append(String .format(" Synchronous (fsync) write to disk of " + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/blockaliasmap/impl/InMemoryLevelDBAliasMapClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/blockaliasmap/impl/InMemoryLevelDBAliasMapClient.java index cacf8f102fa40..6cac72af82ca3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/blockaliasmap/impl/InMemoryLevelDBAliasMapClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/blockaliasmap/impl/InMemoryLevelDBAliasMapClient.java @@ -129,7 +129,7 @@ public Iterator iterator() { } } - class LevelDbWriter extends BlockAliasMap.Writer { + static class LevelDbWriter extends BlockAliasMap.Writer { private InMemoryAliasMapProtocol aliasMap; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/blockaliasmap/impl/TextFileRegionAliasMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/blockaliasmap/impl/TextFileRegionAliasMap.java index 4d65142c95848..0ca7118b0d391 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/blockaliasmap/impl/TextFileRegionAliasMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/blockaliasmap/impl/TextFileRegionAliasMap.java @@ -26,7 +26,6 @@ import java.io.InputStreamReader; import java.io.OutputStream; import java.io.OutputStreamWriter; -import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Base64; import java.util.Iterator; @@ -54,7 +53,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * This class is used for block maps stored as text files, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java index 3199e53890f30..b8a52aa79fed1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java @@ -17,10 +17,10 @@ */ package org.apache.hadoop.hdfs.server.datanode; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.StorageType; @@ -206,6 +206,7 @@ String getBlockPoolId(boolean quiet) { if (id != null) { return id; } + DataNodeFaultInjector.get().delayWhenOfferServiceHoldLock(); readLock(); try { if (bpNSInfo != null) { @@ -382,6 +383,7 @@ void verifyAndSetNamespaceInfo(BPServiceActor actor, NamespaceInfo nsInfo) } try { + DataNodeFaultInjector.get().delayWhenOfferServiceHoldLock(); if (setNamespaceInfo(nsInfo) == null) { boolean success = false; @@ -676,15 +678,20 @@ boolean processCommandFromActor(DatanodeCommand cmd, actor.reRegister(); return false; } - writeLock(); + boolean isActiveActor; + InetSocketAddress nnSocketAddress; + readLock(); try { - if (actor == bpServiceToActive) { - return processCommandFromActive(cmd, actor); - } else { - return processCommandFromStandby(cmd, actor); - } + isActiveActor = (actor == bpServiceToActive); + nnSocketAddress = actor.getNNSocketAddress(); } finally { - writeUnlock(); + readUnlock(); + } + + if (isActiveActor) { + return processCommandFromActive(cmd, nnSocketAddress); + } else { + return processCommandFromStandby(cmd, nnSocketAddress); } } @@ -712,7 +719,7 @@ private String blockIdArrayToString(long ids[]) { * @throws IOException */ private boolean processCommandFromActive(DatanodeCommand cmd, - BPServiceActor actor) throws IOException { + InetSocketAddress nnSocketAddress) throws IOException { final BlockCommand bcmd = cmd instanceof BlockCommand? (BlockCommand)cmd: null; final BlockIdCommand blockIdCmd = @@ -766,7 +773,7 @@ assert getBlockPoolId().equals(bp) : dn.finalizeUpgradeForPool(bp); break; case DatanodeProtocol.DNA_RECOVERBLOCK: - String who = "NameNode at " + actor.getNNSocketAddress(); + String who = "NameNode at " + nnSocketAddress; dn.getBlockRecoveryWorker().recoverBlocks(who, ((BlockRecoveryCommand)cmd).getRecoveringBlocks()); break; @@ -808,10 +815,11 @@ assert getBlockPoolId().equals(bp) : * DNA_REGISTER which should be handled earlier itself. */ private boolean processCommandFromStandby(DatanodeCommand cmd, - BPServiceActor actor) throws IOException { + InetSocketAddress nnSocketAddress) throws IOException { switch(cmd.getAction()) { case DatanodeProtocol.DNA_ACCESSKEYUPDATE: - LOG.info("DatanodeCommand action from standby: DNA_ACCESSKEYUPDATE"); + LOG.info("DatanodeCommand action from standby NN {}: DNA_ACCESSKEYUPDATE", + nnSocketAddress); if (dn.isBlockTokenEnabled) { dn.blockPoolTokenSecretManager.addKeys( getBlockPoolId(), @@ -827,10 +835,12 @@ private boolean processCommandFromStandby(DatanodeCommand cmd, case DatanodeProtocol.DNA_CACHE: case DatanodeProtocol.DNA_UNCACHE: case DatanodeProtocol.DNA_ERASURE_CODING_RECONSTRUCTION: - LOG.warn("Got a command from standby NN - ignoring command:" + cmd.getAction()); + LOG.warn("Got a command from standby NN {} - ignoring command: {}", + nnSocketAddress, cmd.getAction()); break; default: - LOG.warn("Unknown DatanodeCommand action: " + cmd.getAction()); + LOG.warn("Unknown DatanodeCommand action: {} from standby NN {}", + cmd.getAction(), nnSocketAddress); } return true; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java index a436c94dc206d..71834f8965f5e 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hdfs.server.datanode; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY; import static org.apache.hadoop.util.Time.monotonicNow; import java.io.Closeable; @@ -34,9 +36,12 @@ import java.util.TreeSet; import java.util.concurrent.BlockingQueue; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; @@ -57,6 +62,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; import org.apache.hadoop.hdfs.server.protocol.DisallowedDatanodeException; import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse; +import org.apache.hadoop.hdfs.server.protocol.InvalidBlockReportLeaseException; import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports; import org.apache.hadoop.hdfs.server.protocol.SlowPeerReports; @@ -66,13 +72,15 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.net.NetUtils; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.util.Time; import org.apache.hadoop.util.VersionInfo; import org.apache.hadoop.util.VersionUtil; import org.slf4j.Logger; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; /** * A thread per active or standby namenode to perform: @@ -94,6 +102,8 @@ class BPServiceActor implements Runnable { volatile long lastCacheReport = 0; private final Scheduler scheduler; + private final Object sendIBRLock; + private final ExecutorService ibrExecutorService; Thread bpThread; DatanodeProtocolClientSideTranslatorPB bpNamenode; @@ -149,6 +159,10 @@ enum RunningState { } commandProcessingThread = new CommandProcessingThread(this); commandProcessingThread.start(); + sendIBRLock = new Object(); + ibrExecutorService = Executors.newSingleThreadExecutor( + new ThreadFactoryBuilder().setDaemon(true) + .setNameFormat("ibr-executor-%d").build()); } public DatanodeRegistration getBpRegistration() { @@ -187,10 +201,13 @@ private String getNameNodeAddress() { Map getActorInfoMap() { final Map info = new HashMap(); info.put("NamenodeAddress", getNameNodeAddress()); + info.put("NamenodeHaState", state != null ? state.toString() : "Unknown"); info.put("BlockPoolID", bpos.getBlockPoolId()); info.put("ActorState", getRunningState()); info.put("LastHeartbeat", String.valueOf(getScheduler().getLastHearbeatTime())); + info.put("LastHeartbeatResponseTime", + String.valueOf(getScheduler().getLastHeartbeatResponseTime())); info.put("LastBlockReport", String.valueOf(getScheduler().getLastBlockReportTime())); info.put("maxBlockReportSize", String.valueOf(getMaxBlockReportSize())); @@ -309,10 +326,10 @@ private void connectToNNAndHandshake() throws IOException { void triggerBlockReportForTests() { synchronized (ibrManager) { scheduler.scheduleHeartbeat(); - long oldBlockReportTime = scheduler.nextBlockReportTime; + long oldBlockReportTime = scheduler.getNextBlockReportTime(); scheduler.forceFullBlockReportNow(); ibrManager.notifyAll(); - while (oldBlockReportTime == scheduler.nextBlockReportTime) { + while (oldBlockReportTime == scheduler.getNextBlockReportTime()) { try { ibrManager.wait(100); } catch (InterruptedException e) { @@ -368,8 +385,10 @@ List blockReport(long fullBrLeaseId) throws IOException { // we have a chance that we will miss the delHint information // or we will report an RBW replica after the BlockReport already reports // a FINALIZED one. - ibrManager.sendIBRs(bpNamenode, bpRegistration, - bpos.getBlockPoolId(), getRpcMetricSuffix()); + synchronized (sendIBRLock) { + ibrManager.sendIBRs(bpNamenode, bpRegistration, + bpos.getBlockPoolId(), getRpcMetricSuffix()); + } long brCreateStartTime = monotonicNow(); Map perVolumeBlockLists = @@ -402,7 +421,7 @@ List blockReport(long fullBrLeaseId) throws IOException { // Below split threshold, send all reports in a single message. DatanodeCommand cmd = bpNamenode.blockReport( bpRegistration, bpos.getBlockPoolId(), reports, - new BlockReportContext(1, 0, reportId, fullBrLeaseId, true)); + new BlockReportContext(1, 0, reportId, fullBrLeaseId)); blockReportSizes.add( calculateBlockReportPBSize(useBlocksBuffer, reports)); numRPCs = 1; @@ -417,7 +436,7 @@ List blockReport(long fullBrLeaseId) throws IOException { DatanodeCommand cmd = bpNamenode.blockReport( bpRegistration, bpos.getBlockPoolId(), singleReport, new BlockReportContext(reports.length, r, reportId, - fullBrLeaseId, true)); + fullBrLeaseId)); blockReportSizes.add( calculateBlockReportPBSize(useBlocksBuffer, singleReport)); numReportsSent++; @@ -435,8 +454,9 @@ List blockReport(long fullBrLeaseId) throws IOException { dn.getMetrics().addBlockReport(brSendCost, getRpcMetricSuffix()); final int nCmds = cmds.size(); LOG.info((success ? "S" : "Uns") + - "uccessfully sent block report 0x" + - Long.toHexString(reportId) + ", containing " + reports.length + + "uccessfully sent block report 0x" + Long.toHexString(reportId) + + " with lease ID 0x" + Long.toHexString(fullBrLeaseId) + " to namenode: " + nnAddr + + ", containing " + reports.length + " storage report(s), of which we sent " + numReportsSent + "." + " The reports had " + totalBlockCount + " total blocks and used " + numRPCs + @@ -531,11 +551,11 @@ HeartbeatResponse sendHeartBeat(boolean requestBlockReportLease) volumeFailureSummary.getFailedStorageLocations().length : 0; final boolean outliersReportDue = scheduler.isOutliersReportDue(now); final SlowPeerReports slowPeers = - outliersReportDue && dn.getPeerMetrics() != null ? + outliersReportDue && dnConf.peerStatsEnabled && dn.getPeerMetrics() != null ? SlowPeerReports.create(dn.getPeerMetrics().getOutliers()) : SlowPeerReports.EMPTY_REPORT; final SlowDiskReports slowDisks = - outliersReportDue && dn.getDiskMetrics() != null ? + outliersReportDue && dnConf.diskStatsEnabled && dn.getDiskMetrics() != null ? SlowDiskReports.create(dn.getDiskMetrics().getDiskOutliersStats()) : SlowDiskReports.EMPTY_REPORT; @@ -544,13 +564,15 @@ HeartbeatResponse sendHeartBeat(boolean requestBlockReportLease) dn.getFSDataset().getCacheCapacity(), dn.getFSDataset().getCacheUsed(), dn.getXmitsInProgress(), - dn.getXceiverCount(), + dn.getActiveTransferThreadCount(), numFailedVolumes, volumeFailureSummary, requestBlockReportLease, slowPeers, slowDisks); + scheduler.updateLastHeartbeatResponseTime(monotonicNow()); + if (outliersReportDue) { // If the report was due and successfully sent, schedule the next one. scheduler.scheduleNextOutlierReport(); @@ -572,11 +594,11 @@ void start() { } bpThread = new Thread(this); bpThread.setDaemon(true); // needed for JUnit testing - bpThread.start(); if (lifelineSender != null) { lifelineSender.start(); } + bpThread.start(); } private String formatThreadName( @@ -599,6 +621,9 @@ void stop() { if (commandProcessingThread != null) { commandProcessingThread.interrupt(); } + if (ibrExecutorService != null && !ibrExecutorService.isShutdown()) { + ibrExecutorService.shutdownNow(); + } } //This must be called only by blockPoolManager @@ -613,13 +638,18 @@ void join() { } catch (InterruptedException ie) { } } - //Cleanup method to be called by current thread before exiting. + // Cleanup method to be called by current thread before exiting. + // Any Thread / ExecutorService started by BPServiceActor can be shutdown + // here. private synchronized void cleanUp() { shouldServiceRun = false; - IOUtils.cleanup(null, bpNamenode); - IOUtils.cleanup(null, lifelineSender); + IOUtils.cleanupWithLogger(null, bpNamenode); + IOUtils.cleanupWithLogger(null, lifelineSender); bpos.shutdownActor(this); + if (!ibrExecutorService.isShutdown()) { + ibrExecutorService.shutdownNow(); + } } private void handleRollingUpgradeStatus(HeartbeatResponse resp) throws IOException { @@ -661,6 +691,8 @@ private void offerService() throws Exception { // Every so often, send heartbeat or block-report // final boolean sendHeartbeat = scheduler.isHeartbeatDue(startTime); + LOG.debug("BP offer service run start time: {}, sendHeartbeat: {}", startTime, + sendHeartbeat); HeartbeatResponse resp = null; if (sendHeartbeat) { // @@ -673,6 +705,8 @@ private void offerService() throws Exception { boolean requestBlockReportLease = (fullBlockReportLeaseId == 0) && scheduler.isBlockReportDue(startTime); if (!dn.areHeartbeatsDisabledForTests()) { + LOG.debug("Before sending heartbeat to namenode {}, the state of the namenode known" + + " to datanode so far is {}", this.getNameNodeAddress(), state); resp = sendHeartBeat(requestBlockReportLease); assert resp != null; if (resp.getFullBlockReportLeaseId() != 0) { @@ -697,7 +731,12 @@ private void offerService() throws Exception { // that we should actually process. bpos.updateActorStatesFromHeartbeat( this, resp.getNameNodeHaState()); - state = resp.getNameNodeHaState().getState(); + HAServiceState stateFromResp = resp.getNameNodeHaState().getState(); + if (state != stateFromResp) { + LOG.info("After receiving heartbeat response, updating state of namenode {} to {}", + this.getNameNodeAddress(), stateFromResp); + } + state = stateFromResp; if (state == HAServiceState.ACTIVE) { handleRollingUpgradeStatus(resp); @@ -705,11 +744,6 @@ private void offerService() throws Exception { commandProcessingThread.enqueue(resp.getCommands()); } } - if (!dn.areIBRDisabledForTests() && - (ibrManager.sendImmediately()|| sendHeartbeat)) { - ibrManager.sendIBRs(bpNamenode, bpRegistration, - bpos.getBlockPoolId(), getRpcMetricSuffix()); - } List cmds = null; boolean forceFullBr = @@ -745,6 +779,9 @@ private void offerService() throws Exception { shouldServiceRun = false; return; } + if (InvalidBlockReportLeaseException.class.getName().equals(reClass)) { + fullBlockReportLeaseId = 0; + } LOG.warn("RemoteException in offerService", re); sleepAfterException(); } catch (IOException e) { @@ -762,6 +799,7 @@ private void sleepAfterException() { long sleepTime = Math.min(1000, dnConf.heartBeatInterval); Thread.sleep(sleepTime); } catch (InterruptedException ie) { + LOG.info("BPServiceActor {} is interrupted", this); Thread.currentThread().interrupt(); } } @@ -873,6 +911,10 @@ public void run() { initialRegistrationComplete.countDown(); } + // IBR tasks to be handled separately from offerService() in order to + // improve performance of offerService(), which can now focus only on + // FBR and heartbeat. + ibrExecutorService.submit(new IBRTaskHandler()); while (shouldRun()) { try { offerService(); @@ -930,10 +972,12 @@ void reRegister() throws IOException { void triggerBlockReport(BlockReportOptions options) { if (options.isIncremental()) { - LOG.info(bpos.toString() + ": scheduling an incremental block report."); + LOG.info(bpos.toString() + ": scheduling an incremental block report " + + "to namenode: " + nnAddr + "."); ibrManager.triggerIBR(true); } else { - LOG.info(bpos.toString() + ": scheduling a full block report."); + LOG.info(bpos.toString() + ": scheduling a full block report " + + "to namenode: " + nnAddr + "."); synchronized(ibrManager) { scheduler.forceFullBlockReportNow(); ibrManager.notifyAll(); @@ -958,6 +1002,8 @@ private void processQueueMessages() { while (!duplicateQueue.isEmpty()) { BPServiceActorAction actionItem = duplicateQueue.remove(); try { + LOG.debug("BPServiceActor ( {} ) processing queued messages. Action item: {}", this, + actionItem); actionItem.reportTo(bpNamenode, bpRegistration); } catch (BPServiceActorActionException baae) { LOG.warn(baae.getMessage() + nnAddr , baae); @@ -989,7 +1035,7 @@ public void close() { } catch (InterruptedException e) { Thread.currentThread().interrupt(); } - IOUtils.cleanup(null, lifelineNamenode); + IOUtils.cleanupWithLogger(null, lifelineNamenode); } @Override @@ -1101,6 +1147,34 @@ private void sendLifeline() throws IOException { } } + class IBRTaskHandler implements Runnable { + + @Override + public void run() { + LOG.info("Starting IBR Task Handler."); + while (shouldRun()) { + try { + final long startTime = scheduler.monotonicNow(); + final boolean sendHeartbeat = scheduler.isHeartbeatDue(startTime); + if (!dn.areIBRDisabledForTests() && + (ibrManager.sendImmediately() || sendHeartbeat)) { + synchronized (sendIBRLock) { + ibrManager.sendIBRs(bpNamenode, bpRegistration, + bpos.getBlockPoolId(), getRpcMetricSuffix()); + } + } + // There is no work to do; sleep until heartbeat timer elapses, + // or work arrives, and then iterate again. + ibrManager.waitTillNextIBR(scheduler.getHeartbeatWaitTime()); + } catch (Throwable t) { + LOG.error("Exception in IBRTaskHandler.", t); + sleepAndLogInterrupts(5000, "offering IBR service"); + } + } + } + + } + /** * Utility class that wraps the timestamp computations for scheduling * heartbeats and block reports. @@ -1109,8 +1183,8 @@ static class Scheduler { // nextBlockReportTime and nextHeartbeatTime may be assigned/read // by testing threads (through BPServiceActor#triggerXXX), while also // assigned/read by the actor thread. - @VisibleForTesting - volatile long nextBlockReportTime = monotonicNow(); + private final AtomicLong nextBlockReportTime = + new AtomicLong(monotonicNow()); @VisibleForTesting volatile long nextHeartbeatTime = monotonicNow(); @@ -1124,6 +1198,9 @@ static class Scheduler { @VisibleForTesting volatile long lastHeartbeatTime = monotonicNow(); + @VisibleForTesting + private volatile long lastHeartbeatResponseTime = -1; + @VisibleForTesting boolean resetBlockReportTime = true; @@ -1135,8 +1212,8 @@ static class Scheduler { private final long heartbeatIntervalMs; private final long lifelineIntervalMs; - private final long blockReportIntervalMs; - private final long outliersReportIntervalMs; + private volatile long blockReportIntervalMs; + private volatile long outliersReportIntervalMs; Scheduler(long heartbeatIntervalMs, long lifelineIntervalMs, long blockReportIntervalMs, long outliersReportIntervalMs) { @@ -1172,6 +1249,10 @@ void updateLastHeartbeatTime(long heartbeatTime) { lastHeartbeatTime = heartbeatTime; } + void updateLastHeartbeatResponseTime(long heartbeatTime) { + this.lastHeartbeatResponseTime = heartbeatTime; + } + void updateLastBlockReportTime(long blockReportTime) { lastBlockReportTime = blockReportTime; } @@ -1184,6 +1265,10 @@ long getLastHearbeatTime() { return (monotonicNow() - lastHeartbeatTime)/1000; } + private long getLastHeartbeatResponseTime() { + return (monotonicNow() - lastHeartbeatResponseTime) / 1000; + } + long getLastBlockReportTime() { return (monotonicNow() - lastBlockReportTime)/1000; } @@ -1203,7 +1288,7 @@ boolean isLifelineDue(long startTime) { } boolean isBlockReportDue(long curTime) { - return nextBlockReportTime - curTime <= 0; + return nextBlockReportTime.get() - curTime <= 0; } boolean isOutliersReportDue(long curTime) { @@ -1212,6 +1297,7 @@ boolean isOutliersReportDue(long curTime) { void forceFullBlockReportNow() { forceFullBlockReport.set(true); + resetBlockReportTime = true; } /** @@ -1227,15 +1313,15 @@ void forceFullBlockReportNow() { long scheduleBlockReport(long delay, boolean isRegistration) { if (delay > 0) { // send BR after random delay // Numerical overflow is possible here and is okay. - nextBlockReportTime = - monotonicNow() + ThreadLocalRandom.current().nextInt((int) (delay)); + nextBlockReportTime.getAndSet( + monotonicNow() + ThreadLocalRandom.current().nextInt((int) (delay))); } else { // send at next heartbeat - nextBlockReportTime = monotonicNow(); + nextBlockReportTime.getAndSet(monotonicNow()); } resetBlockReportTime = isRegistration; // reset future BRs for // randomness, post first block report to avoid regular BRs from all // DN's coming at one time. - return nextBlockReportTime; + return nextBlockReportTime.get(); } /** @@ -1248,8 +1334,8 @@ void scheduleNextBlockReport() { // If we have sent the first set of block reports, then wait a random // time before we start the periodic block reports. if (resetBlockReportTime) { - nextBlockReportTime = monotonicNow() + - ThreadLocalRandom.current().nextInt((int)(blockReportIntervalMs)); + nextBlockReportTime.getAndSet(monotonicNow() + + ThreadLocalRandom.current().nextInt((int) (blockReportIntervalMs))); resetBlockReportTime = false; } else { /* say the last block report was at 8:20:14. The current report @@ -1259,17 +1345,16 @@ void scheduleNextBlockReport() { * 2) unexpected like 21:35:43, next report should be at 2:20:14 * on the next day. */ - long factor = - (monotonicNow() - nextBlockReportTime + blockReportIntervalMs) - / blockReportIntervalMs; + long factor = (monotonicNow() - nextBlockReportTime.get() + + blockReportIntervalMs) / blockReportIntervalMs; if (factor != 0) { - nextBlockReportTime += factor * blockReportIntervalMs; + nextBlockReportTime.getAndAdd(factor * blockReportIntervalMs); } else { // If the difference between the present time and the scheduled // time is very less, the factor can be 0, so in that case, we can // ignore that negligible time, spent while sending the BRss and // schedule the next BR after the blockReportInterval. - nextBlockReportTime += blockReportIntervalMs; + nextBlockReportTime.getAndAdd(blockReportIntervalMs); } } } @@ -1279,7 +1364,39 @@ long getHeartbeatWaitTime() { } long getLifelineWaitTime() { - return nextLifelineTime - monotonicNow(); + long waitTime = nextLifelineTime - monotonicNow(); + return waitTime > 0 ? waitTime : 0; + } + + @VisibleForTesting + long getNextBlockReportTime() { + return nextBlockReportTime.get(); + } + + @VisibleForTesting + void setNextBlockReportTime(long nextBlockReportTime) { + this.nextBlockReportTime.getAndSet(nextBlockReportTime); + } + + long getBlockReportIntervalMs() { + return this.blockReportIntervalMs; + } + + void setBlockReportIntervalMs(long intervalMs) { + Preconditions.checkArgument(intervalMs > 0, + DFS_BLOCKREPORT_INTERVAL_MSEC_KEY + " should be larger than 0"); + this.blockReportIntervalMs = intervalMs; + } + + void setOutliersReportIntervalMs(long intervalMs) { + Preconditions.checkArgument(intervalMs > 0, + DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY + " should be larger than 0"); + this.outliersReportIntervalMs = intervalMs; + } + + @VisibleForTesting + long getOutliersReportIntervalMs() { + return this.outliersReportIntervalMs; } /** @@ -1312,6 +1429,10 @@ public void run() { processQueue(); } catch (Throwable t) { LOG.error("{} encountered fatal exception and exit.", getName(), t); + runningState = RunningState.FAILED; + } finally { + LOG.warn("Ending command processor service for: " + this); + shouldServiceRun = false; } } @@ -1327,6 +1448,7 @@ private void processQueue() { dn.getMetrics().incrNumProcessedCommands(); } catch (InterruptedException e) { LOG.error("{} encountered interrupt and exit.", getName()); + Thread.currentThread().interrupt(); // ignore unless thread was specifically interrupted. if (Thread.interrupted()) { break; @@ -1369,7 +1491,7 @@ private boolean processCommand(DatanodeCommand[] cmds) { dn.getMetrics().addNumProcessedCommands(processCommandsMs); } if (processCommandsMs > dnConf.getProcessCommandsThresholdMs()) { - LOG.info("Took {} ms to process {} commands from NN", + LOG.warn("Took {} ms to process {} commands from NN", processCommandsMs, cmds.length); } } @@ -1398,4 +1520,11 @@ void enqueue(DatanodeCommand[] cmds) throws InterruptedException { dn.getMetrics().incrActorCmdQueueLength(1); } } + + @VisibleForTesting + void stopCommandProcessingThread() { + if (commandProcessingThread != null) { + commandProcessingThread.interrupt(); + } + } } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockChecksumHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockChecksumHelper.java index 3388855f8fbdf..265267da887d2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockChecksumHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockChecksumHelper.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.datanode; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.DFSUtilClient; @@ -480,8 +480,9 @@ void compute() throws IOException { // Before populating the blockChecksum at this index, record the byte // offset where it will begin. blockChecksumPositions[idx] = blockChecksumBuf.getLength(); + ExtendedBlock block = null; try { - ExtendedBlock block = getInternalBlock(numDataUnits, idx); + block = getInternalBlock(numDataUnits, idx); LiveBlockInfo liveBlkInfo = liveDns.get((byte) idx); if (liveBlkInfo == null) { @@ -502,7 +503,9 @@ void compute() throws IOException { break; // done with the computation, simply return. } } catch (IOException e) { - LOG.warn("Failed to get the checksum", e); + LOG.warn("Failed to get the checksum for block {} at index {} " + + "in blockGroup {}", block, idx, blockGroup, e); + throw e; } } @@ -594,7 +597,7 @@ private ExtendedBlock getInternalBlock(int numDataUnits, int idx) { private void checksumBlock(ExtendedBlock block, int blockIdx, Token blockToken, DatanodeInfo targetDatanode) throws IOException { - int timeout = 3000; + int timeout = getDatanode().getDnConf().getEcChecksumSocketTimeout(); try (IOStreamPair pair = getDatanode().connectToDN(targetDatanode, timeout, block, blockToken)) { @@ -700,24 +703,25 @@ private void recalculateChecksum(int errBlkIndex, long blockLength) blockGroup, ecPolicy, blockIndices, datanodes, errIndices); BlockChecksumType groupChecksumType = getBlockChecksumOptions().getBlockChecksumType(); - final StripedBlockChecksumReconstructor checksumRecon = + try (StripedBlockChecksumReconstructor checksumRecon = groupChecksumType == BlockChecksumType.COMPOSITE_CRC ? new StripedBlockChecksumCompositeCrcReconstructor( getDatanode().getErasureCodingWorker(), stripedReconInfo, blockChecksumBuf, blockLength) : new StripedBlockChecksumMd5CrcReconstructor( getDatanode().getErasureCodingWorker(), stripedReconInfo, - blockChecksumBuf, blockLength); - checksumRecon.reconstruct(); - - DataChecksum checksum = checksumRecon.getChecksum(); - long crcPerBlock = checksum.getChecksumSize() <= 0 ? 0 - : checksumRecon.getChecksumDataLen() / checksum.getChecksumSize(); - setOrVerifyChecksumProperties(errBlkIndex, - checksum.getBytesPerChecksum(), crcPerBlock, - checksum.getChecksumType()); - LOG.debug("Recalculated checksum for the block index:{}, checksum={}", - errBlkIndex, checksumRecon.getDigestObject()); + blockChecksumBuf, blockLength)) { + checksumRecon.reconstruct(); + + DataChecksum checksum = checksumRecon.getChecksum(); + long crcPerBlock = checksum.getChecksumSize() <= 0 ? 0 + : checksumRecon.getChecksumDataLen() / checksum.getChecksumSize(); + setOrVerifyChecksumProperties(errBlkIndex, + checksum.getBytesPerChecksum(), crcPerBlock, + checksum.getChecksumType()); + LOG.debug("Recalculated checksum for the block index:{}, checksum={}", + errBlkIndex, checksumRecon.getDigestObject()); + } } private void setOrVerifyChecksumProperties(int blockIdx, int bpc, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolManager.java index 9a7b6bcf7e52e..95bc2d6c23856 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolManager.java @@ -29,11 +29,11 @@ import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.security.UserGroupInformation; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; import org.slf4j.Logger; /** @@ -158,7 +158,7 @@ void refreshNamenodes(Configuration conf) newLifelineAddressMap = DFSUtil.getNNLifelineRpcAddressesForCluster(conf); } catch (IOException ioe) { - LOG.warn("Unable to get NameNode addresses."); + LOG.warn("Unable to get NameNode addresses.", ioe); } if (newAddressMap == null || newAddressMap.isEmpty()) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceStorage.java index 539baf114d6fe..5c8e6f48ce70d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceStorage.java @@ -47,9 +47,9 @@ import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.util.Daemon; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; /** * Manages storage for the set of BlockPoolSlices which share a particular diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java index 2231aeac300a8..edc22e8bbcb5d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java @@ -58,13 +58,13 @@ import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; -import org.apache.htrace.core.Span; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.tracing.Span; +import org.apache.hadoop.tracing.Tracer; import static org.apache.hadoop.io.nativeio.NativeIO.POSIX.POSIX_FADV_DONTNEED; import static org.apache.hadoop.io.nativeio.NativeIO.POSIX.SYNC_FILE_RANGE_WRITE; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; /** A class that receives a block and writes to its own disk, meanwhile @@ -307,6 +307,17 @@ Replica getReplica() { return replicaInfo; } + public void releaseAnyRemainingReservedSpace() { + if (replicaInfo != null) { + if (replicaInfo.getReplicaInfo().getBytesReserved() > 0) { + LOG.warn("Block {} has not released the reserved bytes. " + + "Releasing {} bytes as part of close.", replicaInfo.getBlockId(), + replicaInfo.getReplicaInfo().getBytesReserved()); + replicaInfo.releaseAllBytesReserved(); + } + } + } + /** * close files and release volume reference. */ @@ -369,7 +380,7 @@ public void close() throws IOException { streams.close(); } if (replicaHandler != null) { - IOUtils.cleanup(null, replicaHandler); + IOUtils.cleanupWithLogger(null, replicaHandler); replicaHandler = null; } if (measuredFlushTime) { @@ -878,7 +889,7 @@ private int receivePacket() throws IOException { */ private void trackSendPacketToLastNodeInPipeline(final long elapsedMs) { final DataNodePeerMetrics peerMetrics = datanode.getPeerMetrics(); - if (peerMetrics != null && isPenultimateNode) { + if (datanode.getDnConf().peerStatsEnabled && peerMetrics != null && isPenultimateNode) { peerMetrics.addSendPacketDownstream(mirrorNameForMetrics, elapsedMs); } } @@ -1093,7 +1104,7 @@ private void initPerfMonitoring(DatanodeInfo[] downstreams) { if (downstreams != null && downstreams.length > 0) { downstreamDNs = downstreams; isPenultimateNode = (downstreams.length == 1); - if (isPenultimateNode && datanode.getPeerMetrics() != null) { + if (isPenultimateNode && datanode.getDnConf().peerStatsEnabled) { mirrorNameForMetrics = (downstreams[0].getInfoSecurePort() != 0 ? downstreams[0].getInfoSecureAddr() : downstreams[0].getInfoAddr()); LOG.debug("Will collect peer metrics for downstream node {}", @@ -1368,6 +1379,7 @@ public void close() { */ @Override public void run() { + datanode.metrics.incrDataNodePacketResponderCount(); boolean lastPacketInBlock = false; final long startTime = ClientTraceLog.isInfoEnabled() ? System.nanoTime() : 0; while (isRunning() && !lastPacketInBlock) { @@ -1505,6 +1517,9 @@ public void run() { } } } + // Any exception will be caught and processed in the previous loop, so we + // will always arrive here when the thread exiting + datanode.metrics.decrDataNodePacketResponderCount(); LOG.info(myString + " terminating"); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java index db52d073fe387..d4687e8331adf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java @@ -17,9 +17,9 @@ */ package org.apache.hadoop.hdfs.server.datanode; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.protocol.DatanodeID; @@ -119,6 +119,7 @@ protected void recover() throws IOException { List syncList = new ArrayList<>(locs.length); int errorCount = 0; int candidateReplicaCnt = 0; + DataNodeFaultInjector.get().delay(); // Check generation stamps, replica size and state. Replica must satisfy // the following criteria to be included in syncList for recovery: @@ -600,17 +601,22 @@ public Daemon recoverBlocks(final String who, Daemon d = new Daemon(datanode.threadGroup, new Runnable() { @Override public void run() { - for(RecoveringBlock b : blocks) { - try { - logRecoverBlock(who, b); - if (b.isStriped()) { - new RecoveryTaskStriped((RecoveringStripedBlock) b).recover(); - } else { - new RecoveryTaskContiguous(b).recover(); + datanode.metrics.incrDataNodeBlockRecoveryWorkerCount(); + try { + for (RecoveringBlock b : blocks) { + try { + logRecoverBlock(who, b); + if (b.isStriped()) { + new RecoveryTaskStriped((RecoveringStripedBlock) b).recover(); + } else { + new RecoveryTaskContiguous(b).recover(); + } + } catch (IOException e) { + LOG.warn("recover Block: {} FAILED: {}", b, e); } - } catch (IOException e) { - LOG.warn("recoverBlocks FAILED: " + b, e); } + } finally { + datanode.metrics.decrDataNodeBlockRecoveryWorkerCount(); } } }); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockScanner.java index 6b1b96fb02655..6dcfad418474a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockScanner.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockScanner.java @@ -18,8 +18,12 @@ package org.apache.hadoop.hdfs.server.datanode; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_VOLUME_BYTES_PER_SECOND; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_VOLUME_BYTES_PER_SECOND_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_VOLUME_JOIN_TIMEOUT_MSEC_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_VOLUME_JOIN_TIMEOUT_MSEC_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_DEFAULT; @@ -28,11 +32,11 @@ import java.util.TreeMap; import java.util.concurrent.TimeUnit; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.server.datanode.VolumeScanner.ScanResultHandler; -import com.google.common.base.Preconditions; -import com.google.common.util.concurrent.Uninterruptibles; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Uninterruptibles; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeReference; @@ -66,6 +70,12 @@ public class BlockScanner { */ private Conf conf; + /** + * Timeout duration in milliseconds waiting for {@link VolumeScanner} to stop + * inside {@link #removeAllVolumeScanners}. + */ + private long joinVolumeScannersTimeOutMs; + @VisibleForTesting void setConf(Conf conf) { this.conf = conf; @@ -112,6 +122,7 @@ static class Conf { final long maxStalenessMs; final long scanPeriodMs; final long cursorSaveMs; + final boolean skipRecentAccessed; final Class resultHandler; private static long getUnitTestLong(Configuration conf, String key, @@ -163,6 +174,9 @@ private static long getConfiguredScanPeriodMs(Configuration conf) { this.cursorSaveMs = Math.max(0L, getUnitTestLong(conf, INTERNAL_DFS_BLOCK_SCANNER_CURSOR_SAVE_INTERVAL_MS, INTERNAL_DFS_BLOCK_SCANNER_CURSOR_SAVE_INTERVAL_MS_DEFAULT)); + this.skipRecentAccessed = conf.getBoolean( + DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED, + DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED_DEFAULT); if (allowUnitTestSettings) { this.resultHandler = (Class) conf.getClass(INTERNAL_VOLUME_SCANNER_SCAN_RESULT_HANDLER, @@ -179,6 +193,9 @@ public BlockScanner(DataNode datanode) { public BlockScanner(DataNode datanode, Configuration conf) { this.datanode = datanode; + setJoinVolumeScannersTimeOutMs( + conf.getLong(DFS_BLOCK_SCANNER_VOLUME_JOIN_TIMEOUT_MSEC_KEY, + DFS_BLOCK_SCANNER_VOLUME_JOIN_TIMEOUT_MSEC_DEFAULT)); this.conf = new Conf(conf); if (isEnabled()) { LOG.info("Initialized block scanner with targetBytesPerSec {}", @@ -198,6 +215,13 @@ public boolean isEnabled() { return (conf.scanPeriodMs > 0) && (conf.targetBytesPerSec > 0); } + /** + * Returns true if there is any scanner thread registered. + */ + public synchronized boolean hasAnyRegisteredScanner() { + return !scanners.isEmpty(); + } + /** * Set up a scanner for the given block pool and volume. * @@ -228,7 +252,7 @@ public synchronized void addVolumeScanner(FsVolumeReference ref) { if (!success) { // If we didn't create a new VolumeScanner object, we don't // need this reference to the volume. - IOUtils.cleanup(null, ref); + IOUtils.cleanupWithLogger(null, ref); } } } @@ -262,7 +286,10 @@ public synchronized void removeVolumeScanner(FsVolumeSpi volume) { /** * Stops and removes all volume scanners. * - * This function will block until all the volume scanners have stopped. + * This function is called on shutdown. It will return even if some of + * the scanners don't terminate in time. Since the scanners are daemon + * threads and do not alter the block content, it is safe to ignore + * such conditions on shutdown. */ public synchronized void removeAllVolumeScanners() { for (Entry entry : scanners.entrySet()) { @@ -270,7 +297,7 @@ public synchronized void removeAllVolumeScanners() { } for (Entry entry : scanners.entrySet()) { Uninterruptibles.joinUninterruptibly(entry.getValue(), - 5, TimeUnit.MINUTES); + getJoinVolumeScannersTimeOutMs(), TimeUnit.MILLISECONDS); } scanners.clear(); } @@ -346,6 +373,14 @@ synchronized void markSuspectBlock(String storageId, ExtendedBlock block) { scanner.markSuspectBlock(block); } + public long getJoinVolumeScannersTimeOutMs() { + return joinVolumeScannersTimeOutMs; + } + + public void setJoinVolumeScannersTimeOutMs(long joinScannersTimeOutMs) { + this.joinVolumeScannersTimeOutMs = joinScannersTimeOutMs; + } + @InterfaceAudience.Private public static class Servlet extends HttpServlet { private static final long serialVersionUID = 1L; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java index 6102a592c2661..f2724d062cea3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java @@ -34,6 +34,7 @@ import org.apache.commons.logging.Log; import org.apache.hadoop.fs.ChecksumException; +import org.apache.hadoop.fs.FsTracer; import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.protocol.Block; @@ -50,13 +51,13 @@ import org.apache.hadoop.net.SocketOutputStream; import org.apache.hadoop.util.AutoCloseableLock; import org.apache.hadoop.util.DataChecksum; -import org.apache.htrace.core.TraceScope; +import org.apache.hadoop.tracing.TraceScope; import static org.apache.hadoop.io.nativeio.NativeIO.POSIX.POSIX_FADV_DONTNEED; import static org.apache.hadoop.io.nativeio.NativeIO.POSIX.POSIX_FADV_SEQUENTIAL; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; /** @@ -255,7 +256,7 @@ class BlockSender implements java.io.Closeable { // the append write. ChunkChecksum chunkChecksum = null; final long replicaVisibleLength; - try(AutoCloseableLock lock = datanode.data.acquireDatasetLock()) { + try(AutoCloseableLock lock = datanode.data.acquireDatasetReadLock()) { replica = getReplica(block, datanode); replicaVisibleLength = replica.getVisibleLength(); } @@ -431,6 +432,7 @@ class BlockSender implements java.io.Closeable { ris = new ReplicaInputStreams( blockIn, checksumIn, volumeRef, fileIoProvider); } catch (IOException ioe) { + IOUtils.cleanupWithLogger(null, volumeRef); IOUtils.closeStream(this); org.apache.commons.io.IOUtils.closeQuietly(blockIn); org.apache.commons.io.IOUtils.closeQuietly(checksumIn); @@ -630,6 +632,7 @@ private int sendPacket(ByteBuffer pkt, int maxChunks, OutputStream out, * * Reporting of this case is done in DataXceiver#run */ + LOG.warn("Sending packets timed out.", e); } else { /* Exception while writing to the client. Connection closure from * the other end is mostly the case and we do not care much about @@ -650,8 +653,12 @@ private int sendPacket(ByteBuffer pkt, int maxChunks, OutputStream out, if (ioem.startsWith(EIO_ERROR)) { throw new DiskFileCorruptException("A disk IO error occurred", e); } + String causeMessage = e.getCause() != null ? e.getCause().getMessage() : ""; + causeMessage = causeMessage != null ? causeMessage : ""; if (!ioem.startsWith("Broken pipe") - && !ioem.startsWith("Connection reset")) { + && !ioem.startsWith("Connection reset") + && !causeMessage.startsWith("Broken pipe") + && !causeMessage.startsWith("Connection reset")) { LOG.error("BlockSender.sendChunks() exception: ", e); datanode.getBlockScanner().markSuspectBlock( ris.getVolumeRef().getVolume().getStorageID(), block); @@ -750,8 +757,8 @@ public void verifyChecksum(final byte[] buf, final int dataOffset, */ long sendBlock(DataOutputStream out, OutputStream baseStream, DataTransferThrottler throttler) throws IOException { - final TraceScope scope = datanode.getTracer(). - newScope("sendBlock_" + block.getBlockId()); + final TraceScope scope = FsTracer.get(null) + .newScope("sendBlock_" + block.getBlockId()); try { return doSendBlock(out, baseStream, throttler); } finally { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java index b56dd4ec223fa..9b5343321d30b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java @@ -62,6 +62,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.IGNORE_SECURE_PORTS_FOR_TESTING_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_BP_READY_TIMEOUT_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_BP_READY_TIMEOUT_DEFAULT; +import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CHECKSUM_EC_SOCKET_TIMEOUT_KEY; +import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CHECKSUM_EC_SOCKET_TIMEOUT_DEFAULT; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; @@ -72,6 +74,7 @@ import org.apache.hadoop.hdfs.protocol.datatransfer.sasl.DataTransferSaslUtil; import org.apache.hadoop.hdfs.server.common.Util; import org.apache.hadoop.security.SaslPropertiesResolver; +import org.apache.hadoop.util.Preconditions; import java.util.concurrent.TimeUnit; @@ -84,6 +87,7 @@ public class DNConf { final int socketTimeout; final int socketWriteTimeout; final int socketKeepaliveTimeout; + final int ecChecksumSocketTimeout; private final int transferSocketSendBufferSize; private final int transferSocketRecvBufferSize; private final boolean tcpNoDelay; @@ -102,14 +106,14 @@ public class DNConf { final long readaheadLength; final long heartBeatInterval; private final long lifelineIntervalMs; - final long blockReportInterval; - final long blockReportSplitThreshold; - final boolean peerStatsEnabled; - final boolean diskStatsEnabled; - final long outliersReportIntervalMs; + volatile long blockReportInterval; + volatile long blockReportSplitThreshold; + volatile boolean peerStatsEnabled; + volatile boolean diskStatsEnabled; + volatile long outliersReportIntervalMs; final long ibrInterval; - final long initialBlockReportDelayMs; - final long cacheReportInterval; + volatile long initialBlockReportDelayMs; + volatile long cacheReportInterval; final long datanodeSlowIoWarningThresholdMs; final String minimumNameNodeVersion; @@ -145,6 +149,9 @@ public DNConf(final Configurable dn) { socketKeepaliveTimeout = getConf().getInt( DFSConfigKeys.DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_KEY, DFSConfigKeys.DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_DEFAULT); + ecChecksumSocketTimeout = getConf().getInt( + DFS_CHECKSUM_EC_SOCKET_TIMEOUT_KEY, + DFS_CHECKSUM_EC_SOCKET_TIMEOUT_DEFAULT); this.transferSocketSendBufferSize = getConf().getInt( DFSConfigKeys.DFS_DATANODE_TRANSFER_SOCKET_SEND_BUFFER_SIZE_KEY, DFSConfigKeys.DFS_DATANODE_TRANSFER_SOCKET_SEND_BUFFER_SIZE_DEFAULT); @@ -208,19 +215,7 @@ public DNConf(final Configurable dn) { this.datanodeSlowIoWarningThresholdMs = getConf().getLong( DFSConfigKeys.DFS_DATANODE_SLOW_IO_WARNING_THRESHOLD_KEY, DFSConfigKeys.DFS_DATANODE_SLOW_IO_WARNING_THRESHOLD_DEFAULT); - - long initBRDelay = getConf().getTimeDuration( - DFS_BLOCKREPORT_INITIAL_DELAY_KEY, - DFS_BLOCKREPORT_INITIAL_DELAY_DEFAULT, - TimeUnit.SECONDS, TimeUnit.MILLISECONDS); - if (initBRDelay >= blockReportInterval) { - initBRDelay = 0; - DataNode.LOG.info(DFS_BLOCKREPORT_INITIAL_DELAY_KEY + " is " - + "greater than or equal to" + DFS_BLOCKREPORT_INTERVAL_MSEC_KEY - + ". Setting initial delay to 0 msec:"); - } - initialBlockReportDelayMs = initBRDelay; - + initBlockReportDelay(); heartBeatInterval = getConf().getTimeDuration(DFS_HEARTBEAT_INTERVAL_KEY, DFS_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.SECONDS, TimeUnit.MILLISECONDS); @@ -304,6 +299,19 @@ public DNConf(final Configurable dn) { ); } + private void initBlockReportDelay() { + long initBRDelay = getConf().getTimeDuration( + DFS_BLOCKREPORT_INITIAL_DELAY_KEY, + DFS_BLOCKREPORT_INITIAL_DELAY_DEFAULT, TimeUnit.SECONDS, TimeUnit.MILLISECONDS); + if (initBRDelay >= blockReportInterval || initBRDelay < 0) { + initBRDelay = 0; + DataNode.LOG.info(DFS_BLOCKREPORT_INITIAL_DELAY_KEY + + " is greater than or equal to " + DFS_BLOCKREPORT_INTERVAL_MSEC_KEY + + ". Setting initial delay to 0 msec."); + } + initialBlockReportDelayMs = initBRDelay; + } + // We get minimumNameNodeVersion via a method so it can be mocked out in tests. String getMinimumNameNodeVersion() { return this.minimumNameNodeVersion; @@ -372,6 +380,15 @@ public int getSocketWriteTimeout() { return socketWriteTimeout; } + /** + * Returns socket timeout for computing the checksum of EC blocks + * + * @return int socket timeout + */ + public int getEcChecksumSocketTimeout() { + return ecChecksumSocketTimeout; + } + /** * Returns the SaslPropertiesResolver configured for use with * DataTransferProtocol, or null if not configured. @@ -459,4 +476,50 @@ public boolean getPmemCacheRecoveryEnabled() { public long getProcessCommandsThresholdMs() { return processCommandsThresholdMs; } + + void setBlockReportInterval(long intervalMs) { + Preconditions.checkArgument(intervalMs > 0, + DFS_BLOCKREPORT_INTERVAL_MSEC_KEY + " should be larger than 0"); + blockReportInterval = intervalMs; + } + + public long getBlockReportInterval() { + return blockReportInterval; + } + + void setCacheReportInterval(long intervalMs) { + Preconditions.checkArgument(intervalMs > 0, + DFS_CACHEREPORT_INTERVAL_MSEC_KEY + " should be larger than 0"); + cacheReportInterval = intervalMs; + } + + public long getCacheReportInterval() { + return cacheReportInterval; + } + + void setBlockReportSplitThreshold(long threshold) { + Preconditions.checkArgument(threshold >= 0, + DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY + " should be larger than or equal to 0"); + blockReportSplitThreshold = threshold; + } + + void setInitBRDelayMs(String delayMs) { + dn.getConf().set(DFS_BLOCKREPORT_INITIAL_DELAY_KEY, delayMs); + initBlockReportDelay(); + } + + void setPeerStatsEnabled(boolean enablePeerStats) { + peerStatsEnabled = enablePeerStats; + } + + public void setFileIoProfilingSamplingPercentage(int samplingPercentage) { + diskStatsEnabled = Util.isDiskStatsEnabled(samplingPercentage); + } + + public void setOutliersReportIntervalMs(String reportIntervalMs) { + dn.getConf().set(DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY, reportIntervalMs); + outliersReportIntervalMs = getConf().getTimeDuration( + DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY, + DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index d390c1e54232a..96c4ad9ae28d8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -18,6 +18,19 @@ package org.apache.hadoop.hdfs.server.datanode; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INITIAL_DELAY_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INITIAL_DELAY_KEY; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DU_INTERVAL_DEFAULT; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DU_INTERVAL_KEY; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_GETSPACEUSED_CLASSNAME; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_GETSPACEUSED_JITTER_DEFAULT; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_GETSPACEUSED_JITTER_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_SPLIT_THRESHOLD_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_ADDRESS_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_ADDRESS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY; @@ -25,6 +38,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DNS_INTERFACE_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DNS_NAMESERVER_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HANDLER_COUNT_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HANDLER_COUNT_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY; @@ -34,11 +49,27 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_KERBEROS_PRINCIPAL_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_KEYTAB_FILE_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_RECEIVER_THREADS_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_RECEIVER_THREADS_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_NETWORK_COUNTS_CACHE_MAX_SIZE_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_NETWORK_COUNTS_CACHE_MAX_SIZE_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_OOB_TIMEOUT_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_OOB_TIMEOUT_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PLUGINS_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_STARTUP_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT; @@ -50,8 +81,13 @@ import static org.apache.hadoop.hdfs.protocol.datatransfer.BlockConstructionStage.PIPELINE_SETUP_CREATE; import static org.apache.hadoop.hdfs.protocol.datatransfer.BlockConstructionStage.PIPELINE_SETUP_STREAMING_RECOVERY; import static org.apache.hadoop.util.ExitUtil.terminate; +import static org.apache.hadoop.util.Preconditions.checkNotNull; +import static org.apache.hadoop.util.Time.now; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.DU; +import org.apache.hadoop.fs.GetSpaceUsed; +import org.apache.hadoop.fs.WindowsGetSpaceUsed; import org.apache.hadoop.hdfs.protocol.proto.ReconfigurationProtocolProtos.ReconfigurationProtocolService; import java.io.BufferedOutputStream; @@ -85,6 +121,7 @@ import java.util.Set; import java.util.UUID; import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -115,6 +152,8 @@ import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.server.datanode.checker.DatasetVolumeChecker; import org.apache.hadoop.hdfs.server.datanode.checker.StorageLocationChecker; +import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.BlockPoolSlice; +import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsVolumeImpl; import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.util.AutoCloseableLock; import org.apache.hadoop.hdfs.client.BlockReportOptions; @@ -188,7 +227,7 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.ReadaheadPool; import org.apache.hadoop.io.nativeio.NativeIO; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.util.MBeans; @@ -202,33 +241,28 @@ import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; -import org.apache.hadoop.tracing.SpanReceiverInfo; -import org.apache.hadoop.tracing.TraceAdminPB.TraceAdminService; -import org.apache.hadoop.tracing.TraceAdminProtocol; -import org.apache.hadoop.tracing.TraceAdminProtocolPB; -import org.apache.hadoop.tracing.TraceAdminProtocolServerSideTranslatorPB; import org.apache.hadoop.tracing.TraceUtils; -import org.apache.hadoop.tracing.TracerConfigurationManager; import org.apache.hadoop.util.Daemon; import org.apache.hadoop.util.DiskChecker.DiskErrorException; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.util.JvmPauseMonitor; import org.apache.hadoop.util.ServicePlugin; +import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Timer; import org.apache.hadoop.util.VersionInfo; import org.apache.hadoop.util.concurrent.HadoopExecutors; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.tracing.Tracer; import org.eclipse.jetty.util.ajax.JSON; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheBuilder; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheLoader; +import org.apache.hadoop.thirdparty.com.google.common.cache.LoadingCache; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.hadoop.thirdparty.protobuf.BlockingService; import org.slf4j.Logger; @@ -268,7 +302,7 @@ @InterfaceAudience.Private public class DataNode extends ReconfigurableBase implements InterDatanodeProtocol, ClientDatanodeProtocol, - TraceAdminProtocol, DataNodeMXBean, ReconfigurationProtocol { + DataNodeMXBean, ReconfigurationProtocol { public static final Logger LOG = LoggerFactory.getLogger(DataNode.class); static{ @@ -308,13 +342,31 @@ public class DataNode extends ReconfigurableBase Collections.unmodifiableList( Arrays.asList( DFS_DATANODE_DATA_DIR_KEY, - DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY)); + DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY, + DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, + DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY, + DFS_BLOCKREPORT_INITIAL_DELAY_KEY, + DFS_DATANODE_MAX_RECEIVER_THREADS_KEY, + DFS_CACHEREPORT_INTERVAL_MSEC_KEY, + DFS_DATANODE_PEER_STATS_ENABLED_KEY, + DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY, + DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY, + DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY, + DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY, + DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY, + DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY, + DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY, + FS_DU_INTERVAL_KEY, + FS_GETSPACEUSED_JITTER_KEY, + FS_GETSPACEUSED_CLASSNAME)); public static final Log METRICS_LOG = LogFactory.getLog("DataNodeMetricsLog"); private static final String DATANODE_HTRACE_PREFIX = "datanode.htrace."; private final FileIoProvider fileIoProvider; + private static final String NETWORK_ERRORS = "networkErrors"; + /** * Use {@link NetUtils#createSocketAddr(String)} instead. */ @@ -348,11 +400,10 @@ public static InetSocketAddress createSocketAddr(String target) { DataNodeMetrics metrics; @Nullable - private DataNodePeerMetrics peerMetrics; - private DataNodeDiskMetrics diskMetrics; + private volatile DataNodePeerMetrics peerMetrics; + private volatile DataNodeDiskMetrics diskMetrics; private InetSocketAddress streamingAddr; - - // See the note below in incrDatanodeNetworkErrors re: concurrency. + private LoadingCache> datanodeNetworkCounts; private String hostName; @@ -395,7 +446,6 @@ public static InetSocketAddress createSocketAddr(String target) { private BlockRecoveryWorker blockRecoveryWorker; private ErasureCodingWorker ecWorker; private final Tracer tracer; - private final TracerConfigurationManager tracerConfigurationManager; private static final int NUM_CORES = Runtime.getRuntime() .availableProcessors(); private static final double CONGESTION_RATIO = 1.5; @@ -412,7 +462,7 @@ public static InetSocketAddress createSocketAddr(String target) { private static Tracer createTracer(Configuration conf) { return new Tracer.Builder("DataNode"). - conf(TraceUtils.wrapHadoopConf(DATANODE_HTRACE_PREFIX , conf)). + conf(TraceUtils.wrapHadoopConf(DATANODE_HTRACE_PREFIX, conf)). build(); } @@ -420,6 +470,11 @@ private static Tracer createTracer(Configuration conf) { private ScheduledThreadPoolExecutor metricsLoggerTimer; + private long startTime = 0; + + private DataTransferThrottler ecReconstuctReadThrottler; + private DataTransferThrottler ecReconstuctWriteThrottler; + /** * Creates a dummy DataNode for testing purpose. */ @@ -428,8 +483,6 @@ private static Tracer createTracer(Configuration conf) { DataNode(final Configuration conf) throws DiskErrorException { super(conf); this.tracer = createTracer(conf); - this.tracerConfigurationManager = - new TracerConfigurationManager(DATANODE_HTRACE_PREFIX, conf); this.fileIoProvider = new FileIoProvider(conf, this); this.fileDescriptorPassingDisabledReason = null; this.maxNumberOfBlocksToLog = 0; @@ -457,8 +510,6 @@ private static Tracer createTracer(Configuration conf) { final SecureResources resources) throws IOException { super(conf); this.tracer = createTracer(conf); - this.tracerConfigurationManager = - new TracerConfigurationManager(DATANODE_HTRACE_PREFIX, conf); this.fileIoProvider = new FileIoProvider(conf, this); this.blockScanner = new BlockScanner(this); this.lastDiskErrorCheck = 0; @@ -523,15 +574,25 @@ private static Tracer createTracer(Configuration conf) { .maximumSize(dncCacheMaxSize) .build(new CacheLoader>() { @Override - public Map load(String key) throws Exception { - final Map ret = new HashMap(); - ret.put("networkErrors", 0L); + public Map load(String key) { + final Map ret = new ConcurrentHashMap<>(); + ret.put(NETWORK_ERRORS, 0L); return ret; } }); initOOBTimeout(); this.storageLocationChecker = storageLocationChecker; + long ecReconstuctReadBandwidth = conf.getLongBytes( + DFSConfigKeys.DFS_DATANODE_EC_RECONSTRUCT_READ_BANDWIDTHPERSEC_KEY, + DFSConfigKeys.DFS_DATANODE_EC_RECONSTRUCT_READ_BANDWIDTHPERSEC_DEFAULT); + long ecReconstuctWriteBandwidth = conf.getLongBytes( + DFSConfigKeys.DFS_DATANODE_EC_RECONSTRUCT_WRITE_BANDWIDTHPERSEC_KEY, + DFSConfigKeys.DFS_DATANODE_EC_RECONSTRUCT_WRITE_BANDWIDTHPERSEC_DEFAULT); + this.ecReconstuctReadThrottler = ecReconstuctReadBandwidth > 0 ? + new DataTransferThrottler(100, ecReconstuctReadBandwidth) : null; + this.ecReconstuctWriteThrottler = ecReconstuctWriteBandwidth > 0 ? + new DataTransferThrottler(100, ecReconstuctWriteBandwidth) : null; } @Override // ReconfigurableBase @@ -546,83 +607,336 @@ protected Configuration getNewConf() { public String reconfigurePropertyImpl(String property, String newVal) throws ReconfigurationException { switch (property) { - case DFS_DATANODE_DATA_DIR_KEY: { - IOException rootException = null; + case DFS_DATANODE_DATA_DIR_KEY: { + IOException rootException = null; + try { + LOG.info("Reconfiguring {} to {}", property, newVal); + this.refreshVolumes(newVal); + return getConf().get(DFS_DATANODE_DATA_DIR_KEY); + } catch (IOException e) { + rootException = e; + } finally { + // Send a full block report to let NN acknowledge the volume changes. try { - LOG.info("Reconfiguring {} to {}", property, newVal); - this.refreshVolumes(newVal); - return getConf().get(DFS_DATANODE_DATA_DIR_KEY); + triggerBlockReport( + new BlockReportOptions.Factory().setIncremental(false).build()); } catch (IOException e) { - rootException = e; + LOG.warn("Exception while sending the block report after refreshing" + + " volumes {} to {}", property, newVal, e); + if (rootException == null) { + rootException = e; + } } finally { - // Send a full block report to let NN acknowledge the volume changes. - try { - triggerBlockReport( - new BlockReportOptions.Factory().setIncremental(false).build()); - } catch (IOException e) { - LOG.warn("Exception while sending the block report after refreshing" - + " volumes {} to {}", property, newVal, e); - if (rootException == null) { - rootException = e; - } - } finally { - if (rootException != null) { - throw new ReconfigurationException(property, newVal, - getConf().get(property), rootException); - } + if (rootException != null) { + throw new ReconfigurationException(property, newVal, + getConf().get(property), rootException); } } - break; } - case DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY: { - ReconfigurationException rootException = null; - try { - LOG.info("Reconfiguring {} to {}", property, newVal); - int movers; - if (newVal == null) { - // set to default - movers = DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT; - } else { - movers = Integer.parseInt(newVal); - if (movers <= 0) { - rootException = new ReconfigurationException( - property, - newVal, - getConf().get(property), - new IllegalArgumentException( - "balancer max concurrent movers must be larger than 0")); - } - } - boolean success = xserver.updateBalancerMaxConcurrentMovers(movers); - if (!success) { + break; + } + case DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY: { + ReconfigurationException rootException = null; + try { + LOG.info("Reconfiguring {} to {}", property, newVal); + int movers; + if (newVal == null) { + // set to default + movers = DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT; + } else { + movers = Integer.parseInt(newVal); + if (movers <= 0) { rootException = new ReconfigurationException( property, newVal, getConf().get(property), new IllegalArgumentException( - "Could not modify concurrent moves thread count")); + "balancer max concurrent movers must be larger than 0")); } - return Integer.toString(movers); - } catch (NumberFormatException nfe) { + } + boolean success = xserver.updateBalancerMaxConcurrentMovers(movers); + if (!success) { rootException = new ReconfigurationException( - property, newVal, getConf().get(property), nfe); - } finally { - if (rootException != null) { - LOG.warn(String.format( - "Exception in updating balancer max concurrent movers %s to %s", - property, newVal), rootException); - throw rootException; - } + property, + newVal, + getConf().get(property), + new IllegalArgumentException( + "Could not modify concurrent moves thread count")); + } + return Integer.toString(movers); + } catch (NumberFormatException nfe) { + rootException = new ReconfigurationException( + property, newVal, getConf().get(property), nfe); + } finally { + if (rootException != null) { + LOG.warn(String.format( + "Exception in updating balancer max concurrent movers %s to %s", + property, newVal), rootException); + throw rootException; } - break; } - default: - break; + break; + } + case DFS_BLOCKREPORT_INTERVAL_MSEC_KEY: + case DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY: + case DFS_BLOCKREPORT_INITIAL_DELAY_KEY: + return reconfBlockReportParameters(property, newVal); + case DFS_DATANODE_MAX_RECEIVER_THREADS_KEY: + return reconfDataXceiverParameters(property, newVal); + case DFS_CACHEREPORT_INTERVAL_MSEC_KEY: + return reconfCacheReportParameters(property, newVal); + case DFS_DATANODE_PEER_STATS_ENABLED_KEY: + case DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY: + case DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY: + case DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY: + return reconfSlowPeerParameters(property, newVal); + case DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY: + case DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY: + case DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY: + case DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY: + return reconfSlowDiskParameters(property, newVal); + case FS_DU_INTERVAL_KEY: + case FS_GETSPACEUSED_JITTER_KEY: + case FS_GETSPACEUSED_CLASSNAME: + return reconfDfsUsageParameters(property, newVal); + default: + break; } throw new ReconfigurationException( property, newVal, getConf().get(property)); } + private String reconfDataXceiverParameters(String property, String newVal) + throws ReconfigurationException { + String result; + try { + LOG.info("Reconfiguring {} to {}", property, newVal); + Preconditions.checkNotNull(getXferServer(), "DataXceiverServer has not been initialized."); + int threads = (newVal == null ? DFS_DATANODE_MAX_RECEIVER_THREADS_DEFAULT : + Integer.parseInt(newVal)); + result = Integer.toString(threads); + getXferServer().setMaxXceiverCount(threads); + LOG.info("RECONFIGURE* changed {} to {}", property, newVal); + return result; + } catch (IllegalArgumentException e) { + throw new ReconfigurationException(property, newVal, getConf().get(property), e); + } + } + + private String reconfCacheReportParameters(String property, String newVal) + throws ReconfigurationException { + String result; + try { + LOG.info("Reconfiguring {} to {}", property, newVal); + Preconditions.checkNotNull(dnConf, "DNConf has not been initialized."); + long reportInterval = (newVal == null ? DFS_CACHEREPORT_INTERVAL_MSEC_DEFAULT : + Long.parseLong(newVal)); + result = Long.toString(reportInterval); + dnConf.setCacheReportInterval(reportInterval); + LOG.info("RECONFIGURE* changed {} to {}", property, newVal); + return result; + } catch (IllegalArgumentException e) { + throw new ReconfigurationException(property, newVal, getConf().get(property), e); + } + } + + private String reconfBlockReportParameters(String property, String newVal) + throws ReconfigurationException { + String result = null; + try { + LOG.info("Reconfiguring {} to {}", property, newVal); + if (property.equals(DFS_BLOCKREPORT_INTERVAL_MSEC_KEY)) { + Preconditions.checkNotNull(dnConf, "DNConf has not been initialized."); + long intervalMs = newVal == null ? DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT : + Long.parseLong(newVal); + result = Long.toString(intervalMs); + dnConf.setBlockReportInterval(intervalMs); + for (BPOfferService bpos : blockPoolManager.getAllNamenodeThreads()) { + if (bpos != null) { + for (BPServiceActor actor : bpos.getBPServiceActors()) { + actor.getScheduler().setBlockReportIntervalMs(intervalMs); + } + } + } + } else if (property.equals(DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY)) { + Preconditions.checkNotNull(dnConf, "DNConf has not been initialized."); + long threshold = newVal == null ? DFS_BLOCKREPORT_SPLIT_THRESHOLD_DEFAULT : + Long.parseLong(newVal); + result = Long.toString(threshold); + dnConf.setBlockReportSplitThreshold(threshold); + } else if (property.equals(DFS_BLOCKREPORT_INITIAL_DELAY_KEY)) { + Preconditions.checkNotNull(dnConf, "DNConf has not been initialized."); + int initialDelay = newVal == null ? DFS_BLOCKREPORT_INITIAL_DELAY_DEFAULT : + Integer.parseInt(newVal); + result = Integer.toString(initialDelay); + dnConf.setInitBRDelayMs(result); + } + LOG.info("RECONFIGURE* changed {} to {}", property, newVal); + return result; + } catch (IllegalArgumentException e) { + throw new ReconfigurationException(property, newVal, getConf().get(property), e); + } + } + + private String reconfSlowPeerParameters(String property, String newVal) + throws ReconfigurationException { + String result = null; + try { + LOG.info("Reconfiguring {} to {}", property, newVal); + if (property.equals(DFS_DATANODE_PEER_STATS_ENABLED_KEY)) { + Preconditions.checkNotNull(dnConf, "DNConf has not been initialized."); + if (newVal != null && !newVal.equalsIgnoreCase("true") + && !newVal.equalsIgnoreCase("false")) { + throw new IllegalArgumentException("Not a valid Boolean value for " + property + + " in reconfSlowPeerParameters"); + } + boolean enable = (newVal == null ? DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT : + Boolean.parseBoolean(newVal)); + result = Boolean.toString(enable); + dnConf.setPeerStatsEnabled(enable); + if (enable) { + // Create if it doesn't exist, overwrite if it does. + peerMetrics = DataNodePeerMetrics.create(getDisplayName(), getConf()); + } + } else if (property.equals(DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY)) { + Preconditions.checkNotNull(peerMetrics, "DataNode peer stats may be disabled."); + long minNodes = (newVal == null ? DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_DEFAULT : + Long.parseLong(newVal)); + result = Long.toString(minNodes); + peerMetrics.setMinOutlierDetectionNodes(minNodes); + } else if (property.equals(DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY)) { + Preconditions.checkNotNull(peerMetrics, "DataNode peer stats may be disabled."); + long threshold = (newVal == null ? DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_DEFAULT : + Long.parseLong(newVal)); + result = Long.toString(threshold); + peerMetrics.setLowThresholdMs(threshold); + } else if (property.equals(DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY)) { + Preconditions.checkNotNull(peerMetrics, "DataNode peer stats may be disabled."); + long minSamples = (newVal == null ? + DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT : + Long.parseLong(newVal)); + result = Long.toString(minSamples); + peerMetrics.setMinOutlierDetectionSamples(minSamples); + } + LOG.info("RECONFIGURE* changed {} to {}", property, newVal); + return result; + } catch (IllegalArgumentException e) { + throw new ReconfigurationException(property, newVal, getConf().get(property), e); + } + } + + private String reconfSlowDiskParameters(String property, String newVal) + throws ReconfigurationException { + String result = null; + try { + LOG.info("Reconfiguring {} to {}", property, newVal); + if (property.equals(DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY)) { + checkNotNull(dnConf, "DNConf has not been initialized."); + String reportInterval = (newVal == null ? DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_DEFAULT : + newVal); + result = reportInterval; + dnConf.setOutliersReportIntervalMs(reportInterval); + for (BPOfferService bpos : blockPoolManager.getAllNamenodeThreads()) { + if (bpos != null) { + for (BPServiceActor actor : bpos.getBPServiceActors()) { + actor.getScheduler().setOutliersReportIntervalMs( + dnConf.outliersReportIntervalMs); + } + } + } + } else if (property.equals(DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY)) { + checkNotNull(dnConf, "DNConf has not been initialized."); + int samplingPercentage = (newVal == null ? + DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_DEFAULT : + Integer.parseInt(newVal)); + result = Integer.toString(samplingPercentage); + dnConf.setFileIoProfilingSamplingPercentage(samplingPercentage); + if (fileIoProvider != null) { + fileIoProvider.getProfilingEventHook().setSampleRangeMax(samplingPercentage); + } + if (samplingPercentage > 0 && diskMetrics == null) { + diskMetrics = new DataNodeDiskMetrics(this, + dnConf.outliersReportIntervalMs, getConf()); + } else if (samplingPercentage <= 0 && diskMetrics != null) { + diskMetrics.shutdownAndWait(); + } + } else if (property.equals(DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY)) { + checkNotNull(diskMetrics, "DataNode disk stats may be disabled."); + long minDisks = (newVal == null ? DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_DEFAULT : + Long.parseLong(newVal)); + result = Long.toString(minDisks); + diskMetrics.setMinOutlierDetectionDisks(minDisks); + } else if (property.equals(DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY)) { + checkNotNull(diskMetrics, "DataNode disk stats may be disabled."); + long threshold = (newVal == null ? DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_DEFAULT : + Long.parseLong(newVal)); + result = Long.toString(threshold); + diskMetrics.setLowThresholdMs(threshold); + } + LOG.info("RECONFIGURE* changed {} to {}", property, newVal); + return result; + } catch (IllegalArgumentException e) { + throw new ReconfigurationException(property, newVal, getConf().get(property), e); + } + } + + private String reconfDfsUsageParameters(String property, String newVal) + throws ReconfigurationException { + String result = null; + try { + LOG.info("Reconfiguring {} to {}", property, newVal); + if (property.equals(FS_DU_INTERVAL_KEY)) { + Preconditions.checkNotNull(data, "FsDatasetSpi has not been initialized."); + long interval = (newVal == null ? FS_DU_INTERVAL_DEFAULT : + Long.parseLong(newVal)); + result = Long.toString(interval); + List volumeList = data.getVolumeList(); + for (FsVolumeImpl fsVolume : volumeList) { + Map blockPoolSlices = fsVolume.getBlockPoolSlices(); + for (BlockPoolSlice value : blockPoolSlices.values()) { + value.updateDfsUsageConfig(interval, null, null); + } + } + } else if (property.equals(FS_GETSPACEUSED_JITTER_KEY)) { + Preconditions.checkNotNull(data, "FsDatasetSpi has not been initialized."); + long jitter = (newVal == null ? FS_GETSPACEUSED_JITTER_DEFAULT : + Long.parseLong(newVal)); + result = Long.toString(jitter); + List volumeList = data.getVolumeList(); + for (FsVolumeImpl fsVolume : volumeList) { + Map blockPoolSlices = fsVolume.getBlockPoolSlices(); + for (BlockPoolSlice value : blockPoolSlices.values()) { + value.updateDfsUsageConfig(null, jitter, null); + } + } + } else if (property.equals(FS_GETSPACEUSED_CLASSNAME)) { + Preconditions.checkNotNull(data, "FsDatasetSpi has not been initialized."); + Class klass; + if (newVal == null) { + if (Shell.WINDOWS) { + klass = DU.class; + } else { + klass = WindowsGetSpaceUsed.class; + } + } else { + klass = Class.forName(newVal).asSubclass(GetSpaceUsed.class); + } + result = klass.getName(); + List volumeList = data.getVolumeList(); + for (FsVolumeImpl fsVolume : volumeList) { + Map blockPoolSlices = fsVolume.getBlockPoolSlices(); + for (BlockPoolSlice value : blockPoolSlices.values()) { + value.updateDfsUsageConfig(null, null, klass); + } + } + } + LOG.info("RECONFIGURE* changed {} to {}", property, newVal); + return result; + } catch (IllegalArgumentException | IOException | ClassNotFoundException e) { + throw new ReconfigurationException(property, newVal, getConf().get(property), e); + } + } + /** * Get a list of the keys of the re-configurable properties in configuration. */ @@ -797,6 +1111,7 @@ private void refreshVolumes(String newVolumes) throws IOException { .newFixedThreadPool(changedVolumes.newLocations.size()); List> exceptions = Lists.newArrayList(); + Preconditions.checkNotNull(data, "Storage not yet initialized"); for (final StorageLocation location : changedVolumes.newLocations) { exceptions.add(service.submit(new Callable() { @Override @@ -896,6 +1211,7 @@ private synchronized void removeVolumes( clearFailure, Joiner.on(",").join(storageLocations))); IOException ioe = null; + Preconditions.checkNotNull(data, "Storage not yet initialized"); // Remove volumes and block infos from FsDataset. data.removeVolumes(storageLocations, clearFailure); @@ -1015,7 +1331,7 @@ private void initIpcServer() throws IOException { // Add all the RPC protocols that the Datanode implements RPC.setProtocolEngine(getConf(), ClientDatanodeProtocolPB.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); ClientDatanodeProtocolServerSideTranslatorPB clientDatanodeProtocolXlator = new ClientDatanodeProtocolServerSideTranslatorPB(this); BlockingService service = ClientDatanodeProtocolService @@ -1044,16 +1360,6 @@ private void initIpcServer() throws IOException { DFSUtil.addPBProtocol(getConf(), InterDatanodeProtocolPB.class, service, ipcServer); - TraceAdminProtocolServerSideTranslatorPB traceAdminXlator = - new TraceAdminProtocolServerSideTranslatorPB(this); - BlockingService traceAdminService = TraceAdminService - .newReflectiveBlockingService(traceAdminXlator); - DFSUtil.addPBProtocol( - getConf(), - TraceAdminProtocolPB.class, - traceAdminService, - ipcServer); - LOG.info("Opened IPC server at {}", ipcServer.getListenerAddress()); // set service-level authorization security policy @@ -1113,7 +1419,7 @@ private synchronized void initDirectoryScanner(Configuration conf) { directoryScanner = new DirectoryScanner(data, conf); directoryScanner.start(); } else { - LOG.info("Periodic Directory Tree Verification scan " + + LOG.warn("Periodic Directory Tree Verification scan " + "is disabled because {}", reason); } @@ -1315,21 +1621,6 @@ public void reportCorruptedBlocks( } } - /** - * Try to send an error report to the NNs associated with the given - * block pool. - * @param bpid the block pool ID - * @param errCode error code to send - * @param errMsg textual message to send - */ - void trySendErrorReport(String bpid, int errCode, String errMsg) { - BPOfferService bpos = blockPoolManager.get(bpid); - if (bpos == null) { - throw new IllegalArgumentException("Bad block pool: " + bpid); - } - bpos.trySendErrorReport(errCode, errMsg); - } - /** * Return the BPOfferService instance corresponding to the given block. * @return the BPOS @@ -1380,7 +1671,7 @@ boolean areCacheReportsDisabledForTests() { /** * This method starts the data node with the specified conf. * - * If conf's CONFIG_PROPERTY_SIMULATED property is set + * If conf's DFS_DATANODE_FSDATASET_FACTORY_KEY property is set * then a simulated storage based data node is created. * * @param dataDirectories - only for a non-simulated storage data node @@ -1473,7 +1764,7 @@ void startDataNode(List dataDirectories, if (dnConf.diskStatsEnabled) { diskMetrics = new DataNodeDiskMetrics(this, - dnConf.outliersReportIntervalMs); + dnConf.outliersReportIntervalMs, getConf()); } } @@ -1661,7 +1952,9 @@ void shutdownBlockPool(BPOfferService bpos) { // a block pool id String bpId = bpos.getBlockPoolId(); - blockScanner.disableBlockPoolId(bpId); + if (blockScanner.hasAnyRegisteredScanner()) { + blockScanner.disableBlockPoolId(bpId); + } if (data != null) { data.shutdownBlockPool(bpId); @@ -1997,6 +2290,7 @@ FileInputStream[] requestShortCircuitFdsForRead(final ExtendedBlock blk, FileInputStream fis[] = new FileInputStream[2]; try { + Preconditions.checkNotNull(data, "Storage not yet initialized"); fis[0] = (FileInputStream)data.getBlockInputStream(blk, 0); fis[1] = DatanodeUtil.getMetaDataInputStream(blk, data); } catch (ClassCastException e) { @@ -2015,7 +2309,7 @@ private void checkBlockToken(ExtendedBlock block, ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier()); DataInputStream in = new DataInputStream(buf); id.readFields(in); - LOG.debug("Got: {}", id); + LOG.debug("BlockTokenIdentifier id: {}", id); blockPoolTokenSecretManager.checkAccess(id, null, block, accessMode, null, null); } @@ -2146,6 +2440,8 @@ public void shutdown() { } if (metrics != null) { metrics.setDataNodeActiveXceiversCount(0); + metrics.setDataNodePacketResponderCount(0); + metrics.setDataNodeBlockRecoveryWorkerCount(0); } // IPC server needs to be shutdown late in the process, otherwise @@ -2179,7 +2475,7 @@ public void shutdown() { if (metrics != null) { metrics.shutdown(); } - if (diskMetrics != null) { + if (dnConf.diskStatsEnabled && diskMetrics != null) { diskMetrics.shutdownAndWait(); } if (dataNodeInfoBeanName != null) { @@ -2236,15 +2532,28 @@ private void handleDiskError(String failedVolumes, int failedNumber) { return; // do not shutdown } - LOG.warn("DataNode is shutting down due to failed volumes: [" - + failedVolumes + "]"); + LOG.warn("DataNode is shutting down due to failed volumes: [{}]", + failedVolumes); shouldRun = false; } /** Number of concurrent xceivers per node. */ @Override // DataNodeMXBean public int getXceiverCount() { - return threadGroup == null ? 0 : threadGroup.activeCount(); + if (metrics == null) { + return 0; + } + return metrics.getDataNodeActiveXceiverCount(); + } + + @Override // DataNodeMXBean + public int getActiveTransferThreadCount() { + if (metrics == null) { + return 0; + } + return metrics.getDataNodeActiveXceiverCount() + + metrics.getDataNodePacketResponderCount() + + metrics.getDataNodeBlockRecoveryWorkerCount(); } @Override // DataNodeMXBean @@ -2255,19 +2564,11 @@ public Map> getDatanodeNetworkCounts() { void incrDatanodeNetworkErrors(String host) { metrics.incrDatanodeNetworkErrors(); - /* - * Synchronizing on the whole cache is a big hammer, but since it's only - * accumulating errors, it should be ok. If this is ever expanded to include - * non-error stats, then finer-grained concurrency should be applied. - */ - synchronized (datanodeNetworkCounts) { - try { - final Map curCount = datanodeNetworkCounts.get(host); - curCount.put("networkErrors", curCount.get("networkErrors") + 1L); - datanodeNetworkCounts.put(host, curCount); - } catch (ExecutionException e) { - LOG.warn("failed to increment network error counts for " + host); - } + try { + datanodeNetworkCounts.get(host).compute(NETWORK_ERRORS, + (key, errors) -> errors == null ? 1L : errors + 1L); + } catch (ExecutionException e) { + LOG.warn("Failed to increment network error counts for host: {}", host); } } @@ -2316,7 +2617,7 @@ private void reportBadBlock(final BPOfferService bpos, final ExtendedBlock block, final String msg) { FsVolumeSpi volume = getFSDataset().getVolume(block); if (volume == null) { - LOG.warn("Cannot find FsVolumeSpi to report bad block: " + block); + LOG.warn("Cannot find FsVolumeSpi to report bad block: {}", block); return; } bpos.reportBadBlocks( @@ -2397,7 +2698,7 @@ void transferBlocks(String poolId, Block blocks[], transferBlock(new ExtendedBlock(poolId, blocks[i]), xferTargets[i], xferTargetStorageTypes[i], xferTargetStorageIDs[i]); } catch (IOException ie) { - LOG.warn("Failed to transfer block " + blocks[i], ie); + LOG.warn("Failed to transfer block {}", blocks[i], ie); } } } @@ -2516,15 +2817,13 @@ private class DataTransfer implements Runnable { DataTransfer(DatanodeInfo targets[], StorageType[] targetStorageTypes, String[] targetStorageIds, ExtendedBlock b, BlockConstructionStage stage, final String clientname) { - if (DataTransferProtocol.LOG.isDebugEnabled()) { - DataTransferProtocol.LOG.debug("{}: {} (numBytes={}), stage={}, " + - "clientname={}, targets={}, target storage types={}, " + - "target storage IDs={}", getClass().getSimpleName(), b, - b.getNumBytes(), stage, clientname, Arrays.asList(targets), - targetStorageTypes == null ? "[]" : - Arrays.asList(targetStorageTypes), - targetStorageIds == null ? "[]" : Arrays.asList(targetStorageIds)); - } + DataTransferProtocol.LOG.debug("{}: {} (numBytes={}), stage={}, " + + "clientname={}, targets={}, target storage types={}, " + + "target storage IDs={}", getClass().getSimpleName(), b, + b.getNumBytes(), stage, clientname, Arrays.asList(targets), + targetStorageTypes == null ? "[]" : + Arrays.asList(targetStorageTypes), + targetStorageIds == null ? "[]" : Arrays.asList(targetStorageIds)); this.targets = targets; this.targetStorageTypes = targetStorageTypes; this.targetStorageIds = targetStorageIds; @@ -2628,7 +2927,7 @@ public void run() { LOG.warn("{}:Failed to transfer {} to {} got", bpReg, b, targets[0], ie); } catch (Throwable t) { - LOG.error("Failed to transfer block " + b, t); + LOG.error("Failed to transfer block {}", b, t); } finally { decrementXmitsInProgress(); IOUtils.closeStream(blockSender); @@ -2705,6 +3004,7 @@ public void runDatanodeDaemon() throws IOException { } ipcServer.setTracer(tracer); ipcServer.start(); + startTime = now(); startPlugins(getConf()); } @@ -2970,6 +3270,7 @@ public static void main(String args[]) { @Override // InterDatanodeProtocol public ReplicaRecoveryInfo initReplicaRecovery(RecoveringBlock rBlock) throws IOException { + Preconditions.checkNotNull(data, "Storage not yet initialized"); return data.initReplicaRecovery(rBlock); } @@ -2980,6 +3281,7 @@ public ReplicaRecoveryInfo initReplicaRecovery(RecoveringBlock rBlock) public String updateReplicaUnderRecovery(final ExtendedBlock oldBlock, final long recoveryId, final long newBlockId, final long newLength) throws IOException { + Preconditions.checkNotNull(data, "Storage not yet initialized"); final Replica r = data.updateReplicaUnderRecovery(oldBlock, recoveryId, newBlockId, newLength); // Notify the namenode of the updated block info. This is important @@ -3020,7 +3322,7 @@ private void checkReadAccess(final ExtendedBlock block) throws IOException { } for (TokenIdentifier tokenId : tokenIds) { BlockTokenIdentifier id = (BlockTokenIdentifier) tokenId; - LOG.debug("Got: {}", id); + LOG.debug("BlockTokenIdentifier: {}", id); blockPoolTokenSecretManager.checkAccess(id, null, block, BlockTokenIdentifier.AccessMode.READ, null, null); } @@ -3045,7 +3347,7 @@ void transferReplicaForPipelineRecovery(final ExtendedBlock b, final BlockConstructionStage stage; //get replica information - try(AutoCloseableLock lock = data.acquireDatasetLock()) { + try(AutoCloseableLock lock = data.acquireDatasetReadLock()) { Block storedBlock = data.getStoredBlock(b.getBlockPoolId(), b.getBlockId()); if (null == storedBlock) { @@ -3060,8 +3362,10 @@ void transferReplicaForPipelineRecovery(final ExtendedBlock b, b.setGenerationStamp(storedGS); if (data.isValidRbw(b)) { stage = BlockConstructionStage.TRANSFER_RBW; + LOG.debug("Replica is being written!"); } else if (data.isValidBlock(b)) { stage = BlockConstructionStage.TRANSFER_FINALIZED; + LOG.debug("Replica is finalized!"); } else { final String r = data.getReplicaString(b.getBlockPoolId(), b.getBlockId()); throw new IOException(b + " is neither a RBW nor a Finalized, r=" + r); @@ -3131,7 +3435,12 @@ public String getDataPort(){ @Override // DataNodeMXBean public String getHttpPort(){ - return this.getConf().get("dfs.datanode.info.port"); + return String.valueOf(infoPort); + } + + @Override // DataNodeMXBean + public long getDNStartedTimeInMillis() { + return this.startTime; } public String getRevision() { @@ -3187,8 +3496,12 @@ public String getDatanodeHostname() { */ @Override // DataNodeMXBean public String getBPServiceActorInfo() { - final ArrayList> infoArray = - new ArrayList>(); + return JSON.toString(getBPServiceActorInfoMap()); + } + + @VisibleForTesting + public List> getBPServiceActorInfoMap() { + final List> infoArray = new ArrayList<>(); for (BPOfferService bpos : blockPoolManager.getAllNamenodeThreads()) { if (bpos != null) { for (BPServiceActor actor : bpos.getBPServiceActors()) { @@ -3196,7 +3509,7 @@ public String getBPServiceActorInfo() { } } } - return JSON.toString(infoArray); + return infoArray; } /** @@ -3220,7 +3533,7 @@ public String getDiskBalancerStatus() { try { return getDiskBalancer().queryWorkStatus().toJsonString(); } catch (IOException ex) { - LOG.debug("Reading diskbalancer Status failed. ex:{}", ex); + LOG.debug("Reading diskbalancer Status failed.", ex); return ""; } } @@ -3254,7 +3567,7 @@ public void deleteBlockPool(String blockPoolId, boolean force) "The block pool is still running. First do a refreshNamenodes to " + "shutdown the block pool service"); } - + Preconditions.checkNotNull(data, "Storage not yet initialized"); data.deleteBlockPool(blockPoolId, force); } @@ -3375,6 +3688,29 @@ boolean isRestarting() { * @return true - if the data node is fully started */ public boolean isDatanodeFullyStarted() { + return isDatanodeFullyStarted(false); + } + + /** + * A datanode is considered to be fully started if all the BP threads are + * alive and all the block pools are initialized. If checkConnectionToActiveNamenode is true, + * the datanode is considered to be fully started if it is also heartbeating to + * active namenode in addition to the above-mentioned conditions. + * + * @param checkConnectionToActiveNamenode if true, performs additional check of whether datanode + * is heartbeating to active namenode. + * @return true if the datanode is fully started and also conditionally connected to active + * namenode, false otherwise. + */ + public boolean isDatanodeFullyStarted(boolean checkConnectionToActiveNamenode) { + if (checkConnectionToActiveNamenode) { + for (BPOfferService bp : blockPoolManager.getAllNamenodeThreads()) { + if (!bp.isInitialized() || !bp.isAlive() || bp.getActiveNN() == null) { + return false; + } + } + return true; + } for (BPOfferService bp : blockPoolManager.getAllNamenodeThreads()) { if (!bp.isInitialized() || !bp.isAlive()) { return false; @@ -3382,7 +3718,7 @@ public boolean isDatanodeFullyStarted() { } return true; } - + @VisibleForTesting public DatanodeID getDatanodeId() { return id; @@ -3421,6 +3757,14 @@ public ShortCircuitRegistry getShortCircuitRegistry() { return shortCircuitRegistry; } + public DataTransferThrottler getEcReconstuctReadThrottler() { + return ecReconstuctReadThrottler; + } + + public DataTransferThrottler getEcReconstuctWriteThrottler() { + return ecReconstuctWriteThrottler; + } + /** * Check the disk error synchronously. */ @@ -3512,24 +3856,6 @@ public long getLastDiskErrorCheck() { return lastDiskErrorCheck; } - @Override - public SpanReceiverInfo[] listSpanReceivers() throws IOException { - checkSuperuserPrivilege(); - return tracerConfigurationManager.listSpanReceivers(); - } - - @Override - public long addSpanReceiver(SpanReceiverInfo info) throws IOException { - checkSuperuserPrivilege(); - return tracerConfigurationManager.addSpanReceiver(info); - } - - @Override - public void removeSpanReceiver(long id) throws IOException { - checkSuperuserPrivilege(); - tracerConfigurationManager.removeSpanReceiver(id); - } - public BlockRecoveryWorker getBlockRecoveryWorker(){ return blockRecoveryWorker; } @@ -3698,13 +4024,13 @@ void setBlockScanner(BlockScanner blockScanner) { @Override // DataNodeMXBean public String getSendPacketDownstreamAvgInfo() { - return peerMetrics != null ? + return dnConf.peerStatsEnabled && peerMetrics != null ? peerMetrics.dumpSendPacketDownstreamAvgInfoAsJson() : null; } @Override // DataNodeMXBean public String getSlowDisks() { - if (diskMetrics == null) { + if (!dnConf.diskStatsEnabled || diskMetrics == null) { //Disk Stats not enabled return null; } @@ -3716,6 +4042,7 @@ public String getSlowDisks() { @Override public List getVolumeReport() throws IOException { checkSuperuserPrivilege(); + Preconditions.checkNotNull(data, "Storage not yet initialized"); Map volumeInfoMap = data.getVolumeInfoMap(); if (volumeInfoMap == null) { LOG.warn("DataNode volume info not available."); @@ -3771,4 +4098,9 @@ private static boolean isWrite(BlockConstructionStage stage) { return (stage == PIPELINE_SETUP_STREAMING_RECOVERY || stage == PIPELINE_SETUP_APPEND_RECOVERY); } + + @VisibleForTesting + public BlockPoolManager getBlockPoolManager() { + return blockPoolManager; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeFaultInjector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeFaultInjector.java index c031f6c4c57e9..8ff3e30d70731 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeFaultInjector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeFaultInjector.java @@ -17,12 +17,13 @@ */ package org.apache.hadoop.hdfs.server.datanode; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import java.io.FileNotFoundException; import java.io.IOException; +import java.nio.ByteBuffer; /** * Used for injecting faults in DFSClient and DFSOutputStream tests. @@ -96,6 +97,28 @@ public void throwTooManyOpenFiles() throws FileNotFoundException { */ public void stripedBlockReconstruction() throws IOException {} + /** + * Used as a hook to inject failure in erasure coding checksum reconstruction + * process. + */ + public void stripedBlockChecksumReconstruction() throws IOException {} + + /** + * Used as a hook to inject latency when read block + * in erasure coding reconstruction process. + */ + public void delayBlockReader() {} + + /** + * Used as a hook to inject intercept when free the block reader buffer. + */ + public void interceptFreeBlockReaderBuffer() {} + + /** + * Used as a hook to inject intercept When finish reading from block. + */ + public void interceptBlockReader() {} + /** * Used as a hook to inject intercept when BPOfferService hold lock. */ @@ -105,4 +128,15 @@ public void delayWhenOfferServiceHoldLock() {} * Used as a hook to inject intercept when re-register. */ public void blockUtilSendFullBlockReport() {} + + /** + * Just delay a while. + */ + public void delay() {} + + /** + * Used as a hook to inject data pollution + * into an erasure coding reconstruction. + */ + public void badDecoding(ByteBuffer[] outputs) {} } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeMXBean.java index 9d11e1488479b..65537754741cb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeMXBean.java @@ -104,11 +104,15 @@ public interface DataNodeMXBean { public String getClusterId(); /** - * Returns an estimate of the number of Datanode threads - * actively transferring blocks. + * Returns the number of active xceivers. */ public int getXceiverCount(); + /** + * Returns the number of Datanode threads actively transferring blocks. + */ + int getActiveTransferThreadCount(); + /** * Returns an estimate of the number of data replication/reconstruction tasks * running currently. @@ -153,4 +157,11 @@ public interface DataNodeMXBean { * @return true, if security is enabled. */ boolean isSecurityEnabled(); + + /** + * Get the start time of the DataNode. + * + * @return Start time of the DataNode. + */ + long getDNStartedTimeInMillis(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java index 2447fd7137236..1d2f10f41dbad 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java @@ -62,10 +62,10 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.util.Daemon; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.ComparisonChain; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.collect.ComparisonChain; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; /** * Data storage information file. @@ -1064,12 +1064,26 @@ private static void linkAllBlocks(File fromDir, File fromBbwDir, File toDir, } private static class LinkArgs { - File src; - File dst; + private File srcDir; + private File dstDir; + private String blockFile; + + LinkArgs(File srcDir, File dstDir, String blockFile) { + this.srcDir = srcDir; + this.dstDir = dstDir; + this.blockFile = blockFile; + } + + public File src() { + return new File(srcDir, blockFile); + } - LinkArgs(File src, File dst) { - this.src = src; - this.dst = dst; + public File dst() { + return new File(dstDir, blockFile); + } + + public String blockFile() { + return blockFile; } } @@ -1095,8 +1109,9 @@ private static void linkBlocks(File from, File to, int oldLV, } final ArrayList idBasedLayoutSingleLinks = Lists.newArrayList(); - linkBlocksHelper(from, to, oldLV, hl, upgradeToIdBasedLayout, to, - idBasedLayoutSingleLinks); + final Map pathCache = new HashMap<>(); + linkBlocksHelper(from, to, hl, upgradeToIdBasedLayout, to, + idBasedLayoutSingleLinks, pathCache); // Detect and remove duplicate entries. final ArrayList duplicates = @@ -1122,7 +1137,7 @@ public Void call() throws IOException { idBasedLayoutSingleLinks.size()); for (int j = iCopy; j < upperBound; j++) { LinkArgs cur = idBasedLayoutSingleLinks.get(j); - HardLink.createHardLink(cur.src, cur.dst); + HardLink.createHardLink(cur.src(), cur.dst()); } return null; } @@ -1155,9 +1170,9 @@ static ArrayList findDuplicateEntries(ArrayList all) { @Override public int compare(LinkArgs a, LinkArgs b) { return ComparisonChain.start(). - compare(a.src.getName(), b.src.getName()). - compare(a.src, b.src). - compare(a.dst, b.dst). + compare(a.blockFile(), b.blockFile()). + compare(a.src(), b.src()). + compare(a.dst(), b.dst()). result(); } }); @@ -1167,8 +1182,8 @@ public int compare(LinkArgs a, LinkArgs b) { boolean addedPrev = false; for (int i = 0; i < all.size(); i++) { LinkArgs args = all.get(i); - long blockId = Block.getBlockId(args.src.getName()); - boolean isMeta = Block.isMetaFilename(args.src.getName()); + long blockId = Block.getBlockId(args.blockFile()); + boolean isMeta = Block.isMetaFilename(args.blockFile()); if ((prevBlockId == null) || (prevBlockId.longValue() != blockId)) { prevBlockId = blockId; @@ -1207,10 +1222,10 @@ private static void removeDuplicateEntries(ArrayList all, TreeMap> highestGenstamps = new TreeMap>(); for (LinkArgs duplicate : duplicates) { - if (!Block.isMetaFilename(duplicate.src.getName())) { + if (!Block.isMetaFilename(duplicate.blockFile())) { continue; } - long blockId = Block.getBlockId(duplicate.src.getName()); + long blockId = Block.getBlockId(duplicate.blockFile()); List prevHighest = highestGenstamps.get(blockId); if (prevHighest == null) { List highest = new LinkedList(); @@ -1219,8 +1234,8 @@ private static void removeDuplicateEntries(ArrayList all, continue; } long prevGenstamp = - Block.getGenerationStamp(prevHighest.get(0).src.getName()); - long genstamp = Block.getGenerationStamp(duplicate.src.getName()); + Block.getGenerationStamp(prevHighest.get(0).blockFile()); + long genstamp = Block.getGenerationStamp(duplicate.blockFile()); if (genstamp < prevGenstamp) { continue; } @@ -1234,19 +1249,19 @@ private static void removeDuplicateEntries(ArrayList all, // from the duplicates list. for (Iterator iter = duplicates.iterator(); iter.hasNext(); ) { LinkArgs duplicate = iter.next(); - long blockId = Block.getBlockId(duplicate.src.getName()); + long blockId = Block.getBlockId(duplicate.blockFile()); List highest = highestGenstamps.get(blockId); if (highest != null) { boolean found = false; for (LinkArgs high : highest) { - if (high.src.getParent().equals(duplicate.src.getParent())) { + if (high.src().getParent().equals(duplicate.src().getParent())) { found = true; break; } } if (!found) { LOG.warn("Unexpectedly low genstamp on {}.", - duplicate.src.getAbsolutePath()); + duplicate.src().getAbsolutePath()); iter.remove(); } } @@ -1257,25 +1272,25 @@ private static void removeDuplicateEntries(ArrayList all, // preserving one block file / metadata file pair. TreeMap longestBlockFiles = new TreeMap(); for (LinkArgs duplicate : duplicates) { - if (Block.isMetaFilename(duplicate.src.getName())) { + if (Block.isMetaFilename(duplicate.blockFile())) { continue; } - long blockId = Block.getBlockId(duplicate.src.getName()); + long blockId = Block.getBlockId(duplicate.blockFile()); LinkArgs prevLongest = longestBlockFiles.get(blockId); if (prevLongest == null) { longestBlockFiles.put(blockId, duplicate); continue; } - long blockLength = duplicate.src.length(); - long prevBlockLength = prevLongest.src.length(); + long blockLength = duplicate.src().length(); + long prevBlockLength = prevLongest.src().length(); if (blockLength < prevBlockLength) { LOG.warn("Unexpectedly short length on {}.", - duplicate.src.getAbsolutePath()); + duplicate.src().getAbsolutePath()); continue; } if (blockLength > prevBlockLength) { LOG.warn("Unexpectedly short length on {}.", - prevLongest.src.getAbsolutePath()); + prevLongest.src().getAbsolutePath()); } longestBlockFiles.put(blockId, duplicate); } @@ -1284,21 +1299,22 @@ private static void removeDuplicateEntries(ArrayList all, // arbitrarily selected by us. for (Iterator iter = all.iterator(); iter.hasNext(); ) { LinkArgs args = iter.next(); - long blockId = Block.getBlockId(args.src.getName()); + long blockId = Block.getBlockId(args.blockFile()); LinkArgs bestDuplicate = longestBlockFiles.get(blockId); if (bestDuplicate == null) { continue; // file has no duplicates } - if (!bestDuplicate.src.getParent().equals(args.src.getParent())) { - LOG.warn("Discarding {}.", args.src.getAbsolutePath()); + if (!bestDuplicate.src().getParent().equals(args.src().getParent())) { + LOG.warn("Discarding {}.", args.src().getAbsolutePath()); iter.remove(); } } } - static void linkBlocksHelper(File from, File to, int oldLV, HardLink hl, - boolean upgradeToIdBasedLayout, File blockRoot, - List idBasedLayoutSingleLinks) throws IOException { + static void linkBlocksHelper(File from, File to, HardLink hl, + boolean upgradeToIdBasedLayout, File blockRoot, + List idBasedLayoutSingleLinks, Map pathCache) + throws IOException { if (!from.exists()) { return; } @@ -1338,8 +1354,18 @@ public boolean accept(File dir, String name) { throw new IOException("Failed to mkdirs " + blockLocation); } } - idBasedLayoutSingleLinks.add(new LinkArgs(new File(from, blockName), - new File(blockLocation, blockName))); + /** + * The destination path is 32x32, so 1024 distinct paths. Therefore + * we cache the destination path and reuse the same File object on + * potentially thousands of blocks located on this volume. + * This method is called recursively so the cache is passed through + * each recursive call. There is one cache per volume, and it is only + * accessed by a single thread so no locking is needed. + */ + File cachedDest = pathCache + .computeIfAbsent(blockLocation, k -> blockLocation); + idBasedLayoutSingleLinks.add(new LinkArgs(from, + cachedDest, blockName)); hl.linkStats.countSingleLinks++; } } else { @@ -1362,8 +1388,8 @@ public boolean accept(File dir, String name) { if (otherNames != null) { for (int i = 0; i < otherNames.length; i++) { linkBlocksHelper(new File(from, otherNames[i]), - new File(to, otherNames[i]), oldLV, hl, upgradeToIdBasedLayout, - blockRoot, idBasedLayoutSingleLinks); + new File(to, otherNames[i]), hl, upgradeToIdBasedLayout, + blockRoot, idBasedLayoutSingleLinks, pathCache); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java index 9c885fc4ab255..00a40bafbfe3e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java @@ -17,12 +17,14 @@ */ package org.apache.hadoop.hdfs.server.datanode; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.thirdparty.protobuf.ByteString; import javax.crypto.SecretKey; import org.apache.commons.logging.Log; +import org.apache.hadoop.fs.FsTracer; import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdfs.ExtendedBlockId; import org.apache.hadoop.hdfs.net.Peer; @@ -135,7 +137,7 @@ public static DataXceiver create(Peer peer, DataNode dn, private DataXceiver(Peer peer, DataNode datanode, DataXceiverServer dataXceiverServer) throws IOException { - super(datanode.getTracer()); + super(FsTracer.get(null)); this.peer = peer; this.dnConf = datanode.getDnConf(); this.socketIn = peer.getInputStream(); @@ -340,7 +342,7 @@ public void run() { * the thread dies away. */ private void collectThreadLocalStates() { - if (datanode.getPeerMetrics() != null) { + if (datanode.getDnConf().peerStatsEnabled && datanode.getPeerMetrics() != null) { datanode.getPeerMetrics().collectThreadLocalStates(); } } @@ -431,7 +433,7 @@ public void requestShortCircuitFds(final ExtendedBlock blk, blk.getBlockId(), dnR.getDatanodeUuid(), success)); } if (fis != null) { - IOUtils.cleanup(null, fis); + IOUtils.cleanupWithLogger(null, fis); } } } @@ -554,7 +556,7 @@ public void requestShortCircuitShm(String clientName) throws IOException { LOG.warn("Failed to shut down socket in error handler", e); } } - IOUtils.cleanup(null, shmInfo); + IOUtils.cleanupWithLogger(null, shmInfo); } } @@ -632,6 +634,7 @@ public void readBlock(final ExtendedBlock block, datanode.metrics.incrBytesRead((int) read); datanode.metrics.incrBlocksRead(); datanode.metrics.incrTotalReadTime(duration); + DFSUtil.addTransferRateMetric(datanode.metrics, read, duration); } catch ( SocketException ignored ) { LOG.trace("{}:Ignoring exception while serving {} to {}", dnR, block, remoteAddress, ignored); @@ -946,6 +949,9 @@ public void writeBlock(final ExtendedBlock block, IOUtils.closeStream(mirrorIn); IOUtils.closeStream(replyOut); IOUtils.closeSocket(mirrorSock); + if (blockReceiver != null) { + blockReceiver.releaseAnyRemainingReservedSpace(); + } IOUtils.closeStream(blockReceiver); setCurrentBlockReceiver(null); } @@ -1091,7 +1097,7 @@ public void copyBlock(final ExtendedBlock block, if (!dataXceiverServer.balanceThrottler.acquire()) { // not able to start String msg = "Not able to copy block " + block.getBlockId() + " " + "to " + peer.getRemoteAddressString() + " because threads " + - "quota is exceeded."; + "quota=" + dataXceiverServer.balanceThrottler.getMaxConcurrentMovers() + " is exceeded."; LOG.info(msg); sendResponse(ERROR, msg); return; @@ -1118,6 +1124,7 @@ public void copyBlock(final ExtendedBlock block, datanode.metrics.incrBytesRead((int) read); datanode.metrics.incrBlocksRead(); datanode.metrics.incrTotalReadTime(duration); + DFSUtil.addTransferRateMetric(datanode.metrics, read, duration); LOG.info("Copied {} to {}", block, peer.getRemoteAddressString()); } catch (IOException ioe) { @@ -1165,7 +1172,7 @@ public void replaceBlock(final ExtendedBlock block, if (!dataXceiverServer.balanceThrottler.acquire()) { // not able to start String msg = "Not able to receive block " + block.getBlockId() + " from " + peer.getRemoteAddressString() + " because threads " + - "quota is exceeded."; + "quota=" + dataXceiverServer.balanceThrottler.getMaxConcurrentMovers() + " is exceeded."; LOG.warn(msg); sendResponse(ERROR, msg); return; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java index ea85a476a429a..c139e14777e03 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java @@ -35,8 +35,8 @@ import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.util.Daemon; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; @@ -68,8 +68,7 @@ class DataXceiverServer implements Runnable { * Enforcing the limit is required in order to avoid data-node * running out of memory. */ - int maxXceiverCount = - DFSConfigKeys.DFS_DATANODE_MAX_RECEIVER_THREADS_DEFAULT; + volatile int maxXceiverCount; /** * A manager to make sure that cluster balancing does not take too much @@ -188,6 +187,9 @@ void release() { this.maxXceiverCount = conf.getInt(DFSConfigKeys.DFS_DATANODE_MAX_RECEIVER_THREADS_KEY, DFSConfigKeys.DFS_DATANODE_MAX_RECEIVER_THREADS_DEFAULT); + Preconditions.checkArgument(this.maxXceiverCount >= 1, + DFSConfigKeys.DFS_DATANODE_MAX_RECEIVER_THREADS_KEY + + " should not be less than 1."); this.estimateBlockSize = conf.getLongBytes(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT); @@ -511,4 +513,15 @@ public boolean updateBalancerMaxConcurrentMovers(final int movers) { void setMaxReconfigureWaitTime(int max) { this.maxReconfigureWaitTime = max; } + + public void setMaxXceiverCount(int xceiverCount) { + Preconditions.checkArgument(xceiverCount > 0, + "dfs.datanode.max.transfer.threads should be larger than 0"); + maxXceiverCount = xceiverCount; + } + + @VisibleForTesting + public int getMaxXceiverCount() { + return maxXceiverCount; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java index 35625ce121d94..9749097ba189c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java @@ -46,15 +46,14 @@ import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi.ScanInfo; -import org.apache.hadoop.util.AutoCloseableLock; import org.apache.hadoop.util.Daemon; import org.apache.hadoop.util.StopWatch; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.ArrayListMultimap; -import com.google.common.collect.ListMultimap; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.collect.ArrayListMultimap; +import org.apache.hadoop.thirdparty.com.google.common.collect.ListMultimap; /** * Periodically scans the data directories for block and block metadata files. @@ -141,7 +140,8 @@ public String toString() { + ", missing metadata files: " + missingMetaFile + ", missing block files: " + missingBlockFile + ", missing blocks in memory: " + missingMemoryBlocks - + ", mismatched blocks: " + mismatchBlocks; + + ", mismatched blocks: " + mismatchBlocks + + ", duplicated blocks: " + duplicateBlocks; } } @@ -356,7 +356,7 @@ private void clear() { } /** - * Main program loop for DirectoryScanner. Runs {@link reconcile()} and + * Main program loop for DirectoryScanner. Runs {@link #reconcile()} and * handles any exceptions. */ @Override @@ -472,88 +472,84 @@ private void scan() { // Pre-sort the reports outside of the lock blockPoolReport.sortBlocks(); - // Hold FSDataset lock to prevent further changes to the block map - try (AutoCloseableLock lock = dataset.acquireDatasetLock()) { - for (final String bpid : blockPoolReport.getBlockPoolIds()) { - List blockpoolReport = blockPoolReport.getScanInfo(bpid); - - Stats statsRecord = new Stats(bpid); - stats.put(bpid, statsRecord); - Collection diffRecord = new ArrayList<>(); - - statsRecord.totalBlocks = blockpoolReport.size(); - final List bl = dataset.getFinalizedBlocks(bpid); - Collections.sort(bl); // Sort based on blockId - - int d = 0; // index for blockpoolReport - int m = 0; // index for memReprot - while (m < bl.size() && d < blockpoolReport.size()) { - ReplicaInfo memBlock = bl.get(m); - ScanInfo info = blockpoolReport.get(d); - if (info.getBlockId() < memBlock.getBlockId()) { - if (!dataset.isDeletingBlock(bpid, info.getBlockId())) { - // Block is missing in memory - statsRecord.missingMemoryBlocks++; - addDifference(diffRecord, statsRecord, info); - } - d++; - continue; - } - if (info.getBlockId() > memBlock.getBlockId()) { - // Block is missing on the disk - addDifference(diffRecord, statsRecord, memBlock.getBlockId(), - info.getVolume()); - m++; - continue; - } - // Block file and/or metadata file exists on the disk - // Block exists in memory - if (info.getVolume().getStorageType() != StorageType.PROVIDED - && info.getBlockFile() == null) { - // Block metadata file exits and block file is missing - addDifference(diffRecord, statsRecord, info); - } else if (info.getGenStamp() != memBlock.getGenerationStamp() - || info.getBlockLength() != memBlock.getNumBytes()) { - // Block metadata file is missing or has wrong generation stamp, - // or block file length is different than expected - statsRecord.mismatchBlocks++; - addDifference(diffRecord, statsRecord, info); - } else if (memBlock.compareWith(info) != 0) { - // volumeMap record and on-disk files do not match. - statsRecord.duplicateBlocks++; + for (final String bpid : blockPoolReport.getBlockPoolIds()) { + List blockpoolReport = blockPoolReport.getScanInfo(bpid); + + Stats statsRecord = new Stats(bpid); + stats.put(bpid, statsRecord); + Collection diffRecord = new ArrayList<>(); + + statsRecord.totalBlocks = blockpoolReport.size(); + final List bl = dataset.getFinalizedBlocks(bpid); + Collections.sort(bl); // Sort based on blockId + + int d = 0; // index for blockpoolReport + int m = 0; // index for memReprot + while (m < bl.size() && d < blockpoolReport.size()) { + ReplicaInfo memBlock = bl.get(m); + ScanInfo info = blockpoolReport.get(d); + if (info.getBlockId() < memBlock.getBlockId()) { + if (!dataset.isDeletingBlock(bpid, info.getBlockId())) { + // Block is missing in memory + statsRecord.missingMemoryBlocks++; addDifference(diffRecord, statsRecord, info); } d++; - - if (d < blockpoolReport.size()) { - // There may be multiple on-disk records for the same block, do not - // increment the memory record pointer if so. - ScanInfo nextInfo = blockpoolReport.get(d); - if (nextInfo.getBlockId() != info.getBlockId()) { - ++m; - } - } else { - ++m; - } + continue; } - while (m < bl.size()) { - ReplicaInfo current = bl.get(m++); - addDifference(diffRecord, statsRecord, current.getBlockId(), - current.getVolume()); + if (info.getBlockId() > memBlock.getBlockId()) { + // Block is missing on the disk + addDifference(diffRecord, statsRecord, memBlock.getBlockId(), + info.getVolume()); + m++; + continue; } - while (d < blockpoolReport.size()) { - if (!dataset.isDeletingBlock(bpid, - blockpoolReport.get(d).getBlockId())) { - statsRecord.missingMemoryBlocks++; - addDifference(diffRecord, statsRecord, blockpoolReport.get(d)); + // Block file and/or metadata file exists on the disk + // Block exists in memory + if (info.getBlockFile() == null) { + // Block metadata file exits and block file is missing + addDifference(diffRecord, statsRecord, info); + } else if (info.getGenStamp() != memBlock.getGenerationStamp() + || info.getBlockLength() != memBlock.getNumBytes()) { + // Block metadata file is missing or has wrong generation stamp, + // or block file length is different than expected + statsRecord.mismatchBlocks++; + addDifference(diffRecord, statsRecord, info); + } else if (memBlock.compareWith(info) != 0) { + // volumeMap record and on-disk files do not match. + statsRecord.duplicateBlocks++; + addDifference(diffRecord, statsRecord, info); + } + d++; + + if (d < blockpoolReport.size()) { + // There may be multiple on-disk records for the same block, do not + // increment the memory record pointer if so. + ScanInfo nextInfo = blockpoolReport.get(d); + if (nextInfo.getBlockId() != info.getBlockId()) { + ++m; } - d++; + } else { + ++m; } - synchronized (diffs) { - diffs.addAll(bpid, diffRecord); + } + while (m < bl.size()) { + ReplicaInfo current = bl.get(m++); + addDifference(diffRecord, statsRecord, current.getBlockId(), + current.getVolume()); + } + while (d < blockpoolReport.size()) { + if (!dataset.isDeletingBlock(bpid, + blockpoolReport.get(d).getBlockId())) { + statsRecord.missingMemoryBlocks++; + addDifference(diffRecord, statsRecord, blockpoolReport.get(d)); } - LOG.info("Scan Results: {}", statsRecord); + d++; + } + synchronized (diffs) { + diffs.addAll(bpid, diffRecord); } + LOG.info("Scan Results: {}", statsRecord); } } @@ -587,7 +583,7 @@ private void addDifference(Collection diffRecord, Stats statsRecord, long blockId, FsVolumeSpi vol) { statsRecord.missingBlockFile++; statsRecord.missingMetaFile++; - diffRecord.add(new ScanInfo(blockId, null, null, vol)); + diffRecord.add(new ScanInfo(blockId, null, null, null, vol)); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancer.java index ffa8524a2f967..7b58d9bca5433 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancer.java @@ -18,8 +18,8 @@ */ package org.apache.hadoop.hdfs.server.datanode; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.codec.digest.DigestUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -504,7 +504,7 @@ private Map getStorageIDToVolumeBasePathMap() Map storageIDToVolBasePathMap = new HashMap<>(); FsDatasetSpi.FsVolumeReferences references; try { - try(AutoCloseableLock lock = this.dataset.acquireDatasetLock()) { + try(AutoCloseableLock lock = this.dataset.acquireDatasetReadLock()) { references = this.dataset.getFsVolumeReferences(); for (int ndx = 0; ndx < references.size(); ndx++) { FsVolumeSpi vol = references.get(ndx); @@ -808,7 +808,7 @@ private boolean isLessThanNeeded(long blockSize, long bytesToCopy = item.getBytesToCopy() - item.getBytesCopied(); bytesToCopy = bytesToCopy + ((bytesToCopy * getBlockTolerancePercentage(item)) / 100); - return (blockSize <= bytesToCopy) ? true : false; + return blockSize <= bytesToCopy; } /** @@ -833,7 +833,7 @@ private long getBlockTolerancePercentage(DiskBalancerWorkItem item) { private boolean isCloseEnough(DiskBalancerWorkItem item) { long temp = item.getBytesCopied() + ((item.getBytesCopied() * getBlockTolerancePercentage(item)) / 100); - return (item.getBytesToCopy() >= temp) ? false : true; + return item.getBytesToCopy() < temp; } /** @@ -902,7 +902,7 @@ private long getMaxError(DiskBalancerWorkItem item) { */ private ExtendedBlock getBlockToCopy(FsVolumeSpi.BlockIterator iter, DiskBalancerWorkItem item) { - while (!iter.atEnd() && item.getErrorCount() < getMaxError(item)) { + while (!iter.atEnd() && item.getErrorCount() <= getMaxError(item)) { try { ExtendedBlock block = iter.nextBlock(); if(null == block){ @@ -923,7 +923,7 @@ private ExtendedBlock getBlockToCopy(FsVolumeSpi.BlockIterator iter, item.incErrorCount(); } } - if (item.getErrorCount() >= getMaxError(item)) { + if (item.getErrorCount() > getMaxError(item)) { item.setErrMsg("Error count exceeded."); LOG.info("Maximum error count exceeded. Error count: {} Max error:{} ", item.getErrorCount(), item.getMaxDiskErrors()); @@ -989,7 +989,7 @@ private void closePoolIters(List poolIters) { try { iter.close(); } catch (IOException ex) { - LOG.error("Error closing a block pool iter. ex: {}", ex); + LOG.error("Error closing a block pool iter. ex: ", ex); } } } @@ -1124,7 +1124,7 @@ public void copyBlocks(VolumePair pair, DiskBalancerWorkItem item) { startTime); item.setSecondsElapsed(secondsElapsed); } catch (IOException ex) { - LOG.error("Exception while trying to copy blocks. error: {}", ex); + LOG.error("Exception while trying to copy blocks. error: ", ex); item.incErrorCount(); } catch (InterruptedException e) { LOG.error("Copy Block Thread interrupted, exiting the copy."); @@ -1133,7 +1133,7 @@ public void copyBlocks(VolumePair pair, DiskBalancerWorkItem item) { this.setExitFlag(); } catch (RuntimeException ex) { // Exiting if any run time exceptions. - LOG.error("Got an unexpected Runtime Exception {}", ex); + LOG.error("Got an unexpected Runtime Exception ", ex); item.incErrorCount(); this.setExitFlag(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ErrorReportAction.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ErrorReportAction.java index 26498d4a0fc45..38581bee2286b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ErrorReportAction.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ErrorReportAction.java @@ -20,6 +20,7 @@ import java.io.IOException; +import org.apache.commons.lang3.builder.ToStringBuilder; import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.ipc.RemoteException; @@ -84,4 +85,12 @@ public boolean equals(Object obj) { } return true; } + + @Override + public String toString() { + return new ToStringBuilder(this) + .append("errorCode", errorCode) + .append("errorMessage", errorMessage) + .toString(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FileIoProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FileIoProvider.java index fc98d3a6b7a8e..6907f3aad731e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FileIoProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FileIoProvider.java @@ -280,7 +280,12 @@ public void transferToSocketFully( profilingEventHook.afterFileIo(volume, TRANSFER, begin, count); } catch (Exception e) { String em = e.getMessage(); - if (!em.startsWith("Broken pipe") && !em.startsWith("Connection reset")) { + if (em != null) { + if (!em.startsWith("Broken pipe") + && !em.startsWith("Connection reset")) { + onFailure(volume, begin); + } + } else { onFailure(volume, begin); } throw e; @@ -1065,4 +1070,8 @@ private void onFailure(@Nullable FsVolumeSpi volume, long begin) { } profilingEventHook.onFailure(volume, begin); } + + public ProfilingFileIoEvents getProfilingEventHook() { + return profilingEventHook; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/IncrementalBlockReportManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/IncrementalBlockReportManager.java index 73cc44b31f9cf..f55b8c2b73425 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/IncrementalBlockReportManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/IncrementalBlockReportManager.java @@ -37,8 +37,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; /** * Manage Incremental Block Reports (IBRs). diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/LocalReplica.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/LocalReplica.java index cb53a64f7defe..b711e1a8f2115 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/LocalReplica.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/LocalReplica.java @@ -42,7 +42,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * This class is used for all replicas which are on local storage media diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/LocalReplicaInPipeline.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/LocalReplicaInPipeline.java index 99d2fc8e04ea8..24b6bd550e7b5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/LocalReplicaInPipeline.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/LocalReplicaInPipeline.java @@ -174,6 +174,10 @@ public void releaseAllBytesReserved() { getVolume().releaseLockedMemory(bytesReserved); bytesReserved = 0; } + @Override + public void releaseReplicaInfoBytesReserved() { + bytesReserved = 0; + } @Override public void setLastChecksumAndDataLen(long dataLength, byte[] checksum) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ProfilingFileIoEvents.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ProfilingFileIoEvents.java index 2da3b1e8f9844..c22401b645f14 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ProfilingFileIoEvents.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ProfilingFileIoEvents.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdfs.server.datanode; +import org.apache.hadoop.classification.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -40,8 +41,8 @@ class ProfilingFileIoEvents { static final Logger LOG = LoggerFactory.getLogger(ProfilingFileIoEvents.class); - private final boolean isEnabled; - private final int sampleRangeMax; + private volatile boolean isEnabled; + private volatile int sampleRangeMax; public ProfilingFileIoEvents(@Nullable Configuration conf) { if (conf != null) { @@ -49,15 +50,7 @@ public ProfilingFileIoEvents(@Nullable Configuration conf) { DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY, DFSConfigKeys .DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_DEFAULT); - isEnabled = Util.isDiskStatsEnabled(fileIOSamplingPercentage); - if (fileIOSamplingPercentage > 100) { - LOG.warn(DFSConfigKeys - .DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY + - " value cannot be more than 100. Setting value to 100"); - fileIOSamplingPercentage = 100; - } - sampleRangeMax = (int) ((double) fileIOSamplingPercentage / 100 * - Integer.MAX_VALUE); + setSampleRangeMax(fileIOSamplingPercentage); } else { isEnabled = false; sampleRangeMax = 0; @@ -80,7 +73,7 @@ public void afterMetadataOp(@Nullable FsVolumeSpi volume, if (isEnabled) { DataNodeVolumeMetrics metrics = getVolumeMetrics(volume); if (metrics != null) { - metrics.addMetadastaOperationLatency(Time.monotonicNow() - begin); + metrics.addMetadataOperationLatency(Time.monotonicNow() - begin); } } } @@ -116,6 +109,12 @@ public void afterFileIo(@Nullable FsVolumeSpi volume, case WRITE: metrics.addWriteIoLatency(latency); break; + case TRANSFER: + metrics.addTransferIoLatency(latency); + break; + case NATIVE_COPY: + metrics.addNativeCopyIoLatency(latency); + break; default: } } @@ -139,4 +138,26 @@ private DataNodeVolumeMetrics getVolumeMetrics(final FsVolumeSpi volume) { } return null; } + + public void setSampleRangeMax(int fileIOSamplingPercentage) { + isEnabled = Util.isDiskStatsEnabled(fileIOSamplingPercentage); + if (fileIOSamplingPercentage > 100) { + LOG.warn(DFSConfigKeys + .DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY + + " value cannot be more than 100. Setting value to 100"); + fileIOSamplingPercentage = 100; + } + sampleRangeMax = (int) ((double) fileIOSamplingPercentage / 100 * + Integer.MAX_VALUE); + } + + @VisibleForTesting + public boolean getDiskStatsEnabled() { + return isEnabled; + } + + @VisibleForTesting + public int getSampleRangeMax() { + return sampleRangeMax; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ProvidedReplica.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ProvidedReplica.java index bd23021f5cdef..00640f62c8428 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ProvidedReplica.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ProvidedReplica.java @@ -23,7 +23,7 @@ import java.io.OutputStream; import java.net.URI; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.commons.io.input.BoundedInputStream; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInPipeline.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInPipeline.java index 174827b5a20eb..65da42d3a205a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInPipeline.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaInPipeline.java @@ -51,6 +51,11 @@ public interface ReplicaInPipeline extends Replica { */ public void releaseAllBytesReserved(); + /** + * Release the reserved space from the ReplicaInfo. + */ + void releaseReplicaInfoBytesReserved(); + /** * store the checksum for the last chunk along with the data length * @param dataLength number of bytes on disk diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReportBadBlockAction.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReportBadBlockAction.java index 2946358a5cbe2..d525d9f7822ba 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReportBadBlockAction.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReportBadBlockAction.java @@ -20,6 +20,7 @@ import java.io.IOException; +import org.apache.commons.lang3.builder.ToStringBuilder; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.DatanodeInfo.DatanodeInfoBuilder; @@ -111,4 +112,13 @@ public boolean equals(Object obj) { } return true; } + + @Override + public String toString() { + return new ToStringBuilder(this) + .append("block", block) + .append("storageUuid", storageUuid) + .append("storageType", storageType) + .toString(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/SecureDataNodeStarter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/SecureDataNodeStarter.java index 84fac578d9798..6c666411c9671 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/SecureDataNodeStarter.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/SecureDataNodeStarter.java @@ -16,7 +16,7 @@ */ package org.apache.hadoop.hdfs.server.datanode; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.commons.daemon.Daemon; import org.apache.commons.daemon.DaemonContext; import org.apache.hadoop.conf.Configuration; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ShortCircuitRegistry.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ShortCircuitRegistry.java index 3df83cfbad4ae..cb8dfaf262334 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ShortCircuitRegistry.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ShortCircuitRegistry.java @@ -30,7 +30,7 @@ import java.util.Iterator; import java.util.Set; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.commons.io.IOUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -46,9 +46,9 @@ import org.apache.hadoop.net.unix.DomainSocketWatcher; import org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.HashMultimap; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.HashMultimap; /** * Manages client short-circuit memory segments on the DataNode. @@ -404,4 +404,9 @@ boolean accept(HashMap segments, public synchronized boolean visit(Visitor visitor) { return visitor.accept(segments, slots); } + + @VisibleForTesting + public int getShmNum() { + return segments.size(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/VolumeScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/VolumeScanner.java index 84cfb04801d88..2c666a3831747 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/VolumeScanner.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/VolumeScanner.java @@ -19,19 +19,23 @@ package org.apache.hadoop.hdfs.server.datanode; import java.io.DataOutputStream; +import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import java.util.concurrent.TimeUnit; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.cache.Cache; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheBuilder; import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.server.datanode.BlockScanner.Conf; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeReference; @@ -289,7 +293,7 @@ public void handle(ExtendedBlock block, IOException e) { volume, block); return; } - LOG.warn("Reporting bad {} on {}", block, volume); + LOG.warn("Reporting bad {} on {}", block, volume, e); scanner.datanode.handleBadBlock(block, e, true); } } @@ -447,7 +451,7 @@ private long scanBlock(ExtendedBlock cblock, long bytesPerSec) { } catch (IOException e) { resultHandler.handle(block, e); } finally { - IOUtils.cleanup(null, blockSender); + IOUtils.cleanupWithLogger(null, blockSender); } metrics.incrBlockVerificationFailures(); return -1; @@ -479,6 +483,50 @@ static boolean calculateShouldScan(String storageId, long targetBytesPerSec, return shouldScan; } + /** + * Get next block and check if it's needed to scan. + * + * @return the candidate block. + */ + ExtendedBlock getNextBlockToScan() { + ExtendedBlock block; + try { + block = curBlockIter.nextBlock(); + } catch (IOException e) { + // There was an error listing the next block in the volume. This is a + // serious issue. + LOG.warn("{}: nextBlock error on {}", this, curBlockIter); + // On the next loop iteration, curBlockIter#eof will be set to true, and + // we will pick a different block iterator. + return null; + } + if (block == null) { + // The BlockIterator is at EOF. + LOG.info("{}: finished scanning block pool {}", + this, curBlockIter.getBlockPoolId()); + saveBlockIterator(curBlockIter); + return null; + } else if (conf.skipRecentAccessed) { + // Check the access time of block file to avoid scanning recently + // changed blocks, reducing disk IO. + try { + BlockLocalPathInfo blockLocalPathInfo = + volume.getDataset().getBlockLocalPathInfo(block); + BasicFileAttributes attr = Files.readAttributes( + new File(blockLocalPathInfo.getBlockPath()).toPath(), + BasicFileAttributes.class); + if (System.currentTimeMillis() - attr.lastAccessTime(). + to(TimeUnit.MILLISECONDS) < conf.scanPeriodMs) { + return null; + } + } catch (IOException ioe) { + LOG.debug("Failed to get access time of block {}", + block, ioe); + } + } + return block; + } + /** * Run an iteration of the VolumeScanner loop. * @@ -503,10 +551,10 @@ private long runLoop(ExtendedBlock suspectBlock) { return 30000L; } - // Find a usable block pool to scan. if (suspectBlock != null) { block = suspectBlock; } else { + // Find a usable block pool to scan. if ((curBlockIter == null) || curBlockIter.atEnd()) { long timeout = findNextUsableBlockIter(); if (timeout > 0) { @@ -524,22 +572,9 @@ private long runLoop(ExtendedBlock suspectBlock) { } return 0L; } - try { - block = curBlockIter.nextBlock(); - } catch (IOException e) { - // There was an error listing the next block in the volume. This is a - // serious issue. - LOG.warn("{}: nextBlock error on {}", this, curBlockIter); - // On the next loop iteration, curBlockIter#eof will be set to true, and - // we will pick a different block iterator. - return 0L; - } + block = getNextBlockToScan(); if (block == null) { - // The BlockIterator is at EOF. - LOG.info("{}: finished scanning block pool {}", - this, curBlockIter.getBlockPoolId()); - saveBlockIterator(curBlockIter); - return 0; + return 0L; } } if (curBlockIter != null) { @@ -635,15 +670,17 @@ public void run() { LOG.error("{} exiting because of exception ", this, e); } LOG.info("{} exiting.", this); + VolumeScannerCBInjector.get().preSavingBlockIteratorTask(this); // Save the current position of all block iterators and close them. for (BlockIterator iter : blockIters) { saveBlockIterator(iter); - IOUtils.cleanup(null, iter); + IOUtils.cleanupWithLogger(null, iter); } } finally { + VolumeScannerCBInjector.get().terminationCallBack(this); // When the VolumeScanner exits, release the reference we were holding // on the volume. This will allow the volume to be removed later. - IOUtils.cleanup(null, ref); + IOUtils.cleanupWithLogger(null, ref); } } @@ -660,6 +697,7 @@ public synchronized void shutdown() { stopping = true; notify(); this.interrupt(); + VolumeScannerCBInjector.get().shutdownCallBack(this); } @@ -729,7 +767,7 @@ public synchronized void disableBlockPoolId(String bpid) { if (iter.getBlockPoolId().equals(bpid)) { LOG.trace("{}: disabling scanning on block pool {}", this, bpid); i.remove(); - IOUtils.cleanup(null, iter); + IOUtils.cleanupWithLogger(null, iter); if (curBlockIter == iter) { curBlockIter = null; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/VolumeScannerCBInjector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/VolumeScannerCBInjector.java new file mode 100644 index 0000000000000..d15d8d45d5e60 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/VolumeScannerCBInjector.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.datanode; + +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Used for injecting call backs in {@link VolumeScanner} + * and {@link BlockScanner} tests. + * Calls into this are a no-op in production code. + */ +@VisibleForTesting +@InterfaceAudience.Private +public class VolumeScannerCBInjector { + private static VolumeScannerCBInjector instance = + new VolumeScannerCBInjector(); + + public static VolumeScannerCBInjector get() { + return instance; + } + + public static void set(VolumeScannerCBInjector injector) { + instance = injector; + } + + public void preSavingBlockIteratorTask(final VolumeScanner volumeScanner) { + } + + public void shutdownCallBack(final VolumeScanner volumeScanner) { + } + + public void terminationCallBack(final VolumeScanner volumeScanner) { + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/AbstractFuture.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/AbstractFuture.java index 06867fbceb793..cdbba6ef14209 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/AbstractFuture.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/AbstractFuture.java @@ -20,16 +20,15 @@ */ package org.apache.hadoop.hdfs.server.datanode.checker; -import com.google.common.annotations.Beta; -import com.google.common.annotations.GwtCompatible; -import com.google.common.base.Preconditions; -import static com.google.common.base.Preconditions.checkNotNull; -import com.google.common.util.concurrent.Futures; -import com.google.common.util.concurrent.ListeningExecutorService; -import com.google.common.util.concurrent.ListenableFuture; -import com.google.common.util.concurrent.SettableFuture; -import com.google.common.util.concurrent.Uninterruptibles; -import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import org.apache.hadoop.thirdparty.com.google.common.annotations.Beta; +import org.apache.hadoop.thirdparty.com.google.common.annotations.GwtCompatible; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Futures; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableFuture; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.SettableFuture; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Uninterruptibles; import static java.util.concurrent.atomic.AtomicReferenceFieldUpdater .newUpdater; @@ -55,7 +54,7 @@ * include instantiating a {@link SettableFuture}, submitting a task to a * {@link ListeningExecutorService}, and deriving a {@code Future} from an * existing one, typically using methods like {@link Futures#transform - * (ListenableFuture, com.google.common.base.Function) Futures.transform} + * (ListenableFuture, org.apache.hadoop.thirdparty.com.google.common.base.Function) Futures.transform} * and its overloaded versions. *

      *

      This class implements all methods in {@code ListenableFuture}. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/AsyncChecker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/AsyncChecker.java index d69845448b7aa..75b0ebea389cf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/AsyncChecker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/AsyncChecker.java @@ -19,7 +19,7 @@ package org.apache.hadoop.hdfs.server.datanode.checker; import java.util.Optional; -import com.google.common.util.concurrent.ListenableFuture; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableFuture; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/DatasetVolumeChecker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/DatasetVolumeChecker.java index 91582fe0558a8..997a6d972224c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/DatasetVolumeChecker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/DatasetVolumeChecker.java @@ -18,14 +18,14 @@ package org.apache.hadoop.hdfs.server.datanode.checker; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Sets; -import com.google.common.util.concurrent.FutureCallback; -import com.google.common.util.concurrent.Futures; -import com.google.common.util.concurrent.ListenableFuture; -import com.google.common.util.concurrent.MoreExecutors; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.FutureCallback; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Futures; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableFuture; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.conf.Configuration; @@ -234,7 +234,7 @@ public void call(Set ignored1, } }), MoreExecutors.directExecutor()); } else { - IOUtils.cleanup(null, reference); + IOUtils.cleanupWithLogger(null, reference); if (numVolumes.decrementAndGet() == 0) { latch.countDown(); } @@ -311,7 +311,7 @@ public boolean checkVolume( ); return true; } else { - IOUtils.cleanup(null, volumeReference); + IOUtils.cleanupWithLogger(null, volumeReference); } return false; } @@ -354,23 +354,29 @@ private class ResultHandler } @Override - public void onSuccess(@Nonnull VolumeCheckResult result) { - switch(result) { - case HEALTHY: - case DEGRADED: - LOG.debug("Volume {} is {}.", reference.getVolume(), result); - markHealthy(); - break; - case FAILED: - LOG.warn("Volume {} detected as being unhealthy", + public void onSuccess(VolumeCheckResult result) { + if (result == null) { + LOG.error("Unexpected health check result null for volume {}", reference.getVolume()); - markFailed(); - break; - default: - LOG.error("Unexpected health check result {} for volume {}", - result, reference.getVolume()); markHealthy(); - break; + } else { + switch(result) { + case HEALTHY: + case DEGRADED: + LOG.debug("Volume {} is {}.", reference.getVolume(), result); + markHealthy(); + break; + case FAILED: + LOG.warn("Volume {} detected as being unhealthy", + reference.getVolume()); + markFailed(); + break; + default: + LOG.error("Unexpected health check result {} for volume {}", + result, reference.getVolume()); + markHealthy(); + break; + } } cleanup(); } @@ -398,7 +404,7 @@ private void markFailed() { } private void cleanup() { - IOUtils.cleanup(null, reference); + IOUtils.cleanupWithLogger(null, reference); invokeCallback(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/StorageLocationChecker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/StorageLocationChecker.java index 0332bc8633950..3d49dd5362864 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/StorageLocationChecker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/StorageLocationChecker.java @@ -20,9 +20,9 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.*; -import com.google.common.collect.Maps; -import com.google.common.util.concurrent.ListenableFuture; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableFuture; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/ThrottledAsyncChecker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/ThrottledAsyncChecker.java index 032379a4d12a1..f969c7ade288b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/ThrottledAsyncChecker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/ThrottledAsyncChecker.java @@ -18,11 +18,11 @@ package org.apache.hadoop.hdfs.server.datanode.checker; -import com.google.common.util.concurrent.FutureCallback; -import com.google.common.util.concurrent.Futures; -import com.google.common.util.concurrent.ListenableFuture; -import com.google.common.util.concurrent.ListeningExecutorService; -import com.google.common.util.concurrent.MoreExecutors; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.FutureCallback; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Futures; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableFuture; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.util.Timer; @@ -117,8 +117,8 @@ public ThrottledAsyncChecker(final Timer timer, * will receive the same Future. */ @Override - public Optional> schedule(Checkable target, - K context) { + public synchronized Optional> schedule( + Checkable target, K context) { if (checksInProgress.containsKey(target)) { return Optional.empty(); } @@ -166,7 +166,7 @@ private void addResultCachingCallback( Checkable target, ListenableFuture lf) { Futures.addCallback(lf, new FutureCallback() { @Override - public void onSuccess(@Nullable V result) { + public void onSuccess(V result) { synchronized (ThrottledAsyncChecker.this) { checksInProgress.remove(target); completedChecks.put(target, new LastCheckResult<>( diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/TimeoutFuture.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/TimeoutFuture.java index ae7b34f773468..d014e499f912e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/TimeoutFuture.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/TimeoutFuture.java @@ -20,8 +20,8 @@ */ package org.apache.hadoop.hdfs.server.datanode.checker; -import com.google.common.base.Preconditions; -import com.google.common.util.concurrent.ListenableFuture; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableFuture; import org.apache.hadoop.hdfs.server.datanode.checker.AbstractFuture; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/ErasureCodingWorker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/ErasureCodingWorker.java index f9063b7a8929f..c2aa77f253a1f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/ErasureCodingWorker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/ErasureCodingWorker.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.datanode.erasurecode; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -106,7 +106,7 @@ public void rejectedExecution(Runnable runnable, private void initializeStripedBlkReconstructionThreadPool(int numThreads) { LOG.debug("Using striped block reconstruction; pool threads={}", numThreads); - stripedReconstructionPool = DFSUtilClient.getThreadPoolExecutor(2, + stripedReconstructionPool = DFSUtilClient.getThreadPoolExecutor(numThreads, numThreads, 60, new LinkedBlockingQueue<>(), "StripedBlockReconstruction-", false); stripedReconstructionPool.allowCoreThreadTimeOut(true); @@ -121,33 +121,31 @@ private void initializeStripedBlkReconstructionThreadPool(int numThreads) { public void processErasureCodingTasks( Collection ecTasks) { for (BlockECReconstructionInfo reconInfo : ecTasks) { - int xmitsSubmitted = 0; try { StripedReconstructionInfo stripedReconInfo = new StripedReconstructionInfo( reconInfo.getExtendedBlock(), reconInfo.getErasureCodingPolicy(), reconInfo.getLiveBlockIndices(), reconInfo.getSourceDnInfos(), reconInfo.getTargetDnInfos(), reconInfo.getTargetStorageTypes(), - reconInfo.getTargetStorageIDs()); + reconInfo.getTargetStorageIDs(), reconInfo.getExcludeReconstructedIndices()); // It may throw IllegalArgumentException from task#stripedReader // constructor. final StripedBlockReconstructor task = new StripedBlockReconstructor(this, stripedReconInfo); if (task.hasValidTargets()) { + stripedReconstructionPool.submit(task); // See HDFS-12044. We increase xmitsInProgress even the task is only // enqueued, so that // 1) NN will not send more tasks than what DN can execute and // 2) DN will not throw away reconstruction tasks, and instead keeps // an unbounded number of tasks in the executor's task queue. - xmitsSubmitted = Math.max((int)(task.getXmits() * xmitWeight), 1); + int xmitsSubmitted = Math.max((int)(task.getXmits() * xmitWeight), 1); getDatanode().incrementXmitsInProcess(xmitsSubmitted); - stripedReconstructionPool.submit(task); } else { LOG.warn("No missing internal block. Skip reconstruction for task:{}", reconInfo); } } catch (Throwable e) { - getDatanode().decrementXmitsInProgress(xmitsSubmitted); LOG.warn("Failed to reconstruct striped block {}", reconInfo.getExtendedBlock().getLocalBlock(), e); } @@ -170,4 +168,8 @@ public void shutDown() { stripedReconstructionPool.shutdown(); stripedReadPool.shutdown(); } + + public float getXmitWeight() { + return xmitWeight; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockChecksumReconstructor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockChecksumReconstructor.java index b2e64966a18b0..a196935219ec5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockChecksumReconstructor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockChecksumReconstructor.java @@ -17,11 +17,13 @@ */ package org.apache.hadoop.hdfs.server.datanode.erasurecode; +import java.io.Closeable; import java.io.IOException; import java.nio.ByteBuffer; import java.util.Arrays; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector; import org.apache.hadoop.io.DataOutputBuffer; /** @@ -32,7 +34,7 @@ */ @InterfaceAudience.Private public abstract class StripedBlockChecksumReconstructor - extends StripedReconstructor { + extends StripedReconstructor implements Closeable { private ByteBuffer targetBuffer; private final byte[] targetIndices; @@ -55,6 +57,7 @@ protected StripedBlockChecksumReconstructor(ErasureCodingWorker worker, private void init() throws IOException { initDecoderIfNecessary(); + initDecodingValidatorIfNecessary(); getStripedReader().init(); // allocate buffer to keep the reconstructed block data targetBuffer = allocateBuffer(getBufferSize()); @@ -73,31 +76,28 @@ private void init() throws IOException { public void reconstruct() throws IOException { prepareDigester(); long maxTargetLength = getMaxTargetLength(); - try { - while (requestedLen > 0 && getPositionInBlock() < maxTargetLength) { - long remaining = maxTargetLength - getPositionInBlock(); - final int toReconstructLen = (int) Math - .min(getStripedReader().getBufferSize(), remaining); - // step1: read from minimum source DNs required for reconstruction. - // The returned success list is the source DNs we do real read from - getStripedReader().readMinimumSources(toReconstructLen); - - // step2: decode to reconstruct targets - reconstructTargets(toReconstructLen); - - // step3: calculate checksum - checksumDataLen += checksumWithTargetOutput( - targetBuffer.array(), toReconstructLen); - - updatePositionInBlock(toReconstructLen); - requestedLen -= toReconstructLen; - clearBuffers(); - } - - commitDigest(); - } finally { - cleanup(); + while (requestedLen > 0 && getPositionInBlock() < maxTargetLength) { + DataNodeFaultInjector.get().stripedBlockChecksumReconstruction(); + long remaining = maxTargetLength - getPositionInBlock(); + final int toReconstructLen = (int) Math + .min(getStripedReader().getBufferSize(), remaining); + // step1: read from minimum source DNs required for reconstruction. + // The returned success list is the source DNs we do real read from + getStripedReader().readMinimumSources(toReconstructLen); + + // step2: decode to reconstruct targets + reconstructTargets(toReconstructLen); + + // step3: calculate checksum + checksumDataLen += checksumWithTargetOutput( + getBufferArray(targetBuffer), toReconstructLen); + + updatePositionInBlock(toReconstructLen); + requestedLen -= toReconstructLen; + clearBuffers(); } + + commitDigest(); } /** @@ -140,7 +140,7 @@ private long checksumWithTargetOutput(byte[] outputData, int toReconstructLen) // case-2) length of data bytes which is less than bytesPerCRC if (requestedLen <= toReconstructLen) { int remainingLen = Math.toIntExact(requestedLen); - outputData = Arrays.copyOf(targetBuffer.array(), remainingLen); + outputData = Arrays.copyOf(outputData, remainingLen); int partialLength = remainingLen % getChecksum().getBytesPerChecksum(); @@ -193,7 +193,16 @@ private void reconstructTargets(int toReconstructLen) throws IOException { for (int i = 0; i < targetIndices.length; i++) { tarIndices[i] = targetIndices[i]; } - getDecoder().decode(inputs, tarIndices, outputs); + + if (isValidationEnabled()) { + markBuffers(inputs); + getDecoder().decode(inputs, tarIndices, outputs); + resetBuffers(inputs); + + getValidator().validate(inputs, tarIndices, outputs); + } else { + getDecoder().decode(inputs, tarIndices, outputs); + } } /** @@ -207,4 +216,25 @@ private void clearBuffers() { public long getChecksumDataLen() { return checksumDataLen; } + + /** + * Gets an array corresponding the buffer. + * @param buffer the input buffer. + * @return the array with content of the buffer. + */ + private static byte[] getBufferArray(ByteBuffer buffer) { + byte[] buff = new byte[buffer.remaining()]; + if (buffer.hasArray()) { + buff = buffer.array(); + } else { + buffer.slice().get(buff); + } + return buff; + } + + @Override + public void close() throws IOException { + getStripedReader().close(); + cleanup(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReader.java index 4dc51c9916ff7..54302e3c2561d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReader.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; import org.apache.hadoop.hdfs.server.datanode.DataNode; +import org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector; import org.apache.hadoop.hdfs.util.StripedBlockUtil.BlockReadStats; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.net.NetUtils; @@ -95,6 +96,7 @@ ByteBuffer getReadBuffer() { } void freeReadBuffer() { + DataNodeFaultInjector.get().interceptFreeBlockReaderBuffer(); buffer = null; } @@ -156,7 +158,7 @@ private Peer newConnectedPeer(ExtendedBlock b, InetSocketAddress addr, return peer; } finally { if (!success) { - IOUtils.cleanup(null, peer); + IOUtils.cleanupWithLogger(null, peer); IOUtils.closeSocket(sock); } } @@ -179,6 +181,8 @@ public BlockReadStats call() throws Exception { } catch (IOException e) { LOG.info(e.getMessage()); throw e; + } finally { + DataNodeFaultInjector.get().interceptBlockReader(); } } }; @@ -188,6 +192,7 @@ public BlockReadStats call() throws Exception { * Perform actual reading of bytes from block. */ private BlockReadStats actualReadFromBlock() throws IOException { + DataNodeFaultInjector.get().delayBlockReader(); int len = buffer.remaining(); int n = 0; while (n < len) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReconstructor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReconstructor.java index 29c0078e95710..ecd6351b46f64 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReconstructor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReconstructor.java @@ -23,6 +23,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector; import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics; +import org.apache.hadoop.io.erasurecode.rawcoder.InvalidDecodingException; import org.apache.hadoop.util.Time; /** @@ -53,6 +54,8 @@ public void run() { try { initDecoderIfNecessary(); + initDecodingValidatorIfNecessary(); + getStripedReader().init(); stripedWriter.init(); @@ -67,7 +70,11 @@ public void run() { LOG.warn("Failed to reconstruct striped block: {}", getBlockGroup(), e); getDatanode().getMetrics().incrECFailedReconstructionTasks(); } finally { - getDatanode().decrementXmitsInProgress(getXmits()); + float xmitWeight = getErasureCodingWorker().getXmitWeight(); + // if the xmits is smaller than 1, the xmitsSubmitted should be set to 1 + // because if it set to zero, we cannot to measure the xmits submitted + int xmitsSubmitted = Math.max((int) (getXmits() * xmitWeight), 1); + getDatanode().decrementXmitsInProgress(xmitsSubmitted); final DataNodeMetrics metrics = getDatanode().getMetrics(); metrics.incrECReconstructionTasks(); metrics.incrECReconstructionBytesRead(getBytesRead()); @@ -88,6 +95,10 @@ void reconstruct() throws IOException { (int) Math.min(getStripedReader().getBufferSize(), remaining); long start = Time.monotonicNow(); + long bytesToRead = (long) toReconstructLen * getStripedReader().getMinRequiredSources(); + if (getDatanode().getEcReconstuctReadThrottler() != null) { + getDatanode().getEcReconstuctReadThrottler().throttle(bytesToRead); + } // step1: read from minimum source DNs required for reconstruction. // The returned success list is the source DNs we do real read from getStripedReader().readMinimumSources(toReconstructLen); @@ -98,6 +109,10 @@ void reconstruct() throws IOException { long decodeEnd = Time.monotonicNow(); // step3: transfer data + long bytesToWrite = (long) toReconstructLen * stripedWriter.getTargets(); + if (getDatanode().getEcReconstuctWriteThrottler() != null) { + getDatanode().getEcReconstuctWriteThrottler().throttle(bytesToWrite); + } if (stripedWriter.transferData2Targets() == 0) { String error = "Transfer failed for all targets."; throw new IOException(error); @@ -122,12 +137,38 @@ private void reconstructTargets(int toReconstructLen) throws IOException { int[] erasedIndices = stripedWriter.getRealTargetIndices(); ByteBuffer[] outputs = stripedWriter.getRealTargetBuffers(toReconstructLen); + if (isValidationEnabled()) { + markBuffers(inputs); + decode(inputs, erasedIndices, outputs); + resetBuffers(inputs); + + DataNodeFaultInjector.get().badDecoding(outputs); + long start = Time.monotonicNow(); + try { + getValidator().validate(inputs, erasedIndices, outputs); + long validateEnd = Time.monotonicNow(); + getDatanode().getMetrics().incrECReconstructionValidateTime( + validateEnd - start); + } catch (InvalidDecodingException e) { + long validateFailedEnd = Time.monotonicNow(); + getDatanode().getMetrics().incrECReconstructionValidateTime( + validateFailedEnd - start); + getDatanode().getMetrics().incrECInvalidReconstructionTasks(); + throw e; + } + } else { + decode(inputs, erasedIndices, outputs); + } + + stripedWriter.updateRealTargetBuffers(toReconstructLen); + } + + private void decode(ByteBuffer[] inputs, int[] erasedIndices, + ByteBuffer[] outputs) throws IOException { long start = System.nanoTime(); getDecoder().decode(inputs, erasedIndices, outputs); long end = System.nanoTime(); this.getDatanode().getMetrics().incrECDecodingTime(end - start); - - stripedWriter.updateRealTargetBuffers(toReconstructLen); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedReader.java index 98edf724a8e25..a302f5e868965 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedReader.java @@ -17,7 +17,8 @@ */ package org.apache.hadoop.hdfs.server.datanode.erasurecode; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import java.util.concurrent.TimeUnit; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -328,14 +329,14 @@ int[] doReadMinimumSources(int reconstructLength, // cancel remaining reads if we read successfully from minimum // number of source DNs required by reconstruction. cancelReads(futures.keySet()); - futures.clear(); + clearFuturesAndService(); break; } } } catch (InterruptedException e) { LOG.info("Read data interrupted.", e); cancelReads(futures.keySet()); - futures.clear(); + clearFuturesAndService(); break; } } @@ -429,6 +430,20 @@ private static void cancelReads(Collection> futures) { } } + // remove all stale futures from readService, and clear futures. + private void clearFuturesAndService() { + while (!futures.isEmpty()) { + try { + Future future = readService.poll( + stripedReadTimeoutInMills, TimeUnit.MILLISECONDS + ); + futures.remove(future); + } catch (InterruptedException e) { + LOG.info("Clear stale futures from service is interrupted.", e); + } + } + } + void close() { if (zeroStripeBuffers != null) { for (ByteBuffer zeroStripeBuffer : zeroStripeBuffers) { @@ -438,9 +453,9 @@ void close() { zeroStripeBuffers = null; for (StripedBlockReader reader : readers) { + reader.closeBlockReader(); reconstructor.freeBuffer(reader.getReadBuffer()); reader.freeReadBuffer(); - reader.closeBlockReader(); } } @@ -493,4 +508,9 @@ CachingStrategy getCachingStrategy() { int getXmits() { return xmits; } + + public int getMinRequiredSources() { + return minRequiredSources; + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedReconstructionInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedReconstructionInfo.java index 0a3e12546dfd7..caf8dfa950446 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedReconstructionInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedReconstructionInfo.java @@ -41,26 +41,28 @@ public class StripedReconstructionInfo { private final DatanodeInfo[] targets; private final StorageType[] targetStorageTypes; private final String[] targetStorageIds; + private final byte[] excludeReconstructedIndices; public StripedReconstructionInfo(ExtendedBlock blockGroup, ErasureCodingPolicy ecPolicy, byte[] liveIndices, DatanodeInfo[] sources, byte[] targetIndices) { this(blockGroup, ecPolicy, liveIndices, sources, targetIndices, null, - null, null); + null, null, new byte[0]); } StripedReconstructionInfo(ExtendedBlock blockGroup, ErasureCodingPolicy ecPolicy, byte[] liveIndices, DatanodeInfo[] sources, DatanodeInfo[] targets, StorageType[] targetStorageTypes, - String[] targetStorageIds) { + String[] targetStorageIds, byte[] excludeReconstructedIndices) { this(blockGroup, ecPolicy, liveIndices, sources, null, targets, - targetStorageTypes, targetStorageIds); + targetStorageTypes, targetStorageIds, excludeReconstructedIndices); } private StripedReconstructionInfo(ExtendedBlock blockGroup, ErasureCodingPolicy ecPolicy, byte[] liveIndices, DatanodeInfo[] sources, byte[] targetIndices, DatanodeInfo[] targets, - StorageType[] targetStorageTypes, String[] targetStorageIds) { + StorageType[] targetStorageTypes, String[] targetStorageIds, + byte[] excludeReconstructedIndices) { this.blockGroup = blockGroup; this.ecPolicy = ecPolicy; @@ -70,6 +72,7 @@ private StripedReconstructionInfo(ExtendedBlock blockGroup, this.targets = targets; this.targetStorageTypes = targetStorageTypes; this.targetStorageIds = targetStorageIds; + this.excludeReconstructedIndices = excludeReconstructedIndices; } ExtendedBlock getBlockGroup() { @@ -104,19 +107,9 @@ String[] getTargetStorageIds() { return targetStorageIds; } - /** - * Return the weight of this EC reconstruction task. - * - * DN uses it to coordinate with NN to adjust the speed of scheduling the - * reconstructions tasks to this DN. - * - * @return the weight of this reconstruction task. - * @see HDFS-12044 - */ - int getWeight() { - // See HDFS-12044. The weight of a RS(n, k) is calculated by the network - // connections it opens. - return sources.length + targets.length; + byte[] getExcludeReconstructedIndices() { + return excludeReconstructedIndices; } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedReconstructor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedReconstructor.java index a1f4c7ff55e37..89ee49a3c8c43 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedReconstructor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedReconstructor.java @@ -17,6 +17,9 @@ */ package org.apache.hadoop.hdfs.server.datanode.erasurecode; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.io.erasurecode.rawcoder.DecodingValidator; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; @@ -102,10 +105,14 @@ abstract class StripedReconstructor { private final Configuration conf; private final DataNode datanode; private final ErasureCodingPolicy ecPolicy; + private final ErasureCoderOptions coderOptions; private RawErasureDecoder decoder; private final ExtendedBlock blockGroup; private static final ByteBufferPool BUFFER_POOL = new ElasticByteBufferPool(); + private final boolean isValidationEnabled; + private DecodingValidator validator; + // position in striped internal block private long positionInBlock; private StripedReader stripedReader; @@ -113,6 +120,7 @@ abstract class StripedReconstructor { private final CachingStrategy cachingStrategy; private long maxTargetLength = 0L; private final BitSet liveBitSet; + private final BitSet excludeBitSet; // metrics private AtomicLong bytesRead = new AtomicLong(0); @@ -130,11 +138,24 @@ abstract class StripedReconstructor { for (int i = 0; i < stripedReconInfo.getLiveIndices().length; i++) { liveBitSet.set(stripedReconInfo.getLiveIndices()[i]); } + excludeBitSet = new BitSet( + ecPolicy.getNumDataUnits() + ecPolicy.getNumParityUnits()); + for (int i = 0; i < stripedReconInfo.getExcludeReconstructedIndices().length; i++) { + excludeBitSet.set(stripedReconInfo.getExcludeReconstructedIndices()[i]); + } + blockGroup = stripedReconInfo.getBlockGroup(); stripedReader = new StripedReader(this, datanode, conf, stripedReconInfo); cachingStrategy = CachingStrategy.newDefaultStrategy(); positionInBlock = 0L; + + coderOptions = new ErasureCoderOptions( + ecPolicy.getNumDataUnits(), ecPolicy.getNumParityUnits()); + isValidationEnabled = conf.getBoolean( + DFSConfigKeys.DFS_DN_EC_RECONSTRUCTION_VALIDATION_KEY, + DFSConfigKeys.DFS_DN_EC_RECONSTRUCTION_VALIDATION_VALUE) + && !coderOptions.allowChangeInputs(); } public void incrBytesRead(boolean local, long delta) { @@ -195,13 +216,18 @@ long getBlockLen(int i) { // Initialize decoder protected void initDecoderIfNecessary() { if (decoder == null) { - ErasureCoderOptions coderOptions = new ErasureCoderOptions( - ecPolicy.getNumDataUnits(), ecPolicy.getNumParityUnits()); decoder = CodecUtil.createRawDecoder(conf, ecPolicy.getCodecName(), coderOptions); } } + // Initialize decoding validator + protected void initDecodingValidatorIfNecessary() { + if (isValidationEnabled && validator == null) { + validator = new DecodingValidator(decoder); + } + } + long getPositionInBlock() { return positionInBlock; } @@ -242,6 +268,10 @@ BitSet getLiveBitSet() { return liveBitSet; } + BitSet getExcludeBitSet(){ + return excludeBitSet; + } + long getMaxTargetLength() { return maxTargetLength; } @@ -258,6 +288,10 @@ RawErasureDecoder getDecoder() { return decoder; } + int getNumLiveBlocks(){ + return liveBitSet.cardinality(); + } + void cleanup() { if (decoder != null) { decoder.release(); @@ -275,4 +309,37 @@ Configuration getConf() { DataNode getDatanode() { return datanode; } + + public ErasureCodingWorker getErasureCodingWorker() { + return erasureCodingWorker; + } + + @VisibleForTesting + static ByteBufferPool getBufferPool() { + return BUFFER_POOL; + } + + boolean isValidationEnabled() { + return isValidationEnabled; + } + + DecodingValidator getValidator() { + return validator; + } + + protected static void markBuffers(ByteBuffer[] buffers) { + for (ByteBuffer buffer: buffers) { + if (buffer != null) { + buffer.mark(); + } + } + } + + protected static void resetBuffers(ByteBuffer[] buffers) { + for (ByteBuffer buffer: buffers) { + if (buffer != null) { + buffer.reset(); + } + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedWriter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedWriter.java index 762506cfdaefb..b570c666a3c4d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedWriter.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedWriter.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.datanode.erasurecode; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.StorageType; @@ -82,8 +82,10 @@ class StripedWriter { assert targetStorageIds != null; writers = new StripedBlockWriter[targets.length]; - targetIndices = new short[targets.length]; + Preconditions.checkArgument( + targetIndices.length <= dataBlkNum + parityBlkNum - reconstructor.getNumLiveBlocks(), + "Reconstruction work gets too much targets."); Preconditions.checkArgument(targetIndices.length <= parityBlkNum, "Too much missed striped blocks."); initTargetIndices(); @@ -123,13 +125,14 @@ void init() throws IOException { private void initTargetIndices() { BitSet bitset = reconstructor.getLiveBitSet(); + BitSet excludebitset=reconstructor.getExcludeBitSet(); int m = 0; hasValidTargets = false; for (int i = 0; i < dataBlkNum + parityBlkNum; i++) { if (!bitset.get(i)) { if (reconstructor.getBlockLen(i) > 0) { - if (m < targets.length) { + if (m < targets.length && !excludebitset.get(i)) { targetIndices[m++] = (short)i; hasValidTargets = true; } @@ -296,7 +299,8 @@ boolean hasValidTargets() { */ void clearBuffers() { for (StripedBlockWriter writer : writers) { - ByteBuffer targetBuffer = writer.getTargetBuffer(); + ByteBuffer targetBuffer = + writer != null ? writer.getTargetBuffer() : null; if (targetBuffer != null) { targetBuffer.clear(); } @@ -305,7 +309,8 @@ void clearBuffers() { void close() { for (StripedBlockWriter writer : writers) { - ByteBuffer targetBuffer = writer.getTargetBuffer(); + ByteBuffer targetBuffer = + writer != null ? writer.getTargetBuffer() : null; if (targetBuffer != null) { reconstructor.freeBuffer(targetBuffer); writer.freeTargetBuffer(); @@ -313,7 +318,9 @@ void close() { } for (int i = 0; i < targets.length; i++) { - writers[i].close(); + if (writers[i] != null) { + writers[i].close(); + } } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/DataNodeVolumeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/DataNodeVolumeMetrics.java index 87509e5b92167..0ce57efd59549 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/DataNodeVolumeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/DataNodeVolumeMetrics.java @@ -71,6 +71,14 @@ public class DataNodeVolumeMetrics { private MutableRate writeIoRate; private MutableQuantiles[] writeIoLatencyQuantiles; + @Metric("file io transfer rate") + private MutableRate transferIoRate; + private MutableQuantiles[] transferIoLatencyQuantiles; + + @Metric("file io nativeCopy rate") + private MutableRate nativeCopyIoRate; + private MutableQuantiles[] nativeCopyIoLatencyQuantiles; + @Metric("number of file io errors") private MutableCounterLong totalFileIoErrors; @Metric("file io error rate") @@ -162,6 +170,40 @@ public double getWriteIoStdDev() { return writeIoRate.lastStat().stddev(); } + // Based on transferIoRate + public long getTransferIoSampleCount() { + return transferIoRate.lastStat().numSamples(); + } + + public double getTransferIoMean() { + return transferIoRate.lastStat().mean(); + } + + public double getTransferIoStdDev() { + return transferIoRate.lastStat().stddev(); + } + + public MutableQuantiles[] getTransferIoQuantiles() { + return transferIoLatencyQuantiles; + } + + // Based on nativeCopyIoRate + public long getNativeCopyIoSampleCount() { + return nativeCopyIoRate.lastStat().numSamples(); + } + + public double getNativeCopyIoMean() { + return nativeCopyIoRate.lastStat().mean(); + } + + public double getNativeCopyIoStdDev() { + return nativeCopyIoRate.lastStat().stddev(); + } + + public MutableQuantiles[] getNativeCopyIoQuantiles() { + return nativeCopyIoLatencyQuantiles; + } + public long getTotalFileIoErrors() { return totalFileIoErrors.value(); } @@ -193,11 +235,13 @@ public DataNodeVolumeMetrics(final MetricsSystem metricsSystem, syncIoLatencyQuantiles = new MutableQuantiles[len]; readIoLatencyQuantiles = new MutableQuantiles[len]; writeIoLatencyQuantiles = new MutableQuantiles[len]; + transferIoLatencyQuantiles = new MutableQuantiles[len]; + nativeCopyIoLatencyQuantiles = new MutableQuantiles[len]; for (int i = 0; i < len; i++) { int interval = intervals[i]; metadataOperationLatencyQuantiles[i] = registry.newQuantiles( "metadataOperationLatency" + interval + "s", - "Meatadata Operation Latency in ms", "ops", "latency", interval); + "Metadata Operation Latency in ms", "ops", "latency", interval); dataFileIoLatencyQuantiles[i] = registry.newQuantiles( "dataFileIoLatency" + interval + "s", "Data File Io Latency in ms", "ops", "latency", interval); @@ -213,6 +257,12 @@ public DataNodeVolumeMetrics(final MetricsSystem metricsSystem, writeIoLatencyQuantiles[i] = registry.newQuantiles( "writeIoLatency" + interval + "s", "Data write Io Latency in ms", "ops", "latency", interval); + transferIoLatencyQuantiles[i] = registry.newQuantiles( + "transferIoLatency" + interval + "s", + "Data transfer Io Latency in ms", "ops", "latency", interval); + nativeCopyIoLatencyQuantiles[i] = registry.newQuantiles( + "nativeCopyIoLatency" + interval + "s", + "Data nativeCopy Io Latency in ms", "ops", "latency", interval); } } @@ -238,7 +288,7 @@ public void unRegister() { ms.unregisterSource(name); } - public void addMetadastaOperationLatency(final long latency) { + public void addMetadataOperationLatency(final long latency) { totalMetadataOperations.incr(); metadataOperationRate.add(latency); for (MutableQuantiles q : metadataOperationLatencyQuantiles) { @@ -282,8 +332,22 @@ public void addWriteIoLatency(final long latency) { } } + public void addTransferIoLatency(final long latency) { + transferIoRate.add(latency); + for (MutableQuantiles q: transferIoLatencyQuantiles) { + q.add(latency); + } + } + + public void addNativeCopyIoLatency(final long latency) { + nativeCopyIoRate.add(latency); + for (MutableQuantiles q: nativeCopyIoLatencyQuantiles) { + q.add(latency); + } + } + public void addFileIoError(final long latency) { totalFileIoErrors.incr(); - metadataOperationRate.add(latency); + fileIoErrorRate.add(latency); } } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java index 2e5135d841f4b..4bfb0cb870cda 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java @@ -36,6 +36,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.util.AutoCloseableLock; +import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsVolumeImpl; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.BlockListAsLongs; @@ -657,12 +658,16 @@ ReplicaInfo moveBlockAcrossVolumes(final ExtendedBlock block, FsVolumeSpi destination) throws IOException; /** - * Acquire the lock of the data set. + * Acquire the lock of the data set. This prevents other threads from + * modifying the volume map structure inside the datanode, but other changes + * are still possible. For example modifying the genStamp of a block instance. */ AutoCloseableLock acquireDatasetLock(); /*** - * Acquire the read lock of the data set. + * Acquire the read lock of the data set. This prevents other threads from + * modifying the volume map structure inside the datanode, but other changes + * are still possible. For example modifying the genStamp of a block instance. * @return The AutoClosable read lock instance. */ AutoCloseableLock acquireDatasetReadLock(); @@ -675,4 +680,9 @@ ReplicaInfo moveBlockAcrossVolumes(final ExtendedBlock block, * @throws IOException */ Set deepCopyReplica(String bpid) throws IOException; + + /** + * Get the volume list. + */ + List getVolumeList(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java index be978d75e9a34..c1043aee176e2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java @@ -224,27 +224,27 @@ interface BlockIterator extends Closeable { */ public static class ScanInfo implements Comparable { private final long blockId; - /** - * The block file path, relative to the volume's base directory. - * If there was no block file found, this may be null. If 'vol' - * is null, then this is the full path of the block file. + * The full path to the folder containing the block / meta files. */ - private final String blockSuffix; - + private final File basePath; /** - * The suffix of the meta file path relative to the block file. - * If blockSuffix is null, then this will be the entire path relative - * to the volume base directory, or an absolute path if vol is also - * null. + * The block file name, with no path */ - private final String metaSuffix; + private final String blockFile; + /** + * Holds the meta file name, with no path, only if blockFile is null. + * If blockFile is not null, the meta file will be named identically to + * the blockFile, but with a suffix like "_1234.meta". If the blockFile + * is present, we store only the meta file suffix. + */ + private final String metaFile; private final FsVolumeSpi volume; private final FileRegion fileRegion; /** - * Get the file's length in async block scan + * Get the file's length in async block scan. */ private final long blockLength; @@ -254,35 +254,19 @@ public static class ScanInfo implements Comparable { private final static String QUOTED_FILE_SEPARATOR = Matcher.quoteReplacement(File.separator); - /** - * Get the most condensed version of the path. - * - * For example, the condensed version of /foo//bar is /foo/bar - * Unlike {@link File#getCanonicalPath()}, this will never perform I/O - * on the filesystem. - * - * @param path the path to condense - * @return the condensed path - */ - private static String getCondensedPath(String path) { - return CONDENSED_PATH_REGEX.matcher(path). - replaceAll(QUOTED_FILE_SEPARATOR); - } - /** * Get a path suffix. * - * @param f The file to get the suffix for. + * @param f The string to get the suffix for. * @param prefix The prefix we're stripping off. * - * @return A suffix such that prefix + suffix = path to f + * @return A suffix such that prefix + suffix = f */ - private static String getSuffix(File f, String prefix) { - String fullPath = getCondensedPath(f.getAbsolutePath()); - if (fullPath.startsWith(prefix)) { - return fullPath.substring(prefix.length()); + private static String getSuffix(String f, String prefix) { + if (f.startsWith(prefix)) { + return f.substring(prefix.length()); } - throw new RuntimeException(prefix + " is not a prefix of " + fullPath); + throw new RuntimeException(prefix + " is not a prefix of " + f); } /** @@ -290,27 +274,27 @@ private static String getSuffix(File f, String prefix) { * the block data and meta-data files. * * @param blockId the block ID - * @param blockFile the path to the block data file - * @param metaFile the path to the block meta-data file + * @param basePath The full path to the directory the block is stored in + * @param blockFile The block filename, with no path + * @param metaFile The meta filename, with no path. If blockFile is not null + * then the metaFile and blockFile should have the same + * prefix, with the meta file having a suffix like + * "_1234.meta". To save memory, if the blockFile is present + * we store only the meta file suffix in the object * @param vol the volume that contains the block */ - public ScanInfo(long blockId, File blockFile, File metaFile, - FsVolumeSpi vol) { + public ScanInfo(long blockId, File basePath, String blockFile, + String metaFile, FsVolumeSpi vol) { this.blockId = blockId; - String condensedVolPath = - (vol == null || vol.getBaseURI() == null) ? null : - getCondensedPath(new File(vol.getBaseURI()).getAbsolutePath()); - this.blockSuffix = blockFile == null ? null : - getSuffix(blockFile, condensedVolPath); - this.blockLength = (blockFile != null) ? blockFile.length() : 0; - if (metaFile == null) { - this.metaSuffix = null; - } else if (blockFile == null) { - this.metaSuffix = getSuffix(metaFile, condensedVolPath); + this.basePath = basePath; + this.blockFile = blockFile; + if (blockFile != null && metaFile != null) { + this.metaFile = getSuffix(metaFile, blockFile); } else { - this.metaSuffix = getSuffix(metaFile, - condensedVolPath + blockSuffix); + this.metaFile = metaFile; } + this.blockLength = (blockFile != null) ? + new File(basePath, blockFile).length() : 0; this.volume = vol; this.fileRegion = null; } @@ -330,8 +314,9 @@ public ScanInfo(long blockId, FsVolumeSpi vol, FileRegion fileRegion, this.blockLength = length; this.volume = vol; this.fileRegion = fileRegion; - this.blockSuffix = null; - this.metaSuffix = null; + this.basePath = null; + this.blockFile = null; + this.metaFile = null; } /** @@ -340,8 +325,8 @@ public ScanInfo(long blockId, FsVolumeSpi vol, FileRegion fileRegion, * @return the block data file */ public File getBlockFile() { - return (blockSuffix == null) ? null : - new File(new File(volume.getBaseURI()).getAbsolutePath(), blockSuffix); + return (blockFile == null) ? null : + new File(basePath.getAbsolutePath(), blockFile); } /** @@ -360,15 +345,10 @@ public long getBlockLength() { * @return the block meta data file */ public File getMetaFile() { - if (metaSuffix == null) { + if (metaFile == null) { return null; } - String fileSuffix = metaSuffix; - if (blockSuffix != null) { - fileSuffix = blockSuffix + metaSuffix; - } - return new File(new File(volume.getBaseURI()).getAbsolutePath(), - fileSuffix); + return new File(basePath.getAbsolutePath(), fullMetaFile()); } /** @@ -411,14 +391,24 @@ public int hashCode() { } public long getGenStamp() { - return metaSuffix != null ? Block.getGenerationStamp( - getMetaFile().getName()) : - HdfsConstants.GRANDFATHER_GENERATION_STAMP; + return metaFile != null ? Block.getGenerationStamp(fullMetaFile()) + : HdfsConstants.GRANDFATHER_GENERATION_STAMP; } public FileRegion getFileRegion() { return fileRegion; } + + private String fullMetaFile() { + if (metaFile == null) { + return null; + } + if (blockFile == null) { + return metaFile; + } else { + return blockFile + metaFile; + } + } } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/ReplicaInputStreams.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/ReplicaInputStreams.java index f40315a6da013..f8bd8c03e19a5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/ReplicaInputStreams.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/ReplicaInputStreams.java @@ -130,7 +130,7 @@ public void closeStreams() throws IOException { dataInFd = null; } if (volumeRef != null) { - IOUtils.cleanup(null, volumeRef); + IOUtils.cleanupWithLogger(null, volumeRef); volumeRef = null; } // throw IOException if there is any @@ -146,7 +146,7 @@ public void close() { dataInFd = null; IOUtils.closeStream(checksumIn); checksumIn = null; - IOUtils.cleanup(null, volumeRef); + IOUtils.cleanupWithLogger(null, volumeRef); volumeRef = null; } } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java index fc84c4d280cde..5bfba59f94af4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java @@ -47,6 +47,7 @@ import java.util.concurrent.TimeUnit; import org.apache.hadoop.hdfs.server.datanode.FSCachingGetSpaceUsed; +import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -76,7 +77,11 @@ import org.apache.hadoop.util.ShutdownHookManager; import org.apache.hadoop.util.Timer; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; + +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DU_INTERVAL_KEY; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_GETSPACEUSED_CLASSNAME; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_GETSPACEUSED_JITTER_KEY; /** * A block pool slice represents a portion of a block pool stored on a volume. @@ -85,7 +90,7 @@ * * This class is synchronized by {@link FsVolumeImpl}. */ -class BlockPoolSlice { +public class BlockPoolSlice { static final Logger LOG = LoggerFactory.getLogger(BlockPoolSlice.class); private final String bpid; @@ -111,6 +116,8 @@ class BlockPoolSlice { private final Timer timer; private final int maxDataLength; private final FileIoProvider fileIoProvider; + private final Configuration config; + private final File bpDir; private static ForkJoinPool addReplicaThreadPool = null; private static final int VOLUMES_REPLICA_ADD_THREADPOOL_SIZE = Runtime @@ -124,7 +131,7 @@ public int compare(File f1, File f2) { }; // TODO:FEDERATION scalability issue - a thread per DU is needed - private final GetSpaceUsed dfsUsage; + private volatile GetSpaceUsed dfsUsage; /** * Create a blook pool slice @@ -137,6 +144,8 @@ public int compare(File f1, File f2) { */ BlockPoolSlice(String bpid, FsVolumeImpl volume, File bpDir, Configuration conf, Timer timer) throws IOException { + this.config = conf; + this.bpDir = bpDir; this.bpid = bpid; this.volume = volume; this.fileIoProvider = volume.getFileIoProvider(); @@ -228,6 +237,39 @@ public void run() { SHUTDOWN_HOOK_PRIORITY); } + public void updateDfsUsageConfig(Long interval, Long jitter, Class klass) + throws IOException { + // Close the old dfsUsage if it is CachingGetSpaceUsed. + if (dfsUsage instanceof CachingGetSpaceUsed) { + ((CachingGetSpaceUsed) dfsUsage).close(); + } + if (interval != null) { + Preconditions.checkArgument(interval > 0, + FS_DU_INTERVAL_KEY + " should be larger than 0"); + config.setLong(FS_DU_INTERVAL_KEY, interval); + } + if (jitter != null) { + Preconditions.checkArgument(jitter >= 0, + FS_GETSPACEUSED_JITTER_KEY + " should be larger than or equal to 0"); + config.setLong(FS_GETSPACEUSED_JITTER_KEY, jitter); + } + if (klass != null) { + config.setClass(FS_GETSPACEUSED_CLASSNAME, klass, CachingGetSpaceUsed.class); + } + // Start new dfsUsage. + this.dfsUsage = new FSCachingGetSpaceUsed.Builder().setBpid(bpid) + .setVolume(volume) + .setPath(bpDir) + .setConf(config) + .setInitialUsed(loadDfsUsed()) + .build(); + } + + @VisibleForTesting + public GetSpaceUsed getDfsUsage() { + return dfsUsage; + } + private synchronized static void initializeAddReplicaPool(Configuration conf, FsDatasetImpl dataset) { if (addReplicaThreadPool == null) { @@ -293,9 +335,13 @@ long loadDfsUsed() { long mtime; Scanner sc; + File duCacheFile = new File(currentDir, DU_CACHE_FILE); try { - sc = new Scanner(new File(currentDir, DU_CACHE_FILE), "UTF-8"); + sc = new Scanner(duCacheFile, "UTF-8"); } catch (FileNotFoundException fnfe) { + FsDatasetImpl.LOG.warn("{} file missing in {}, will proceed with Du " + + "for space computation calculation, ", + DU_CACHE_FILE, currentDir); return -1; } @@ -304,21 +350,31 @@ long loadDfsUsed() { if (sc.hasNextLong()) { cachedDfsUsed = sc.nextLong(); } else { + FsDatasetImpl.LOG.warn("cachedDfsUsed not found in file:{}, will " + + "proceed with Du for space computation calculation, ", + duCacheFile); return -1; } // Get the recorded mtime from the file. if (sc.hasNextLong()) { mtime = sc.nextLong(); } else { + FsDatasetImpl.LOG.warn("mtime not found in file:{}, will proceed" + + " with Du for space computation calculation, ", duCacheFile); return -1; } + long elapsedTime = timer.now() - mtime; // Return the cached value if mtime is okay. - if (mtime > 0 && (timer.now() - mtime < cachedDfsUsedCheckTime)) { + if (mtime > 0 && (elapsedTime < cachedDfsUsedCheckTime)) { FsDatasetImpl.LOG.info("Cached dfsUsed found for " + currentDir + ": " + cachedDfsUsed); return cachedDfsUsed; } + FsDatasetImpl.LOG.warn("elapsed time:{} is greater than threshold:{}," + + " mtime:{} in file:{}, will proceed with Du for space" + + " computation calculation", + elapsedTime, cachedDfsUsedCheckTime, mtime, duCacheFile); return -1; } finally { sc.close(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/CacheStats.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/CacheStats.java index f79b7c7374c6e..476a31e2ab132 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/CacheStats.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/CacheStats.java @@ -22,7 +22,7 @@ import org.apache.hadoop.io.nativeio.NativeIO; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Keeps statistics for the memory cache. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetAsyncDiskService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetAsyncDiskService.java index 81213a033f00f..138037456bc95 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetAsyncDiskService.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetAsyncDiskService.java @@ -30,6 +30,8 @@ import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; @@ -65,13 +67,12 @@ class FsDatasetAsyncDiskService { // ThreadPool core pool size private static final int CORE_THREADS_PER_VOLUME = 1; // ThreadPool maximum pool size - private static final int MAXIMUM_THREADS_PER_VOLUME = 4; + private final int maxNumThreadsPerVolume; // ThreadPool keep-alive time for threads over core pool size private static final long THREADS_KEEP_ALIVE_SECONDS = 60; private final DataNode datanode; private final FsDatasetImpl fsdatasetImpl; - private final ThreadGroup threadGroup; private Map executors = new HashMap(); private Map> deletedBlockIds @@ -89,7 +90,12 @@ class FsDatasetAsyncDiskService { FsDatasetAsyncDiskService(DataNode datanode, FsDatasetImpl fsdatasetImpl) { this.datanode = datanode; this.fsdatasetImpl = fsdatasetImpl; - this.threadGroup = new ThreadGroup(getClass().getSimpleName()); + maxNumThreadsPerVolume = datanode.getConf().getInt( + DFSConfigKeys.DFS_DATANODE_FSDATASETASYNCDISK_MAX_THREADS_PER_VOLUME_KEY, + DFSConfigKeys.DFS_DATANODE_FSDATASETASYNCDISK_MAX_THREADS_PER_VOLUME_DEFAULT); + Preconditions.checkArgument(maxNumThreadsPerVolume > 0, + DFSConfigKeys.DFS_DATANODE_FSDATASETASYNCDISK_MAX_THREADS_PER_VOLUME_KEY + + " must be a positive integer."); } private void addExecutorForVolume(final FsVolumeImpl volume) { @@ -102,7 +108,7 @@ public Thread newThread(Runnable r) { synchronized (this) { thisIndex = counter++; } - Thread t = new Thread(threadGroup, r); + Thread t = new Thread(r); t.setName("Async disk worker #" + thisIndex + " for volume " + volume); return t; @@ -110,7 +116,7 @@ public Thread newThread(Runnable r) { }; ThreadPoolExecutor executor = new ThreadPoolExecutor( - CORE_THREADS_PER_VOLUME, MAXIMUM_THREADS_PER_VOLUME, + CORE_THREADS_PER_VOLUME, maxNumThreadsPerVolume, THREADS_KEEP_ALIVE_SECONDS, TimeUnit.SECONDS, new LinkedBlockingQueue(), threadFactory); @@ -167,18 +173,26 @@ synchronized long countPendingDeletions() { * Execute the task sometime in the future, using ThreadPools. */ synchronized void execute(FsVolumeImpl volume, Runnable task) { - if (executors == null) { - throw new RuntimeException("AsyncDiskService is already shutdown"); - } - if (volume == null) { - throw new RuntimeException("A null volume does not have a executor"); - } - ThreadPoolExecutor executor = executors.get(volume.getStorageID()); - if (executor == null) { - throw new RuntimeException("Cannot find volume " + volume - + " for execution of task " + task); - } else { - executor.execute(task); + try { + if (executors == null) { + throw new RuntimeException("AsyncDiskService is already shutdown"); + } + if (volume == null) { + throw new RuntimeException("A null volume does not have a executor"); + } + ThreadPoolExecutor executor = executors.get(volume.getStorageID()); + if (executor == null) { + throw new RuntimeException("Cannot find volume " + volume + + " for execution of task " + task); + } else { + executor.execute(task); + } + } catch (RuntimeException re) { + if (task instanceof ReplicaFileDeleteTask) { + IOUtils.cleanupWithLogger(null, + ((ReplicaFileDeleteTask) task).volumeRef); + } + throw re; } } @@ -202,16 +216,20 @@ synchronized void shutdown() { } } - public void submitSyncFileRangeRequest(FsVolumeImpl volume, - final ReplicaOutputStreams streams, final long offset, final long nbytes, - final int flags) { - execute(volume, new Runnable() { - @Override - public void run() { + public void submitSyncFileRangeRequest(FsVolumeImpl volume, final ReplicaOutputStreams streams, + final long offset, final long nbytes, final int flags) { + execute(volume, () -> { + try { + streams.syncFileRangeIfPossible(offset, nbytes, flags); + } catch (NativeIOException e) { try { - streams.syncFileRangeIfPossible(offset, nbytes, flags); - } catch (NativeIOException e) { - LOG.warn("sync_file_range error", e); + LOG.warn("sync_file_range error. Volume: {}, Capacity: {}, Available space: {}, " + + "File range offset: {}, length: {}, flags: {}", volume, volume.getCapacity(), + volume.getAvailable(), offset, nbytes, flags, e); + } catch (IOException ioe) { + LOG.warn("sync_file_range error. Volume: {}, Capacity: {}, " + + "File range offset: {}, length: {}, flags: {}", volume, volume.getCapacity(), + offset, nbytes, flags, e); } } }); @@ -314,28 +332,31 @@ private boolean moveFiles() { @Override public void run() { - final long blockLength = replicaToDelete.getBlockDataLength(); - final long metaLength = replicaToDelete.getMetadataLength(); - boolean result; + try { + final long blockLength = replicaToDelete.getBlockDataLength(); + final long metaLength = replicaToDelete.getMetadataLength(); + boolean result; - result = (trashDirectory == null) ? deleteFiles() : moveFiles(); + result = (trashDirectory == null) ? deleteFiles() : moveFiles(); - if (!result) { - LOG.warn("Unexpected error trying to " - + (trashDirectory == null ? "delete" : "move") - + " block " + block.getBlockPoolId() + " " + block.getLocalBlock() - + " at file " + replicaToDelete.getBlockURI() + ". Ignored."); - } else { - if(block.getLocalBlock().getNumBytes() != BlockCommand.NO_ACK){ - datanode.notifyNamenodeDeletedBlock(block, volume.getStorageID()); + if (!result) { + LOG.warn("Unexpected error trying to " + + (trashDirectory == null ? "delete" : "move") + + " block " + block.getBlockPoolId() + " " + block.getLocalBlock() + + " at file " + replicaToDelete.getBlockURI() + ". Ignored."); + } else { + if (block.getLocalBlock().getNumBytes() != BlockCommand.NO_ACK) { + datanode.notifyNamenodeDeletedBlock(block, volume.getStorageID()); + } + volume.onBlockFileDeletion(block.getBlockPoolId(), blockLength); + volume.onMetaFileDeletion(block.getBlockPoolId(), metaLength); + LOG.info("Deleted " + block.getBlockPoolId() + " " + + block.getLocalBlock() + " URI " + replicaToDelete.getBlockURI()); } - volume.onBlockFileDeletion(block.getBlockPoolId(), blockLength); - volume.onMetaFileDeletion(block.getBlockPoolId(), metaLength); - LOG.info("Deleted " + block.getBlockPoolId() + " " - + block.getLocalBlock() + " URI " + replicaToDelete.getBlockURI()); + updateDeletedBlockId(block); + } finally { + IOUtils.cleanupWithLogger(null, this.volumeRef); } - updateDeletedBlockId(block); - IOUtils.cleanup(null, volumeRef); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetCache.java index 732dc2180eabf..facace28604a6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetCache.java @@ -23,8 +23,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_CACHE_REVOCATION_POLLING_MS; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_CACHE_REVOCATION_POLLING_MS_DEFAULT; -import com.google.common.base.Preconditions; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import java.io.FileInputStream; import java.io.FileNotFoundException; @@ -42,7 +42,7 @@ import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.LongAdder; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.time.DurationFormatUtils; @@ -120,7 +120,7 @@ public boolean shouldAdvertise() { private final HashMap mappableBlockMap = new HashMap(); - private final AtomicLong numBlocksCached = new AtomicLong(0); + private final LongAdder numBlocksCached = new LongAdder(); private final FsDatasetImpl dataset; @@ -143,11 +143,11 @@ public boolean shouldAdvertise() { /** * Number of cache commands that could not be completed successfully */ - final AtomicLong numBlocksFailedToCache = new AtomicLong(0); + final LongAdder numBlocksFailedToCache = new LongAdder(); /** * Number of uncache commands that could not be completed successfully */ - final AtomicLong numBlocksFailedToUncache = new AtomicLong(0); + final LongAdder numBlocksFailedToUncache = new LongAdder(); public FsDatasetCache(FsDatasetImpl dataset) throws IOException { this.dataset = dataset; @@ -204,7 +204,7 @@ public void initCache(String bpid) throws IOException { for (Map.Entry entry : entrySet) { mappableBlockMap.put(entry.getKey(), new Value(keyToMappableBlock.get(entry.getKey()), State.CACHED)); - numBlocksCached.addAndGet(1); + numBlocksCached.increment(); dataset.datanode.getMetrics().incrBlocksCached(1); } } @@ -278,7 +278,7 @@ synchronized void cacheBlock(long blockId, String bpid, LOG.debug("Block with id {}, pool {} already exists in the " + "FsDatasetCache with state {}", blockId, bpid, prevValue.state ); - numBlocksFailedToCache.incrementAndGet(); + numBlocksFailedToCache.increment(); return; } mappableBlockMap.put(key, new Value(null, State.CACHING)); @@ -301,7 +301,7 @@ synchronized void uncacheBlock(String bpid, long blockId) { LOG.debug("Block with id {}, pool {} does not need to be uncached, " + "because it is not currently in the mappableBlockMap.", blockId, bpid); - numBlocksFailedToUncache.incrementAndGet(); + numBlocksFailedToUncache.increment(); return; } switch (prevValue.state) { @@ -331,7 +331,7 @@ synchronized void uncacheBlock(String bpid, long blockId) { default: LOG.debug("Block with id {}, pool {} does not need to be uncached, " + "because it is in state {}.", blockId, bpid, prevValue.state); - numBlocksFailedToUncache.incrementAndGet(); + numBlocksFailedToUncache.increment(); break; } } @@ -469,7 +469,7 @@ public void run() { dataset.datanode. getShortCircuitRegistry().processBlockMlockEvent(key); } - numBlocksCached.addAndGet(1); + numBlocksCached.increment(); dataset.datanode.getMetrics().incrBlocksCached(1); success = true; } finally { @@ -482,7 +482,7 @@ public void run() { LOG.debug("Caching of {} was aborted. We are now caching only {} " + "bytes in total.", key, cacheLoader.getCacheUsed()); IOUtils.closeQuietly(mappableBlock); - numBlocksFailedToCache.incrementAndGet(); + numBlocksFailedToCache.increment(); synchronized (FsDatasetCache.this) { mappableBlockMap.remove(key); @@ -561,7 +561,7 @@ public void run() { } long newUsedBytes = cacheLoader. release(key, value.mappableBlock.getLength()); - numBlocksCached.addAndGet(-1); + numBlocksCached.decrement(); dataset.datanode.getMetrics().incrBlocksUncached(1); if (revocationTimeMs != 0) { LOG.debug("Uncaching of {} completed. usedBytes = {}", @@ -607,15 +607,15 @@ public long getCacheCapacity() { } public long getNumBlocksFailedToCache() { - return numBlocksFailedToCache.get(); + return numBlocksFailedToCache.longValue(); } public long getNumBlocksFailedToUncache() { - return numBlocksFailedToUncache.get(); + return numBlocksFailedToUncache.longValue(); } public long getNumBlocksCached() { - return numBlocksCached.get(); + return numBlocksCached.longValue(); } public synchronized boolean isCached(String bpid, long blockId) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java index 39152fccb8f26..054c2c347d438 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java @@ -48,7 +48,7 @@ import javax.management.ObjectName; import javax.management.StandardMBean; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; @@ -120,9 +120,9 @@ import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Timer; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -182,7 +182,7 @@ public StorageReport[] getStorageReports(String bpid) @Override public FsVolumeImpl getVolume(final ExtendedBlock b) { - try (AutoCloseableLock lock = datasetWriteLock.acquire()) { + try (AutoCloseableLock lock = datasetReadLock.acquire()) { final ReplicaInfo r = volumeMap.get(b.getBlockPoolId(), b.getLocalBlock()); return r != null ? (FsVolumeImpl) r.getVolume() : null; @@ -192,7 +192,7 @@ public FsVolumeImpl getVolume(final ExtendedBlock b) { @Override // FsDatasetSpi public Block getStoredBlock(String bpid, long blkid) throws IOException { - try (AutoCloseableLock lock = datasetWriteLock.acquire()) { + try (AutoCloseableLock lock = datasetReadLock.acquire()) { ReplicaInfo r = volumeMap.get(bpid, blkid); if (r == null) { return null; @@ -205,7 +205,7 @@ public Block getStoredBlock(String bpid, long blkid) public Set deepCopyReplica(String bpid) throws IOException { Set replicas = null; - try (AutoCloseableLock lock = datasetWriteLock.acquire()) { + try (AutoCloseableLock lock = datasetReadLock.acquire()) { replicas = new HashSet<>(volumeMap.replicas(bpid) == null ? Collections. EMPTY_SET : volumeMap.replicas(bpid)); } @@ -299,7 +299,20 @@ public LengthInputStream getMetaDataInputStream(ExtendedBlock b) DFSConfigKeys.DFS_DATANODE_LOCK_REPORTING_THRESHOLD_MS_DEFAULT, TimeUnit.MILLISECONDS)); this.datasetWriteLock = new AutoCloseableLock(datasetRWLock.writeLock()); - this.datasetReadLock = new AutoCloseableLock(datasetRWLock.readLock()); + boolean enableRL = conf.getBoolean( + DFSConfigKeys.DFS_DATANODE_LOCK_READ_WRITE_ENABLED_KEY, + DFSConfigKeys.DFS_DATANODE_LOCK_READ_WRITE_ENABLED_DEFAULT); + // The read lock can be disabled by the above config key. If it is disabled + // then we simply make the both the read and write lock variables hold + // the write lock. All accesses to the lock are via these variables, so that + // effectively disables the read lock. + if (enableRL) { + LOG.info("The datanode lock is a read write lock"); + this.datasetReadLock = new AutoCloseableLock(datasetRWLock.readLock()); + } else { + LOG.info("The datanode lock is an exclusive write lock"); + this.datasetReadLock = this.datasetWriteLock; + } this.datasetWriteLockCondition = datasetWriteLock.newCondition(); // The number of volumes required for operation is the total number @@ -339,7 +352,7 @@ public LengthInputStream getMetaDataInputStream(ExtendedBlock b) } storageMap = new ConcurrentHashMap(); - volumeMap = new ReplicaMap(datasetRWLock); + volumeMap = new ReplicaMap(datasetReadLock, datasetWriteLock); ramDiskReplicaTracker = RamDiskReplicaTracker.getInstance(conf, this); @SuppressWarnings("unchecked") @@ -349,7 +362,7 @@ public LengthInputStream getMetaDataInputStream(ExtendedBlock b) RoundRobinVolumeChoosingPolicy.class, VolumeChoosingPolicy.class), conf); volumes = new FsVolumeList(volumeFailureInfos, datanode.getBlockScanner(), - blockChooserImpl); + blockChooserImpl, datanode.getDiskMetrics()); asyncDiskService = new FsDatasetAsyncDiskService(datanode, this); asyncLazyPersistService = new RamDiskAsyncLazyPersistService(datanode, conf); deletingBlock = new HashMap>(); @@ -472,7 +485,8 @@ private void addVolume(Storage.StorageDirectory sd) throws IOException { .setConf(this.conf) .build(); FsVolumeReference ref = fsVolume.obtainReference(); - ReplicaMap tempVolumeMap = new ReplicaMap(datasetRWLock); + ReplicaMap tempVolumeMap = + new ReplicaMap(datasetReadLock, datasetWriteLock); fsVolume.getVolumeMap(tempVolumeMap, ramDiskReplicaTracker); activateVolume(tempVolumeMap, sd, storageLocation.getStorageType(), ref); @@ -575,7 +589,8 @@ public void removeVolumes( // Unlike updating the volumeMap in addVolume(), this operation does // not scan disks. for (String bpid : volumeMap.getBlockPoolList()) { - List blocks = new ArrayList<>(); + List blocks = blkToInvalidate + .computeIfAbsent(bpid, (k) -> new ArrayList<>()); for (Iterator it = volumeMap.replicas(bpid).iterator(); it.hasNext();) { ReplicaInfo block = it.next(); @@ -588,9 +603,7 @@ public void removeVolumes( it.remove(); } } - blkToInvalidate.put(bpid, blocks); } - storageToRemove.add(sd.getStorageUuid()); storageLocationsToRemove.remove(sdLocation); } @@ -808,7 +821,7 @@ public InputStream getBlockInputStream(ExtendedBlock b, long seekOffset) throws IOException { ReplicaInfo info; - try (AutoCloseableLock lock = datasetWriteLock.acquire()) { + try (AutoCloseableLock lock = datasetReadLock.acquire()) { info = volumeMap.get(b.getBlockPoolId(), b.getLocalBlock()); } @@ -896,7 +909,7 @@ ReplicaInfo getReplicaInfo(String bpid, long blkid) @Override // FsDatasetSpi public ReplicaInputStreams getTmpInputStreams(ExtendedBlock b, long blkOffset, long metaOffset) throws IOException { - try (AutoCloseableLock lock = datasetWriteLock.acquire()) { + try (AutoCloseableLock lock = datasetReadLock.acquire()) { ReplicaInfo info = getReplicaInfo(b); FsVolumeReference ref = info.getVolume().obtainReference(); try { @@ -906,11 +919,11 @@ public ReplicaInputStreams getTmpInputStreams(ExtendedBlock b, return new ReplicaInputStreams( blockInStream, metaInStream, ref, datanode.getFileIoProvider()); } catch (IOException e) { - IOUtils.cleanup(null, blockInStream); + IOUtils.cleanupWithLogger(null, blockInStream); throw e; } } catch (IOException e) { - IOUtils.cleanup(null, ref); + IOUtils.cleanupWithLogger(null, ref); throw e; } } @@ -1021,7 +1034,7 @@ public ReplicaInfo moveBlockAcrossStorage(ExtendedBlock block, } FsVolumeReference volumeRef = null; - try (AutoCloseableLock lock = datasetWriteLock.acquire()) { + try (AutoCloseableLock lock = datasetReadLock.acquire()) { volumeRef = volumes.getNextVolume(targetStorageType, targetStorageId, block.getNumBytes()); } @@ -1135,7 +1148,7 @@ public ReplicaInfo moveBlockAcrossVolumes(ExtendedBlock block, FsVolumeSpi FsVolumeReference volumeRef = null; - try (AutoCloseableLock lock = datasetWriteLock.acquire()) { + try (AutoCloseableLock lock = datasetReadLock.acquire()) { volumeRef = destination.obtainReference(); } @@ -1253,7 +1266,7 @@ public ReplicaHandler append(ExtendedBlock b, replica = append(b.getBlockPoolId(), replicaInfo, newGS, b.getNumBytes()); } catch (IOException e) { - IOUtils.cleanup(null, ref); + IOUtils.cleanupWithLogger(null, ref); throw e; } return new ReplicaHandler(replica, ref); @@ -1385,7 +1398,7 @@ public ReplicaHandler recoverAppend( replica = (ReplicaInPipeline) replicaInfo; } } catch (IOException e) { - IOUtils.cleanup(null, ref); + IOUtils.cleanupWithLogger(null, ref); throw e; } return new ReplicaHandler(replica, ref); @@ -1479,7 +1492,7 @@ public ReplicaHandler createRbw( + " for block " + b.getBlockId()); } } catch (IOException e) { - IOUtils.cleanup(null, ref); + IOUtils.cleanupWithLogger(null, ref); throw e; } @@ -1571,7 +1584,7 @@ private ReplicaHandler recoverRbwImpl(ReplicaInPipeline rbw, // bump the replica's generation stamp to newGS rbw.getReplicaInfo().bumpReplicaGS(newGS); } catch (IOException e) { - IOUtils.cleanup(null, ref); + IOUtils.cleanupWithLogger(null, ref); throw e; } return new ReplicaHandler(rbw, ref); @@ -1717,7 +1730,7 @@ public ReplicaHandler createTemporary(StorageType storageType, try { newReplicaInfo = v.createTemporary(b); } catch (IOException e) { - IOUtils.cleanup(null, ref); + IOUtils.cleanupWithLogger(null, ref); throw e; } @@ -1813,6 +1826,9 @@ private ReplicaInfo finalizeReplica(String bpid, ReplicaInfo replicaInfo) newReplicaInfo = v.addFinalizedBlock( bpid, replicaInfo, replicaInfo, replicaInfo.getBytesReserved()); + if (replicaInfo instanceof ReplicaInPipeline) { + ((ReplicaInPipeline) replicaInfo).releaseReplicaInfoBytesReserved(); + } if (v.isTransientStorage()) { releaseLockedMemory( replicaInfo.getOriginalBytesReserved() @@ -1889,7 +1905,7 @@ public Map getBlockReports(String bpid) { new HashMap(); List curVolumes = null; - try (AutoCloseableLock lock = datasetWriteLock.acquire()) { + try (AutoCloseableLock lock = datasetReadLock.acquire()) { curVolumes = volumes.getVolumes(); for (FsVolumeSpi v : curVolumes) { builders.put(v.getStorageID(), BlockListAsLongs.builder(maxDataLength)); @@ -1902,28 +1918,32 @@ public Map getBlockReports(String bpid) { continue; } String volStorageID = b.getVolume().getStorageID(); - if (!builders.containsKey(volStorageID)) { - if (!missingVolumesReported.contains(volStorageID)) { - LOG.warn("Storage volume: " + volStorageID + " missing for the" - + " replica block: " + b + ". Probably being removed!"); - missingVolumesReported.add(volStorageID); - } - continue; - } switch(b.getState()) { case FINALIZED: case RBW: case RWR: - builders.get(volStorageID).add(b); break; case RUR: - ReplicaInfo orig = b.getOriginalReplica(); - builders.get(volStorageID).add(orig); + // use the original replica. + b = b.getOriginalReplica(); break; case TEMPORARY: - break; + continue; default: assert false : "Illegal ReplicaInfo state."; + continue; + } + BlockListAsLongs.Builder storageBuilder = builders.get(volStorageID); + // a storage in the process of failing will not be in the volumes list + // but will be in the replica map. + if (storageBuilder != null) { + storageBuilder.add(b); + } else { + if (!missingVolumesReported.contains(volStorageID)) { + LOG.warn("Storage volume: " + volStorageID + " missing for the" + + " replica block: " + b + ". Probably being removed!"); + missingVolumesReported.add(volStorageID); + } } } } @@ -1948,7 +1968,7 @@ public Map getBlockReports(String bpid) { */ @Override public List getFinalizedBlocks(String bpid) { - try (AutoCloseableLock lock = datasetWriteLock.acquire()) { + try (AutoCloseableLock lock = datasetReadLock.acquire()) { final List finalized = new ArrayList( volumeMap.size(bpid)); for (ReplicaInfo b : volumeMap.replicas(bpid)) { @@ -2041,9 +2061,7 @@ private boolean isValid(final ExtendedBlock b, final ReplicaState state) { ReplicaInfo validateBlockFile(String bpid, long blockId) { //Should we check for metadata file too? final ReplicaInfo r; - try (AutoCloseableLock lock = datasetWriteLock.acquire()) { - r = volumeMap.get(bpid, blockId); - } + r = volumeMap.get(bpid, blockId); if (r != null) { if (r.blockDataExists()) { return r; @@ -2052,9 +2070,7 @@ ReplicaInfo validateBlockFile(String bpid, long blockId) { datanode.checkDiskErrorAsync(r.getVolume()); } - if (LOG.isDebugEnabled()) { - LOG.debug("blockId=" + blockId + ", replica=" + r); - } + LOG.debug("blockId={}, replica={}", blockId, r); return null; } @@ -2124,15 +2140,12 @@ private void invalidate(String bpid, Block[] invalidBlks, boolean async) continue; } } catch(IllegalArgumentException e) { - LOG.warn("Parent directory check failed; replica " + info - + " is not backed by a local file"); + LOG.warn("Parent directory check failed; replica {} is " + + "not backed by a local file", info); } removing = volumeMap.remove(bpid, invalidBlks[i]); addDeletingBlock(bpid, removing.getBlockId()); - if (LOG.isDebugEnabled()) { - LOG.debug("Block file " + removing.getBlockURI() - + " is to be deleted"); - } + LOG.debug("Block file {} is to be deleted", removing.getBlockURI()); if (removing instanceof ReplicaInPipeline) { ((ReplicaInPipeline) removing).releaseAllBytesReserved(); } @@ -2173,8 +2186,8 @@ private void invalidate(String bpid, Block[] invalidBlks, boolean async) dataStorage.getTrashDirectoryForReplica(bpid, removing)); } } catch (ClosedChannelException e) { - LOG.warn("Volume " + v + " is closed, ignore the deletion task for " + - "block " + invalidBlks[i]); + LOG.warn("Volume {} is closed, ignore the deletion task for " + + "block: {}", v, invalidBlks[i]); } } if (!errors.isEmpty()) { @@ -2253,7 +2266,7 @@ private void cacheBlock(String bpid, long blockId) { success = true; } finally { if (!success) { - cacheManager.numBlocksFailedToCache.incrementAndGet(); + cacheManager.numBlocksFailedToCache.increment(); } } blockFileName = info.getBlockURI().toString(); @@ -2286,7 +2299,7 @@ public boolean isCached(String bpid, long blockId) { @Override // FsDatasetSpi public boolean contains(final ExtendedBlock block) { - try (AutoCloseableLock lock = datasetWriteLock.acquire()) { + try (AutoCloseableLock lock = datasetReadLock.acquire()) { final long blockId = block.getLocalBlock().getBlockId(); final String bpid = block.getBlockPoolId(); final ReplicaInfo r = volumeMap.get(bpid, blockId); @@ -2343,6 +2356,7 @@ public void shutdown() { if (mbeanName != null) { MBeans.unregister(mbeanName); + mbeanName = null; } if (asyncDiskService != null) { @@ -2607,7 +2621,7 @@ public ReplicaInfo getReplica(String bpid, long blockId) { @Override public String getReplicaString(String bpid, long blockId) { - try (AutoCloseableLock lock = datasetWriteLock.acquire()) { + try (AutoCloseableLock lock = datasetReadLock.acquire()) { final Replica r = volumeMap.get(bpid, blockId); return r == null ? "null" : r.toString(); } @@ -2827,7 +2841,7 @@ private ReplicaInfo updateReplicaUnderRecovery( @Override // FsDatasetSpi public long getReplicaVisibleLength(final ExtendedBlock block) throws IOException { - try (AutoCloseableLock lock = datasetWriteLock.acquire()) { + try (AutoCloseableLock lock = datasetReadLock.acquire()) { final Replica replica = getReplicaInfo(block.getBlockPoolId(), block.getBlockId()); if (replica.getGenerationStamp() < block.getGenerationStamp()) { @@ -2977,18 +2991,20 @@ public void deleteBlockPool(String bpid, boolean force) @Override // FsDatasetSpi public BlockLocalPathInfo getBlockLocalPathInfo(ExtendedBlock block) throws IOException { - try (AutoCloseableLock lock = datasetWriteLock.acquire()) { + try (AutoCloseableLock lock = datasetReadLock.acquire()) { final Replica replica = volumeMap.get(block.getBlockPoolId(), block.getBlockId()); if (replica == null) { throw new ReplicaNotFoundException(block); } - if (replica.getGenerationStamp() < block.getGenerationStamp()) { - throw new IOException( - "Replica generation stamp < block generation stamp, block=" - + block + ", replica=" + replica); - } else if (replica.getGenerationStamp() > block.getGenerationStamp()) { - block.setGenerationStamp(replica.getGenerationStamp()); + synchronized(replica) { + if (replica.getGenerationStamp() < block.getGenerationStamp()) { + throw new IOException( + "Replica generation stamp < block generation stamp, block=" + + block + ", replica=" + replica); + } else if (replica.getGenerationStamp() > block.getGenerationStamp()) { + block.setGenerationStamp(replica.getGenerationStamp()); + } } } @@ -3418,5 +3434,10 @@ void stopAllDataxceiverThreads(FsVolumeImpl volume) { } } } + + @Override + public List getVolumeList() { + return volumes.getVolumes(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetUtil.java index fbd02c76820cd..621c2735a267c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetUtil.java @@ -34,7 +34,8 @@ import java.nio.file.Paths; import java.util.Arrays; -import com.google.common.base.Preconditions; +import com.fasterxml.jackson.databind.util.ByteBufferBackedInputStream; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.protocol.Block; @@ -45,7 +46,6 @@ import org.apache.hadoop.hdfs.server.datanode.ReplicaInfo; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.DataChecksum; -import org.apache.htrace.shaded.fasterxml.jackson.databind.util.ByteBufferBackedInputStream; /** Utility methods. */ @InterfaceAudience.Private @@ -117,7 +117,7 @@ public static FileDescriptor openAndSeek(File file, long offset) } return raf.getFD(); } catch(IOException ioe) { - IOUtils.cleanup(null, raf); + IOUtils.cleanupWithLogger(null, raf); throw ioe; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java index aa0cc5685fd81..c59a77184a974 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java @@ -86,10 +86,10 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import com.fasterxml.jackson.databind.ObjectWriter; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; /** * The underlying volume used to store replica. @@ -133,6 +133,7 @@ public class FsVolumeImpl implements FsVolumeSpi { protected volatile long configuredCapacity; private final FileIoProvider fileIoProvider; private final DataNodeVolumeMetrics metrics; + private URI baseURI; /** * Per-volume worker pool that processes new blocks to cache. @@ -182,6 +183,7 @@ public class FsVolumeImpl implements FsVolumeSpi { File parent = currentDir.getParentFile(); cacheExecutor = initializeCacheExecutor(parent); this.metrics = DataNodeVolumeMetrics.create(conf, parent.getPath()); + this.baseURI = new File(currentDir.getParent()).toURI(); } else { cacheExecutor = null; this.metrics = null; @@ -302,7 +304,7 @@ private void checkReference() { } @VisibleForTesting - int getReferenceCount() { + public int getReferenceCount() { return this.reference.getReferenceCount(); } @@ -491,6 +493,10 @@ long getRecentReserved() { return recentReserved; } + public Map getBlockPoolSlices() { + return bpSlices; + } + long getReserved(){ return reserved != null ? reserved.getReserved() : 0; } @@ -506,7 +512,7 @@ BlockPoolSlice getBlockPoolSlice(String bpid) throws IOException { @Override public URI getBaseURI() { - return new File(currentDir.getParent()).toURI(); + return baseURI; } @Override @@ -1390,7 +1396,7 @@ private void compileReport(File bpFinalizedDir, File dir, long blockId = Block.getBlockId(file.getName()); verifyFileLocation(file, bpFinalizedDir, blockId); - report.add(new ScanInfo(blockId, null, file, this)); + report.add(new ScanInfo(blockId, dir, null, fileNames.get(i), this)); } continue; } @@ -1413,7 +1419,8 @@ private void compileReport(File bpFinalizedDir, File dir, } } verifyFileLocation(blockFile, bpFinalizedDir, blockId); - report.add(new ScanInfo(blockId, blockFile, metaFile, this)); + report.add(new ScanInfo(blockId, dir, blockFile.getName(), + metaFile == null ? null : metaFile.getName(), this)); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImplBuilder.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImplBuilder.java index 50ab97b87a7fc..5cdd45c70fcf2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImplBuilder.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImplBuilder.java @@ -19,7 +19,7 @@ import java.io.IOException; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.DF; import org.apache.hadoop.fs.StorageType; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java index 049654b567dd5..bfb400c049b73 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java @@ -32,6 +32,7 @@ import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.Condition; +import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.StorageType; @@ -41,6 +42,7 @@ import org.apache.hadoop.hdfs.server.datanode.fsdataset.VolumeChoosingPolicy; import org.apache.hadoop.hdfs.server.datanode.BlockScanner; import org.apache.hadoop.hdfs.server.datanode.StorageLocation; +import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeDiskMetrics; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.AutoCloseableLock; @@ -62,13 +64,16 @@ class FsVolumeList { private final VolumeChoosingPolicy blockChooser; private final BlockScanner blockScanner; + private final DataNodeDiskMetrics diskMetrics; + FsVolumeList(List initialVolumeFailureInfos, BlockScanner blockScanner, - VolumeChoosingPolicy blockChooser) { + VolumeChoosingPolicy blockChooser, DataNodeDiskMetrics dataNodeDiskMetrics) { this.blockChooser = blockChooser; this.blockScanner = blockScanner; this.checkDirsLock = new AutoCloseableLock(); this.checkDirsLockCondition = checkDirsLock.newCondition(); + this.diskMetrics = dataNodeDiskMetrics; for (VolumeFailureInfo volumeFailureInfo: initialVolumeFailureInfos) { volumeFailureInfos.put(volumeFailureInfo.getFailedStorageLocation(), volumeFailureInfo); @@ -84,6 +89,15 @@ List getVolumes() { private FsVolumeReference chooseVolume(List list, long blockSize, String storageId) throws IOException { + + // Exclude slow disks when choosing volume. + if (diskMetrics != null) { + List slowDisksToExclude = diskMetrics.getSlowDisksToExclude(); + list = list.stream() + .filter(volume -> !slowDisksToExclude.contains(volume.getBaseURI().getPath())) + .collect(Collectors.toList()); + } + while (true) { FsVolumeImpl volume = blockChooser.chooseVolume(list, blockSize, storageId); @@ -296,7 +310,7 @@ void addVolume(FsVolumeReference ref) { } else { // If the volume is not put into a volume scanner, it does not need to // hold the reference. - IOUtils.cleanup(null, ref); + IOUtils.cleanupWithLogger(null, ref); } // If the volume is used to replace a failed volume, it needs to reset the // volume failure info for this volume. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/MappableBlockLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/MappableBlockLoader.java index 02bea81f70d20..96d88345e6b9d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/MappableBlockLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/MappableBlockLoader.java @@ -18,7 +18,7 @@ package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.io.IOUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/NativePmemMappableBlockLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/NativePmemMappableBlockLoader.java index 55e5decd0832c..ec024cda9ab02 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/NativePmemMappableBlockLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/NativePmemMappableBlockLoader.java @@ -18,7 +18,7 @@ package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.io.IOUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/PmemVolumeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/PmemVolumeManager.java index 51b768198b7a2..a85c577745af4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/PmemVolumeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/PmemVolumeManager.java @@ -18,7 +18,7 @@ package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.io.filefilter.TrueFileFilter; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ProvidedVolumeImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ProvidedVolumeImpl.java index b4d2c640cd804..7e077181707af 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ProvidedVolumeImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ProvidedVolumeImpl.java @@ -65,7 +65,7 @@ import org.codehaus.jackson.map.ObjectReader; import org.codehaus.jackson.map.ObjectWriter; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * This class is used to create provided volumes. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskAsyncLazyPersistService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskAsyncLazyPersistService.java index a77faf2cec8bc..0d42ae99e358e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskAsyncLazyPersistService.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskAsyncLazyPersistService.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl; +import org.apache.hadoop.io.IOUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -153,16 +154,24 @@ synchronized boolean queryVolume(FsVolumeImpl volume) { * Execute the task sometime in the future, using ThreadPools. */ synchronized void execute(String storageId, Runnable task) { - if (executors == null) { - throw new RuntimeException( - "AsyncLazyPersistService is already shutdown"); - } - ThreadPoolExecutor executor = executors.get(storageId); - if (executor == null) { - throw new RuntimeException("Cannot find root storage volume with id " + - storageId + " for execution of task " + task); - } else { - executor.execute(task); + try { + if (executors == null) { + throw new RuntimeException( + "AsyncLazyPersistService is already shutdown"); + } + ThreadPoolExecutor executor = executors.get(storageId); + if (executor == null) { + throw new RuntimeException("Cannot find root storage volume with id " + + storageId + " for execution of task " + task); + } else { + executor.execute(task); + } + } catch (RuntimeException re) { + if (task instanceof ReplicaLazyPersistTask) { + IOUtils.cleanupWithLogger(null, + ((ReplicaLazyPersistTask) task).targetVolume); + } + throw re; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskReplicaLruTracker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskReplicaLruTracker.java index b940736ccfd28..aebedaab0ef8d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskReplicaLruTracker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskReplicaLruTracker.java @@ -19,7 +19,7 @@ package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl; -import com.google.common.collect.TreeMultimap; +import org.apache.hadoop.thirdparty.com.google.common.collect.TreeMultimap; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.util.Time; @@ -35,7 +35,7 @@ @InterfaceStability.Unstable public class RamDiskReplicaLruTracker extends RamDiskReplicaTracker { - private class RamDiskReplicaLru extends RamDiskReplica { + private static class RamDiskReplicaLru extends RamDiskReplica { long lastUsedTime; private RamDiskReplicaLru(String bpid, long blockId, @@ -88,7 +88,7 @@ synchronized void addReplica(final String bpid, final long blockId, } RamDiskReplicaLru ramDiskReplicaLru = new RamDiskReplicaLru(bpid, blockId, transientVolume, - lockedBytesReserved); + lockedBytesReserved); map.put(blockId, ramDiskReplicaLru); replicasNotPersisted.add(ramDiskReplicaLru); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskReplicaTracker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskReplicaTracker.java index 07e520117f617..f7b12ff179941 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskReplicaTracker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskReplicaTracker.java @@ -18,7 +18,7 @@ package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ReplicaMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ReplicaMap.java index df14f2aad01e2..c1d103ed50dba 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ReplicaMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ReplicaMap.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl; import java.util.Collection; -import java.util.Comparator; import java.util.HashMap; import java.util.Map; import java.util.concurrent.locks.ReadWriteLock; @@ -26,45 +25,37 @@ import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.server.datanode.ReplicaInfo; -import org.apache.hadoop.hdfs.util.FoldedTreeSet; +import org.apache.hadoop.util.LightWeightResizableGSet; import org.apache.hadoop.util.AutoCloseableLock; /** * Maintains the replica map. */ class ReplicaMap { - private final ReadWriteLock rwLock; // Lock object to synchronize this instance. private final AutoCloseableLock readLock; private final AutoCloseableLock writeLock; - // Map of block pool Id to a set of ReplicaInfo. - private final Map> map = new HashMap<>(); + // Map of block pool Id to another map of block Id to ReplicaInfo. + private final Map> map = + new HashMap<>(); - // Special comparator used to compare Long to Block ID in the TreeSet. - private static final Comparator LONG_AND_BLOCK_COMPARATOR - = new Comparator() { - - @Override - public int compare(Object o1, Object o2) { - long lookup = (long) o1; - long stored = ((Block) o2).getBlockId(); - return lookup > stored ? 1 : lookup < stored ? -1 : 0; - } - }; - - ReplicaMap(ReadWriteLock lock) { - if (lock == null) { + ReplicaMap(AutoCloseableLock readLock, AutoCloseableLock writeLock) { + if (readLock == null || writeLock == null) { throw new HadoopIllegalArgumentException( "Lock to synchronize on cannot be null"); } - this.rwLock = lock; - this.readLock = new AutoCloseableLock(rwLock.readLock()); - this.writeLock = new AutoCloseableLock(rwLock.writeLock()); + this.readLock = readLock; + this.writeLock = writeLock; + } + + ReplicaMap(ReadWriteLock lock) { + this(new AutoCloseableLock(lock.readLock()), + new AutoCloseableLock(lock.writeLock())); } String[] getBlockPoolList() { - try (AutoCloseableLock l = writeLock.acquire()) { + try (AutoCloseableLock l = readLock.acquire()) { return map.keySet().toArray(new String[map.keySet().size()]); } } @@ -109,12 +100,9 @@ ReplicaInfo get(String bpid, Block block) { */ ReplicaInfo get(String bpid, long blockId) { checkBlockPool(bpid); - try (AutoCloseableLock l = writeLock.acquire()) { - FoldedTreeSet set = map.get(bpid); - if (set == null) { - return null; - } - return set.get(blockId, LONG_AND_BLOCK_COMPARATOR); + try (AutoCloseableLock l = readLock.acquire()) { + LightWeightResizableGSet m = map.get(bpid); + return m != null ? m.get(new Block(blockId)) : null; } } @@ -130,13 +118,13 @@ ReplicaInfo add(String bpid, ReplicaInfo replicaInfo) { checkBlockPool(bpid); checkBlock(replicaInfo); try (AutoCloseableLock l = writeLock.acquire()) { - FoldedTreeSet set = map.get(bpid); - if (set == null) { + LightWeightResizableGSet m = map.get(bpid); + if (m == null) { // Add an entry for block pool if it does not exist already - set = new FoldedTreeSet<>(); - map.put(bpid, set); + m = new LightWeightResizableGSet(); + map.put(bpid, m); } - return set.addOrReplace(replicaInfo); + return m.put(replicaInfo); } } @@ -148,18 +136,17 @@ ReplicaInfo addAndGet(String bpid, ReplicaInfo replicaInfo) { checkBlockPool(bpid); checkBlock(replicaInfo); try (AutoCloseableLock l = writeLock.acquire()) { - FoldedTreeSet set = map.get(bpid); - if (set == null) { + LightWeightResizableGSet m = map.get(bpid); + if (m == null) { // Add an entry for block pool if it does not exist already - set = new FoldedTreeSet<>(); - map.put(bpid, set); + m = new LightWeightResizableGSet(); + map.put(bpid, m); } - ReplicaInfo oldReplicaInfo = set.get(replicaInfo.getBlockId(), - LONG_AND_BLOCK_COMPARATOR); + ReplicaInfo oldReplicaInfo = m.get(replicaInfo); if (oldReplicaInfo != null) { return oldReplicaInfo; } else { - set.addOrReplace(replicaInfo); + m.put(replicaInfo); } return replicaInfo; } @@ -198,13 +185,12 @@ ReplicaInfo remove(String bpid, Block block) { checkBlockPool(bpid); checkBlock(block); try (AutoCloseableLock l = writeLock.acquire()) { - FoldedTreeSet set = map.get(bpid); - if (set != null) { - ReplicaInfo replicaInfo = - set.get(block.getBlockId(), LONG_AND_BLOCK_COMPARATOR); + LightWeightResizableGSet m = map.get(bpid); + if (m != null) { + ReplicaInfo replicaInfo = m.get(block); if (replicaInfo != null && block.getGenerationStamp() == replicaInfo.getGenerationStamp()) { - return set.removeAndGet(replicaInfo); + return m.remove(block); } } } @@ -221,9 +207,9 @@ ReplicaInfo remove(String bpid, Block block) { ReplicaInfo remove(String bpid, long blockId) { checkBlockPool(bpid); try (AutoCloseableLock l = writeLock.acquire()) { - FoldedTreeSet set = map.get(bpid); - if (set != null) { - return set.removeAndGet(blockId, LONG_AND_BLOCK_COMPARATOR); + LightWeightResizableGSet m = map.get(bpid); + if (m != null) { + return m.remove(new Block(blockId)); } } return null; @@ -235,9 +221,9 @@ ReplicaInfo remove(String bpid, long blockId) { * @return the number of replicas in the map */ int size(String bpid) { - try (AutoCloseableLock l = writeLock.acquire()) { - FoldedTreeSet set = map.get(bpid); - return set != null ? set.size() : 0; + try (AutoCloseableLock l = readLock.acquire()) { + LightWeightResizableGSet m = map.get(bpid); + return m != null ? m.size() : 0; } } @@ -252,17 +238,19 @@ int size(String bpid) { * @return a collection of the replicas belonging to the block pool */ Collection replicas(String bpid) { - return map.get(bpid); + LightWeightResizableGSet m = null; + m = map.get(bpid); + return m != null ? m.values() : null; } void initBlockPool(String bpid) { checkBlockPool(bpid); try (AutoCloseableLock l = writeLock.acquire()) { - FoldedTreeSet set = map.get(bpid); - if (set == null) { + LightWeightResizableGSet m = map.get(bpid); + if (m == null) { // Add an entry for block pool if it does not exist already - set = new FoldedTreeSet<>(); - map.put(bpid, set); + m = new LightWeightResizableGSet(); + map.put(bpid, m); } } } @@ -281,4 +269,14 @@ void cleanUpBlockPool(String bpid) { AutoCloseableLock getLock() { return writeLock; } + + /** + * Get the lock object used for synchronizing the ReplicasMap for read only + * operations. + * @return The read lock object + */ + AutoCloseableLock getReadLock() { + return readLock; + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeDiskMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeDiskMetrics.java index a8a6c85762d62..7fd8d031c0da7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeDiskMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeDiskMetrics.java @@ -17,9 +17,11 @@ */ package org.apache.hadoop.hdfs.server.datanode.metrics; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Maps; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.server.datanode.DataNode; @@ -28,13 +30,21 @@ import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi; import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports.DiskOp; import org.apache.hadoop.util.Daemon; +import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.Iterator; +import java.util.List; import java.util.Map; +import java.util.stream.Collectors; + +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY; /** * This class detects and maintains DataNode disk outliers and their @@ -48,8 +58,6 @@ public class DataNodeDiskMetrics { DataNodeDiskMetrics.class); private DataNode dn; - private final long MIN_OUTLIER_DETECTION_DISKS = 5; - private final long SLOW_DISK_LOW_THRESHOLD_MS = 20; private final long detectionInterval; private volatile boolean shouldRun; private OutlierDetector slowDiskDetector; @@ -61,11 +69,38 @@ public class DataNodeDiskMetrics { // code, status should not be overridden by daemon thread. private boolean overrideStatus = true; - public DataNodeDiskMetrics(DataNode dn, long diskOutlierDetectionIntervalMs) { + /** + * Minimum number of disks to run outlier detection. + */ + private volatile long minOutlierDetectionDisks; + /** + * Threshold in milliseconds below which a disk is definitely not slow. + */ + private volatile long lowThresholdMs; + /** + * The number of slow disks that needs to be excluded. + */ + private int maxSlowDisksToExclude; + /** + * List of slow disks that need to be excluded. + */ + private List slowDisksToExclude = new ArrayList<>(); + + public DataNodeDiskMetrics(DataNode dn, long diskOutlierDetectionIntervalMs, + Configuration conf) { this.dn = dn; this.detectionInterval = diskOutlierDetectionIntervalMs; - slowDiskDetector = new OutlierDetector(MIN_OUTLIER_DETECTION_DISKS, - SLOW_DISK_LOW_THRESHOLD_MS); + minOutlierDetectionDisks = + conf.getLong(DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY, + DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_DEFAULT); + lowThresholdMs = + conf.getLong(DFSConfigKeys.DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY, + DFSConfigKeys.DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_DEFAULT); + maxSlowDisksToExclude = + conf.getInt(DFSConfigKeys.DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY, + DFSConfigKeys.DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_DEFAULT); + slowDiskDetector = + new OutlierDetector(minOutlierDetectionDisks, lowThresholdMs); shouldRun = true; startDiskOutlierDetectionThread(); } @@ -111,6 +146,21 @@ public void run() { detectAndUpdateDiskOutliers(metadataOpStats, readIoStats, writeIoStats); + + // Sort the slow disks by latency and extract the top n by maxSlowDisksToExclude. + if (maxSlowDisksToExclude > 0) { + ArrayList diskLatencies = new ArrayList<>(); + for (Map.Entry> diskStats : + diskOutliersStats.entrySet()) { + diskLatencies.add(new DiskLatency(diskStats.getKey(), diskStats.getValue())); + } + + Collections.sort(diskLatencies, (o1, o2) + -> Double.compare(o2.getMaxLatency(), o1.getMaxLatency())); + + slowDisksToExclude = diskLatencies.stream().limit(maxSlowDisksToExclude) + .map(DiskLatency::getSlowDisk).collect(Collectors.toList()); + } } try { @@ -155,6 +205,35 @@ private void detectAndUpdateDiskOutliers(Map metadataOpStats, } } + /** + * This structure is a wrapper over disk latencies. + */ + public static class DiskLatency { + final private String slowDisk; + final private Map latencyMap; + + public DiskLatency( + String slowDiskID, + Map latencyMap) { + this.slowDisk = slowDiskID; + this.latencyMap = latencyMap; + } + + double getMaxLatency() { + double maxLatency = 0; + for (double latency : latencyMap.values()) { + if (latency > maxLatency) { + maxLatency = latency; + } + } + return maxLatency; + } + + public String getSlowDisk() { + return slowDisk; + } + } + private void addDiskStat(Map> diskStats, String disk, DiskOp diskOp, double latency) { if (!diskStats.containsKey(disk)) { @@ -190,4 +269,35 @@ public void addSlowDiskForTesting(String slowDiskPath, diskOutliersStats.put(slowDiskPath, latencies); } } + + public List getSlowDisksToExclude() { + return slowDisksToExclude; + } + + public void setLowThresholdMs(long thresholdMs) { + Preconditions.checkArgument(thresholdMs > 0, + DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY + " should be larger than 0"); + lowThresholdMs = thresholdMs; + this.slowDiskDetector.setLowThresholdMs(thresholdMs); + } + + public long getLowThresholdMs() { + return lowThresholdMs; + } + + public void setMinOutlierDetectionDisks(long minDisks) { + Preconditions.checkArgument(minDisks > 0, + DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY + " should be larger than 0"); + minOutlierDetectionDisks = minDisks; + this.slowDiskDetector.setMinNumResources(minDisks); + } + + public long getMinOutlierDetectionDisks() { + return minOutlierDetectionDisks; + } + + @VisibleForTesting + public OutlierDetector getSlowDiskDetector() { + return this.slowDiskDetector; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java index cc802375f9f28..d7eee80720194 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java @@ -61,6 +61,8 @@ public class DataNodeMetrics { @Metric MutableCounterLong bytesRead; @Metric("Milliseconds spent reading") MutableCounterLong totalReadTime; + @Metric private MutableRate readTransferRate; + final private MutableQuantiles[] readTransferRateQuantiles; @Metric MutableCounterLong blocksWritten; @Metric MutableCounterLong blocksRead; @Metric MutableCounterLong blocksReplicated; @@ -109,6 +111,12 @@ public class DataNodeMetrics { @Metric("Count of active dataNode xceivers") private MutableGaugeInt dataNodeActiveXceiversCount; + @Metric("Count of active DataNode packetResponder") + private MutableGaugeInt dataNodePacketResponderCount; + + @Metric("Count of active DataNode block recovery worker") + private MutableGaugeInt dataNodeBlockRecoveryWorkerCount; + @Metric MutableRate readBlockOp; @Metric MutableRate writeBlockOp; @Metric MutableRate blockChecksumOp; @@ -146,6 +154,8 @@ public class DataNodeMetrics { MutableCounterLong ecReconstructionTasks; @Metric("Count of erasure coding failed reconstruction tasks") MutableCounterLong ecFailedReconstructionTasks; + @Metric("Count of erasure coding invalidated reconstruction tasks") + private MutableCounterLong ecInvalidReconstructionTasks; @Metric("Nanoseconds spent by decoding tasks") MutableCounterLong ecDecodingTimeNanos; @Metric("Bytes read by erasure coding worker") @@ -160,6 +170,8 @@ public class DataNodeMetrics { private MutableCounterLong ecReconstructionDecodingTimeMillis; @Metric("Milliseconds spent on write by erasure coding worker") private MutableCounterLong ecReconstructionWriteTimeMillis; + @Metric("Milliseconds spent on validating by erasure coding worker") + private MutableCounterLong ecReconstructionValidateTimeMillis; @Metric("Sum of all BPServiceActors command queue length") private MutableCounterLong sumOfActorCommandQueueLength; @Metric("Num of processed commands of all BPServiceActors") @@ -191,6 +203,7 @@ public DataNodeMetrics(String name, String sessionId, int[] intervals, sendDataPacketTransferNanosQuantiles = new MutableQuantiles[len]; ramDiskBlocksEvictionWindowMsQuantiles = new MutableQuantiles[len]; ramDiskBlocksLazyPersistWindowMsQuantiles = new MutableQuantiles[len]; + readTransferRateQuantiles = new MutableQuantiles[len]; for (int i = 0; i < len; i++) { int interval = intervals[i]; @@ -219,6 +232,10 @@ public DataNodeMetrics(String name, String sessionId, int[] intervals, "ramDiskBlocksLazyPersistWindows" + interval + "s", "Time between the RamDisk block write and disk persist in ms", "ops", "latency", interval); + readTransferRateQuantiles[i] = registry.newQuantiles( + "readTransferRate" + interval + "s", + "Rate at which bytes are read from datanode calculated in bytes per second", + "ops", "rate", interval); } } @@ -280,6 +297,13 @@ public void addIncrementalBlockReport(long latency, } } + public void addReadTransferRate(long readTransferRate) { + this.readTransferRate.add(readTransferRate); + for (MutableQuantiles q : readTransferRateQuantiles) { + q.add(readTransferRate); + } + } + public void addCacheReport(long latency) { cacheReports.add(latency); } @@ -513,6 +537,14 @@ public void incrECFailedReconstructionTasks() { ecFailedReconstructionTasks.incr(); } + public void incrECInvalidReconstructionTasks() { + ecInvalidReconstructionTasks.incr(); + } + + public long getECInvalidReconstructionTasks() { + return ecInvalidReconstructionTasks.value(); + } + public void incrDataNodeActiveXceiversCount() { dataNodeActiveXceiversCount.incr(); } @@ -525,6 +557,42 @@ public void setDataNodeActiveXceiversCount(int value) { dataNodeActiveXceiversCount.set(value); } + public int getDataNodeActiveXceiverCount() { + return dataNodeActiveXceiversCount.value(); + } + + public void incrDataNodePacketResponderCount() { + dataNodePacketResponderCount.incr(); + } + + public void decrDataNodePacketResponderCount() { + dataNodePacketResponderCount.decr(); + } + + public void setDataNodePacketResponderCount(int value) { + dataNodePacketResponderCount.set(value); + } + + public int getDataNodePacketResponderCount() { + return dataNodePacketResponderCount.value(); + } + + public void incrDataNodeBlockRecoveryWorkerCount() { + dataNodeBlockRecoveryWorkerCount.incr(); + } + + public void decrDataNodeBlockRecoveryWorkerCount() { + dataNodeBlockRecoveryWorkerCount.decr(); + } + + public void setDataNodeBlockRecoveryWorkerCount(int value) { + dataNodeBlockRecoveryWorkerCount.set(value); + } + + public int getDataNodeBlockRecoveryWorkerCount() { + return dataNodeBlockRecoveryWorkerCount.value(); + } + public void incrECDecodingTime(long decodingTimeNanos) { ecDecodingTimeNanos.incr(decodingTimeNanos); } @@ -553,6 +621,10 @@ public void incrECReconstructionDecodingTime(long millis) { ecReconstructionDecodingTimeMillis.incr(millis); } + public void incrECReconstructionValidateTime(long millis) { + ecReconstructionValidateTimeMillis.incr(millis); + } + public DataNodeUsageReport getDNUsageReport(long timeSinceLastReport) { return dnUsageReportUtil.getUsageReport(bytesWritten.value(), bytesRead .value(), totalWriteTime.value(), totalReadTime.value(), diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodePeerMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodePeerMetrics.java index 3c70a23ac5b3d..a77c3ba0643a2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodePeerMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodePeerMetrics.java @@ -21,17 +21,24 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.server.protocol.OutlierMetrics; import org.apache.hadoop.metrics2.MetricsJsonBuilder; import org.apache.hadoop.metrics2.lib.MutableRollingAverages; +import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.Map; import java.util.concurrent.ThreadLocalRandom; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY; /** * This class maintains DataNode peer metrics (e.g. numOps, AvgTime, etc.) for @@ -48,11 +55,8 @@ public class DataNodePeerMetrics { private final String name; - /** - * Threshold in milliseconds below which a DataNode is definitely not slow. - */ - private static final long LOW_THRESHOLD_MS = 5; - private static final long MIN_OUTLIER_DETECTION_NODES = 10; + // Strictly to be used by test code only. Source code is not supposed to use this. + private Map testOutlier = null; private final OutlierDetector slowNodeDetector; @@ -61,15 +65,29 @@ public class DataNodePeerMetrics { * for outlier detection. If the number of samples is below this then * outlier detection is skipped. */ - private final long minOutlierDetectionSamples; + private volatile long minOutlierDetectionSamples; + /** + * Threshold in milliseconds below which a DataNode is definitely not slow. + */ + private volatile long lowThresholdMs; + /** + * Minimum number of nodes to run outlier detection. + */ + private volatile long minOutlierDetectionNodes; public DataNodePeerMetrics(final String name, Configuration conf) { this.name = name; minOutlierDetectionSamples = conf.getLong( DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY, DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT); - this.slowNodeDetector = new OutlierDetector(MIN_OUTLIER_DETECTION_NODES, - LOW_THRESHOLD_MS); + lowThresholdMs = + conf.getLong(DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY, + DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_DEFAULT); + minOutlierDetectionNodes = + conf.getLong(DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY, + DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_DEFAULT); + this.slowNodeDetector = + new OutlierDetector(minOutlierDetectionNodes, lowThresholdMs); sendPacketDownstreamRollingAverages = new MutableRollingAverages("Time"); } @@ -77,7 +95,7 @@ public String name() { return name; } - long getMinOutlierDetectionSamples() { + public long getMinOutlierDetectionSamples() { return minOutlierDetectionSamples; } @@ -126,18 +144,66 @@ public void collectThreadLocalStates() { * Retrieve the set of dataNodes that look significantly slower * than their peers. */ - public Map getOutliers() { - // This maps the metric name to the aggregate latency. - // The metric name is the datanode ID. - final Map stats = - sendPacketDownstreamRollingAverages.getStats( - minOutlierDetectionSamples); - LOG.trace("DataNodePeerMetrics: Got stats: {}", stats); + public Map getOutliers() { + // outlier must be null for source code. + if (testOutlier == null) { + // This maps the metric name to the aggregate latency. + // The metric name is the datanode ID. + final Map stats = + sendPacketDownstreamRollingAverages.getStats(minOutlierDetectionSamples); + LOG.trace("DataNodePeerMetrics: Got stats: {}", stats); + return slowNodeDetector.getOutlierMetrics(stats); + } else { + // this happens only for test code. + return testOutlier; + } + } - return slowNodeDetector.getOutliers(stats); + /** + * Strictly to be used by test code only. Source code is not supposed to use this. This method + * directly sets outlier mapping so that aggregate latency metrics are not calculated for tests. + * + * @param outlier outlier directly set by tests. + */ + public void setTestOutliers(Map outlier) { + this.testOutlier = outlier; } public MutableRollingAverages getSendPacketDownstreamRollingAverages() { return sendPacketDownstreamRollingAverages; } + + public void setMinOutlierDetectionNodes(long minNodes) { + Preconditions.checkArgument(minNodes > 0, + DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY + " should be larger than 0"); + minOutlierDetectionNodes = minNodes; + this.slowNodeDetector.setMinNumResources(minNodes); + } + + public long getMinOutlierDetectionNodes() { + return minOutlierDetectionNodes; + } + + public void setLowThresholdMs(long thresholdMs) { + Preconditions.checkArgument(thresholdMs > 0, + DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY + " should be larger than 0"); + lowThresholdMs = thresholdMs; + this.slowNodeDetector.setLowThresholdMs(thresholdMs); + } + + public long getLowThresholdMs() { + return lowThresholdMs; + } + + public void setMinOutlierDetectionSamples(long minSamples) { + Preconditions.checkArgument(minSamples > 0, + DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY + + " should be larger than 0"); + minOutlierDetectionSamples = minSamples; + } + + @VisibleForTesting + public OutlierDetector getSlowNodeDetector() { + return this.slowNodeDetector; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/OutlierDetector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/OutlierDetector.java index 401fc8bd712d3..e816f52c919fa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/OutlierDetector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/OutlierDetector.java @@ -18,10 +18,12 @@ package org.apache.hadoop.hdfs.server.datanode.metrics; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.ImmutableMap; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hdfs.server.protocol.OutlierMetrics; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -60,7 +62,7 @@ public class OutlierDetector { /** * Minimum number of resources to run outlier detection. */ - private final long minNumResources; + private volatile long minNumResources; /** * The multiplier is from Leys, C. et al. @@ -70,7 +72,7 @@ public class OutlierDetector { /** * Threshold in milliseconds below which a node/ disk is definitely not slow. */ - private final long lowThresholdMs; + private volatile long lowThresholdMs; /** * Deviation multiplier. A sample is considered to be an outlier if it @@ -108,6 +110,26 @@ public OutlierDetector(long minNumResources, long lowThresholdMs) { * @return */ public Map getOutliers(Map stats) { + final Map slowResources = new HashMap<>(); + Map slowResourceMetrics = getOutlierMetrics(stats); + slowResourceMetrics.forEach( + (node, outlierMetrics) -> slowResources.put(node, outlierMetrics.getActualLatency())); + return slowResources; + } + + /** + * Return a set of nodes whose latency is much higher than + * their counterparts. The input is a map of (resource {@literal ->} aggregate + * latency) entries. + * + * The aggregate may be an arithmetic mean or a percentile e.g. + * 90th percentile. Percentiles are a better choice than median + * since latency is usually not a normal distribution. + * + * @param stats map of aggregate latency entries. + * @return map of outlier nodes to outlier metrics. + */ + public Map getOutlierMetrics(Map stats) { if (stats.size() < minNumResources) { LOG.debug("Skipping statistical outlier detection as we don't have " + "latency data for enough resources. Have {}, need at least {}", @@ -124,19 +146,20 @@ public Map getOutliers(Map stats) { upperLimitLatency = Math.max( upperLimitLatency, median + (DEVIATION_MULTIPLIER * mad)); - final Map slowResources = new HashMap<>(); + final Map slowResources = new HashMap<>(); - LOG.trace("getOutliers: List={}, MedianLatency={}, " + - "MedianAbsoluteDeviation={}, upperLimitLatency={}", - sorted, median, mad, upperLimitLatency); + LOG.trace("getOutliers: List={}, MedianLatency={}, " + + "MedianAbsoluteDeviation={}, upperLimitLatency={}", sorted, median, mad, + upperLimitLatency); // Find resources whose latency exceeds the threshold. for (Map.Entry entry : stats.entrySet()) { if (entry.getValue() > upperLimitLatency) { - slowResources.put(entry.getKey(), entry.getValue()); + OutlierMetrics outlierMetrics = + new OutlierMetrics(median, mad, upperLimitLatency, entry.getValue()); + slowResources.put(entry.getKey(), outlierMetrics); } } - return slowResources; } @@ -180,4 +203,20 @@ public static Double computeMedian(List sortedValues) { } return median; } + + public void setMinNumResources(long minNodes) { + minNumResources = minNodes; + } + + public long getMinOutlierDetectionNodes() { + return minNumResources; + } + + public void setLowThresholdMs(long thresholdMs) { + lowThresholdMs = thresholdMs; + } + + public long getLowThresholdMs() { + return lowThresholdMs; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/DatanodeHttpServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/DatanodeHttpServer.java index e4a0c209762be..46656c178a9c5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/DatanodeHttpServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/DatanodeHttpServer.java @@ -64,7 +64,6 @@ import java.security.GeneralSecurityException; import java.util.Enumeration; import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ADMIN; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HTTPS_ADDRESS_DEFAULT; @@ -77,8 +76,6 @@ */ public class DatanodeHttpServer implements Closeable { static final Logger LOG = LoggerFactory.getLogger(DatanodeHttpServer.class); - private static final ConcurrentHashMap, Object> HANDLER_STATE - = new ConcurrentHashMap, Object>() {}; // HttpServer threads are only used for the web UI and basic servlets, so // set them to the minimum possible private static final int HTTP_SELECTOR_THREADS = 1; @@ -281,11 +278,10 @@ private ChannelHandler[] getFilterHandlers(Configuration configuration) { try { Method initializeState = classes[i].getDeclaredMethod("initializeState", Configuration.class); - Constructor constructor = + Constructor constructor = classes[i].getDeclaredConstructor(initializeState.getReturnType()); handlers[i] = (ChannelHandler) constructor.newInstance( - HANDLER_STATE.getOrDefault(classes[i], - initializeState.invoke(null, configuration))); + initializeState.invoke(null, configuration)); } catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException | InstantiationException | IllegalArgumentException e) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/HostRestrictingAuthorizationFilterHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/HostRestrictingAuthorizationFilterHandler.java index 584e12bdc9b02..798def0c716b7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/HostRestrictingAuthorizationFilterHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/HostRestrictingAuthorizationFilterHandler.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.datanode.web; -import com.google.common.collect.ImmutableMap; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; import io.netty.channel.ChannelFutureListener; import io.netty.channel.ChannelHandler.Sharable; import io.netty.channel.ChannelHandlerContext; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/DataNodeUGIProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/DataNodeUGIProvider.java index 366f47f29631d..293102c9d8560 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/DataNodeUGIProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/DataNodeUGIProvider.java @@ -23,9 +23,9 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.cache.Cache; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheBuilder; import java.io.ByteArrayInputStream; import java.io.DataInputStream; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/ExceptionHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/ExceptionHandler.java index b56b0d09ac28b..02ec25c13c874 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/ExceptionHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/ExceptionHandler.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.datanode.web.webhdfs; -import com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; import com.sun.jersey.api.ParamException; import com.sun.jersey.api.container.ContainerException; import io.netty.buffer.Unpooled; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/WebHdfsHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/WebHdfsHandler.java index ff68a7ee7e4d0..d0c71f6be1606 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/WebHdfsHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/WebHdfsHandler.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.datanode.web.webhdfs; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import io.netty.buffer.Unpooled; import io.netty.channel.ChannelFutureListener; import io.netty.channel.ChannelHandlerContext; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/CancelCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/CancelCommand.java index 007272eda9e70..f478dff4af9e9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/CancelCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/CancelCommand.java @@ -19,7 +19,7 @@ package org.apache.hadoop.hdfs.server.diskbalancer.command; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.codec.digest.DigestUtils; @@ -32,6 +32,7 @@ import org.apache.hadoop.hdfs.tools.DiskBalancerCLI; import java.io.IOException; +import java.nio.charset.StandardCharsets; /** * Cancels a running plan. @@ -76,7 +77,7 @@ public void execute(CommandLine cmd) throws Exception { "Invalid plan file specified."); String planData = null; try (FSDataInputStream plan = open(planFile)) { - planData = IOUtils.toString(plan); + planData = IOUtils.toString(plan, StandardCharsets.UTF_8); } cancelPlan(planData); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/Command.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/Command.java index 4f8e373d9c2ce..6845c572ef6b2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/Command.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/Command.java @@ -20,9 +20,9 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.Option; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/ExecuteCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/ExecuteCommand.java index c7cb089c5b72d..5b5dc2ad5b437 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/ExecuteCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/ExecuteCommand.java @@ -19,7 +19,7 @@ package org.apache.hadoop.hdfs.server.diskbalancer.command; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.codec.digest.DigestUtils; @@ -32,7 +32,7 @@ import org.apache.hadoop.hdfs.tools.DiskBalancerCLI; import java.io.IOException; - +import java.nio.charset.StandardCharsets; /** * executes a given plan. @@ -69,7 +69,7 @@ public void execute(CommandLine cmd) throws Exception { String planData = null; try (FSDataInputStream plan = open(planFile)) { - planData = IOUtils.toString(plan); + planData = IOUtils.toString(plan, StandardCharsets.UTF_8); } boolean skipDateCheck = false; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/HelpCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/HelpCommand.java index f7c84e16f7b81..e36628edf0eb2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/HelpCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/HelpCommand.java @@ -19,7 +19,7 @@ package org.apache.hadoop.hdfs.server.diskbalancer.command; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.HelpFormatter; import org.apache.hadoop.conf.Configuration; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/PlanCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/PlanCommand.java index ebcbb4c2fa9f9..e9f9f33e71535 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/PlanCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/PlanCommand.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs.server.diskbalancer.command; -import com.google.common.base.Preconditions; -import com.google.common.base.Throwables; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Throwables; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.HelpFormatter; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/QueryCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/QueryCommand.java index a8adcbd5621bb..520e80f3974c7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/QueryCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/QueryCommand.java @@ -19,7 +19,7 @@ package org.apache.hadoop.hdfs.server.diskbalancer.command; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.HelpFormatter; import org.apache.hadoop.conf.Configuration; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/ReportCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/ReportCommand.java index 4f75aff1a518c..ad5a3c2090edf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/ReportCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/ReportCommand.java @@ -33,8 +33,8 @@ import org.apache.hadoop.hdfs.server.diskbalancer.datamodel.DiskBalancerVolumeSet; import org.apache.hadoop.hdfs.tools.DiskBalancerCLI; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; /** * Executes the report command. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/connectors/DBNameNodeConnector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/connectors/DBNameNodeConnector.java index 2d8ba8a0a0724..b7bb3f02dce9d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/connectors/DBNameNodeConnector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/connectors/DBNameNodeConnector.java @@ -17,7 +17,7 @@ package org.apache.hadoop.hdfs.server.diskbalancer.connectors; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/connectors/JsonNodeConnector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/connectors/JsonNodeConnector.java index 838511ba82bf8..268c055a354ac 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/connectors/JsonNodeConnector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/connectors/JsonNodeConnector.java @@ -19,7 +19,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerCluster.java index 1307983c80927..0e405ff7bd3c5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerCluster.java @@ -21,7 +21,7 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.io.FileUtils; import org.slf4j.Logger; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerDataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerDataNode.java index 6cf244be9442d..fce858aaca01b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerDataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerDataNode.java @@ -17,7 +17,7 @@ package org.apache.hadoop.hdfs.server.diskbalancer.datamodel; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import java.util.HashMap; import java.util.Map; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerVolumeSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerVolumeSet.java index 5a4eb6d68554d..bcce012ff84b9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerVolumeSet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerVolumeSet.java @@ -21,7 +21,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/planner/GreedyPlanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/planner/GreedyPlanner.java index 3f9734537a52b..0ed56afb39a5a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/planner/GreedyPlanner.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/planner/GreedyPlanner.java @@ -17,7 +17,7 @@ package org.apache.hadoop.hdfs.server.diskbalancer.planner; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.server.diskbalancer.datamodel diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/planner/NodePlan.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/planner/NodePlan.java index 44039eaa0f36e..72df5abe6bcaa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/planner/NodePlan.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/planner/NodePlan.java @@ -21,7 +21,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import com.fasterxml.jackson.databind.ObjectWriter; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import java.io.IOException; import java.util.LinkedList; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java index 06c7cc5a5acf7..cae6b68793580 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java @@ -17,10 +17,10 @@ */ package org.apache.hadoop.hdfs.server.mover; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.apache.commons.cli.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -134,9 +134,17 @@ private List getTargetStorages(StorageType t) { final int maxNoMoveInterval = conf.getInt( DFSConfigKeys.DFS_MOVER_MAX_NO_MOVE_INTERVAL_KEY, DFSConfigKeys.DFS_MOVER_MAX_NO_MOVE_INTERVAL_DEFAULT); - this.retryMaxAttempts = conf.getInt( + final int maxAttempts = conf.getInt( DFSConfigKeys.DFS_MOVER_RETRY_MAX_ATTEMPTS_KEY, DFSConfigKeys.DFS_MOVER_RETRY_MAX_ATTEMPTS_DEFAULT); + if (maxAttempts >= 0) { + this.retryMaxAttempts = maxAttempts; + } else { + LOG.warn(DFSConfigKeys.DFS_MOVER_RETRY_MAX_ATTEMPTS_KEY + " is " + + "configured with a negative value, using default value of " + + DFSConfigKeys.DFS_MOVER_RETRY_MAX_ATTEMPTS_DEFAULT); + this.retryMaxAttempts = DFSConfigKeys.DFS_MOVER_RETRY_MAX_ATTEMPTS_DEFAULT; + } this.retryCount = retryCount; this.dispatcher = new Dispatcher(nnc, Collections. emptySet(), Collections. emptySet(), movedWinWidth, moverThreads, 0, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclEntryStatusFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclEntryStatusFormat.java index e9e3e598fc12a..a6e7d00f848d9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclEntryStatusFormat.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclEntryStatusFormat.java @@ -26,8 +26,6 @@ import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.hdfs.util.LongBitFormat; -import com.google.common.collect.ImmutableList; - /** * Class to pack an AclEntry into an integer.
      * An ACL entry is represented by a 32-bit integer in Big Endian format.
      diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclFeature.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclFeature.java index 6d546f81c4f99..2dfe50742dac2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclFeature.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclFeature.java @@ -24,8 +24,8 @@ import org.apache.hadoop.fs.permission.AclEntry; import org.apache.hadoop.hdfs.util.ReferenceCountMap.ReferenceCounter; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; /** * Feature that represents the ACLs of the inode. @@ -83,17 +83,17 @@ public int hashCode() { } @Override - public int getRefCount() { + public synchronized int getRefCount() { return refCount; } @Override - public int incrementAndGetRefCount() { + public synchronized int incrementAndGetRefCount() { return ++refCount; } @Override - public int decrementAndGetRefCount() { + public synchronized int decrementAndGetRefCount() { return (refCount > 0) ? --refCount : 0; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclStorage.java index fa268c72a1eb2..806cdc6d6940b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclStorage.java @@ -20,9 +20,9 @@ import java.util.Collections; import java.util.List; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.permission.AclEntry; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclTransformation.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclTransformation.java index 4402e263a2108..031929ce67c79 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclTransformation.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclTransformation.java @@ -28,10 +28,10 @@ import java.util.Iterator; import java.util.List; -import com.google.common.collect.ComparisonChain; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Ordering; +import org.apache.hadoop.thirdparty.com.google.common.collect.ComparisonChain; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.collect.Ordering; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.permission.AclEntry; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AuditLogger.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AuditLogger.java index 614eb63d055a7..49dcb8c0a3757 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AuditLogger.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AuditLogger.java @@ -17,14 +17,13 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import java.net.InetAddress; -import java.security.Principal; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; +import java.net.InetAddress; + /** * Interface defining an audit logger. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupImage.java index 9f5f29e371432..bbe607670f71a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupImage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupImage.java @@ -30,8 +30,8 @@ import org.apache.hadoop.hdfs.server.common.Storage.StorageState; import org.apache.hadoop.util.StringUtils; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; /** * Extension of FSImage for the backup node. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java index 2fb1933c421cc..dab227fcc763c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java @@ -51,7 +51,7 @@ import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.UserGroupInformation; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.thirdparty.protobuf.BlockingService; /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java index 33bb74caabe4f..68ab12c9eb4e3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java @@ -43,6 +43,7 @@ import java.util.TreeMap; import java.util.concurrent.locks.ReentrantLock; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import org.apache.commons.io.IOUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -89,10 +90,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.HashMultimap; -import com.google.common.collect.Lists; -import com.google.common.collect.Multimap; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.collect.HashMultimap; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Multimap; /** * The Cache Manager handles caching on DataNodes. @@ -935,6 +936,11 @@ public void setCachedLocations(LocatedBlocks locations) { } } + @SuppressFBWarnings( + value="EC_UNRELATED_TYPES", + justification="HDFS-15255 Asked Wei-Chiu and Pifta to review this" + + " warning and we all agree the code is OK and the warning is not " + + "needed") private void setCachedLocations(LocatedBlock block) { CachedBlock cachedBlock = new CachedBlock(block.getBlock().getBlockId(), @@ -1071,6 +1077,10 @@ public PersistState saveState() throws IOException { if (p.getLimit() != null) b.setLimit(p.getLimit()); + if (p.getMaxRelativeExpiryMs() != null) { + b.setMaxRelativeExpiry(p.getMaxRelativeExpiryMs()); + } + pools.add(b.build()); } @@ -1136,6 +1146,10 @@ public void loadState(PersistState s) throws IOException { if (p.hasLimit()) info.setLimit(p.getLimit()); + if (p.hasMaxRelativeExpiry()) { + info.setMaxRelativeExpiryMs(p.getMaxRelativeExpiry()); + } + addCachePool(info); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CachePool.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CachePool.java index 20b1d25434a1b..dda4789b74e49 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CachePool.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CachePool.java @@ -32,7 +32,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.IntrusiveCollection; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * A CachePool describes a set of cache resources being managed by the NameNode. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointConf.java index 186bc3d727639..4df170d771601 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointConf.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointConf.java @@ -24,7 +24,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; -import com.google.common.collect.ImmutableList; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; import java.util.concurrent.TimeUnit; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointSignature.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointSignature.java index 18cc7afbe9d5c..5c753070a459e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointSignature.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointSignature.java @@ -23,7 +23,7 @@ import org.apache.hadoop.hdfs.server.common.StorageInfo; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType; -import com.google.common.collect.ComparisonChain; +import org.apache.hadoop.thirdparty.com.google.common.collect.ComparisonChain; /** * A unique signature intended to identify checkpoint transactions. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java index ab07efa81c77a..0557580404a7f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java @@ -28,7 +28,7 @@ import java.net.URL; import java.util.List; -import com.google.common.math.LongMath; +import org.apache.hadoop.thirdparty.com.google.common.math.LongMath; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -43,7 +43,7 @@ import org.apache.hadoop.io.MD5Hash; import org.apache.hadoop.util.Daemon; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; /** * The Checkpointer is responsible for supporting periodic checkpoints diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java index 0263f2a347faa..7a5963a6c57cd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.permission.FsAction; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/DirectoryWithQuotaFeature.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/DirectoryWithQuotaFeature.java index 9597c6aeeb366..cddefd35afd13 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/DirectoryWithQuotaFeature.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/DirectoryWithQuotaFeature.java @@ -176,6 +176,11 @@ void setSpaceConsumed(QuotaCounts c) { usage.setTypeSpaces(c.getTypeSpaces()); } + /** @return the namespace and storagespace and typespace allowed. */ + public QuotaCounts getSpaceAllowed() { + return new QuotaCounts.Builder().quotaCount(quota).build(); + } + /** @return the namespace and storagespace and typespace consumed. */ public QuotaCounts getSpaceConsumed() { return new QuotaCounts.Builder().quotaCount(usage).build(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupInputStream.java index 81d285a03626b..d17fd06bc882c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupInputStream.java @@ -21,7 +21,7 @@ import java.io.ByteArrayInputStream; import java.io.IOException; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java index 26c5e9049f74d..8c3c0fd933a43 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java @@ -45,9 +45,9 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authentication.client.AuthenticationException; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.base.Throwables; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Throwables; /** * An implementation of the abstract class {@link EditLogInputStream}, which diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java index 4dbe2720ddaa0..7fe84307df125 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java @@ -34,7 +34,7 @@ import org.apache.hadoop.hdfs.protocol.LayoutFlags; import org.apache.hadoop.io.IOUtils; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * An implementation of the abstract class {@link EditLogOutputStream}, which @@ -88,7 +88,12 @@ public EditLogFileOutputStream(Configuration conf, File name, int size) } else { rp = new RandomAccessFile(name, "rw"); } - fp = new FileOutputStream(rp.getFD()); // open for append + try { + fp = new FileOutputStream(rp.getFD()); // open for append + } catch (IOException e) { + IOUtils.closeStream(rp); + throw e; + } fc = rp.getChannel(); fc.position(fc.size()); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogOutputStream.java index 27733cf404162..6f43d737443d5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogOutputStream.java @@ -24,6 +24,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; /** * A generic abstract class to support journaling of edits logs into @@ -42,6 +43,16 @@ public EditLogOutputStream() throws IOException { numSync = totalTimeSync = 0; } + /** + * Get the last txId journalled in the stream. + * The txId is recorded when FSEditLogOp is written to the stream. + * The default implementation is dummy. + * JournalSet tracks the txId uniformly for all underlying streams. + */ + public long getLastJournalledTxId() { + return HdfsServerConstants.INVALID_TXID; + }; + /** * Write edits log operation to the stream. * diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditsDoubleBuffer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditsDoubleBuffer.java index be68f6d609008..ccc233efcbafe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditsDoubleBuffer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditsDoubleBuffer.java @@ -32,7 +32,7 @@ import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.IOUtils; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * A double-buffer for edits. New edits are written into the first buffer @@ -76,7 +76,7 @@ public void close() throws IOException { + " bytes still to be flushed and cannot be closed."); } - IOUtils.cleanup(null, bufCurrent, bufReady); + IOUtils.cleanupWithLogger(null, bufCurrent, bufReady); bufCurrent = bufReady = null; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionFaultInjector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionFaultInjector.java index 938eacd7fd648..33954a2cc3993 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionFaultInjector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionFaultInjector.java @@ -19,7 +19,7 @@ import java.io.IOException; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Used to inject certain faults for testing. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionZoneManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionZoneManager.java index 48e405ba35432..2f5fde8e94015 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionZoneManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionZoneManager.java @@ -29,10 +29,10 @@ import java.util.NavigableMap; import java.util.TreeMap; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.hadoop.thirdparty.protobuf.InvalidProtocolBufferException; import org.apache.commons.lang3.builder.EqualsBuilder; import org.apache.commons.lang3.builder.HashCodeBuilder; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ErasureCodingPolicyManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ErasureCodingPolicyManager.java index d9f7e9afdc70d..a653ff459c22d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ErasureCodingPolicyManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ErasureCodingPolicyManager.java @@ -17,8 +17,9 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -304,6 +305,12 @@ public synchronized ErasureCodingPolicy addPolicy( + policy.getCodecName() + " is not supported"); } + int blocksInGroup = policy.getNumDataUnits() + policy.getNumParityUnits(); + if (blocksInGroup > HdfsServerConstants.MAX_BLOCKS_IN_GROUP) { + throw new HadoopIllegalArgumentException("Number of data and parity blocks in an EC group " + + blocksInGroup + " should not exceed maximum " + HdfsServerConstants.MAX_BLOCKS_IN_GROUP); + } + if (policy.getCellSize() > maxCellSize) { throw new HadoopIllegalArgumentException("Cell size " + policy.getCellSize() + " should not exceed maximum " + @@ -343,6 +350,7 @@ public synchronized ErasureCodingPolicy addPolicy( policiesByName.values().toArray(new ErasureCodingPolicyInfo[0]); allPersistedPolicies.put(policy.getId(), new ErasureCodingPolicyInfo(policy)); + LOG.info("Added erasure coding policy " + policy); return policy; } @@ -414,7 +422,7 @@ public synchronized boolean disablePolicy(String name) { enabledPolicies = enabledPoliciesByName.values().toArray(new ErasureCodingPolicy[0]); info.setState(ErasureCodingPolicyState.DISABLED); - LOG.info("Disable the erasure coding policy " + name); + LOG.info("Disabled the erasure coding policy " + name); allPersistedPolicies.put(info.getPolicy().getId(), createPolicyInfo(info.getPolicy(), ErasureCodingPolicyState.DISABLED)); @@ -448,7 +456,7 @@ public synchronized boolean enablePolicy(String name) { enabledPoliciesByName.values().toArray(new ErasureCodingPolicy[0]); allPersistedPolicies.put(ecPolicy.getId(), createPolicyInfo(info.getPolicy(), ErasureCodingPolicyState.ENABLED)); - LOG.info("Enable the erasure coding policy " + name); + LOG.info("Enabled the erasure coding policy " + name); return true; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAclOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAclOp.java index 31dc51a3c24b1..6ced588e96882 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAclOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAclOp.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.permission.AclEntry; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAppendOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAppendOp.java index 919e853f4590f..7e90d4bafa8bb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAppendOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAppendOp.java @@ -40,7 +40,7 @@ import org.apache.hadoop.hdfs.server.namenode.NameNodeLayoutVersion.Feature; import org.apache.hadoop.ipc.RetriableException; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * Helper class to perform append operation. @@ -209,7 +209,7 @@ static LocatedBlock prepareFileForAppend(final FSNamesystem fsn, BlockInfo lastBlock = file.getLastBlock(); if (lastBlock != null) { ExtendedBlock blk = new ExtendedBlock(fsn.getBlockPoolId(), lastBlock); - ret = new LocatedBlock(blk, new DatanodeInfo[0]); + ret = new LocatedBlock(blk, DatanodeInfo.EMPTY_ARRAY); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAttrOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAttrOp.java index 8e9606dcf61fe..5c347655fc8ba 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAttrOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAttrOp.java @@ -38,7 +38,7 @@ import org.apache.hadoop.hdfs.util.EnumCounters; import org.apache.hadoop.security.AccessControlException; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import java.io.FileNotFoundException; import java.io.IOException; @@ -56,15 +56,18 @@ static FileStatus setPermission( throw new InvalidPathException(src); } INodesInPath iip; + boolean changed; fsd.writeLock(); try { iip = fsd.resolvePath(pc, src, DirOp.WRITE); fsd.checkOwner(pc, iip); - unprotectedSetPermission(fsd, iip, permission); + changed = unprotectedSetPermission(fsd, iip, permission); } finally { fsd.writeUnlock(); } - fsd.getEditLog().logSetPermissions(iip.getPath(), permission); + if (changed) { + fsd.getEditLog().logSetPermissions(iip.getPath(), permission); + } return fsd.getAuditFileInfo(iip); } @@ -75,6 +78,7 @@ static FileStatus setOwner( throw new InvalidPathException(src); } INodesInPath iip; + boolean changed; fsd.writeLock(); try { iip = fsd.resolvePath(pc, src, DirOp.WRITE); @@ -89,11 +93,13 @@ static FileStatus setOwner( "User " + pc.getUser() + " does not belong to " + group); } } - unprotectedSetOwner(fsd, iip, username, group); + changed = unprotectedSetOwner(fsd, iip, username, group); } finally { fsd.writeUnlock(); } - fsd.getEditLog().logSetOwner(iip.getPath(), username, group); + if (changed) { + fsd.getEditLog().logSetOwner(iip.getPath(), username, group); + } return fsd.getAuditFileInfo(iip); } @@ -257,28 +263,32 @@ static void setQuota(FSDirectory fsd, FSPermissionChecker pc, String src, } } - static void unprotectedSetPermission( + static boolean unprotectedSetPermission( FSDirectory fsd, INodesInPath iip, FsPermission permissions) throws FileNotFoundException, UnresolvedLinkException, QuotaExceededException, SnapshotAccessControlException { assert fsd.hasWriteLock(); final INode inode = FSDirectory.resolveLastINode(iip); int snapshotId = iip.getLatestSnapshotId(); + long oldPerm = inode.getPermissionLong(); inode.setPermission(permissions, snapshotId); + return oldPerm != inode.getPermissionLong(); } - static void unprotectedSetOwner( + static boolean unprotectedSetOwner( FSDirectory fsd, INodesInPath iip, String username, String groupname) throws FileNotFoundException, UnresolvedLinkException, QuotaExceededException, SnapshotAccessControlException { assert fsd.hasWriteLock(); final INode inode = FSDirectory.resolveLastINode(iip); + long oldPerm = inode.getPermissionLong(); if (username != null) { inode.setUser(username, iip.getLatestSnapshotId()); } if (groupname != null) { inode.setGroup(groupname, iip.getLatestSnapshotId()); } + return oldPerm != inode.getPermissionLong(); } static boolean setTimes( @@ -398,7 +408,7 @@ static BlockInfo[] unprotectedSetReplication( bm.setReplication(oldBR, targetReplication, b); } - if (oldBR != -1) { + if (oldBR != -1 && FSDirectory.LOG.isDebugEnabled()) { if (oldBR > targetReplication) { FSDirectory.LOG.debug("Decreasing replication from {} to {} for {}", oldBR, targetReplication, iip.getPath()); @@ -424,6 +434,8 @@ static void unprotectedSetStoragePolicy(FSDirectory fsd, BlockManager bm, } final int snapshotId = iip.getLatestSnapshotId(); if (inode.isFile()) { + FSDirectory.LOG.debug("DIR* FSDirAAr.unprotectedSetStoragePolicy for " + + "File."); if (policyId != HdfsConstants.BLOCK_STORAGE_POLICY_ID_UNSPECIFIED) { BlockStoragePolicy newPolicy = bm.getStoragePolicy(policyId); if (newPolicy.isCopyOnCreateFile()) { @@ -442,6 +454,8 @@ static void unprotectedSetStoragePolicy(FSDirectory fsd, BlockManager bm, } inode.asFile().setStoragePolicyID(policyId, snapshotId); } else if (inode.isDirectory()) { + FSDirectory.LOG.debug("DIR* FSDirAAr.unprotectedSetStoragePolicy for " + + "Directory."); setDirStoragePolicy(fsd, iip, policyId); } else { throw new FileNotFoundException(iip.getPath() diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirConcatOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirConcatOp.java index b92c4140b4a05..04ae358c67afb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirConcatOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirConcatOp.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.fs.permission.FsAction; @@ -52,9 +52,9 @@ static FileStatus concat(FSDirectory fsd, FSPermissionChecker pc, String target, String[] srcs, boolean logRetryCache) throws IOException { validatePath(target, srcs); assert srcs != null; - if (FSDirectory.LOG.isDebugEnabled()) { - FSDirectory.LOG.debug("concat {} to {}", Arrays.toString(srcs), target); - } + NameNode.stateChangeLog.debug("DIR* NameSystem.concat: {} to {}", + Arrays.toString(srcs), target); + final INodesInPath targetIIP = fsd.resolvePath(pc, target, DirOp.WRITE); // write permission for the target if (fsd.isPermissionEnabled()) { @@ -66,11 +66,6 @@ static FileStatus concat(FSDirectory fsd, FSPermissionChecker pc, // check the srcs INodeFile[] srcFiles = verifySrcFiles(fsd, srcs, targetIIP, pc); - if(NameNode.stateChangeLog.isDebugEnabled()) { - NameNode.stateChangeLog.debug("DIR* NameSystem.concat: " + - Arrays.toString(srcs) + " to " + target); - } - long timestamp = now(); fsd.writeLock(); try { @@ -150,7 +145,7 @@ private static INodeFile[] verifySrcFiles(FSDirectory fsd, String[] srcs, + " is referred by some other reference in some snapshot."); } // source file cannot be the same with the target file - if (srcINode == targetINode) { + if (srcINode.equals(targetINode)) { throw new HadoopIllegalArgumentException("concat: the src file " + src + " is the same with the target file " + targetIIP.getPath()); } @@ -234,10 +229,8 @@ private static void verifyQuota(FSDirectory fsd, INodesInPath targetIIP, static void unprotectedConcat(FSDirectory fsd, INodesInPath targetIIP, INodeFile[] srcList, long timestamp) throws IOException { assert fsd.hasWriteLock(); - if (NameNode.stateChangeLog.isDebugEnabled()) { - NameNode.stateChangeLog.debug("DIR* FSNamesystem.concat to " - + targetIIP.getPath()); - } + NameNode.stateChangeLog.debug("DIR* NameSystem.concat to {}", + targetIIP.getPath()); final INodeFile trgInode = targetIIP.getLastINode().asFile(); QuotaCounts deltas = computeQuotaDeltas(fsd, trgInode, srcList); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirDeleteOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirDeleteOp.java index 47f25f6f1fb66..2dfb90ee67282 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirDeleteOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirDeleteOp.java @@ -61,10 +61,10 @@ static long delete(FSDirectory fsd, INodesInPath iip, removedUCFiles); if (unprotectedDelete(fsd, iip, context, mtime)) { filesRemoved = context.quotaDelta().getNsDelta(); + fsn.removeSnapshottableDirs(snapshottableDirs); } fsd.updateReplicationFactor(context.collectedBlocks() .toUpdateReplicationInfo()); - fsn.removeSnapshottableDirs(snapshottableDirs); fsd.updateCount(iip, context.quotaDelta(), false); } } finally { @@ -144,9 +144,9 @@ static void deleteForEditLog(FSDirectory fsd, INodesInPath iip, long mtime) new ReclaimContext(fsd.getBlockStoragePolicySuite(), collectedBlocks, removedINodes, removedUCFiles), mtime); - fsn.removeSnapshottableDirs(snapshottableDirs); if (filesRemoved) { + fsn.removeSnapshottableDirs(snapshottableDirs); fsn.removeLeasesAndINodes(removedUCFiles, removedINodes, false); fsn.getBlockManager().removeBlocksAndUpdateSafemodeTotal(collectedBlocks); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirEncryptionZoneOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirEncryptionZoneOp.java index 6c7b1fae50dec..516d59415401c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirEncryptionZoneOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirEncryptionZoneOp.java @@ -52,8 +52,8 @@ import org.apache.hadoop.hdfs.server.namenode.ReencryptionUpdater.FileEdekInfo; import org.apache.hadoop.security.SecurityUtil; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.hadoop.thirdparty.protobuf.InvalidProtocolBufferException; import org.apache.hadoop.util.Time; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirErasureCodingOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirErasureCodingOp.java index 011c72ea49c6f..11981b27183d6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirErasureCodingOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirErasureCodingOp.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.fs.FileStatus; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirMkdirOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirMkdirOp.java index 95e889888bb7f..da324fb46738a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirMkdirOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirMkdirOp.java @@ -18,7 +18,7 @@ package org.apache.hadoop.hdfs.server.namenode; import org.apache.hadoop.fs.permission.FsCreateModes; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.ParentNotDirectoryException; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirRenameOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirRenameOp.java index 602f9962942be..ee0bf8a5fb165 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirRenameOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirRenameOp.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.InvalidPathException; @@ -80,8 +80,12 @@ private static void verifyQuotaForRename(FSDirectory fsd, INodesInPath src, // Assume dstParent existence check done by callers. INode dstParent = dst.getINode(-2); // Use the destination parent's storage policy for quota delta verify. + final boolean isSrcSetSp = src.getLastINode().isSetStoragePolicy(); + final byte storagePolicyID = isSrcSetSp ? + src.getLastINode().getLocalStoragePolicyID() : + dstParent.getStoragePolicyID(); final QuotaCounts delta = src.getLastINode() - .computeQuotaUsage(bsps, dstParent.getStoragePolicyID(), false, + .computeQuotaUsage(bsps, storagePolicyID, false, Snapshot.CURRENT_STATE_ID); // Reduce the required quota by dst that is being removed @@ -262,6 +266,11 @@ static RenameResult renameTo(FSDirectory fsd, FSPermissionChecker pc, throws IOException { final INodesInPath srcIIP = fsd.resolvePath(pc, src, DirOp.WRITE_LINK); final INodesInPath dstIIP = fsd.resolvePath(pc, dst, DirOp.CREATE_LINK); + + if(fsd.isNonEmptyDirectory(srcIIP)) { + DFSUtil.checkProtectedDescendants(fsd, srcIIP); + } + if (fsd.isPermissionEnabled()) { boolean renameToTrash = false; if (null != options && @@ -650,7 +659,12 @@ private static class RenameOperation { // snapshot is taken on the dst tree, changes will be recorded in the // latest snapshot of the src tree. if (isSrcInSnapshot) { - srcChild.recordModification(srcLatestSnapshotId); + if (srcChild.isFile()) { + INodeFile file = srcChild.asFile(); + file.recordModification(srcLatestSnapshotId, true); + } else { + srcChild.recordModification(srcLatestSnapshotId); + } } // check srcChild for reference diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirSatisfyStoragePolicyOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirSatisfyStoragePolicyOp.java index 3f873d7eea5ff..4057bbd211c0a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirSatisfyStoragePolicyOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirSatisfyStoragePolicyOp.java @@ -33,7 +33,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSDirectory.DirOp; import org.apache.hadoop.hdfs.server.namenode.sps.StoragePolicySatisfyManager; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; /** * Helper class to perform storage policy satisfier related operations. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java index 7f07dd1feda1b..dfacc491eae53 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java @@ -18,7 +18,7 @@ package org.apache.hadoop.hdfs.server.namenode; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.DirectoryListingStartAfterNotFoundException; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirTruncateOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirTruncateOp.java index bf55d30591074..22b0e175018d8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirTruncateOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirTruncateOp.java @@ -24,6 +24,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.UnresolvedLinkException; import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy; import org.apache.hadoop.hdfs.protocol.QuotaExceededException; @@ -38,7 +39,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSNamesystem.RecoverLeaseOp; import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Helper class to perform truncate operation. @@ -111,6 +112,10 @@ static TruncateResult truncate(final FSNamesystem fsn, final String srcArg, + truncatedBlock.getNumBytes(); if (newLength == truncateLength) { return new TruncateResult(false, fsd.getAuditFileInfo(iip)); + } else { + throw new AlreadyBeingCreatedException( + RecoverLeaseOp.TRUNCATE_FILE.getExceptionMessage(src, + clientName, clientMachine, src + " is being truncated.")); } } } @@ -262,7 +267,11 @@ static Block prepareFileForTruncate(FSNamesystem fsn, INodesInPath iip, uc.setTruncateBlock(new BlockInfoContiguous(oldBlock, oldBlock.getReplication())); uc.getTruncateBlock().setNumBytes(oldBlock.getNumBytes() - lastBlockDelta); - uc.getTruncateBlock().setGenerationStamp(newBlock.getGenerationStamp()); + final long newGenerationStamp = newBlock.getGenerationStamp(); + uc.getTruncateBlock().setGenerationStamp(newGenerationStamp); + // Update global generation stamp in Standby NameNode + blockManager.getBlockIdManager().setGenerationStampIfGreater( + newGenerationStamp); truncatedBlockUC = oldBlock; NameNode.stateChangeLog.debug("BLOCK* prepareFileForTruncate: " + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java index 99a80b537c208..0d9c6aeeb9c45 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.fs.XAttrSetFlag; import org.apache.hadoop.hdfs.AddBlockFlag; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirXAttrOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirXAttrOp.java index ff82610f545bb..7f1691091f8d1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirXAttrOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirXAttrOp.java @@ -17,9 +17,9 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; import org.apache.hadoop.fs.FileStatus; @@ -29,6 +29,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.XAttrHelper; +import org.apache.hadoop.hdfs.protocol.XAttrNotFoundException; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.ReencryptionInfoProto; import org.apache.hadoop.hdfs.protocolPB.PBHelperClient; @@ -114,8 +115,7 @@ static List getXAttrs(FSDirectory fsd, FSPermissionChecker pc, return filteredAll; } if (filteredAll == null || filteredAll.isEmpty()) { - throw new IOException( - "At least one of the attributes provided was not found."); + throw new XAttrNotFoundException(); } List toGet = Lists.newArrayListWithCapacity(xAttrs.size()); for (XAttr xAttr : xAttrs) { @@ -129,8 +129,7 @@ static List getXAttrs(FSDirectory fsd, FSPermissionChecker pc, } } if (!foundIt) { - throw new IOException( - "At least one of the attributes provided was not found."); + throw new XAttrNotFoundException(); } } return toGet; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java index c06b59f625a79..fc17eaebf7f19 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java @@ -20,9 +20,9 @@ import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; import org.apache.hadoop.util.StringUtils; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.thirdparty.protobuf.InvalidProtocolBufferException; import org.apache.hadoop.HadoopIllegalArgumentException; @@ -69,6 +69,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.annotation.Nullable; import java.io.Closeable; import java.io.FileNotFoundException; import java.io.IOException; @@ -88,6 +89,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_QUOTA_BY_STORAGETYPE_ENABLED_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_QUOTA_BY_STORAGETYPE_ENABLED_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PROTECTED_SUBDIRECTORIES_ENABLE; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PROTECTED_SUBDIRECTORIES_ENABLE_DEFAULT; import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.CRYPTO_XATTR_ENCRYPTION_ZONE; import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.SECURITY_XATTR_UNREADABLE_BY_SUPERUSER; import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.XATTR_SATISFY_STORAGE_POLICY; @@ -168,6 +171,7 @@ private static INodeDirectory createRoot(FSNamesystem namesystem) { // // Each entry in this set must be a normalized path. private volatile SortedSet protectedDirectories; + private final boolean isProtectedSubDirectoriesEnable; private final boolean isPermissionEnabled; private final boolean isPermissionContentSummarySubAccess; @@ -203,8 +207,47 @@ private static INodeDirectory createRoot(FSNamesystem namesystem) { // will be bypassed private HashSet usersToBypassExtAttrProvider = null; - public void setINodeAttributeProvider(INodeAttributeProvider provider) { + // If external inode attribute provider is configured, use the new + // authorizeWithContext() API or not. + private boolean useAuthorizationWithContextAPI = false; + + public void setINodeAttributeProvider( + @Nullable INodeAttributeProvider provider) { attributeProvider = provider; + + if (attributeProvider == null) { + // attributeProvider is set to null during NN shutdown. + return; + } + + // if the runtime external authorization provider doesn't support + // checkPermissionWithContext(), fall back to the old API + // checkPermission(). + // This check is done only once during NameNode initialization to reduce + // runtime overhead. + Class[] cArg = new Class[1]; + cArg[0] = INodeAttributeProvider.AuthorizationContext.class; + + INodeAttributeProvider.AccessControlEnforcer enforcer = + attributeProvider.getExternalAccessControlEnforcer(null); + + // If external enforcer is null, we use the default enforcer, which + // supports the new API. + if (enforcer == null) { + useAuthorizationWithContextAPI = true; + return; + } + + try { + Class clazz = enforcer.getClass(); + clazz.getDeclaredMethod("checkPermissionWithContext", cArg); + useAuthorizationWithContextAPI = true; + LOG.info("Use the new authorization provider API"); + } catch (NoSuchMethodException e) { + useAuthorizationWithContextAPI = false; + LOG.info("Fallback to the old authorization provider API because " + + "the expected method is not found."); + } } /** @@ -341,6 +384,9 @@ public enum DirOp { DFSConfigKeys.DFS_NAMENODE_MAX_XATTRS_PER_INODE_DEFAULT); this.protectedDirectories = parseProtectedDirectories(conf); + this.isProtectedSubDirectoriesEnable = conf.getBoolean( + DFS_PROTECTED_SUBDIRECTORIES_ENABLE, + DFS_PROTECTED_SUBDIRECTORIES_ENABLE_DEFAULT); Preconditions.checkArgument(this.inodeXAttrsLimit >= 0, "Cannot set a negative limit on the number of xattrs per inode (%s).", @@ -501,6 +547,10 @@ public SortedSet getProtectedDirectories() { return protectedDirectories; } + public boolean isProtectedSubDirectoriesEnable() { + return isProtectedSubDirectoriesEnable; + } + /** * Set directories that cannot be removed unless empty, even by an * administrator. @@ -684,6 +734,26 @@ public INodesInPath resolvePath(FSPermissionChecker pc, String src, return iip; } + /** + * This method should only be used from internal paths and not those provided + * directly by a user. It resolves a given path into an INodesInPath in a + * similar way to resolvePath(...), only traversal and permissions are not + * checked. + * @param src The path to resolve. + * @return if the path indicates an inode, return path after replacing up to + * {@code } with the corresponding path of the inode, else + * the path in {@code src} as is. If the path refers to a path in + * the "raw" directory, return the non-raw pathname. + * @throws FileNotFoundException + */ + public INodesInPath unprotectedResolvePath(String src) + throws FileNotFoundException { + byte[][] components = INode.getPathComponents(src); + boolean isRaw = isReservedRawName(components); + components = resolveComponents(components, this); + return INodesInPath.resolve(rootDir, components, isRaw); + } + INodesInPath resolvePath(FSPermissionChecker pc, String src, long fileId) throws UnresolvedLinkException, FileNotFoundException, AccessControlException, ParentNotDirectoryException { @@ -1139,7 +1209,8 @@ static void verifyQuota(INodesInPath iip, int pos, QuotaCounts deltas, // check existing components in the path for(int i = (pos > iip.length() ? iip.length(): pos) - 1; i >= 0; i--) { - if (commonAncestor == iip.getINode(i)) { + if (commonAncestor == iip.getINode(i) + && !commonAncestor.isInLatestSnapshot(iip.getLatestSnapshotId())) { // Stop checking for quota when common ancestor is reached return; } @@ -1292,9 +1363,13 @@ public INodesInPath addLastINode(INodesInPath existing, INode inode, // always verify inode name verifyINodeName(inode.getLocalNameBytes()); + final boolean isSrcSetSp = inode.isSetStoragePolicy(); + final byte storagePolicyID = isSrcSetSp ? + inode.getLocalStoragePolicyID() : + parent.getStoragePolicyID(); final QuotaCounts counts = inode .computeQuotaUsage(getBlockStoragePolicySuite(), - parent.getStoragePolicyID(), false, Snapshot.CURRENT_STATE_ID); + storagePolicyID, false, Snapshot.CURRENT_STATE_ID); updateCount(existing, pos, counts, checkQuota); boolean isRename = (inode.getParent() != null); @@ -1784,7 +1859,8 @@ FSPermissionChecker getPermissionChecker() FSPermissionChecker getPermissionChecker(String fsOwner, String superGroup, UserGroupInformation ugi) throws AccessControlException { return new FSPermissionChecker( - fsOwner, superGroup, ugi, getUserFilteredAttributeProvider(ugi)); + fsOwner, superGroup, ugi, getUserFilteredAttributeProvider(ugi), + useAuthorizationWithContextAPI); } void checkOwner(FSPermissionChecker pc, INodesInPath iip) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java index be8c684f01508..c3e31bcba692c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java @@ -27,7 +27,7 @@ import java.util.Collection; import java.util.Iterator; import java.util.List; -import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.LongAdder; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -109,9 +109,9 @@ import org.apache.hadoop.ipc.Server; import org.apache.hadoop.security.token.delegation.DelegationKey; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -182,7 +182,7 @@ private enum State { // these are statistics counters. private long numTransactions; // number of transactions - private final AtomicLong numTransactionsBatchedInSync = new AtomicLong(); + private final LongAdder numTransactionsBatchedInSync = new LongAdder(); private long totalTimeTransactions; // total time for all transactions private NameNodeMetrics metrics; @@ -217,7 +217,10 @@ private static class TransactionId { private static final ThreadLocal myTransactionId = new ThreadLocal() { @Override protected synchronized TransactionId initialValue() { - return new TransactionId(Long.MAX_VALUE); + // If an RPC call did not generate any transactions, + // logSync() should exit without syncing + // Therefore the initial value of myTransactionId should be 0 + return new TransactionId(0L); } }; @@ -462,6 +465,7 @@ assert isOpenForWrite() : // wait if an automatic sync is scheduled waitIfAutoSyncScheduled(); + beginTransaction(op); // check if it is time to schedule an automatic sync needsSync = doEditTransaction(op); if (needsSync) { @@ -476,9 +480,11 @@ assert isOpenForWrite() : } synchronized boolean doEditTransaction(final FSEditLogOp op) { - long start = beginTransaction(); - op.setTransactionId(txid); + LOG.debug("doEditTx() op={} txid={}", op, txid); + assert op.hasTransactionId() : + "Transaction id is not set for " + op + " EditLog.txId=" + txid; + long start = monotonicNow(); try { editLogStream.write(op); } catch (IOException ex) { @@ -522,7 +528,7 @@ private boolean shouldForceSync() { return editLogStream.shouldForceSync(); } - private long beginTransaction() { + protected void beginTransaction(final FSEditLogOp op) { assert Thread.holdsLock(this); // get a new transactionId txid++; @@ -532,7 +538,9 @@ private long beginTransaction() { // TransactionId id = myTransactionId.get(); id.txid = txid; - return monotonicNow(); + if(op != null) { + op.setTransactionId(txid); + } } private void endTransaction(long start) { @@ -649,7 +657,7 @@ public void logSync() { } protected void logSync(long mytxid) { - long syncStart = 0; + long lastJournalledTxId = HdfsServerConstants.INVALID_TXID; boolean sync = false; long editsBatchedInSync = 0; try { @@ -676,8 +684,16 @@ protected void logSync(long mytxid) { // now, this thread will do the sync. track if other edits were // included in the sync - ie. batched. if this is the only edit // synced then the batched count is 0 - editsBatchedInSync = txid - synctxid - 1; - syncStart = txid; + lastJournalledTxId = editLogStream.getLastJournalledTxId(); + LOG.debug("logSync(tx) synctxid={} lastJournalledTxId={} mytxid={}", + synctxid, lastJournalledTxId, mytxid); + assert lastJournalledTxId <= txid : "lastJournalledTxId exceeds txid"; + // The stream has already been flushed, or there are no active streams + // We still try to flush up to mytxid + if(lastJournalledTxId <= synctxid) { + lastJournalledTxId = mytxid; + } + editsBatchedInSync = lastJournalledTxId - synctxid - 1; isSyncRunning = true; sync = true; @@ -730,14 +746,14 @@ protected void logSync(long mytxid) { if (metrics != null) { // Metrics non-null only when used inside name node metrics.addSync(elapsed); metrics.incrTransactionsBatchedInSync(editsBatchedInSync); - numTransactionsBatchedInSync.addAndGet(editsBatchedInSync); + numTransactionsBatchedInSync.add(editsBatchedInSync); } } finally { // Prevent RuntimeException from blocking other log edit sync synchronized (this) { if (sync) { - synctxid = syncStart; + synctxid = lastJournalledTxId; for (JournalManager jm : journalSet.getJournalManagers()) { /** * {@link FileJournalManager#lastReadableTxId} is only meaningful @@ -745,7 +761,7 @@ protected void logSync(long mytxid) { * other types of {@link JournalManager}. */ if (jm instanceof FileJournalManager) { - ((FileJournalManager)jm).setLastReadableTxId(syncStart); + ((FileJournalManager)jm).setLastReadableTxId(synctxid); } } isSyncRunning = false; @@ -770,7 +786,7 @@ private void printStatistics(boolean force) { .append(" Total time for transactions(ms): ") .append(totalTimeTransactions) .append(" Number of transactions batched in Syncs: ") - .append(numTransactionsBatchedInSync.get()) + .append(numTransactionsBatchedInSync.longValue()) .append(" Number of syncs: ") .append(editLogStream.getNumSync()) .append(" SyncTimes(ms): ") @@ -1402,7 +1418,7 @@ private void startLogSegment(final long segmentTxId, int layoutVersion) numTransactions = 0; totalTimeTransactions = 0; - numTransactionsBatchedInSync.set(0L); + numTransactionsBatchedInSync.reset(); // TODO no need to link this back to storage anymore! // See HDFS-2174. @@ -1496,11 +1512,12 @@ public synchronized void purgeLogsOlderThan(final long minTxIdToKeep) { if (!isOpenForWrite()) { return; } - - assert curSegmentTxId == HdfsServerConstants.INVALID_TXID || // on format this is no-op - minTxIdToKeep <= curSegmentTxId : - "cannot purge logs older than txid " + minTxIdToKeep + - " when current segment starts at " + curSegmentTxId; + + Preconditions.checkArgument( + curSegmentTxId == HdfsServerConstants.INVALID_TXID || // on format this is no-op + minTxIdToKeep <= curSegmentTxId, + "cannot purge logs older than txid " + minTxIdToKeep + + " when current segment starts at " + curSegmentTxId); if (minTxIdToKeep == 0) { return; } @@ -1616,7 +1633,8 @@ public synchronized void journal(long firstTxId, int numTxns, byte[] data) { * store yet. */ synchronized void logEdit(final int length, final byte[] data) { - long start = beginTransaction(); + beginTransaction(null); + long start = monotonicNow(); try { editLogStream.writeRaw(data, 0, length); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogAsync.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogAsync.java index 2b47398f40c98..c50f527a39141 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogAsync.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogAsync.java @@ -28,13 +28,15 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; +import org.apache.hadoop.util.Time; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.ipc.Server; import org.apache.hadoop.util.ExitUtil; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; class FSEditLogAsync extends FSEditLog implements Runnable { static final Logger LOG = LoggerFactory.getLogger(FSEditLog.class); @@ -53,6 +55,8 @@ class FSEditLogAsync extends FSEditLog implements Runnable { // of the edit log buffer - ie. a sync will eventually be forced. private final Deque syncWaitQ = new ArrayDeque(); + private long lastFull = 0; + FSEditLogAsync(Configuration conf, NNStorage storage, List editsDirs) { super(conf, storage, editsDirs); // op instances cannot be shared due to queuing for background thread. @@ -115,9 +119,14 @@ public void close() { @Override void logEdit(final FSEditLogOp op) { + assert isOpenForWrite(); + Edit edit = getEditInstance(op); THREAD_EDIT.set(edit); - enqueueEdit(edit); + synchronized(this) { + enqueueEdit(edit); + beginTransaction(op); + } } @Override @@ -188,6 +197,11 @@ private void enqueueEdit(Edit edit) { if (!editPendingQ.offer(edit)) { Preconditions.checkState( isSyncThreadAlive(), "sync thread is not alive"); + long now = Time.monotonicNow(); + if (now - lastFull > 4000) { + lastFull = now; + LOG.info("Edit pending queue is full"); + } if (Thread.holdsLock(this)) { // if queue is full, synchronized caller must immediately relinquish // the monitor before re-offering to avoid deadlock with sync thread @@ -225,15 +239,18 @@ private Edit dequeueEdit() throws InterruptedException { public void run() { try { while (true) { + NameNodeMetrics metrics = NameNode.getNameNodeMetrics(); boolean doSync; Edit edit = dequeueEdit(); if (edit != null) { // sync if requested by edit log. doSync = edit.logEdit(); syncWaitQ.add(edit); + metrics.setPendingEditsCount(editPendingQ.size() + 1); } else { // sync when editq runs dry, but have edits pending a sync. doSync = !syncWaitQ.isEmpty(); + metrics.setPendingEditsCount(0); } if (doSync) { // normally edit log exceptions cause the NN to terminate, but tests diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index 294296d2d36d5..03d403345a2d6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -116,9 +116,9 @@ import org.apache.hadoop.util.ChunkedArrayList; import org.apache.hadoop.util.Timer; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import static org.apache.hadoop.log.LogThrottlingHelper.LogAction; @@ -132,7 +132,8 @@ public class FSEditLogLoader { /** Limit logging about edit loading to every 5 seconds max. */ @VisibleForTesting static final long LOAD_EDIT_LOG_INTERVAL_MS = 5000; - private final LogThrottlingHelper loadEditsLogHelper = + @VisibleForTesting + static final LogThrottlingHelper LOAD_EDITS_LOG_HELPER = new LogThrottlingHelper(LOAD_EDIT_LOG_INTERVAL_MS); private final FSNamesystem fsNamesys; @@ -173,7 +174,7 @@ long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId, fsNamesys.writeLock(); try { long startTime = timer.monotonicNow(); - LogAction preLogAction = loadEditsLogHelper.record("pre", startTime); + LogAction preLogAction = LOAD_EDITS_LOG_HELPER.record("pre", startTime); if (preLogAction.shouldLog()) { FSImage.LOG.info("Start loading edits file " + edits.getName() + " maxTxnsToRead = " + maxTxnsToRead + @@ -182,7 +183,7 @@ long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId, long numEdits = loadEditRecords(edits, false, expectedStartingTxId, maxTxnsToRead, startOpt, recovery); long endTime = timer.monotonicNow(); - LogAction postLogAction = loadEditsLogHelper.record("post", endTime, + LogAction postLogAction = LOAD_EDITS_LOG_HELPER.record("post", endTime, numEdits, edits.length(), endTime - startTime); if (postLogAction.shouldLog()) { FSImage.LOG.info("Loaded {} edits file(s) (the last named {}) of " + @@ -798,7 +799,7 @@ private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir, final String snapshotRoot = renameReservedPathsOnUpgrade(createSnapshotOp.snapshotRoot, logVersion); - INodesInPath iip = fsDir.getINodesInPath(snapshotRoot, DirOp.WRITE); + INodesInPath iip = fsDir.unprotectedResolvePath(snapshotRoot); String path = fsNamesys.getSnapshotManager().createSnapshot( fsDir.getFSNamesystem().getLeaseManager(), iip, snapshotRoot, createSnapshotOp.snapshotName, @@ -816,7 +817,7 @@ private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir, final String snapshotRoot = renameReservedPathsOnUpgrade(deleteSnapshotOp.snapshotRoot, logVersion); - INodesInPath iip = fsDir.getINodesInPath(snapshotRoot, DirOp.WRITE); + INodesInPath iip = fsDir.unprotectedResolvePath(snapshotRoot); fsNamesys.getSnapshotManager().deleteSnapshot(iip, deleteSnapshotOp.snapshotName, new INode.ReclaimContext(fsNamesys.dir.getBlockStoragePolicySuite(), @@ -838,7 +839,7 @@ private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir, final String snapshotRoot = renameReservedPathsOnUpgrade(renameSnapshotOp.snapshotRoot, logVersion); - INodesInPath iip = fsDir.getINodesInPath(snapshotRoot, DirOp.WRITE); + INodesInPath iip = fsDir.unprotectedResolvePath(snapshotRoot); fsNamesys.getSnapshotManager().renameSnapshot(iip, snapshotRoot, renameSnapshotOp.snapshotOldName, renameSnapshotOp.snapshotNewName, renameSnapshotOp.mtime); @@ -912,6 +913,7 @@ private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir, fsNamesys.getFSImage().updateStorageVersion(); fsNamesys.getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK, NameNodeFile.IMAGE); + fsNamesys.setNeedRollbackFsImage(false); break; } case OP_ADD_CACHE_DIRECTIVE: { @@ -1150,8 +1152,12 @@ private void updateBlocks(FSDirectory fsDir, BlockListUpdatingOp op, oldBlock.setNumBytes(newBlock.getNumBytes()); boolean changeMade = oldBlock.getGenerationStamp() != newBlock.getGenerationStamp(); - oldBlock.setGenerationStamp(newBlock.getGenerationStamp()); - + final long newGenerationStamp = newBlock.getGenerationStamp(); + oldBlock.setGenerationStamp(newGenerationStamp); + // Update global generation stamp in Standby NameNode + fsNamesys.getBlockManager().getBlockIdManager(). + setGenerationStampIfGreater(newGenerationStamp); + if (!oldBlock.isComplete() && (!isLastBlock || op.shouldCompleteLastBlock())) { changeMade = true; @@ -1234,7 +1240,7 @@ private void incrOpCount(FSEditLogOpCodes opCode, holder = new Holder(1); opCounts.put(opCode, holder); } else { - holder.held++; + holder.held = holder.held + 1; } counter.increment(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java index 963628f9ac4e3..03ea2360e9b1d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java @@ -137,11 +137,11 @@ import org.xml.sax.SAXException; import org.xml.sax.helpers.AttributesImpl; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; /** * Helper classes for reading the ops from an InputStream. @@ -412,6 +412,17 @@ private static List readXAttrsFromEditLog(DataInputStream in, return PBHelperClient.convertXAttrs(proto.getXAttrsList()); } + private static Block[] deepCopy(Block[] blocks) { + if (blocks == null || blocks.length == 0) { + return blocks; + } + Block[] copy = new Block[blocks.length]; + for (int i = 0; i < blocks.length; ++i) { + copy[i] = blocks[i] == null ? null : new Block(blocks[i]); + } + return copy; + } + @SuppressWarnings("unchecked") static abstract class AddCloseOp extends FSEditLogOp @@ -500,7 +511,7 @@ T setBlocks(Block[] blocks) { throw new RuntimeException("Can't have more than " + MAX_BLOCKS + " in an AddCloseOp."); } - this.blocks = blocks; + this.blocks = FSEditLogOp.deepCopy(blocks); return (T)this; } @@ -978,7 +989,7 @@ public String getPath() { } AddBlockOp setPenultimateBlock(Block pBlock) { - this.penultimateBlock = pBlock; + this.penultimateBlock = pBlock == null ? null : new Block(pBlock); return this; } @@ -987,7 +998,7 @@ Block getPenultimateBlock() { } AddBlockOp setLastBlock(Block lastBlock) { - this.lastBlock = lastBlock; + this.lastBlock = lastBlock == null ? null : new Block(lastBlock); return this; } @@ -1090,7 +1101,7 @@ public String getPath() { } UpdateBlocksOp setBlocks(Block[] blocks) { - this.blocks = blocks; + this.blocks = FSEditLogOp.deepCopy(blocks); return this; } @@ -2881,7 +2892,8 @@ TruncateOp setTimestamp(long timestamp) { } TruncateOp setTruncateBlock(Block truncateBlock) { - this.truncateBlock = truncateBlock; + this.truncateBlock = truncateBlock == null ? + null : new Block(truncateBlock); return this; } @@ -3477,17 +3489,30 @@ CreateSnapshotOp setSnapshotMTime(long mTime) { void readFields(DataInputStream in, int logVersion) throws IOException { snapshotRoot = FSImageSerialization.readString(in); snapshotName = FSImageSerialization.readString(in); - mtime = FSImageSerialization.readLong(in); - + if (NameNodeLayoutVersion + .supports(NameNodeLayoutVersion.Feature.SNAPSHOT_MODIFICATION_TIME, + logVersion)) { + mtime = FSImageSerialization.readLong(in); + } // read RPC ids if necessary readRpcIds(in, logVersion); } @Override public void writeFields(DataOutputStream out) throws IOException { + throw new IOException("Unsupported without logversion"); + } + + @Override + public void writeFields(DataOutputStream out, int logVersion) + throws IOException { FSImageSerialization.writeString(snapshotRoot, out); FSImageSerialization.writeString(snapshotName, out); - FSImageSerialization.writeLong(mtime, out); + if (NameNodeLayoutVersion + .supports(NameNodeLayoutVersion.Feature.SNAPSHOT_MODIFICATION_TIME, + logVersion)) { + FSImageSerialization.writeLong(mtime, out); + } writeRpcIds(rpcClientId, rpcCallId, out); } @@ -3569,17 +3594,30 @@ DeleteSnapshotOp setSnapshotMTime(long mTime) { void readFields(DataInputStream in, int logVersion) throws IOException { snapshotRoot = FSImageSerialization.readString(in); snapshotName = FSImageSerialization.readString(in); - mtime = FSImageSerialization.readLong(in); - + if (NameNodeLayoutVersion + .supports(NameNodeLayoutVersion.Feature.SNAPSHOT_MODIFICATION_TIME, + logVersion)) { + mtime = FSImageSerialization.readLong(in); + } // read RPC ids if necessary readRpcIds(in, logVersion); } @Override public void writeFields(DataOutputStream out) throws IOException { + throw new IOException("Unsupported without logversion"); + } + + @Override + public void writeFields(DataOutputStream out, int logVersion) + throws IOException { FSImageSerialization.writeString(snapshotRoot, out); FSImageSerialization.writeString(snapshotName, out); - FSImageSerialization.writeLong(mtime, out); + if (NameNodeLayoutVersion + .supports(NameNodeLayoutVersion.Feature.SNAPSHOT_MODIFICATION_TIME, + logVersion)) { + FSImageSerialization.writeLong(mtime, out); + } writeRpcIds(rpcClientId, rpcCallId, out); } @@ -3670,19 +3708,31 @@ void readFields(DataInputStream in, int logVersion) throws IOException { snapshotRoot = FSImageSerialization.readString(in); snapshotOldName = FSImageSerialization.readString(in); snapshotNewName = FSImageSerialization.readString(in); - mtime = FSImageSerialization.readLong(in); - + if (NameNodeLayoutVersion + .supports(NameNodeLayoutVersion.Feature.SNAPSHOT_MODIFICATION_TIME, + logVersion)) { + mtime = FSImageSerialization.readLong(in); + } // read RPC ids if necessary readRpcIds(in, logVersion); } @Override public void writeFields(DataOutputStream out) throws IOException { + throw new IOException("Unsupported without logversion"); + } + + @Override + public void writeFields(DataOutputStream out, int logVersion) + throws IOException { FSImageSerialization.writeString(snapshotRoot, out); FSImageSerialization.writeString(snapshotOldName, out); FSImageSerialization.writeString(snapshotNewName, out); - FSImageSerialization.writeLong(mtime, out); - + if (NameNodeLayoutVersion + .supports(NameNodeLayoutVersion.Feature.SNAPSHOT_MODIFICATION_TIME, + logVersion)) { + FSImageSerialization.writeLong(mtime, out); + } writeRpcIds(rpcClientId, rpcCallId, out); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java index 9b95f14bddc28..86b4150777edc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java @@ -75,9 +75,9 @@ import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.Time; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; /** * FSImage handles checkpointing and logging of the namespace edits. @@ -172,6 +172,7 @@ protected FSImage(Configuration conf, this.editLog = FSEditLog.newInstance(conf, storage, editsDirs); archivalManager = new NNStorageRetentionManager(conf, storage, editLog); + FSImageFormatProtobuf.initParallelLoad(conf); } void format(FSNamesystem fsn, String clusterId, boolean force) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java index da067d15b6db5..478cec55d0dd0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java @@ -75,8 +75,8 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.util.StringUtils; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * Contains inner classes for reading or writing the on-disk format for @@ -242,6 +242,7 @@ public void load(File file, boolean requireSameLayoutVersion) * the layout version. */ public static LoaderDelegator newLoader(Configuration conf, FSNamesystem fsn) { + return new LoaderDelegator(conf, fsn); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java index 6212e65e01db6..26df995e552e1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java @@ -23,13 +23,14 @@ import java.io.OutputStream; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.Iterator; import java.util.List; -import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.locks.ReentrantLock; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -71,8 +72,8 @@ import org.apache.hadoop.hdfs.util.EnumCounters; import org.apache.hadoop.hdfs.util.ReadOnlyList; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; import org.apache.hadoop.thirdparty.protobuf.ByteString; @InterfaceAudience.Private @@ -204,15 +205,20 @@ public static void updateBlocksMap(INodeFile file, BlockManager bm) { private final FSDirectory dir; private final FSNamesystem fsn; private final FSImageFormatProtobuf.Loader parent; - private ReentrantLock cacheNameMapLock; - private ReentrantLock blockMapLock; + + // Update blocks map by single thread asynchronously + private ExecutorService blocksMapUpdateExecutor; + // update name cache by single thread asynchronously. + private ExecutorService nameCacheUpdateExecutor; Loader(FSNamesystem fsn, final FSImageFormatProtobuf.Loader parent) { this.fsn = fsn; this.dir = fsn.dir; this.parent = parent; - cacheNameMapLock = new ReentrantLock(true); - blockMapLock = new ReentrantLock(true); + // Note: these executors must be SingleThreadExecutor, as they + // are used to modify structures which are not thread safe. + blocksMapUpdateExecutor = Executors.newSingleThreadExecutor(); + nameCacheUpdateExecutor = Executors.newSingleThreadExecutor(); } void loadINodeDirectorySectionInParallel(ExecutorService service, @@ -221,8 +227,7 @@ void loadINodeDirectorySectionInParallel(ExecutorService service, LOG.info("Loading the INodeDirectory section in parallel with {} sub-" + "sections", sections.size()); CountDownLatch latch = new CountDownLatch(sections.size()); - final CopyOnWriteArrayList exceptions = - new CopyOnWriteArrayList<>(); + final List exceptions = Collections.synchronizedList(new ArrayList<>()); for (FileSummary.Section s : sections) { service.submit(() -> { InputStream ins = null; @@ -231,8 +236,7 @@ void loadINodeDirectorySectionInParallel(ExecutorService service, compressionCodec); loadINodeDirectorySection(ins); } catch (Exception e) { - LOG.error("An exception occurred loading INodeDirectories in " + - "parallel", e); + LOG.error("An exception occurred loading INodeDirectories in parallel", e); exceptions.add(new IOException(e)); } finally { latch.countDown(); @@ -263,7 +267,6 @@ void loadINodeDirectorySectionInParallel(ExecutorService service, void loadINodeDirectorySection(InputStream in) throws IOException { final List refList = parent.getLoaderContext() .getRefList(); - ArrayList inodeList = new ArrayList<>(); while (true) { INodeDirectorySection.DirEntry e = INodeDirectorySection.DirEntry .parseDelimitedFrom(in); @@ -274,15 +277,7 @@ void loadINodeDirectorySection(InputStream in) throws IOException { INodeDirectory p = dir.getInode(e.getParent()).asDirectory(); for (long id : e.getChildrenList()) { INode child = dir.getInode(id); - if (addToParent(p, child)) { - if (child.isFile()) { - inodeList.add(child); - } - if (inodeList.size() >= DIRECTORY_ENTRY_BATCH_SIZE) { - addToCacheAndBlockMap(inodeList); - inodeList.clear(); - } - } else { + if (!addToParent(p, child)) { LOG.warn("Failed to add the inode {} to the directory {}", child.getId(), p.getId()); } @@ -290,40 +285,79 @@ void loadINodeDirectorySection(InputStream in) throws IOException { for (int refId : e.getRefChildrenList()) { INodeReference ref = refList.get(refId); - if (addToParent(p, ref)) { - if (ref.isFile()) { - inodeList.add(ref); - } - if (inodeList.size() >= DIRECTORY_ENTRY_BATCH_SIZE) { - addToCacheAndBlockMap(inodeList); - inodeList.clear(); - } - } else { + if (!addToParent(p, ref)) { LOG.warn("Failed to add the inode reference {} to the directory {}", ref.getId(), p.getId()); } } } - addToCacheAndBlockMap(inodeList); } - private void addToCacheAndBlockMap(ArrayList inodeList) { - try { - cacheNameMapLock.lock(); - for (INode i : inodeList) { - dir.cacheName(i); - } - } finally { - cacheNameMapLock.unlock(); + private void fillUpInodeList(ArrayList inodeList, INode inode) { + if (inode.isFile()) { + inodeList.add(inode); } + if (inodeList.size() >= DIRECTORY_ENTRY_BATCH_SIZE) { + addToCacheAndBlockMap(inodeList); + inodeList.clear(); + } + } - try { - blockMapLock.lock(); - for (INode i : inodeList) { - updateBlocksMap(i.asFile(), fsn.getBlockManager()); + private void addToCacheAndBlockMap(final ArrayList inodeList) { + final ArrayList inodes = new ArrayList<>(inodeList); + nameCacheUpdateExecutor.submit( + new Runnable() { + @Override + public void run() { + addToCacheInternal(inodes); + } + }); + blocksMapUpdateExecutor.submit( + new Runnable() { + @Override + public void run() { + updateBlockMapInternal(inodes); + } + }); + } + + // update name cache with non-thread safe + private void addToCacheInternal(ArrayList inodeList) { + for (INode i : inodeList) { + dir.cacheName(i); + } + } + + // update blocks map with non-thread safe + private void updateBlockMapInternal(ArrayList inodeList) { + for (INode i : inodeList) { + updateBlocksMap(i.asFile(), fsn.getBlockManager()); + } + } + + void waitBlocksMapAndNameCacheUpdateFinished() throws IOException { + long start = System.currentTimeMillis(); + waitExecutorTerminated(blocksMapUpdateExecutor); + waitExecutorTerminated(nameCacheUpdateExecutor); + LOG.info("Completed update blocks map and name cache, total waiting " + + "duration {}ms.", (System.currentTimeMillis() - start)); + } + + private void waitExecutorTerminated(ExecutorService executorService) + throws IOException { + executorService.shutdown(); + long start = System.currentTimeMillis(); + while (!executorService.isTerminated()) { + try { + executorService.awaitTermination(1, TimeUnit.SECONDS); + if (LOG.isDebugEnabled()) { + LOG.debug("Waiting to executor service terminated duration {}ms.", + (System.currentTimeMillis() - start)); + } + } catch (InterruptedException e) { + LOG.error("Interrupted waiting for executor terminated.", e); + throw new IOException(e); } - } finally { - blockMapLock.unlock(); } } @@ -340,6 +374,7 @@ private int loadINodesInSection(InputStream in, Counter counter) // As the input stream is a LimitInputStream, the reading will stop when // EOF is encountered at the end of the stream. int cntr = 0; + ArrayList inodeList = new ArrayList<>(); while (true) { INodeSection.INode p = INodeSection.INode.parseDelimitedFrom(in); if (p == null) { @@ -354,12 +389,16 @@ private int loadINodesInSection(InputStream in, Counter counter) synchronized(this) { dir.addToInodeMap(n); } + fillUpInodeList(inodeList, n); } cntr++; if (counter != null) { counter.increment(); } } + if (inodeList.size() > 0){ + addToCacheAndBlockMap(inodeList); + } return cntr; } @@ -383,8 +422,7 @@ void loadINodeSectionInParallel(ExecutorService service, long expectedInodes = 0; CountDownLatch latch = new CountDownLatch(sections.size()); AtomicInteger totalLoaded = new AtomicInteger(0); - final CopyOnWriteArrayList exceptions = - new CopyOnWriteArrayList<>(); + final List exceptions = Collections.synchronizedList(new ArrayList<>()); for (int i=0; i < sections.size(); i++) { FileSummary.Section s = sections.get(i); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java index be21d1f80f9e9..183449f574ec9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java @@ -76,8 +76,8 @@ import org.apache.hadoop.util.LimitInputStream; import org.apache.hadoop.util.Time; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.apache.hadoop.thirdparty.protobuf.CodedOutputStream; /** @@ -88,6 +88,8 @@ public final class FSImageFormatProtobuf { private static final Logger LOG = LoggerFactory .getLogger(FSImageFormatProtobuf.class); + private static volatile boolean enableParallelLoad = false; + public static final class LoaderContext { private SerialNumberManager.StringTable stringTable; private final ArrayList refList = Lists.newArrayList(); @@ -269,14 +271,20 @@ public InputStream getInputStreamForSection(FileSummary.Section section, String compressionCodec) throws IOException { FileInputStream fin = new FileInputStream(filename); - FileChannel channel = fin.getChannel(); - channel.position(section.getOffset()); - InputStream in = new BufferedInputStream(new LimitInputStream(fin, - section.getLength())); + try { - in = FSImageUtil.wrapInputStreamForCompression(conf, - compressionCodec, in); - return in; + FileChannel channel = fin.getChannel(); + channel.position(section.getOffset()); + InputStream in = new BufferedInputStream(new LimitInputStream(fin, + section.getLength())); + + in = FSImageUtil.wrapInputStreamForCompression(conf, + compressionCodec, in); + return in; + } catch (IOException e) { + fin.close(); + throw e; + } } /** @@ -447,6 +455,7 @@ public int compare(FileSummary.Section s1, FileSummary.Section s2) { } else { inodeLoader.loadINodeDirectorySection(in); } + inodeLoader.waitBlocksMapAndNameCacheUpdateFinished(); break; case FILES_UNDERCONSTRUCTION: inodeLoader.loadFilesUnderConstructionSection(in); @@ -535,10 +544,9 @@ private void loadSecretManagerSection(InputStream in, StartupProgress prog, Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, currentStep); for (int i = 0; i < numTokens; ++i) { tokens.add(SecretManagerSection.PersistToken.parseDelimitedFrom(in)); - counter.increment(); } - fsn.loadSecretManagerState(s, keys, tokens); + fsn.loadSecretManagerState(s, keys, tokens, counter); } private void loadCacheManagerSection(InputStream in, StartupProgress prog, @@ -575,9 +583,7 @@ private void loadErasureCodingSection(InputStream in) } private static boolean enableParallelSaveAndLoad(Configuration conf) { - boolean loadInParallel = - conf.getBoolean(DFSConfigKeys.DFS_IMAGE_PARALLEL_LOAD_KEY, - DFSConfigKeys.DFS_IMAGE_PARALLEL_LOAD_DEFAULT); + boolean loadInParallel = enableParallelLoad; boolean compressionEnabled = conf.getBoolean( DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, DFSConfigKeys.DFS_IMAGE_COMPRESS_DEFAULT); @@ -593,6 +599,20 @@ private static boolean enableParallelSaveAndLoad(Configuration conf) { return loadInParallel; } + public static void initParallelLoad(Configuration conf) { + enableParallelLoad = + conf.getBoolean(DFSConfigKeys.DFS_IMAGE_PARALLEL_LOAD_KEY, + DFSConfigKeys.DFS_IMAGE_PARALLEL_LOAD_DEFAULT); + } + + public static void refreshParallelSaveAndLoad(boolean enable) { + enableParallelLoad = enable; + } + + public static boolean getEnableParallelLoad() { + return enableParallelLoad; + } + public static final class Saver { public static final int CHECK_CANCEL_INTERVAL = 4096; private boolean writeSubSections = false; @@ -633,10 +653,6 @@ public int getInodesPerSubSection() { return inodesPerSubSection; } - public boolean shouldWriteSubSections() { - return writeSubSections; - } - /** * Commit the length and offset of a fsimage section to the summary index, * including the sub section, which will be committed before the section is diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java index 706b2bda06299..3d75cebf729d3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java @@ -53,7 +53,7 @@ import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * Static utility functions for serializing various pieces of data in the correct diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageTransactionalStorageInspector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageTransactionalStorageInspector.java index b04007513c3fd..9f71f69902c86 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageTransactionalStorageInspector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageTransactionalStorageInspector.java @@ -35,8 +35,8 @@ import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; class FSImageTransactionalStorageInspector extends FSImageStorageInspector { public static final Logger LOG = LoggerFactory.getLogger( diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 99ad6f2eb079f..9855b434e9c49 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -24,12 +24,16 @@ import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_CONTEXT_ENABLED_KEY; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_CONTEXT_MAX_SIZE_DEFAULT; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_CONTEXT_MAX_SIZE_KEY; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_CONTEXT_SEPARATOR_DEFAULT; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_CONTEXT_SEPARATOR_KEY; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_CONTEXT_SIGNATURE_MAX_SIZE_DEFAULT; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_CONTEXT_SIGNATURE_MAX_SIZE_KEY; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_WITH_REMOTE_PORT_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_WITH_REMOTE_PORT_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_PERMISSIONS_SUPERUSER_ONLY_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_PERMISSIONS_SUPERUSER_ONLY_KEY; @@ -99,6 +103,7 @@ import org.apache.commons.text.CaseUtils; import org.apache.hadoop.hdfs.protocol.ECTopologyVerifierResult; import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY; import static org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.*; import static org.apache.hadoop.ha.HAServiceProtocol.HAServiceState.ACTIVE; @@ -107,7 +112,7 @@ import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyInfo; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.apache.hadoop.thirdparty.protobuf.ByteString; import org.apache.hadoop.hdfs.protocol.BatchedDirectoryListing; import org.apache.hadoop.hdfs.protocol.HdfsPartialListing; @@ -336,12 +341,12 @@ import org.apache.log4j.AsyncAppender; import org.eclipse.jetty.util.ajax.JSON; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Charsets; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.slf4j.LoggerFactory; /** @@ -383,6 +388,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, @Metric final MutableRatesWithAggregation detailedLockHoldTimeMetrics = registry.newRatesWithAggregation("detailedLockHoldTimeMetrics"); + private final String contextFieldSeparator; + boolean isAuditEnabled() { return (!isDefaultAuditLogger || auditLog.isInfoEnabled()) && !auditLoggers.isEmpty(); @@ -397,7 +404,7 @@ private void logAuditEvent(boolean succeeded, String cmd, String src, String dst, FileStatus stat) throws IOException { if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(succeeded, Server.getRemoteUser(), Server.getRemoteIp(), - cmd, src, dst, stat); + cmd, src, dst, stat); } } @@ -428,6 +435,9 @@ private void logAuditEvent(boolean succeeded, for (AuditLogger logger : auditLoggers) { if (logger instanceof HdfsAuditLogger) { HdfsAuditLogger hdfsLogger = (HdfsAuditLogger) logger; + if (auditLogWithRemotePort) { + appendClientPortToCallerContextIfAbsent(); + } hdfsLogger.logAuditEvent(succeeded, ugiStr, addr, cmd, src, dst, status, CallerContext.getCurrent(), ugi, dtSecretManager); } else { @@ -436,6 +446,24 @@ private void logAuditEvent(boolean succeeded, } } + private void appendClientPortToCallerContextIfAbsent() { + final CallerContext ctx = CallerContext.getCurrent(); + if (isClientPortInfoAbsent(ctx)) { + String origContext = ctx == null ? null : ctx.getContext(); + byte[] origSignature = ctx == null ? null : ctx.getSignature(); + CallerContext.setCurrent( + new CallerContext.Builder(origContext, contextFieldSeparator) + .append(CallerContext.CLIENT_PORT_STR, String.valueOf(Server.getRemotePort())) + .setSignature(origSignature) + .build()); + } + } + + private boolean isClientPortInfoAbsent(CallerContext ctx){ + return ctx == null || ctx.getContext() == null + || !ctx.getContext().contains(CallerContext.CLIENT_PORT_STR); + } + /** * Logger for audit events, noting successful FSNamesystem operations. Emits * to FSNamesystem.audit at INFO. Each event causes a set of tab-separated @@ -486,6 +514,7 @@ private void logAuditEvent(boolean succeeded, // underlying logger is disabled, and avoid some unnecessary work. private final boolean isDefaultAuditLogger; private final List auditLoggers; + private final boolean auditLogWithRemotePort; /** The namespace tree. */ FSDirectory dir; @@ -815,8 +844,14 @@ static FSNamesystem loadFromDisk(Configuration conf) throws IOException { if (conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY, DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT)) { LOG.info("Enabling async auditlog"); - enableAsyncAuditLog(); - } + enableAsyncAuditLog(conf); + } + auditLogWithRemotePort = + conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_WITH_REMOTE_PORT_KEY, + DFS_NAMENODE_AUDIT_LOG_WITH_REMOTE_PORT_DEFAULT); + this.contextFieldSeparator = + conf.get(HADOOP_CALLER_CONTEXT_SEPARATOR_KEY, + HADOOP_CALLER_CONTEXT_SEPARATOR_DEFAULT); fsLock = new FSNamesystemLock(conf, detailedLockHoldTimeMetrics); cond = fsLock.newWriteLockCondition(); cpLock = new ReentrantLock(); @@ -959,6 +994,10 @@ static FSNamesystem loadFromDisk(Configuration conf) throws IOException { this.leaseRecheckIntervalMs = conf.getLong( DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY, DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT); + Preconditions.checkArgument( + leaseRecheckIntervalMs > 0, + DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY + + " must be greater than zero"); this.maxLockHoldToReleaseLeaseMs = conf.getLong( DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_KEY, DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT); @@ -1427,8 +1466,11 @@ void stopActiveServices() { LOG.info("Stopping services started for active state"); writeLock(); try { - if (blockManager != null && blockManager.getSPSManager() != null) { - blockManager.getSPSManager().stop(); + if (blockManager != null) { + blockManager.stopReconstructionInitializer(); + if (blockManager.getSPSManager() != null) { + blockManager.getSPSManager().stop(); + } } stopSecretManager(); leaseManager.stopMonitor(); @@ -1904,6 +1946,7 @@ private void metaSave(PrintWriter out) { */ BatchedListEntries listOpenFiles(long prevId, EnumSet openFilesTypes, String path) throws IOException { + INode.checkAbsolutePath(path); final String operationName = "listOpenFiles"; checkSuperuserPrivilege(); checkOperation(OperationCategory.READ); @@ -1942,7 +1985,12 @@ public BatchedListEntries getFilesBlockingDecom(long prevId, LightWeightHashSet openFileIds = new LightWeightHashSet<>(); for (DatanodeDescriptor dataNode : blockManager.getDatanodeManager().getDatanodes()) { - for (long ucFileId : dataNode.getLeavingServiceStatus().getOpenFiles()) { + // Sort open files + LightWeightHashSet dnOpenFiles = + dataNode.getLeavingServiceStatus().getOpenFiles(); + Long[] dnOpenFileIds = new Long[dnOpenFiles.size()]; + Arrays.sort(dnOpenFiles.toArray(dnOpenFileIds)); + for (Long ucFileId : dnOpenFileIds) { INode ucFile = getFSDirectory().getInode(ucFileId); if (ucFile == null || ucFileId <= prevId || openFileIds.contains(ucFileId)) { @@ -1952,8 +2000,14 @@ public BatchedListEntries getFilesBlockingDecom(long prevId, continue; } Preconditions.checkState(ucFile instanceof INodeFile); - openFileIds.add(ucFileId); + INodeFile inodeFile = ucFile.asFile(); + if (!inodeFile.isUnderConstruction()) { + LOG.warn("The file {} is not under construction but has lease.", + inodeFile.getFullPathName()); + continue; + } + openFileIds.add(ucFileId); String fullPathName = inodeFile.getFullPathName(); if (org.apache.commons.lang3.StringUtils.isEmpty(path) @@ -2085,14 +2139,8 @@ LocatedBlocks getBlockLocations(String clientMachine, String srcArg, } } } - } else if (haEnabled && haContext != null && - haContext.getState().getServiceState() == OBSERVER) { - for (LocatedBlock b : res.blocks.getLocatedBlocks()) { - if (b.getLocations() == null || b.getLocations().length == 0) { - throw new ObserverRetryOnActiveException("Zero blocklocations " - + "for " + srcArg); - } - } + } else if (isObserver()) { + checkBlockLocationsWhenObserver(res.blocks, srcArg); } } finally { readUnlock(operationName); @@ -2258,8 +2306,8 @@ boolean truncate(String src, long newLength, String clientName, } getEditLog().logSync(); if (!toRemoveBlocks.getToDeleteList().isEmpty()) { - removeBlocks(toRemoveBlocks); - toRemoveBlocks.clear(); + blockManager.addBLocksToMarkedDeleteQueue( + toRemoveBlocks.getToDeleteList()); } logAuditEvent(true, operationName, src, null, r.getFileStatus()); } catch (AccessControlException e) { @@ -2601,7 +2649,9 @@ private HdfsFileStatus startFileInt(String src, .append(", createFlag=").append(flag) .append(", blockSize=").append(blockSize) .append(", supportedVersions=") - .append(Arrays.toString(supportedVersions)); + .append(Arrays.toString(supportedVersions)) + .append(", ecPolicyName=").append(ecPolicyName) + .append(", storagePolicy=").append(storagePolicy); NameNode.stateChangeLog.debug(builder.toString()); } if (!DFSUtil.isValidName(src) || @@ -2696,8 +2746,8 @@ private HdfsFileStatus startFileInt(String src, if (!skipSync) { getEditLog().logSync(); if (toRemoveBlocks != null) { - removeBlocks(toRemoveBlocks); - toRemoveBlocks.clear(); + blockManager.addBLocksToMarkedDeleteQueue( + toRemoveBlocks.getToDeleteList()); } } } @@ -2758,7 +2808,7 @@ enum RecoverLeaseOp { TRUNCATE_FILE, RECOVER_LEASE; - private String getExceptionMessage(String src, String holder, + public String getExceptionMessage(String src, String holder, String clientMachine, String reason) { return "Failed to " + this + " " + src + " for " + holder + " on " + clientMachine + " because " + reason; @@ -2918,12 +2968,12 @@ LocatedBlock getAdditionalBlock( LocatedBlock[] onRetryBlock = new LocatedBlock[1]; FSDirWriteFileOp.ValidateAddBlockResult r; - checkOperation(OperationCategory.READ); + checkOperation(OperationCategory.WRITE); final FSPermissionChecker pc = getPermissionChecker(); FSPermissionChecker.setOperationType(operationName); readLock(); try { - checkOperation(OperationCategory.READ); + checkOperation(OperationCategory.WRITE); r = FSDirWriteFileOp.validateAddBlock(this, pc, src, fileId, clientName, previous, onRetryBlock); } finally { @@ -2969,12 +3019,15 @@ LocatedBlock getAdditionalDatanode(String src, long fileId, final byte storagePolicyID; final List chosen; final BlockType blockType; - checkOperation(OperationCategory.READ); + checkOperation(OperationCategory.WRITE); final FSPermissionChecker pc = getPermissionChecker(); FSPermissionChecker.setOperationType(null); readLock(); try { - checkOperation(OperationCategory.READ); + // Changing this operation category to WRITE instead of making getAdditionalDatanode as a + // read method is aim to let Active NameNode to handle this RPC, because Active NameNode + // contains a more complete DN selection context than Observer NameNode. + checkOperation(OperationCategory.WRITE); //check safe mode checkNameNodeSafeMode("Cannot add datanode; src=" + src + ", blk=" + blk); final INodesInPath iip = dir.resolvePath(pc, src, fileId); @@ -3215,8 +3268,8 @@ void renameTo(final String src, final String dst, BlocksMapUpdateInfo collectedBlocks = res.collectedBlocks; if (!collectedBlocks.getToDeleteList().isEmpty()) { - removeBlocks(collectedBlocks); - collectedBlocks.clear(); + blockManager.addBLocksToMarkedDeleteQueue( + collectedBlocks.getToDeleteList()); } logAuditEvent(true, operationName + " (options=" + @@ -3253,10 +3306,11 @@ boolean delete(String src, boolean recursive, boolean logRetryCache) throw e; } getEditLog().logSync(); + logAuditEvent(ret, operationName, src); if (toRemovedBlocks != null) { - removeBlocks(toRemovedBlocks); // Incremental deletion of blocks + blockManager.addBLocksToMarkedDeleteQueue( + toRemovedBlocks.getToDeleteList()); } - logAuditEvent(true, operationName, src); return ret; } @@ -3265,30 +3319,6 @@ FSPermissionChecker getPermissionChecker() return dir.getPermissionChecker(); } - /** - * From the given list, incrementally remove the blocks from blockManager - * Writelock is dropped and reacquired every BLOCK_DELETION_INCREMENT to - * ensure that other waiters on the lock can get in. See HDFS-2938 - * - * @param blocks - * An instance of {@link BlocksMapUpdateInfo} which contains a list - * of blocks that need to be removed from blocksMap - */ - void removeBlocks(BlocksMapUpdateInfo blocks) { - List toDeleteList = blocks.getToDeleteList(); - Iterator iter = toDeleteList.iterator(); - while (iter.hasNext()) { - writeLock(); - try { - for (int i = 0; i < blockDeletionIncrement && iter.hasNext(); i++) { - blockManager.removeBlock(iter.next()); - } - } finally { - writeUnlock("removeBlocks"); - } - } - } - /** * Remove leases and inodes related to a given path * @param removedUCFiles INodes whose leases need to be released @@ -3358,6 +3388,10 @@ HdfsFileStatus getFileInfo(final String src, boolean resolveLink, logAuditEvent(false, operationName, src); throw e; } + if (needLocation && isObserver() && stat instanceof HdfsLocatedFileStatus) { + LocatedBlocks lbs = ((HdfsLocatedFileStatus) stat).getLocatedBlocks(); + checkBlockLocationsWhenObserver(lbs, src); + } logAuditEvent(true, operationName, src); return stat; } @@ -3649,10 +3683,13 @@ boolean internalReleaseLease(Lease lease, String src, INodesInPath iip, // if there are no valid replicas on data-nodes. String message = "DIR* NameSystem.internalReleaseLease: " + "Failed to release lease for file " + src + - ". Committed blocks are waiting to be minimally replicated." + - " Try again later."; + ". Committed blocks are waiting to be minimally replicated."; NameNode.stateChangeLog.warn(message); - throw new AlreadyBeingCreatedException(message); + if (!penultimateBlockMinStorage) { + throw new AlreadyBeingCreatedException(message); + } + // Intentionally fall through to UNDER_RECOVERY so BLOCK_RECOVERY is + // attempted case UNDER_CONSTRUCTION: case UNDER_RECOVERY: BlockUnderConstructionFeature uc = @@ -4015,14 +4052,10 @@ void closeFileCommitBlocks(String src, INodeFile pendingFile, */ void renewLease(String holder) throws IOException { checkOperation(OperationCategory.WRITE); - readLock(); - try { - checkOperation(OperationCategory.WRITE); - checkNameNodeSafeMode("Cannot renew lease for " + holder); - leaseManager.renewLease(holder); - } finally { - readUnlock("renewLease"); - } + checkNameNodeSafeMode("Cannot renew lease for " + holder); + // fsn is not mutated so lock is not required. the leaseManger is also + // thread-safe. + leaseManager.renewLease(holder); } /** @@ -4057,6 +4090,14 @@ DirectoryListing getListing(String src, byte[] startAfter, logAuditEvent(false, operationName, src); throw e; } + if (dl != null && needLocation && isObserver()) { + for (HdfsFileStatus fs : dl.getPartialListing()) { + if (fs instanceof HdfsLocatedFileStatus) { + LocatedBlocks lbs = ((HdfsLocatedFileStatus) fs).getLocatedBlocks(); + checkBlockLocationsWhenObserver(lbs, fs.toString()); + } + } + } logAuditEvent(true, operationName, src); return dl; } @@ -4321,9 +4362,8 @@ void handleLifeline(DatanodeRegistration nodeReg, StorageReport[] reports, long cacheCapacity, long cacheUsed, int xceiverCount, int xmitsInProgress, int failedVolumes, VolumeFailureSummary volumeFailureSummary) throws IOException { - int maxTransfer = blockManager.getMaxReplicationStreams() - xmitsInProgress; blockManager.getDatanodeManager().handleLifeline(nodeReg, reports, - getBlockPoolId(), cacheCapacity, cacheUsed, xceiverCount, maxTransfer, + cacheCapacity, cacheUsed, xceiverCount, failedVolumes, volumeFailureSummary); } @@ -4489,7 +4529,8 @@ private void clearCorruptLazyPersistFiles() INodesInPath.fromINode((INodeFile) bc), false); changed |= toRemoveBlocks != null; if (toRemoveBlocks != null) { - removeBlocks(toRemoveBlocks); // Incremental deletion of blocks + blockManager.addBLocksToMarkedDeleteQueue( + toRemoveBlocks.getToDeleteList()); } } } finally { @@ -4558,7 +4599,8 @@ public long getMissingReplOneBlocksCount() { return blockManager.getMissingReplOneBlocksCount(); } - @Metric({"ExpiredHeartbeats", "Number of expired heartbeats"}) + @Metric(value = {"ExpiredHeartbeats", "Number of expired heartbeats"}, + type = Metric.Type.COUNTER) public int getExpiredHeartbeats() { return datanodeStatistics.getExpiredHeartbeats(); } @@ -4755,6 +4797,20 @@ public int getFsLockQueueLength() { return fsLock.getQueueLength(); } + @Metric(value = {"ReadLockLongHoldCount", "The number of time " + + "the read lock has been held for longer than the threshold"}, + type = Metric.Type.COUNTER) + public long getNumOfReadLockLongHold() { + return fsLock.getNumOfReadLockLongHold(); + } + + @Metric(value = {"WriteLockLongHoldCount", "The number of time " + + "the write lock has been held for longer than the threshold"}, + type = Metric.Type.COUNTER) + public long getNumOfWriteLockLongHold() { + return fsLock.getNumOfWriteLockLongHold(); + } + int getNumberOfDatanodes(DatanodeReportType type) { readLock(); try { @@ -4765,6 +4821,32 @@ int getNumberOfDatanodes(DatanodeReportType type) { } } + DatanodeInfo[] slowDataNodesReport() throws IOException { + String operationName = "slowDataNodesReport"; + DatanodeInfo[] datanodeInfos; + checkOperation(OperationCategory.UNCHECKED); + readLock(); + try { + checkOperation(OperationCategory.UNCHECKED); + final DatanodeManager dm = getBlockManager().getDatanodeManager(); + final List results = dm.getAllSlowDataNodes(); + datanodeInfos = getDatanodeInfoFromDescriptors(results); + } finally { + readUnlock(operationName); + } + logAuditEvent(true, operationName, null); + return datanodeInfos; + } + + private DatanodeInfo[] getDatanodeInfoFromDescriptors(List results) { + DatanodeInfo[] datanodeInfos = new DatanodeInfo[results.size()]; + for (int i = 0; i < datanodeInfos.length; i++) { + datanodeInfos[i] = new DatanodeInfoBuilder().setFrom(results.get(i)).build(); + datanodeInfos[i].setNumBlocks(results.get(i).numBlocks()); + } + return datanodeInfos; + } + DatanodeInfo[] datanodeReport(final DatanodeReportType type) throws IOException { String operationName = "datanodeReport"; @@ -4776,12 +4858,7 @@ DatanodeInfo[] datanodeReport(final DatanodeReportType type) checkOperation(OperationCategory.UNCHECKED); final DatanodeManager dm = getBlockManager().getDatanodeManager(); final List results = dm.getDatanodeListForReport(type); - arr = new DatanodeInfo[results.size()]; - for (int i=0; i live = new ArrayList(); + getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true); + int liveInService = live.size(); + for (DatanodeDescriptor node : live) { + liveInService -= node.isInMaintenance() ? 1 : 0; + } + return liveInService; + } + @Override // FSNamesystemMBean @Metric({"VolumeFailuresTotal", "Total number of volume failures across all Datanodes"}) @@ -6153,8 +6243,10 @@ void loadSecretManagerStateCompat(DataInput in) throws IOException { void loadSecretManagerState(SecretManagerSection s, List keys, - List tokens) throws IOException { - dtSecretManager.loadSecretManagerState(new SecretManagerState(s, keys, tokens)); + List tokens, + StartupProgress.Counter counter) throws IOException { + dtSecretManager.loadSecretManagerState(new SecretManagerState(s, keys, tokens), + counter); } /** @@ -6239,13 +6331,19 @@ boolean isExternalInvocation() { private static UserGroupInformation getRemoteUser() throws IOException { return NameNode.getRemoteUser(); } - + /** - * Log fsck event in the audit log + * Log fsck event in the audit log. + * + * @param succeeded Whether authorization succeeded. + * @param src Path of affected source file. + * @param remoteAddress Remote address of the request. + * @throws IOException if {@link #getRemoteUser()} fails. */ - void logFsckEvent(String src, InetAddress remoteAddress) throws IOException { + void logFsckEvent(boolean succeeded, String src, InetAddress remoteAddress) + throws IOException { if (isAuditEnabled()) { - logAuditEvent(true, getRemoteUser(), + logAuditEvent(succeeded, getRemoteUser(), remoteAddress, "fsck", src, null, null); } @@ -6370,6 +6468,7 @@ public String getLiveNodes() { .put("infoAddr", node.getInfoAddr()) .put("infoSecureAddr", node.getInfoSecureAddr()) .put("xferaddr", node.getXferAddr()) + .put("location", node.getNetworkLocation()) .put("lastContact", getLastContact(node)) .put("usedSpace", getDfsUsed(node)) .put("adminState", node.getAdminState().toString()) @@ -6419,6 +6518,7 @@ public String getDeadNodes() { .put("decommissioned", node.isDecommissioned()) .put("adminState", node.getAdminState().toString()) .put("xferaddr", node.getXferAddr()) + .put("location", node.getNetworkLocation()) .build(); info.put(node.getHostName() + ":" + node.getXferPort(), innerinfo); } @@ -6440,6 +6540,7 @@ public String getDecomNodes() { Map innerinfo = ImmutableMap . builder() .put("xferaddr", node.getXferAddr()) + .put("location", node.getNetworkLocation()) .put("underReplicatedBlocks", node.getLeavingServiceStatus().getUnderReplicatedBlocks()) .put("decommissionOnlyReplicas", @@ -6467,6 +6568,7 @@ public String getEnteringMaintenanceNodes() { Map attrMap = ImmutableMap . builder() .put("xferaddr", node.getXferAddr()) + .put("location", node.getNetworkLocation()) .put("underReplicatedBlocks", node.getLeavingServiceStatus().getUnderReplicatedBlocks()) .put("maintenanceOnlyReplicas", @@ -7111,7 +7213,8 @@ void deleteSnapshot(String snapshotRoot, String snapshotName, // Breaking the pattern as removing blocks have to happen outside of the // global lock if (blocksToBeDeleted != null) { - removeBlocks(blocksToBeDeleted); + blockManager.addBLocksToMarkedDeleteQueue( + blocksToBeDeleted.getToDeleteList()); } logAuditEvent(true, operationName, rootPath, null, null); } @@ -8058,7 +8161,6 @@ boolean disableErasureCodingPolicy(String ecPolicyName, checkOperation(OperationCategory.WRITE); checkErasureCodingSupported(operationName); boolean success = false; - LOG.info("Disable the erasure coding policy " + ecPolicyName); try { writeLock(); try { @@ -8141,7 +8243,7 @@ public ECTopologyVerifierResult getECTopologyResultForPolicies( getBlockManager().getDatanodeManager().getNumOfDataNodes(); int numOfRacks = getBlockManager().getDatanodeManager().getNetworkTopology() - .getNumOfRacks(); + .getNumOfNonEmptyRacks(); result = ECTopologyVerifier .getECTopologyVerifierResult(numOfRacks, numOfDataNodes, policies); } @@ -8404,9 +8506,10 @@ public void logAuditEvent(boolean succeeded, String userName, src = escapeJava(src); dst = escapeJava(dst); sb.setLength(0); + String ipAddr = addr != null ? "/" + addr.getHostAddress() : "null"; sb.append("allowed=").append(succeeded).append("\t") .append("ugi=").append(userName).append("\t") - .append("ip=").append(addr).append("\t") + .append("ip=").append(ipAddr).append("\t") .append("cmd=").append(cmd).append("\t") .append("src=").append(src).append("\t") .append("dst=").append(dst).append("\t"); @@ -8440,18 +8543,18 @@ public void logAuditEvent(boolean succeeded, String userName, callerContext != null && callerContext.isContextValid()) { sb.append("\t").append("callerContext="); - if (callerContext.getContext().length() > callerContextMaxLen) { - sb.append(callerContext.getContext().substring(0, - callerContextMaxLen)); + String context = escapeJava(callerContext.getContext()); + if (context.length() > callerContextMaxLen) { + sb.append(context, 0, callerContextMaxLen); } else { - sb.append(callerContext.getContext()); + sb.append(context); } if (callerContext.getSignature() != null && callerContext.getSignature().length > 0 && callerContext.getSignature().length <= callerSignatureMaxLen) { sb.append(":") - .append(new String(callerContext.getSignature(), - CallerContext.SIGNATURE_ENCODING)); + .append(escapeJava(new String(callerContext.getSignature(), + CallerContext.SIGNATURE_ENCODING))); } } logAuditMessage(sb.toString()); @@ -8472,7 +8575,7 @@ public void logAuditMessage(String message) { } } - private static void enableAsyncAuditLog() { + private static void enableAsyncAuditLog(Configuration conf) { if (!(auditLog instanceof Log4JLogger)) { LOG.warn("Log4j is required to enable async auditlog"); return; @@ -8483,6 +8586,14 @@ private static void enableAsyncAuditLog() { // failsafe against trying to async it more than once if (!appenders.isEmpty() && !(appenders.get(0) instanceof AsyncAppender)) { AsyncAppender asyncAppender = new AsyncAppender(); + asyncAppender.setBlocking(conf.getBoolean( + DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_BLOCKING_KEY, + DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_BLOCKING_DEFAULT + )); + asyncAppender.setBufferSize(conf.getInt( + DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_BUFFER_SIZE_KEY, + DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_BUFFER_SIZE_DEFAULT + )); // change logger to have an async appender containing all the // previously configured appenders for (Appender appender : appenders) { @@ -8576,7 +8687,7 @@ private ECTopologyVerifierResult getEcTopologyVerifierResultForEnabledPolicies() int numOfDataNodes = getBlockManager().getDatanodeManager().getNumOfDataNodes(); int numOfRacks = getBlockManager().getDatanodeManager().getNetworkTopology() - .getNumOfRacks(); + .getNumOfNonEmptyRacks(); ErasureCodingPolicy[] enabledEcPolicies = getErasureCodingPolicyManager().getCopyOfEnabledPolicies(); return ECTopologyVerifier @@ -8633,5 +8744,23 @@ public void checkErasureCodingSupported(String operationName) throw new UnsupportedActionException(operationName + " not supported."); } } -} + private boolean isObserver() { + return haEnabled && haContext != null && haContext.getState().getServiceState() == OBSERVER; + } + + private void checkBlockLocationsWhenObserver(LocatedBlocks blocks, String src) + throws ObserverRetryOnActiveException { + if (blocks == null) { + return; + } + List locatedBlockList = blocks.getLocatedBlocks(); + if (locatedBlockList != null) { + for (LocatedBlock b : locatedBlockList) { + if (b.getLocations() == null || b.getLocations().length == 0) { + throw new ObserverRetryOnActiveException("Zero blocklocations for " + src); + } + } + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java index b87117c2318e7..f556219b324d5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java @@ -22,10 +22,11 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.atomic.LongAdder; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.ReentrantReadWriteLock; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.ipc.Server; import org.apache.hadoop.log.LogThrottlingHelper; @@ -108,6 +109,16 @@ public Long initialValue() { private final AtomicReference longestReadLockHeldInfo = new AtomicReference<>(new LockHeldInfo(0, 0, null)); private LockHeldInfo longestWriteLockHeldInfo = new LockHeldInfo(0, 0, null); + /** + * The number of time the read lock + * has been held longer than the threshold. + */ + private final LongAdder numReadLockLongHold = new LongAdder(); + /** + * The number of time the write lock + * has been held for longer than the threshold. + */ + private final LongAdder numWriteLockLongHold = new LongAdder(); @VisibleForTesting static final String OP_NAME_OTHER = "OTHER"; @@ -176,6 +187,7 @@ public void readUnlock(String opName) { final long readLockIntervalMs = TimeUnit.NANOSECONDS.toMillis(readLockIntervalNanos); if (needReport && readLockIntervalMs >= this.readLockReportingThresholdMs) { + numReadLockLongHold.increment(); LockHeldInfo localLockHeldInfo; do { localLockHeldInfo = longestReadLockHeldInfo.get(); @@ -253,6 +265,7 @@ public void writeUnlock(String opName, boolean suppressWriteLockReport) { LogAction logAction = LogThrottlingHelper.DO_NOT_LOG; if (needReport && writeLockIntervalMs >= this.writeLockReportingThresholdMs) { + numWriteLockLongHold.increment(); if (longestWriteLockHeldInfo.getIntervalMs() < writeLockIntervalMs) { longestWriteLockHeldInfo = new LockHeldInfo(currentTimeMs, writeLockIntervalMs, @@ -302,6 +315,28 @@ public Condition newWriteLockCondition() { return coarseLock.writeLock().newCondition(); } + /** + * Returns the number of time the read lock + * has been held longer than the threshold. + * + * @return long - Number of time the read lock + * has been held longer than the threshold + */ + public long getNumOfReadLockLongHold() { + return numReadLockLongHold.longValue(); + } + + /** + * Returns the number of time the write lock + * has been held longer than the threshold. + * + * @return long - Number of time the write lock + * has been held longer than the threshold. + */ + public long getNumOfWriteLockLongHold() { + return numWriteLockLongHold.longValue(); + } + /** * Returns the QueueLength of waiting threads. * diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSPermissionChecker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSPermissionChecker.java index c4fd6a6d3a49a..5d136cb7504c4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSPermissionChecker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSPermissionChecker.java @@ -23,7 +23,7 @@ import java.util.List; import java.util.Stack; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.ipc.CallerContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -89,11 +89,16 @@ private String toAccessControlString(INodeAttributes inodeAttrib, private static ThreadLocal operationType = new ThreadLocal<>(); - protected FSPermissionChecker(String fsOwner, String supergroup, UserGroupInformation callerUgi, INodeAttributeProvider attributeProvider) { - boolean useNewAuthorizationWithContextAPI; + this(fsOwner, supergroup, callerUgi, attributeProvider, false); + } + + protected FSPermissionChecker(String fsOwner, String supergroup, + UserGroupInformation callerUgi, + INodeAttributeProvider attributeProvider, + boolean useAuthorizationWithContextAPI) { this.fsOwner = fsOwner; this.supergroup = supergroup; this.callerUgi = callerUgi; @@ -102,36 +107,15 @@ protected FSPermissionChecker(String fsOwner, String supergroup, isSuper = user.equals(fsOwner) || groups.contains(supergroup); this.attributeProvider = attributeProvider; - // If the AccessControlEnforcer supports context enrichment, call - // the new API. Otherwise choose the old API. - Class[] cArg = new Class[1]; - cArg[0] = INodeAttributeProvider.AuthorizationContext.class; - - AccessControlEnforcer ace; if (attributeProvider == null) { // If attribute provider is null, use FSPermissionChecker default // implementation to authorize, which supports authorization with context. - useNewAuthorizationWithContextAPI = true; - LOG.info("Default authorization provider supports the new authorization" + + authorizeWithContext = true; + LOG.debug("Default authorization provider supports the new authorization" + " provider API"); } else { - ace = attributeProvider.getExternalAccessControlEnforcer(this); - // if the runtime external authorization provider doesn't support - // checkPermissionWithContext(), fall back to the old API - // checkPermission(). - try { - Class clazz = ace.getClass(); - clazz.getDeclaredMethod("checkPermissionWithContext", cArg); - useNewAuthorizationWithContextAPI = true; - LOG.info("Use the new authorization provider API"); - } catch (NoSuchMethodException e) { - useNewAuthorizationWithContextAPI = false; - LOG.info("Fallback to the old authorization provider API because " + - "the expected method is not found."); - } + authorizeWithContext = useAuthorizationWithContextAPI; } - - authorizeWithContext = useNewAuthorizationWithContextAPI; } public static void setOperationType(String opType) { @@ -232,31 +216,41 @@ void checkPermission(INodesInPath inodesInPath, boolean doCheckOwner, AccessControlEnforcer enforcer = getAccessControlEnforcer(); String opType = operationType.get(); - if (this.authorizeWithContext && opType != null) { - INodeAttributeProvider.AuthorizationContext.Builder builder = - new INodeAttributeProvider.AuthorizationContext.Builder(); - builder.fsOwner(fsOwner). - supergroup(supergroup). - callerUgi(callerUgi). - inodeAttrs(inodeAttrs). - inodes(inodes). - pathByNameArr(components). - snapshotId(snapshotId). - path(path). - ancestorIndex(ancestorIndex). - doCheckOwner(doCheckOwner). - ancestorAccess(ancestorAccess). - parentAccess(parentAccess). - access(access). - subAccess(subAccess). - ignoreEmptyDir(ignoreEmptyDir). - operationName(opType). - callerContext(CallerContext.getCurrent()); - enforcer.checkPermissionWithContext(builder.build()); - } else { - enforcer.checkPermission(fsOwner, supergroup, callerUgi, inodeAttrs, - inodes, components, snapshotId, path, ancestorIndex, doCheckOwner, - ancestorAccess, parentAccess, access, subAccess, ignoreEmptyDir); + try { + if (this.authorizeWithContext && opType != null) { + INodeAttributeProvider.AuthorizationContext.Builder builder = + new INodeAttributeProvider.AuthorizationContext.Builder(); + builder.fsOwner(fsOwner). + supergroup(supergroup). + callerUgi(callerUgi). + inodeAttrs(inodeAttrs). + inodes(inodes). + pathByNameArr(components). + snapshotId(snapshotId). + path(path). + ancestorIndex(ancestorIndex). + doCheckOwner(doCheckOwner). + ancestorAccess(ancestorAccess). + parentAccess(parentAccess). + access(access). + subAccess(subAccess). + ignoreEmptyDir(ignoreEmptyDir). + operationName(opType). + callerContext(CallerContext.getCurrent()); + enforcer.checkPermissionWithContext(builder.build()); + } else { + enforcer.checkPermission(fsOwner, supergroup, callerUgi, inodeAttrs, + inodes, components, snapshotId, path, ancestorIndex, doCheckOwner, + ancestorAccess, parentAccess, access, subAccess, ignoreEmptyDir); + } + } catch (AccessControlException ace) { + Class exceptionClass = ace.getClass(); + if (exceptionClass.equals(AccessControlException.class) + || exceptionClass.equals(TraverseAccessControlException.class)) { + throw ace; + } + // Only form a new ACE for subclasses which come from external enforcers + throw new AccessControlException(ace); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSTreeTraverser.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSTreeTraverser.java index 2acbda4005b57..a90dc27a54fa8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSTreeTraverser.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSTreeTraverser.java @@ -34,7 +34,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * FSTreeTraverser traverse directory recursively and process files diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java index 78394ab1520e5..9b6f82f088450 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java @@ -45,11 +45,11 @@ import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog; import org.apache.hadoop.io.nativeio.NativeIO; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.ComparisonChain; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.ComparisonChain; /** * Journal manager for the common case of edits files being written diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FsckServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FsckServlet.java index 5fae9cd48901b..059b6531242e8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FsckServlet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FsckServlet.java @@ -48,28 +48,32 @@ public void doGet(HttpServletRequest request, HttpServletResponse response @SuppressWarnings("unchecked") final Map pmap = request.getParameterMap(); final PrintWriter out = response.getWriter(); - final InetAddress remoteAddress = + final InetAddress remoteAddress = InetAddress.getByName(request.getRemoteAddr()); - final ServletContext context = getServletContext(); + final ServletContext context = getServletContext(); final Configuration conf = NameNodeHttpServer.getConfFromContext(context); final UserGroupInformation ugi = getUGI(request, conf); try { - ugi.doAs(new PrivilegedExceptionAction() { - @Override - public Object run() throws Exception { - NameNode nn = NameNodeHttpServer.getNameNodeFromContext(context); - - final FSNamesystem namesystem = nn.getNamesystem(); - final BlockManager bm = namesystem.getBlockManager(); - final int totalDatanodes = - namesystem.getNumberOfDatanodes(DatanodeReportType.LIVE); - new NamenodeFsck(conf, nn, - bm.getDatanodeManager().getNetworkTopology(), pmap, out, - totalDatanodes, remoteAddress).fsck(); - - return null; + ugi.doAs((PrivilegedExceptionAction) () -> { + NameNode nn = NameNodeHttpServer.getNameNodeFromContext(context); + + final FSNamesystem namesystem = nn.getNamesystem(); + final BlockManager bm = namesystem.getBlockManager(); + final int totalDatanodes = + namesystem.getNumberOfDatanodes(DatanodeReportType.LIVE); + NamenodeFsck fsck = new NamenodeFsck(conf, nn, + bm.getDatanodeManager().getNetworkTopology(), pmap, out, + totalDatanodes, remoteAddress); + String auditSource = fsck.getAuditSource(); + boolean success = false; + try { + fsck.fsck(); + success = true; + } finally { + namesystem.logFsckEvent(success, auditSource, remoteAddress); } + return null; }); } catch (InterruptedException e) { response.sendError(400, e.getMessage()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/HdfsAuditLogger.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/HdfsAuditLogger.java index 0a355d0ec3e77..5c0a34ce76582 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/HdfsAuditLogger.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/HdfsAuditLogger.java @@ -17,8 +17,6 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import java.net.InetAddress; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.FileStatus; @@ -26,6 +24,8 @@ import org.apache.hadoop.ipc.CallerContext; import org.apache.hadoop.security.UserGroupInformation; +import java.net.InetAddress; + /** * Extension of {@link AuditLogger}. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java index 6b29b33f3fd0c..8e417fe43aa5c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java @@ -17,27 +17,21 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import java.io.PrintStream; -import java.io.PrintWriter; -import java.io.StringWriter; -import java.util.List; -import java.util.Map; - -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Maps; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.PermissionStatus; +import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite; import org.apache.hadoop.hdfs.server.blockmanagement.BlockUnderConstructionFeature; -import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.server.namenode.INodeReference.DstReference; import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithCount; import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithName; @@ -46,9 +40,14 @@ import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.util.ChunkedArrayList; import org.apache.hadoop.util.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import java.io.PrintStream; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.List; +import java.util.Map; /** * We keep an in-memory representation of the file/block hierarchy. @@ -225,6 +224,27 @@ public INodeAttributes getSnapshotINode(final int snapshotId) { return this; } + /** Is this inode in the current state? */ + public boolean isInCurrentState() { + if (isRoot()) { + return true; + } + final INodeDirectory parentDir = getParent(); + if (parentDir == null) { + return false; // this inode is only referenced in snapshots + } + if (!parentDir.isInCurrentState()) { + return false; + } + final INode child = parentDir.getChild(getLocalNameBytes(), + Snapshot.CURRENT_STATE_ID); + if (this == child) { + return true; + } + return child != null && child.isReference() && + this.equals(child.asReference().getReferredINode()); + } + /** Is this inode in the latest snapshot? */ public final boolean isInLatestSnapshot(final int latestSnapshotId) { if (latestSnapshotId == Snapshot.CURRENT_STATE_ID || @@ -234,6 +254,8 @@ public final boolean isInLatestSnapshot(final int latestSnapshotId) { // if parent is a reference node, parent must be a renamed node. We can // stop the check at the reference node. if (parent != null && parent.isReference()) { + // TODO: Is it a bug to return true? + // Some ancestor nodes may not be in the latest snapshot. return true; } final INodeDirectory parentDir = getParent(); @@ -318,6 +340,16 @@ public boolean isFile() { return false; } + /** + * Check if this inode itself has a storage policy set. + */ + public boolean isSetStoragePolicy() { + if (isSymlink()) { + return false; + } + return getLocalStoragePolicyID() != HdfsConstants.BLOCK_STORAGE_POLICY_ID_UNSPECIFIED; + } + /** Cast this inode to an {@link INodeFile}. */ public INodeFile asFile() { throw new IllegalStateException("Current inode is not a file: " @@ -806,7 +838,7 @@ static boolean isValidAbsolutePath(final String path){ return path != null && path.startsWith(Path.SEPARATOR); } - private static void checkAbsolutePath(final String path) { + static void checkAbsolutePath(final String path) { if (!isValidAbsolutePath(path)) { throw new AssertionError("Absolute path required, but got '" + path + "'"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeAttributeProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeAttributeProvider.java index 63c5b46b2fbf0..e83c962a4a845 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeAttributeProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeAttributeProvider.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java index 3070957e3deeb..657db8b7974a6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java @@ -40,8 +40,8 @@ import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; import org.apache.hadoop.hdfs.util.ReadOnlyList; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.security.AccessControlException; import static org.apache.hadoop.hdfs.protocol.HdfsConstants.BLOCK_STORAGE_POLICY_ID_UNSPECIFIED; @@ -544,7 +544,7 @@ public boolean removeChild(final INode child) { } final INode removed = children.remove(i); - Preconditions.checkState(removed == child); + Preconditions.checkState(removed.equals(child)); return true; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectoryAttributes.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectoryAttributes.java index 240aa15ee5fb5..5e5c4b4b81fb7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectoryAttributes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectoryAttributes.java @@ -22,7 +22,7 @@ import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.util.EnumCounters; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * The attributes of an inode. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java index 67c86b375780d..beb71e3a3098d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java @@ -57,8 +57,8 @@ import org.apache.hadoop.util.StringUtils; import static org.apache.hadoop.io.erasurecode.ErasureCodeConstants.REPLICATION_POLICY_ID; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** I-node for closed file. */ @InterfaceAudience.Private diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java index bc273d28d7f99..f35949fdcdbed 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java @@ -26,7 +26,7 @@ import org.apache.hadoop.util.GSet; import org.apache.hadoop.util.LightWeightGSet; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * Storing all the {@link INode}s and maintaining the mapping between INode ID diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java index 8de0ed6d5de22..ce37f0afa2dbc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java @@ -29,7 +29,7 @@ import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature; import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.security.AccessControlException; /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeWithAdditionalFields.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeWithAdditionalFields.java index b7d2f2c1e5abc..cd3f842d2641b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeWithAdditionalFields.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeWithAdditionalFields.java @@ -24,7 +24,7 @@ import org.apache.hadoop.hdfs.util.LongBitFormat; import org.apache.hadoop.util.LightWeightGSet.LinkedElement; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * {@link INode} with additional fields including id, name, permission, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodesInPath.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodesInPath.java index f072220677733..c2cdd48d4952a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodesInPath.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodesInPath.java @@ -27,7 +27,7 @@ import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature; import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import static org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot.CURRENT_STATE_ID; import static org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot.ID_INTEGER_COMPARATOR; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java index 91f24dd1137f2..54825d8a8a261 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.security.SecurityUtil; +import org.eclipse.jetty.server.Response; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -63,8 +64,8 @@ import org.apache.hadoop.util.ServletUtil; import org.apache.hadoop.util.StringUtils; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * This class is used in Namesystem's jetty to retrieve/upload a file @@ -99,12 +100,38 @@ public class ImageServlet extends HttpServlet { "recent.image.check.enabled"; public static final boolean RECENT_IMAGE_CHECK_ENABLED_DEFAULT = true; + /* + * Specify a relaxation for the time delta check, the relaxation is to account + * for the scenario that there are chances that minor time difference (e.g. + * due to image upload delay, or minor machine clock skew) can cause ANN to + * reject a fsImage too aggressively. + */ + private static double recentImageCheckTimePrecision = 0.75; + + @VisibleForTesting + static void setRecentImageCheckTimePrecision(double ratio) { + recentImageCheckTimePrecision = ratio; + } + + private FSImage getAndValidateFSImage(ServletContext context, + final HttpServletResponse response) + throws IOException { + final FSImage nnImage = NameNodeHttpServer.getFsImageFromContext(context); + if (nnImage == null) { + String errorMsg = "NameNode initialization not yet complete. " + + "FSImage has not been set in the NameNode."; + sendError(response, HttpServletResponse.SC_FORBIDDEN, errorMsg); + throw new IOException(errorMsg); + } + return nnImage; + } + @Override public void doGet(final HttpServletRequest request, final HttpServletResponse response) throws ServletException, IOException { try { final ServletContext context = getServletContext(); - final FSImage nnImage = NameNodeHttpServer.getFsImageFromContext(context); + final FSImage nnImage = getAndValidateFSImage(context, response); final GetImageParams parsedParams = new GetImageParams(request, response); final Configuration conf = (Configuration) context .getAttribute(JspHelper.CURRENT_CONF); @@ -192,7 +219,7 @@ private void serveFile(File file) throws IOException { } catch (Throwable t) { String errMsg = "GetImage failed. " + StringUtils.stringifyException(t); - response.sendError(HttpServletResponse.SC_GONE, errMsg); + sendError(response, HttpServletResponse.SC_GONE, errMsg); throw new IOException(errMsg); } finally { response.getOutputStream().close(); @@ -208,7 +235,7 @@ private void validateRequest(ServletContext context, Configuration conf, conf)) { String errorMsg = "Only Namenode, Secondary Namenode, and administrators may access " + "this servlet"; - response.sendError(HttpServletResponse.SC_FORBIDDEN, errorMsg); + sendError(response, HttpServletResponse.SC_FORBIDDEN, errorMsg); LOG.warn("Received non-NN/SNN/administrator request for image or edits from " + request.getUserPrincipal().getName() + " at " @@ -221,7 +248,7 @@ private void validateRequest(ServletContext context, Configuration conf, && !myStorageInfoString.equals(theirStorageInfoString)) { String errorMsg = "This namenode has storage info " + myStorageInfoString + " but the secondary expected " + theirStorageInfoString; - response.sendError(HttpServletResponse.SC_FORBIDDEN, errorMsg); + sendError(response, HttpServletResponse.SC_FORBIDDEN, errorMsg); LOG.warn("Received an invalid request file transfer request " + "from a secondary with storage info " + theirStorageInfoString); throw new IOException(errorMsg); @@ -511,7 +538,7 @@ protected void doPut(final HttpServletRequest request, final HttpServletResponse response) throws ServletException, IOException { try { ServletContext context = getServletContext(); - final FSImage nnImage = NameNodeHttpServer.getFsImageFromContext(context); + final FSImage nnImage = getAndValidateFSImage(context, response); final Configuration conf = (Configuration) getServletContext() .getAttribute(JspHelper.CURRENT_CONF); final PutImageParams parsedParams = new PutImageParams(request, response, @@ -552,7 +579,7 @@ public Void run() throws Exception { // we need a different response type here so the client can differentiate this // from the failure to upload due to (1) security, or (2) other checkpoints already // present - response.sendError(HttpServletResponse.SC_EXPECTATION_FAILED, + sendError(response, HttpServletResponse.SC_EXPECTATION_FAILED, "Nameode "+request.getLocalAddr()+" is currently not in a state which can " + "accept uploads of new fsimages. State: "+state); return null; @@ -567,7 +594,7 @@ public Void run() throws Exception { // if the node is attempting to upload an older transaction, we ignore it SortedSet larger = currentlyDownloadingCheckpoints.tailSet(imageRequest); if (larger.size() > 0) { - response.sendError(HttpServletResponse.SC_CONFLICT, + sendError(response, HttpServletResponse.SC_CONFLICT, "Another checkpointer is already in the process of uploading a" + " checkpoint made up to transaction ID " + larger.last()); return null; @@ -575,7 +602,7 @@ public Void run() throws Exception { //make sure no one else has started uploading one if (!currentlyDownloadingCheckpoints.add(imageRequest)) { - response.sendError(HttpServletResponse.SC_CONFLICT, + sendError(response, HttpServletResponse.SC_CONFLICT, "Either current namenode is checkpointing or another" + " checkpointer is already in the process of " + "uploading a checkpoint made at transaction ID " @@ -592,6 +619,9 @@ public Void run() throws Exception { long checkpointPeriod = conf.getTimeDuration(DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, DFS_NAMENODE_CHECKPOINT_PERIOD_DEFAULT, TimeUnit.SECONDS); + checkpointPeriod = Math.round( + checkpointPeriod * recentImageCheckTimePrecision); + long checkpointTxnCount = conf.getLong(DFS_NAMENODE_CHECKPOINT_TXNS_KEY, DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT); @@ -612,21 +642,24 @@ public Void run() throws Exception { // a new fsImage // 1. most recent image's txid is too far behind // 2. last checkpoint time was too old - response.sendError(HttpServletResponse.SC_CONFLICT, - "Most recent checkpoint is neither too far behind in " - + "txid, nor too old. New txnid cnt is " - + (txid - lastCheckpointTxid) - + ", expecting at least " + checkpointTxnCount - + " unless too long since last upload."); + String message = "Rejecting a fsimage due to small time delta " + + "and txnid delta. Time since previous checkpoint is " + + timeDelta + " expecting at least " + checkpointPeriod + + " txnid delta since previous checkpoint is " + + (txid - lastCheckpointTxid) + " expecting at least " + + checkpointTxnCount; + LOG.info(message); + sendError(response, HttpServletResponse.SC_CONFLICT, message); return null; } try { if (nnImage.getStorage().findImageFile(nnf, txid) != null) { - response.sendError(HttpServletResponse.SC_CONFLICT, - "Either current namenode has checkpointed or " - + "another checkpointer already uploaded an " - + "checkpoint for txid " + txid); + String message = "Either current namenode has checkpointed or " + + "another checkpointer already uploaded an " + + "checkpoint for txid " + txid; + LOG.info(message); + sendError(response, HttpServletResponse.SC_CONFLICT, message); return null; } @@ -663,11 +696,20 @@ public Void run() throws Exception { }); } catch (Throwable t) { String errMsg = "PutImage failed. " + StringUtils.stringifyException(t); - response.sendError(HttpServletResponse.SC_GONE, errMsg); + sendError(response, HttpServletResponse.SC_GONE, errMsg); throw new IOException(errMsg); } } + private void sendError(HttpServletResponse response, int code, String message) + throws IOException { + if (response instanceof Response) { + ((Response)response).setStatusWithReason(code, message); + } + + response.sendError(code, message); + } + /* * Params required to handle put image request */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/InotifyFSEditLogOpTranslator.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/InotifyFSEditLogOpTranslator.java index 8a54c8a775945..ba40da4d843b5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/InotifyFSEditLogOpTranslator.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/InotifyFSEditLogOpTranslator.java @@ -18,7 +18,7 @@ package org.apache.hadoop.hdfs.server.namenode; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.inotify.Event; import org.apache.hadoop.hdfs.inotify.EventBatch; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java index a5df9f53b9d97..7dbade676fdb4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode; +import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.INVALID_TXID; import static org.apache.hadoop.util.ExitUtil.terminate; import java.io.IOException; @@ -24,8 +25,10 @@ import java.util.Collection; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; import java.util.LinkedList; import java.util.List; +import java.util.Map; import java.util.PriorityQueue; import java.util.SortedSet; import java.util.concurrent.CopyOnWriteArrayList; @@ -38,14 +41,10 @@ import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog; import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableListMultimap; -import com.google.common.collect.Lists; -import com.google.common.collect.Multimaps; -import com.google.common.collect.Sets; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; /** * Manages a collection of Journals. None of the methods are synchronized, it is @@ -188,9 +187,11 @@ public boolean isShared() { final int minimumRedundantJournals; private boolean closed; - + private long lastJournalledTxId; + JournalSet(int minimumRedundantResources) { this.minimumRedundantJournals = minimumRedundantResources; + lastJournalledTxId = INVALID_TXID; } @Override @@ -440,6 +441,16 @@ private class JournalSetOutputStream extends EditLogOutputStream { super(); } + /** + * Get the last txId journalled in the stream. + * The txId is recorded when FSEditLogOp is written to the journal. + * JournalSet tracks the txId uniformly for all underlying streams. + */ + @Override + public long getLastJournalledTxId() { + return lastJournalledTxId; + } + @Override public void write(final FSEditLogOp op) throws IOException { @@ -451,6 +462,10 @@ public void apply(JournalAndStream jas) throws IOException { } } }, "write op"); + + assert lastJournalledTxId < op.txid : "TxId order violation for op=" + + op + ", lastJournalledTxId=" + lastJournalledTxId; + lastJournalledTxId = op.txid; } @Override @@ -634,7 +649,7 @@ public void apply(JournalAndStream jas) throws IOException { */ public synchronized RemoteEditLogManifest getEditLogManifest(long fromTxId) { // Collect RemoteEditLogs available from each FileJournalManager - List allLogs = Lists.newArrayList(); + List allLogs = new ArrayList<>(); for (JournalAndStream j : journals) { if (j.getManager() instanceof FileJournalManager) { FileJournalManager fjm = (FileJournalManager)j.getManager(); @@ -645,15 +660,17 @@ public synchronized RemoteEditLogManifest getEditLogManifest(long fromTxId) { } } } - // Group logs by their starting txid - ImmutableListMultimap logsByStartTxId = - Multimaps.index(allLogs, RemoteEditLog.GET_START_TXID); + final Map> logsByStartTxId = new HashMap<>(); + allLogs.forEach(input -> { + long key = RemoteEditLog.GET_START_TXID.apply(input); + logsByStartTxId.computeIfAbsent(key, k-> new ArrayList<>()).add(input); + }); long curStartTxId = fromTxId; - - List logs = Lists.newArrayList(); + List logs = new ArrayList<>(); while (true) { - ImmutableList logGroup = logsByStartTxId.get(curStartTxId); + List logGroup = + logsByStartTxId.getOrDefault(curStartTxId, Collections.emptyList()); if (logGroup.isEmpty()) { // we have a gap in logs - for example because we recovered some old // storage directory with ancient logs. Clear out any logs we've diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java index c3624ce303385..15c940aec618f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java @@ -23,21 +23,19 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.Comparator; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; -import java.util.NavigableSet; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; -import java.util.TreeSet; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; @@ -52,8 +50,8 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; import org.apache.hadoop.util.Daemon; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.util.Time; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -92,21 +90,11 @@ public class LeaseManager { private long lastHolderUpdateTime; private String internalLeaseHolder; + // // Used for handling lock-leases // Mapping: leaseHolder -> Lease - private final SortedMap leases = new TreeMap<>(); - // Set of: Lease - private final NavigableSet sortedLeases = new TreeSet<>( - new Comparator() { - @Override - public int compare(Lease o1, Lease o2) { - if (o1.getLastUpdate() != o2.getLastUpdate()) { - return Long.signum(o1.getLastUpdate() - o2.getLastUpdate()); - } else { - return o1.holder.compareTo(o2.holder); - } - } - }); + // + private final HashMap leases = new HashMap<>(); // INodeID -> Lease private final TreeMap leasesById = new TreeMap<>(); @@ -312,8 +300,13 @@ public BatchedListEntries getUnderConstructionFiles( Iterator inodeIdIterator = inodeIds.iterator(); while (inodeIdIterator.hasNext()) { Long inodeId = inodeIdIterator.next(); - final INodeFile inodeFile = - fsnamesystem.getFSDirectory().getInode(inodeId).asFile(); + INode ucFile = fsnamesystem.getFSDirectory().getInode(inodeId); + if (ucFile == null) { + //probably got deleted + continue; + } + + final INodeFile inodeFile = ucFile.asFile(); if (!inodeFile.isUnderConstruction()) { LOG.warn("The file {} is not under construction but has lease.", inodeFile.getFullPathName()); @@ -344,7 +337,7 @@ public BatchedListEntries getUnderConstructionFiles( /** @return the number of leases currently in the system */ @VisibleForTesting public synchronized int countLease() { - return sortedLeases.size(); + return leases.size(); } /** @return the number of paths contained in all leases */ @@ -360,7 +353,6 @@ synchronized Lease addLease(String holder, long inodeId) { if (lease == null) { lease = new Lease(holder); leases.put(holder, lease); - sortedLeases.add(lease); } else { renewLease(lease); } @@ -386,9 +378,8 @@ private synchronized void removeLease(Lease lease, long inodeId) { } if (!lease.hasFiles()) { - leases.remove(lease.holder); - if (!sortedLeases.remove(lease)) { - LOG.error("{} not found in sortedLeases", lease); + if (leases.remove(lease.holder) == null) { + LOG.error("{} not found", lease); } } } @@ -407,7 +398,6 @@ synchronized void removeLease(String holder, INodeFile src) { } synchronized void removeAllLeases() { - sortedLeases.clear(); leasesById.clear(); leases.clear(); } @@ -430,11 +420,10 @@ synchronized Lease reassignLease(Lease lease, INodeFile src, synchronized void renewLease(String holder) { renewLease(getLease(holder)); } + synchronized void renewLease(Lease lease) { if (lease != null) { - sortedLeases.remove(lease); lease.renew(); - sortedLeases.add(lease); } } @@ -458,10 +447,10 @@ class Lease { private final String holder; private long lastUpdate; private final HashSet files = new HashSet<>(); - + /** Only LeaseManager object can create a lease */ - private Lease(String holder) { - this.holder = holder; + private Lease(String h) { + this.holder = h; renew(); } /** Only LeaseManager object can renew a lease */ @@ -474,6 +463,10 @@ public boolean expiredHardLimit() { return monotonicNow() - lastUpdate > hardLimit; } + public boolean expiredHardLimit(long now) { + return now - lastUpdate > hardLimit; + } + /** @return true if the Soft Limit Timer has expired */ public boolean expiredSoftLimit() { return monotonicNow() - lastUpdate > softLimit; @@ -496,7 +489,7 @@ public String toString() { public int hashCode() { return holder.hashCode(); } - + private Collection getFiles() { return Collections.unmodifiableCollection(files); } @@ -515,6 +508,17 @@ public void setLeasePeriod(long softLimit, long hardLimit) { this.softLimit = softLimit; this.hardLimit = hardLimit; } + + private synchronized Collection getExpiredCandidateLeases() { + final long now = Time.monotonicNow(); + Collection expired = new HashSet<>(); + for (Lease lease : leases.values()) { + if (lease.expiredHardLimit(now)) { + expired.add(lease); + } + } + return expired; + } /****************************************************** * Monitor checks for leases that have expired, @@ -529,10 +533,19 @@ public void run() { for(; shouldRunMonitor && fsnamesystem.isRunning(); ) { boolean needSync = false; try { + // sleep now to avoid infinite loop if an exception was thrown. + Thread.sleep(fsnamesystem.getLeaseRecheckIntervalMs()); + + // pre-filter the leases w/o the fsn lock. + Collection candidates = getExpiredCandidateLeases(); + if (candidates.isEmpty()) { + continue; + } + fsnamesystem.writeLockInterruptibly(); try { if (!fsnamesystem.isInSafeMode()) { - needSync = checkLeases(); + needSync = checkLeases(candidates); } } finally { fsnamesystem.writeUnlock("leaseManager"); @@ -541,8 +554,6 @@ public void run() { fsnamesystem.getEditLog().logSync(); } } - - Thread.sleep(fsnamesystem.getLeaseRecheckIntervalMs()); } catch(InterruptedException ie) { LOG.debug("{} is interrupted", name, ie); } catch(Throwable e) { @@ -557,17 +568,22 @@ public void run() { */ @VisibleForTesting synchronized boolean checkLeases() { + return checkLeases(getExpiredCandidateLeases()); + } + + private synchronized boolean checkLeases(Collection leasesToCheck) { boolean needSync = false; assert fsnamesystem.hasWriteLock(); long start = monotonicNow(); - - while(!sortedLeases.isEmpty() && - sortedLeases.first().expiredHardLimit() - && !isMaxLockHoldToReleaseLease(start)) { - Lease leaseToCheck = sortedLeases.first(); + for (Lease leaseToCheck : leasesToCheck) { + if (isMaxLockHoldToReleaseLease(start)) { + break; + } + if (!leaseToCheck.expiredHardLimit(Time.monotonicNow())) { + continue; + } LOG.info("{} has expired hard limit", leaseToCheck); - final List removing = new ArrayList<>(); // need to create a copy of the oldest lease files, because // internalReleaseLease() removes files corresponding to empty files, @@ -629,7 +645,6 @@ synchronized boolean checkLeases() { removeLease(leaseToCheck, id); } } - return needSync; } @@ -644,7 +659,6 @@ private boolean isMaxLockHoldToReleaseLease(long start) { public synchronized String toString() { return getClass().getSimpleName() + "= {" + "\n leases=" + leases - + "\n sortedLeases=" + sortedLeases + "\n leasesById=" + leasesById + "\n}"; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java index 98ae44ede937d..db08ac200b670 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java @@ -57,9 +57,9 @@ import org.apache.hadoop.util.Time; import org.eclipse.jetty.util.ajax.JSON; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; /** * NNStorage is responsible for management of the StorageDirectories used by diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java index aaaaa7210c13b..22be54e5576d6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java @@ -37,10 +37,10 @@ import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile; import org.apache.hadoop.hdfs.util.MD5FileUtils; -import com.google.common.base.Preconditions; -import com.google.common.collect.ComparisonChain; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.ComparisonChain; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; /** * The NNStorageRetentionManager is responsible for inspecting the storage diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java index 9cca97ab74b32..8086b60637dae 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java @@ -34,7 +34,7 @@ import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; import org.apache.hadoop.hdfs.server.common.StorageInfo; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; public abstract class NNUpgradeUtil { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index 74757e563a64d..62a35c201ba2f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -17,11 +17,11 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; import java.util.Set; import org.apache.commons.logging.Log; @@ -87,9 +87,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol; import org.apache.hadoop.tools.GetUserMappingsProtocol; -import org.apache.hadoop.tracing.TraceAdminProtocol; import org.apache.hadoop.tracing.TraceUtils; -import org.apache.hadoop.tracing.TracerConfigurationManager; import org.apache.hadoop.util.ExitUtil.ExitException; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.util.JvmPauseMonitor; @@ -98,7 +96,9 @@ import org.apache.hadoop.util.Time; import org.apache.hadoop.util.GcTimeMonitor; import org.apache.hadoop.util.GcTimeMonitor.Builder; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.tracing.Tracer; +import org.apache.hadoop.util.Timer; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -122,8 +122,13 @@ import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_INVALIDATE_LIMIT_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_IMAGE_PARALLEL_LOAD_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_IMAGE_PARALLEL_LOAD_KEY; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_CONTEXT_ENABLED_KEY; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_CONTEXT_ENABLED_DEFAULT; @@ -186,6 +191,16 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_GC_TIME_MONITOR_ENABLE_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_PLACEMENT_EC_CLASSNAME_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AVOID_SLOW_DATANODE_FOR_READ_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AVOID_SLOW_DATANODE_FOR_READ_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_SLOWPEER_COLLECT_NODES_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_SLOWPEER_COLLECT_NODES_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_LIMIT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_LIMIT_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_BLOCKS_PER_LOCK; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_BLOCKS_PER_LOCK_DEFAULT; import static org.apache.hadoop.util.ExitUtil.terminate; import static org.apache.hadoop.util.ToolRunner.confirmPrompt; @@ -326,7 +341,15 @@ public enum OperationCategory { DFS_NAMENODE_REPLICATION_STREAMS_HARD_LIMIT_KEY, DFS_NAMENODE_REPLICATION_WORK_MULTIPLIER_PER_ITERATION, DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, - DFS_BLOCK_PLACEMENT_EC_CLASSNAME_KEY)); + DFS_BLOCK_PLACEMENT_EC_CLASSNAME_KEY, + DFS_IMAGE_PARALLEL_LOAD_KEY, + DFS_NAMENODE_AVOID_SLOW_DATANODE_FOR_READ_KEY, + DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_KEY, + DFS_NAMENODE_MAX_SLOWPEER_COLLECT_NODES_KEY, + DFS_BLOCK_INVALIDATE_LIMIT_KEY, + DFS_DATANODE_PEER_STATS_ENABLED_KEY, + DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_LIMIT, + DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_BLOCKS_PER_LOCK)); private static final String USAGE = "Usage: hdfs namenode [" + StartupOption.BACKUP.getName() + "] | \n\t[" @@ -371,8 +394,6 @@ public long getProtocolVersion(String protocol, return RefreshCallQueueProtocol.versionID; } else if (protocol.equals(GetUserMappingsProtocol.class.getName())){ return GetUserMappingsProtocol.versionID; - } else if (protocol.equals(TraceAdminProtocol.class.getName())){ - return TraceAdminProtocol.versionID; } else { throw new IOException("Unknown protocol to name node: " + protocol); } @@ -384,6 +405,7 @@ public long getProtocolVersion(String protocol, */ @Deprecated public static final int DEFAULT_PORT = DFS_NAMENODE_RPC_PORT_DEFAULT; + public static final String FS_HDFS_IMPL_KEY = "fs.hdfs.impl"; public static final Logger LOG = LoggerFactory.getLogger(NameNode.class.getName()); public static final Logger stateChangeLog = @@ -426,7 +448,6 @@ public long getProtocolVersion(String protocol, private GcTimeMonitor gcTimeMonitor; private ObjectName nameNodeStatusBeanName; protected final Tracer tracer; - protected final TracerConfigurationManager tracerConfigurationManager; ScheduledThreadPoolExecutor metricsLoggerTimer; /** @@ -993,8 +1014,6 @@ protected NameNode(Configuration conf, NamenodeRole role) this.tracer = new Tracer.Builder("NameNode"). conf(TraceUtils.wrapHadoopConf(NAMENODE_HTRACE_PREFIX, conf)). build(); - this.tracerConfigurationManager = - new TracerConfigurationManager(NAMENODE_HTRACE_PREFIX, conf); this.role = role; String nsId = getNameServiceId(conf); String namenodeId = HAUtil.getNameNodeId(conf, nsId); @@ -1245,8 +1264,9 @@ private static boolean format(Configuration conf, boolean force, LOG.info("Formatting using clusterid: {}", clusterId); FSImage fsImage = new FSImage(conf, nameDirsToFormat, editDirsToFormat); + FSNamesystem fsn = null; try { - FSNamesystem fsn = new FSNamesystem(conf, fsImage); + fsn = new FSNamesystem(conf, fsImage); fsImage.getEditLog().initJournalsForWrite(); // Abort NameNode format if reformat is disabled and if @@ -1271,8 +1291,14 @@ private static boolean format(Configuration conf, boolean force, fsImage.format(fsn, clusterId, force); } catch (IOException ioe) { LOG.warn("Encountered exception during format", ioe); - fsImage.close(); throw ioe; + } finally { + if (fsImage != null) { + fsImage.close(); + } + if (fsn != null) { + fsn.close(); + } } return false; } @@ -1828,7 +1854,7 @@ public static void main(String argv[]) throws Exception { } } - synchronized void monitorHealth() + synchronized void monitorHealth() throws HealthCheckFailedException, AccessControlException { namesystem.checkSuperuserPrivilege(); if (!haEnabled) { @@ -1852,7 +1878,7 @@ synchronized void monitorHealth() } } - synchronized void transitionToActive() + synchronized void transitionToActive() throws ServiceFailedException, AccessControlException { namesystem.checkSuperuserPrivilege(); if (!haEnabled) { @@ -2187,6 +2213,19 @@ protected String reconfigurePropertyImpl(String property, String newVal) .equals(DFS_BLOCK_PLACEMENT_EC_CLASSNAME_KEY)) { reconfBlockPlacementPolicy(); return newVal; + } else if (property.equals(DFS_IMAGE_PARALLEL_LOAD_KEY)) { + return reconfigureParallelLoad(newVal); + } else if (property.equals(DFS_NAMENODE_AVOID_SLOW_DATANODE_FOR_READ_KEY) || (property.equals( + DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_KEY)) || (property.equals( + DFS_NAMENODE_MAX_SLOWPEER_COLLECT_NODES_KEY)) || (property.equals( + DFS_DATANODE_PEER_STATS_ENABLED_KEY))) { + return reconfigureSlowNodesParameters(datanodeManager, property, newVal); + } else if (property.equals(DFS_BLOCK_INVALIDATE_LIMIT_KEY)) { + return reconfigureBlockInvalidateLimit(datanodeManager, property, newVal); + } else if (property.equals(DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_LIMIT) || + (property.equals(DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_BLOCKS_PER_LOCK))) { + return reconfigureDecommissionBackoffMonitorParameters(datanodeManager, property, + newVal); } else { throw new ReconfigurationException(property, newVal, getConf().get( property)); @@ -2219,7 +2258,7 @@ private String reconfReplicationParameters(final String newVal, newSetting = bm.getBlocksReplWorkMultiplier(); } else { throw new IllegalArgumentException("Unexpected property " + - property + "in reconfReplicationParameters"); + property + " in reconfReplicationParameters"); } LOG.info("RECONFIGURE* changed {} to {}", property, newSetting); return String.valueOf(newSetting); @@ -2362,6 +2401,125 @@ String reconfigureSPSModeEvent(String newVal, String property) return newVal; } + String reconfigureParallelLoad(String newVal) { + boolean enableParallelLoad; + if (newVal == null) { + enableParallelLoad = DFS_IMAGE_PARALLEL_LOAD_DEFAULT; + } else { + enableParallelLoad = Boolean.parseBoolean(newVal); + } + FSImageFormatProtobuf.refreshParallelSaveAndLoad(enableParallelLoad); + return Boolean.toString(enableParallelLoad); + } + + String reconfigureSlowNodesParameters(final DatanodeManager datanodeManager, + final String property, final String newVal) throws ReconfigurationException { + BlockManager bm = namesystem.getBlockManager(); + namesystem.writeLock(); + String result; + try { + switch (property) { + case DFS_NAMENODE_AVOID_SLOW_DATANODE_FOR_READ_KEY: { + boolean enable = (newVal == null ? + DFS_NAMENODE_AVOID_SLOW_DATANODE_FOR_READ_DEFAULT : + Boolean.parseBoolean(newVal)); + result = Boolean.toString(enable); + datanodeManager.setAvoidSlowDataNodesForReadEnabled(enable); + break; + } + case DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_KEY: { + boolean enable = (newVal == null ? + DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_DEFAULT : + Boolean.parseBoolean(newVal)); + result = Boolean.toString(enable); + bm.setExcludeSlowNodesEnabled(enable); + break; + } + case DFS_NAMENODE_MAX_SLOWPEER_COLLECT_NODES_KEY: { + int maxSlowpeerCollectNodes = (newVal == null ? + DFS_NAMENODE_MAX_SLOWPEER_COLLECT_NODES_DEFAULT : + Integer.parseInt(newVal)); + result = Integer.toString(maxSlowpeerCollectNodes); + datanodeManager.setMaxSlowpeerCollectNodes(maxSlowpeerCollectNodes); + break; + } + case DFS_DATANODE_PEER_STATS_ENABLED_KEY: { + Timer timer = new Timer(); + if (newVal != null && !newVal.equalsIgnoreCase("true") && !newVal.equalsIgnoreCase( + "false")) { + throw new IllegalArgumentException(newVal + " is not boolean value"); + } + final boolean peerStatsEnabled = newVal == null ? + DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT : + Boolean.parseBoolean(newVal); + result = Boolean.toString(peerStatsEnabled); + datanodeManager.initSlowPeerTracker(getConf(), timer, peerStatsEnabled); + break; + } + default: { + throw new IllegalArgumentException( + "Unexpected property " + property + " in reconfigureSlowNodesParameters"); + } + } + LOG.info("RECONFIGURE* changed {} to {}", property, newVal); + return result; + } catch (IllegalArgumentException e) { + throw new ReconfigurationException(property, newVal, getConf().get( + property), e); + } finally { + namesystem.writeUnlock(); + } + } + + private String reconfigureBlockInvalidateLimit(final DatanodeManager datanodeManager, + final String property, final String newVal) throws ReconfigurationException { + namesystem.writeLock(); + try { + if (newVal == null) { + datanodeManager.setBlockInvalidateLimit(DFSConfigKeys.DFS_BLOCK_INVALIDATE_LIMIT_DEFAULT); + } else { + datanodeManager.setBlockInvalidateLimit(Integer.parseInt(newVal)); + } + final String updatedBlockInvalidateLimit = + String.valueOf(datanodeManager.getBlockInvalidateLimit()); + LOG.info("RECONFIGURE* changed blockInvalidateLimit to {}", updatedBlockInvalidateLimit); + return updatedBlockInvalidateLimit; + } catch (NumberFormatException e) { + throw new ReconfigurationException(property, newVal, getConf().get(property), e); + } finally { + namesystem.writeUnlock(); + } + } + + private String reconfigureDecommissionBackoffMonitorParameters( + final DatanodeManager datanodeManager, final String property, final String newVal) + throws ReconfigurationException { + String newSetting = null; + try { + if (property.equals(DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_LIMIT)) { + int pendingRepLimit = (newVal == null ? + DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_LIMIT_DEFAULT : + Integer.parseInt(newVal)); + datanodeManager.getDatanodeAdminManager().refreshPendingRepLimit(pendingRepLimit, + DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_LIMIT); + newSetting = String.valueOf(datanodeManager.getDatanodeAdminManager().getPendingRepLimit()); + } else if (property.equals( + DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_BLOCKS_PER_LOCK)) { + int blocksPerLock = (newVal == null ? + DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_BLOCKS_PER_LOCK_DEFAULT : + Integer.parseInt(newVal)); + datanodeManager.getDatanodeAdminManager().refreshBlocksPerLock(blocksPerLock, + DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_BLOCKS_PER_LOCK); + newSetting = String.valueOf(datanodeManager.getDatanodeAdminManager().getBlocksPerLock()); + } + LOG.info("RECONFIGURE* changed reconfigureDecommissionBackoffMonitorParameters {} to {}", + property, newSetting); + return newSetting; + } catch (IllegalArgumentException e) { + throw new ReconfigurationException(property, newVal, getConf().get(property), e); + } + } + @Override // ReconfigurableBase protected Configuration getNewConf() { return new HdfsConfiguration(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeHttpServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeHttpServer.java index 1a3b578510b8f..c05398a31cec3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeHttpServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeHttpServer.java @@ -27,7 +27,7 @@ import javax.servlet.ServletContext; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.ha.HAServiceProtocol; @@ -166,7 +166,7 @@ void start() throws IOException { httpServer.setAttribute(NAMENODE_ATTRIBUTE_KEY, nn); httpServer.setAttribute(JspHelper.CURRENT_CONF, conf); - setupServlets(httpServer, conf); + setupServlets(httpServer); httpServer.start(); int connIdx = 0; @@ -243,7 +243,7 @@ void setAliasMap(InMemoryAliasMap aliasMap) { httpServer.setAttribute(ALIASMAP_ATTRIBUTE_KEY, aliasMap); } - private static void setupServlets(HttpServer2 httpServer, Configuration conf) { + private static void setupServlets(HttpServer2 httpServer) { httpServer.addInternalServlet("startupProgress", StartupProgressServlet.PATH_SPEC, StartupProgressServlet.class); httpServer.addInternalServlet("fsck", "/fsck", FsckServlet.class, @@ -253,6 +253,8 @@ private static void setupServlets(HttpServer2 httpServer, Configuration conf) { httpServer.addInternalServlet(IsNameNodeActiveServlet.SERVLET_NAME, IsNameNodeActiveServlet.PATH_SPEC, IsNameNodeActiveServlet.class); + httpServer.addInternalServlet(NetworkTopologyServlet.SERVLET_NAME, + NetworkTopologyServlet.PATH_SPEC, NetworkTopologyServlet.class); } static FSImage getFsImageFromContext(ServletContext context) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeLayoutVersion.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeLayoutVersion.java index 297ca74c5e111..bcb3714268aa2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeLayoutVersion.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeLayoutVersion.java @@ -89,7 +89,8 @@ public enum Feature implements LayoutFeature { APPEND_NEW_BLOCK(-62, -61, "Support appending to new block"), QUOTA_BY_STORAGE_TYPE(-63, -61, "Support quota for specific storage types"), ERASURE_CODING(-64, -61, "Support erasure coding"), - EXPANDED_STRING_TABLE(-65, -61, "Support expanded string table in fsimage"); + EXPANDED_STRING_TABLE(-65, -61, "Support expanded string table in fsimage"), + SNAPSHOT_MODIFICATION_TIME(-66, -61, "Support modification time for snapshot"); private final FeatureInfo info; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourceChecker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourceChecker.java index d0245d80351c2..4cac0feffdfa1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourceChecker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourceChecker.java @@ -24,7 +24,7 @@ import java.util.Collection; import java.util.HashMap; import java.util.Map; - +import java.util.stream.Collectors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -33,9 +33,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.server.common.Util; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Collections2; -import com.google.common.base.Predicate; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * @@ -116,18 +114,15 @@ public NameNodeResourceChecker(Configuration conf) throws IOException { Collection extraCheckedVolumes = Util.stringCollectionAsURIs(conf .getTrimmedStringCollection(DFSConfigKeys.DFS_NAMENODE_CHECKED_VOLUMES_KEY)); - - Collection localEditDirs = Collections2.filter( - FSNamesystem.getNamespaceEditsDirs(conf), - new Predicate() { - @Override - public boolean apply(URI input) { - if (input.getScheme().equals(NNStorage.LOCAL_URI_SCHEME)) { - return true; - } - return false; - } - }); + + Collection localEditDirs = + FSNamesystem.getNamespaceEditsDirs(conf).stream().filter( + input -> { + if (input.getScheme().equals(NNStorage.LOCAL_URI_SCHEME)) { + return true; + } + return false; + }).collect(Collectors.toList()); // Add all the local edits dirs, marking some as required if they are // configured as such. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java index fcbd457d7a5e8..badbc6477632b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java @@ -21,6 +21,7 @@ import static org.apache.hadoop.fs.CommonConfigurationKeys.IPC_MAXIMUM_DATA_LENGTH_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HANDLER_COUNT_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HANDLER_COUNT_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_IP_PROXY_USERS; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIFELINE_HANDLER_COUNT_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIFELINE_HANDLER_RATIO_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIFELINE_HANDLER_RATIO_KEY; @@ -45,7 +46,9 @@ import java.util.Map; import java.util.Set; -import com.google.common.collect.Lists; +import org.apache.commons.lang3.ArrayUtils; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.ipc.CallerContext; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; @@ -127,6 +130,7 @@ import org.apache.hadoop.hdfs.protocol.QuotaExceededException; import org.apache.hadoop.hdfs.protocol.RecoveryInProgressException; import org.apache.hadoop.hdfs.protocol.ReplicatedBlockStats; +import org.apache.hadoop.hdfs.protocol.XAttrNotFoundException; import org.apache.hadoop.hdfs.protocol.ZoneReencryptionStatus; import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo; import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport; @@ -169,6 +173,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport; import org.apache.hadoop.hdfs.server.protocol.FinalizeCommand; import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse; +import org.apache.hadoop.hdfs.server.protocol.InvalidBlockReportLeaseException; import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration; @@ -184,7 +189,7 @@ import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary; import org.apache.hadoop.io.EnumSetWritable; import org.apache.hadoop.io.Text; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RetriableException; import org.apache.hadoop.ipc.RetryCache; @@ -218,16 +223,13 @@ import org.apache.hadoop.tools.proto.GetUserMappingsProtocolProtos.GetUserMappingsProtocolService; import org.apache.hadoop.tools.protocolPB.GetUserMappingsProtocolPB; import org.apache.hadoop.tools.protocolPB.GetUserMappingsProtocolServerSideTranslatorPB; -import org.apache.hadoop.tracing.SpanReceiverInfo; -import org.apache.hadoop.tracing.TraceAdminPB.TraceAdminService; -import org.apache.hadoop.tracing.TraceAdminProtocolPB; -import org.apache.hadoop.tracing.TraceAdminProtocolServerSideTranslatorPB; import org.apache.hadoop.util.VersionInfo; import org.apache.hadoop.util.VersionUtil; import org.slf4j.Logger; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.thirdparty.protobuf.BlockingService; +import org.apache.hadoop.hdfs.server.protocol.DisallowedDatanodeException; import javax.annotation.Nonnull; @@ -269,6 +271,9 @@ public class NameNodeRpcServer implements NamenodeProtocols { private final String defaultECPolicyName; + // Users who can override the client ip + private final String[] ipProxyUsers; + public NameNodeRpcServer(Configuration conf, NameNode nn) throws IOException { this.nn = nn; @@ -279,9 +284,10 @@ public NameNodeRpcServer(Configuration conf, NameNode nn) int handlerCount = conf.getInt(DFS_NAMENODE_HANDLER_COUNT_KEY, DFS_NAMENODE_HANDLER_COUNT_DEFAULT); + ipProxyUsers = conf.getStrings(DFS_NAMENODE_IP_PROXY_USERS); RPC.setProtocolEngine(conf, ClientNamenodeProtocolPB.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); ClientNamenodeProtocolServerSideTranslatorPB clientProtocolServerTranslator = @@ -341,11 +347,6 @@ public NameNodeRpcServer(Configuration conf, NameNode nn) BlockingService reconfigurationPbService = ReconfigurationProtocolService .newReflectiveBlockingService(reconfigurationProtocolXlator); - TraceAdminProtocolServerSideTranslatorPB traceAdminXlator = - new TraceAdminProtocolServerSideTranslatorPB(this); - BlockingService traceAdminService = TraceAdminService - .newReflectiveBlockingService(traceAdminXlator); - InetSocketAddress serviceRpcAddr = nn.getServiceRpcServerAddress(conf); if (serviceRpcAddr != null) { String bindHost = nn.getServiceRpcServerBindHost(conf); @@ -389,8 +390,6 @@ public NameNodeRpcServer(Configuration conf, NameNode nn) genericRefreshService, serviceRpcServer); DFSUtil.addPBProtocol(conf, GetUserMappingsProtocolPB.class, getUserMappingService, serviceRpcServer); - DFSUtil.addPBProtocol(conf, TraceAdminProtocolPB.class, - traceAdminService, serviceRpcServer); // Update the address with the correct port InetSocketAddress listenAddr = serviceRpcServer.getListenerAddress(); @@ -405,7 +404,7 @@ public NameNodeRpcServer(Configuration conf, NameNode nn) InetSocketAddress lifelineRpcAddr = nn.getLifelineRpcServerAddress(conf); if (lifelineRpcAddr != null) { RPC.setProtocolEngine(conf, HAServiceProtocolPB.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); String bindHost = nn.getLifelineRpcServerBindHost(conf); if (bindHost == null) { bindHost = lifelineRpcAddr.getHostName(); @@ -493,8 +492,6 @@ public NameNodeRpcServer(Configuration conf, NameNode nn) genericRefreshService, clientRpcServer); DFSUtil.addPBProtocol(conf, GetUserMappingsProtocolPB.class, getUserMappingService, clientRpcServer); - DFSUtil.addPBProtocol(conf, TraceAdminProtocolPB.class, - traceAdminService, clientRpcServer); // set service-level authorization security policy if (serviceAuthEnabled = @@ -542,7 +539,9 @@ public NameNodeRpcServer(Configuration conf, NameNode nn) QuotaByStorageTypeExceededException.class, AclException.class, FSLimitException.PathComponentTooLongException.class, - FSLimitException.MaxDirectoryItemsExceededException.class); + FSLimitException.MaxDirectoryItemsExceededException.class, + DisallowedDatanodeException.class, + XAttrNotFoundException.class); clientRpcServer.addSuppressedLoggingExceptions(StandbyException.class, UnresolvedPathException.class); @@ -662,6 +661,7 @@ public BlocksWithLocations getBlocks(DatanodeInfo datanode, long size, long } checkNNStartup(); namesystem.checkSuperuserPrivilege(); + namesystem.checkNameNodeSafeMode("Cannot execute getBlocks"); return namesystem.getBlocks(datanode, size, minBlockSize); } @@ -863,6 +863,7 @@ public boolean setReplication(String src, short replication) public void unsetStoragePolicy(String src) throws IOException { checkNNStartup(); + stateChangeLog.debug("*DIR* NameNode.unsetStoragePolicy for path: {}", src); namesystem.unsetStoragePolicy(src); } @@ -870,12 +871,15 @@ public void unsetStoragePolicy(String src) public void setStoragePolicy(String src, String policyName) throws IOException { checkNNStartup(); + stateChangeLog.debug("*DIR* NameNode.setStoragePolicy for path: {}, " + + "policyName: {}", src, policyName); namesystem.setStoragePolicy(src, policyName); } @Override public BlockStoragePolicy getStoragePolicy(String path) throws IOException { checkNNStartup(); + stateChangeLog.debug("*DIR* NameNode.getStoragePolicy for path: {}", path); return namesystem.getStoragePolicy(path); } @@ -1054,6 +1058,8 @@ public boolean rename(String src, String dst) throws IOException { @Override // ClientProtocol public void concat(String trg, String[] src) throws IOException { checkNNStartup(); + stateChangeLog.debug("*DIR* NameNode.concat: src path {} to" + + " target path {}", Arrays.toString(src), trg); namesystem.checkOperation(OperationCategory.WRITE); CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); if (cacheEntry != null && cacheEntry.isSuccess()) { @@ -1494,6 +1500,12 @@ public void satisfyStoragePolicy(String src) throws IOException { } } + @Override + public DatanodeInfo[] getSlowDatanodeReport() throws IOException { + checkNNStartup(); + return namesystem.slowDataNodesReport(); + } + @Override // ClientProtocol public void setQuota(String path, long namespaceQuota, long storagespaceQuota, StorageType type) @@ -1624,6 +1636,8 @@ public DatanodeCommand blockReport(final DatanodeRegistration nodeReg, bm.processReport(nodeReg, reports[index].getStorage(), blocks, context)); } + } else { + throw new InvalidBlockReportLeaseException(context.getReportId(), context.getLeaseId()); } } catch (UnregisteredNodeException une) { LOG.debug("Datanode {} is attempting to report but not register yet.", @@ -1892,7 +1906,29 @@ private void verifySoftwareVersion(DatanodeRegistration dnReg) } } - private static String getClientMachine() { + private String getClientMachine() { + if (ipProxyUsers != null) { + // Get the real user (or effective if it isn't a proxy user) + UserGroupInformation user = + UserGroupInformation.getRealUserOrSelf(Server.getRemoteUser()); + if (user != null && + ArrayUtils.contains(ipProxyUsers, user.getShortUserName())) { + CallerContext context = CallerContext.getCurrent(); + if (context != null && context.isContextValid()) { + String cc = context.getContext(); + // if the rpc has a caller context of "clientIp:1.2.3.4,CLI", + // return "1.2.3.4" as the client machine. + String key = CallerContext.CLIENT_IP_STR + + CallerContext.Builder.KEY_VALUE_SEPARATOR; + int posn = cc.indexOf(key); + if (posn != -1) { + posn += key.length(); + int end = cc.indexOf(",", posn); + return end == -1 ? cc.substring(posn) : cc.substring(posn, end); + } + } + } + } String clientMachine = Server.getRemoteAddress(); if (clientMachine == null) { //not a RPC client clientMachine = ""; @@ -2259,10 +2295,10 @@ public void setErasureCodingPolicy(String src, String ecPolicyName) try { if (ecPolicyName == null) { ecPolicyName = defaultECPolicyName; - LOG.trace("No policy name is specified, " + + LOG.debug("No policy name is specified, " + "set the default policy name instead"); } - LOG.trace("Set erasure coding policy " + ecPolicyName + " on " + src); + LOG.debug("Set erasure coding policy {} on {}", ecPolicyName, src); namesystem.setErasureCodingPolicy(src, ecPolicyName, cacheEntry != null); success = true; } finally { @@ -2464,27 +2500,6 @@ private EventBatchList getEventBatchList(long syncTxid, long txid, return new EventBatchList(batches, firstSeenTxid, maxSeenTxid, syncTxid); } - @Override // TraceAdminProtocol - public SpanReceiverInfo[] listSpanReceivers() throws IOException { - checkNNStartup(); - namesystem.checkSuperuserPrivilege(); - return nn.tracerConfigurationManager.listSpanReceivers(); - } - - @Override // TraceAdminProtocol - public long addSpanReceiver(SpanReceiverInfo info) throws IOException { - checkNNStartup(); - namesystem.checkSuperuserPrivilege(); - return nn.tracerConfigurationManager.addSpanReceiver(info); - } - - @Override // TraceAdminProtocol - public void removeSpanReceiver(long id) throws IOException { - checkNNStartup(); - namesystem.checkSuperuserPrivilege(); - nn.tracerConfigurationManager.removeSpanReceiver(id); - } - @Override // ClientProtocol public ErasureCodingPolicyInfo[] getErasureCodingPolicies() throws IOException { @@ -2513,6 +2528,7 @@ public void unsetErasureCodingPolicy(String src) throws IOException { } boolean success = false; try { + LOG.debug("Unset erasure coding policy on {}", src); namesystem.unsetErasureCodingPolicy(src, cacheEntry != null); success = true; } finally { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeUtils.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeUtils.java index ec1d5104922ec..85254cd840942 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeUtils.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeUtils.java @@ -20,7 +20,7 @@ import javax.annotation.Nullable; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSUtilClient; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java index b36c11a671549..a70bd44fc50c8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java @@ -87,9 +87,9 @@ import org.apache.hadoop.security.token.Token; import org.apache.hadoop.tracing.TraceUtils; import org.apache.hadoop.util.Time; -import org.apache.htrace.core.Tracer; +import org.apache.hadoop.tracing.Tracer; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * This class provides rudimentary checking of DFS volumes for errors and @@ -155,6 +155,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { private boolean showMaintenanceState = false; private long staleInterval; private Tracer tracer; + private String auditSource; /** * True if we encountered an internal error during FSCK, such as not being @@ -186,7 +187,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { String path = "/"; - private String blockIds = null; + private String[] blockIds = null; // We return back N files that are corrupt; the list of files returned is // ordered by block id; to allow continuation support, pass in the last block @@ -262,11 +263,17 @@ else if (key.equals("replicadetails")) { } else if (key.equals("includeSnapshots")) { this.snapshottableDirs = new ArrayList(); } else if (key.equals("blockId")) { - this.blockIds = pmap.get("blockId")[0]; + this.blockIds = pmap.get("blockId")[0].split(" "); } else if (key.equals("replicate")) { this.doReplicate = true; } } + this.auditSource = (blockIds != null) + ? "blocksIds=" + Arrays.asList(blockIds) : path; + } + + public String getAuditSource() { + return auditSource; } /** @@ -368,18 +375,18 @@ private void printDatanodeReplicaStatus(Block block, /** * Check files on DFS, starting from the indicated path. */ - public void fsck() { + public void fsck() throws AccessControlException { final long startTime = Time.monotonicNow(); try { if(blockIds != null) { - String[] blocks = blockIds.split(" "); + namenode.getNamesystem().checkSuperuserPrivilege(); StringBuilder sb = new StringBuilder(); sb.append("FSCK started by " + UserGroupInformation.getCurrentUser() + " from " + remoteAddress + " at " + new Date()); out.println(sb); sb.append(" for blockIds: \n"); - for (String blk: blocks) { + for (String blk: blockIds) { if(blk == null || !blk.contains(Block.BLOCK_FILE_PREFIX)) { out.println("Incorrect blockId format: " + blk); continue; @@ -389,7 +396,6 @@ public void fsck() { sb.append(blk + "\n"); } LOG.info("{}", sb.toString()); - namenode.getNamesystem().logFsckEvent("/", remoteAddress); out.flush(); return; } @@ -398,7 +404,6 @@ public void fsck() { + " from " + remoteAddress + " for path " + path + " at " + new Date(); LOG.info(msg); out.println(msg); - namenode.getNamesystem().logFsckEvent(path, remoteAddress); if (snapshottableDirs != null) { SnapshottableDirectoryStatus[] snapshotDirs = @@ -568,10 +573,11 @@ private LocatedBlocks getBlockLocations(String path, HdfsFileStatus file) final FSNamesystem fsn = namenode.getNamesystem(); final String operationName = "fsckGetBlockLocations"; FSPermissionChecker.setOperationType(operationName); + FSPermissionChecker pc = fsn.getPermissionChecker(); fsn.readLock(); try { blocks = FSDirStatAndListingOp.getBlockLocations( - fsn.getFSDirectory(), fsn.getPermissionChecker(), + fsn.getFSDirectory(), pc, path, 0, fileLen, false) .blocks; } catch (FileNotFoundException fnfe) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NetworkTopologyServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NetworkTopologyServlet.java new file mode 100644 index 0000000000000..c07d596d696da --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NetworkTopologyServlet.java @@ -0,0 +1,188 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonGenerator; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; +import org.apache.hadoop.net.NetUtils; +import org.apache.hadoop.net.Node; +import org.apache.hadoop.net.NodeBase; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.util.StringUtils; + +import javax.servlet.ServletContext; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import javax.ws.rs.core.HttpHeaders; +import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeSet; + +/** + * A servlet to print out the network topology. + */ +@InterfaceAudience.Private +public class NetworkTopologyServlet extends DfsServlet { + + public static final String SERVLET_NAME = "topology"; + public static final String PATH_SPEC = "/topology"; + + protected static final String FORMAT_JSON = "json"; + protected static final String FORMAT_TEXT = "text"; + + @Override + public void doGet(HttpServletRequest request, HttpServletResponse response) + throws IOException { + final ServletContext context = getServletContext(); + + String format = parseAcceptHeader(request); + if (FORMAT_TEXT.equals(format)) { + response.setContentType("text/plain; charset=UTF-8"); + } else if (FORMAT_JSON.equals(format)) { + response.setContentType("application/json; charset=UTF-8"); + } + + NameNode nn = NameNodeHttpServer.getNameNodeFromContext(context); + BlockManager bm = nn.getNamesystem().getBlockManager(); + List leaves = bm.getDatanodeManager().getNetworkTopology() + .getLeaves(NodeBase.ROOT); + + try (PrintStream out = new PrintStream( + response.getOutputStream(), false, "UTF-8")) { + printTopology(out, leaves, format); + } catch (Throwable t) { + String errMsg = "Print network topology failed. " + + StringUtils.stringifyException(t); + response.sendError(HttpServletResponse.SC_GONE, errMsg); + throw new IOException(errMsg); + } finally { + response.getOutputStream().close(); + } + } + + /** + * Display each rack and the nodes assigned to that rack, as determined + * by the NameNode, in a hierarchical manner. The nodes and racks are + * sorted alphabetically. + * + * @param stream print stream + * @param leaves leaves nodes under base scope + * @param format the response format + */ + protected void printTopology(PrintStream stream, List leaves, + String format) throws BadFormatException, IOException { + if (leaves.isEmpty()) { + stream.print("No DataNodes"); + return; + } + + // Build a map of rack -> nodes + Map> tree = new HashMap<>(); + for(Node dni : leaves) { + String location = dni.getNetworkLocation(); + String name = dni.getName(); + + tree.putIfAbsent(location, new TreeSet<>()); + tree.get(location).add(name); + } + + // Sort the racks (and nodes) alphabetically, display in order + ArrayList racks = new ArrayList<>(tree.keySet()); + Collections.sort(racks); + + if (FORMAT_JSON.equals(format)) { + printJsonFormat(stream, tree, racks); + } else if (FORMAT_TEXT.equals(format)) { + printTextFormat(stream, tree, racks); + } else { + throw new BadFormatException("Bad format: " + format); + } + } + + protected void printJsonFormat(PrintStream stream, Map> tree, ArrayList racks) throws IOException { + JsonFactory dumpFactory = new JsonFactory(); + JsonGenerator dumpGenerator = dumpFactory.createGenerator(stream); + dumpGenerator.writeStartArray(); + + for(String r : racks) { + dumpGenerator.writeStartObject(); + dumpGenerator.writeFieldName(r); + TreeSet nodes = tree.get(r); + dumpGenerator.writeStartArray(); + + for(String n : nodes) { + dumpGenerator.writeStartObject(); + dumpGenerator.writeStringField("ip", n); + String hostname = NetUtils.getHostNameOfIP(n); + if(hostname != null) { + dumpGenerator.writeStringField("hostname", hostname); + } + dumpGenerator.writeEndObject(); + } + dumpGenerator.writeEndArray(); + dumpGenerator.writeEndObject(); + } + dumpGenerator.writeEndArray(); + dumpGenerator.flush(); + + if (!dumpGenerator.isClosed()) { + dumpGenerator.close(); + } + } + + protected void printTextFormat(PrintStream stream, Map> tree, ArrayList racks) { + for(String r : racks) { + stream.println("Rack: " + r); + TreeSet nodes = tree.get(r); + + for(String n : nodes) { + stream.print(" " + n); + String hostname = NetUtils.getHostNameOfIP(n); + if(hostname != null) { + stream.print(" (" + hostname + ")"); + } + stream.println(); + } + stream.println(); + } + } + + @VisibleForTesting + protected static String parseAcceptHeader(HttpServletRequest request) { + String format = request.getHeader(HttpHeaders.ACCEPT); + return format != null && format.contains(FORMAT_JSON) ? + FORMAT_JSON : FORMAT_TEXT; + } + + public static class BadFormatException extends Exception { + private static final long serialVersionUID = 1L; + + public BadFormatException(String msg) { + super(msg); + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/QuotaByStorageTypeEntry.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/QuotaByStorageTypeEntry.java index 934c3169fc014..fd98ce9ef6146 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/QuotaByStorageTypeEntry.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/QuotaByStorageTypeEntry.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import com.google.common.base.Objects; +import org.apache.hadoop.thirdparty.com.google.common.base.Objects; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.util.StringUtils; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/QuotaCounts.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/QuotaCounts.java index bcb21929c8b9f..00e848f05a768 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/QuotaCounts.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/QuotaCounts.java @@ -18,12 +18,11 @@ package org.apache.hadoop.hdfs.server.namenode; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.util.ConstEnumCounters; import org.apache.hadoop.hdfs.util.EnumCounters; -import org.apache.hadoop.hdfs.util.ConstEnumCounters.ConstEnumException; import java.util.function.Consumer; @@ -57,14 +56,10 @@ public class QuotaCounts { */ static > EnumCounters modify(EnumCounters counter, Consumer> action) { - try { - action.accept(counter); - } catch (ConstEnumException cee) { - // We don't call clone here because ConstEnumCounters.clone() will return - // an object of class ConstEnumCounters. We want EnumCounters. + if (counter instanceof ConstEnumCounters) { counter = counter.deepCopyEnumCounter(); - action.accept(counter); } + action.accept(counter); return counter; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/RedundantEditLogInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/RedundantEditLogInputStream.java index edda691fbbeac..c35a582d1846c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/RedundantEditLogInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/RedundantEditLogInputStream.java @@ -26,8 +26,8 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; import org.apache.hadoop.io.IOUtils; -import com.google.common.base.Preconditions; -import com.google.common.primitives.Longs; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.primitives.Longs; import org.apache.hadoop.log.LogThrottlingHelper; import org.apache.hadoop.log.LogThrottlingHelper.LogAction; @@ -47,7 +47,7 @@ class RedundantEditLogInputStream extends EditLogInputStream { /** Limit logging about fast forwarding the stream to every 5 seconds max. */ private static final long FAST_FORWARD_LOGGING_INTERVAL_MS = 5000; - private final LogThrottlingHelper fastForwardLoggingHelper = + private static final LogThrottlingHelper FAST_FORWARD_LOGGING_HELPER = new LogThrottlingHelper(FAST_FORWARD_LOGGING_INTERVAL_MS); /** @@ -170,6 +170,7 @@ protected FSEditLogOp nextValidOp() { } return nextOp(); } catch (IOException e) { + LOG.warn("encountered an exception", e); return null; } } @@ -181,7 +182,7 @@ protected FSEditLogOp nextOp() throws IOException { case SKIP_UNTIL: try { if (prevTxId != HdfsServerConstants.INVALID_TXID) { - LogAction logAction = fastForwardLoggingHelper.record(); + LogAction logAction = FAST_FORWARD_LOGGING_HELPER.record(); if (logAction.shouldLog()) { LOG.info("Fast-forwarding stream '" + streams[curIdx].getName() + "' to transaction ID " + (prevTxId + 1) + @@ -228,7 +229,8 @@ protected FSEditLogOp nextOp() throws IOException { "streams are shorter than the current one! The best " + "remaining edit log ends at transaction " + newLast + ", but we thought we could read up to transaction " + - oldLast + ". If you continue, metadata will be lost forever!"); + oldLast + ". If you continue, metadata will be lost forever!", + prevException); } LOG.error("Got error reading edit log input stream " + streams[curIdx].getName() + "; failing over to edit log " + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ReencryptionHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ReencryptionHandler.java index fd9cbd752750e..b1c5928f8575d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ReencryptionHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ReencryptionHandler.java @@ -17,9 +17,9 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -835,7 +835,7 @@ protected void readUnlock() { } } - private class ZoneTraverseInfo extends TraverseInfo { + private static class ZoneTraverseInfo extends TraverseInfo { private String ezKeyVerName; ZoneTraverseInfo(String ezKeyVerName) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ReencryptionUpdater.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ReencryptionUpdater.java index 15cfa9278f6b9..f2d09b0627f3d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ReencryptionUpdater.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ReencryptionUpdater.java @@ -17,9 +17,9 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.EncryptedKeyVersion; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceContext.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceContext.java index 823385a980667..35a7cd2f643cc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceContext.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceContext.java @@ -26,7 +26,7 @@ import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; import org.apache.hadoop.hdfs.util.Canceler; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * Context for an ongoing SaveNamespace operation. This class diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java index 1423b308012c0..29cfe4280287b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java @@ -29,7 +29,7 @@ import java.security.PrivilegedExceptionAction; import java.util.*; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.HelpFormatter; @@ -77,8 +77,8 @@ import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.util.VersionInfo; import javax.management.ObjectName; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SerialNumberMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SerialNumberMap.java index ee6b8eb80ce04..d9a41428b5584 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SerialNumberMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SerialNumberMap.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hdfs.server.namenode; import java.util.HashSet; -import java.util.Iterator; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java index 82cdcfe69d9be..7ba6d839bea1d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java @@ -57,8 +57,8 @@ import org.apache.hadoop.util.Time; import org.apache.http.client.utils.URIBuilder; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.eclipse.jetty.io.EofException; import static org.apache.hadoop.hdfs.server.common.Util.IO_FILE_BUFFER_SIZE; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrFeature.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrFeature.java index a7f08780a63ec..11263bb9a01e2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrFeature.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrFeature.java @@ -18,13 +18,14 @@ package org.apache.hadoop.hdfs.server.namenode; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.XAttr; import org.apache.hadoop.hdfs.XAttrHelper; -import com.google.common.collect.ImmutableList; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; /** * Feature for extended attributes. @@ -84,6 +85,22 @@ public List getXAttrs() { } } + @Override + public boolean equals(Object o) { + if (o == null) { + return false; + } + if (getClass() != o.getClass()) { + return false; + } + return getXAttrs().equals(((XAttrFeature) o).getXAttrs()); + } + + @Override + public int hashCode() { + return Arrays.hashCode(getXAttrs().toArray()); + } + /** * Get XAttr by name with prefix. * @param prefixedName xAttr name with prefix diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrFormat.java index 5b5992c7a5894..af1025ab457ec 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrFormat.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrFormat.java @@ -25,8 +25,8 @@ import org.apache.hadoop.fs.XAttr; import org.apache.hadoop.hdfs.XAttrHelper; -import com.google.common.base.Preconditions; -import com.google.common.primitives.Ints; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.primitives.Ints; import org.apache.hadoop.hdfs.util.LongBitFormat; /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrPermissionFilter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrPermissionFilter.java index b6f4f64ca517c..92e5ef1a0b86d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrPermissionFilter.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrPermissionFilter.java @@ -25,8 +25,8 @@ import org.apache.hadoop.hdfs.XAttrHelper; import org.apache.hadoop.security.AccessControlException; -import com.google.common.collect.Lists; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.SECURITY_XATTR_UNREADABLE_BY_SUPERUSER; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java index 0babaaf0c1bc5..3f273cb5e75e6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java @@ -70,8 +70,8 @@ import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * Tool which allows the standby node's storage directories to be bootstrapped diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java index d27947b995238..b82fb5b0e41d1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java @@ -34,8 +34,8 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; -import com.google.common.collect.Iterators; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.collect.Iterators; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -58,8 +58,8 @@ import static org.apache.hadoop.util.Time.monotonicNow; import static org.apache.hadoop.util.ExitUtil.terminate; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.util.Time; @@ -298,13 +298,23 @@ public void catchupDuringFailover() throws IOException { SecurityUtil.doAsLoginUser(new PrivilegedExceptionAction() { @Override public Void run() throws Exception { - try { - // It is already under the full name system lock and the checkpointer - // thread is already stopped. No need to acqure any other lock. - doTailEdits(); - } catch (InterruptedException e) { - throw new IOException(e); - } + long editsTailed = 0; + // Fully tail the journal to the end + do { + long startTime = Time.monotonicNow(); + try { + NameNode.getNameNodeMetrics().addEditLogTailInterval( + startTime - lastLoadTimeMs); + // It is already under the name system lock and the checkpointer + // thread is already stopped. No need to acquire any other lock. + editsTailed = doTailEdits(); + } catch (InterruptedException e) { + throw new IOException(e); + } finally { + NameNode.getNameNodeMetrics().addEditLogTailTime( + Time.monotonicNow() - startTime); + } + } while(editsTailed > 0); return null; } }); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/RemoteNameNodeInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/RemoteNameNodeInfo.java index 9a51190b17607..3db43f01288d1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/RemoteNameNodeInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/RemoteNameNodeInfo.java @@ -24,13 +24,12 @@ import java.util.Collections; import java.util.List; -import com.google.common.base.Objects; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.HAUtil; import org.apache.hadoop.hdfs.server.namenode.NameNode; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * Information about a single remote NameNode diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java index daa836ac69a5f..1a86f8e82f7dd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java @@ -19,7 +19,7 @@ import static org.apache.hadoop.util.Time.monotonicNow; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import java.io.IOException; import java.net.URI; import java.net.URL; @@ -48,9 +48,9 @@ import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java index 7dbddc2d3ae5d..7e5f108167ccc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java @@ -150,6 +150,12 @@ public interface FSNamesystemMBean { */ public int getNumDecomDeadDataNodes(); + /** + * @return Number of in-service data nodes, where NumInServiceDataNodes = + * NumLiveDataNodes - NumDecomLiveDataNodes - NumInMaintenanceLiveDataNodes + */ + int getNumInServiceLiveDataNodes(); + /** * Number of failed data volumes across all live data nodes. * @return number of failed data volumes across all live data nodes diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java index c15cdbdd48e4e..5e318277c8754 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java @@ -85,6 +85,12 @@ public class NameNodeMetrics { MutableGaugeInt blockOpsQueued; @Metric("Number of blockReports and blockReceivedAndDeleted batch processed") MutableCounterLong blockOpsBatched; + @Metric("Number of pending edits") + MutableGaugeInt pendingEditsCount; + @Metric("Number of delete blocks Queued") + MutableGaugeInt deleteBlocksQueued; + @Metric("Number of pending deletion blocks") + MutableGaugeInt pendingDeleteBlocksCount; @Metric("Number of file system operations") public long totalFileOps(){ @@ -332,10 +338,26 @@ public void setBlockOpsQueued(int size) { blockOpsQueued.set(size); } + public void setDeleteBlocksQueued(int size) { + deleteBlocksQueued.set(size); + } + + public void incrPendingDeleteBlocksCount(int size) { + pendingDeleteBlocksCount.incr(size); + } + + public void decrPendingDeleteBlocksCount() { + pendingDeleteBlocksCount.decr(); + } + public void addBlockOpsBatched(int count) { blockOpsBatched.incr(count); } + public void setPendingEditsCount(int size) { + pendingEditsCount.set(size); + } + public void addTransaction(long latency) { transactions.add(latency); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/AbstractINodeDiff.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/AbstractINodeDiff.java index 2e8620b649593..df052f171afa8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/AbstractINodeDiff.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/AbstractINodeDiff.java @@ -24,7 +24,7 @@ import org.apache.hadoop.hdfs.server.namenode.INodeAttributes; import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * The difference of an inode between in two snapshots. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DiffListBySkipList.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DiffListBySkipList.java index 705b8d6937b52..dedc1e49d341f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DiffListBySkipList.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DiffListBySkipList.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode.snapshot; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.hdfs.server.namenode.INodeDirectory; import org.apache.hadoop.hdfs.server.namenode.snapshot. DirectoryWithSnapshotFeature.DirectoryDiff; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectorySnapshottableFeature.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectorySnapshottableFeature.java index b38d8bfe8ce06..234f7b9f85469 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectorySnapshottableFeature.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectorySnapshottableFeature.java @@ -47,9 +47,9 @@ import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.util.Time; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; /** * A directory with this feature is a snapshottable directory, where snapshots @@ -173,7 +173,8 @@ void addSnapshot(Snapshot snapshot) { * Add a snapshot. * @param snapshotRoot Root of the snapshot. * @param name Name of the snapshot. - * @param mtime The snapshot creation time set by Time.now(). + * @param leaseManager + * @param captureOpenFiles * @throws SnapshotException Throw SnapshotException when there is a snapshot * with the same name already exists or snapshot quota exceeds */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java index 4e756c7268ccf..b9b446707a115 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode.snapshot; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite; import org.apache.hadoop.hdfs.server.namenode.*; @@ -739,19 +739,22 @@ public void cleanDirectory(INode.ReclaimContext reclaimContext, // were created before "prior" will be covered by the later // cleanSubtreeRecursively call. if (priorCreated != null) { - if (currentINode.isLastReference() && - currentINode.getDiffs().getLastSnapshotId() == prior) { - // If this is the last reference of the directory inode and it - // can not be accessed in any of the subsequent snapshots i.e, - // this is the latest snapshot diff and if this is the last - // reference, the created list can be - // destroyed. - priorDiff.getChildrenDiff().destroyCreatedList( - reclaimContext, currentINode); - } else { - // we only check the node originally in prior's created list - for (INode cNode : priorDiff.diff.getCreatedUnmodifiable()) { - if (priorCreated.containsKey(cNode)) { + // The nodes in priorCreated must be destroyed if + // (1) this is the last reference, and + // (2) prior is the last snapshot, and + // (3) currentINode is not in the current state. + final boolean destroy = currentINode.isLastReference() + && currentINode.getDiffs().getLastSnapshotId() == prior + && !currentINode.isInCurrentState(); + // we only check the node originally in prior's created list + for (INode cNode : new ArrayList<>(priorDiff. + diff.getCreatedUnmodifiable())) { + if (priorCreated.containsKey(cNode)) { + if (destroy) { + cNode.destroyAndCollectBlocks(reclaimContext); + currentINode.removeChild(cNode); + priorDiff.diff.removeCreated(cNode); + } else { cNode.cleanSubtree(reclaimContext, snapshot, NO_SNAPSHOT_ID); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java index 5f5cd41166a7e..8d05af284b4e6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java @@ -35,7 +35,7 @@ import java.util.List; import java.util.Map; -import com.google.common.collect.ImmutableList; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.permission.PermissionStatus; import org.apache.hadoop.fs.StorageType; @@ -82,7 +82,7 @@ import org.apache.hadoop.hdfs.server.namenode.XAttrFeature; import org.apache.hadoop.hdfs.util.EnumCounters; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.thirdparty.protobuf.ByteString; @InterfaceAudience.Private diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileWithSnapshotFeature.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileWithSnapshotFeature.java index 7fa7ce74160ca..5263ef357bf36 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileWithSnapshotFeature.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileWithSnapshotFeature.java @@ -17,9 +17,10 @@ */ package org.apache.hadoop.hdfs.server.namenode.snapshot; -import java.util.ArrayList; import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Set; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.StorageType; @@ -156,13 +157,20 @@ public void updateQuotaAndCollectBlocks(INode.ReclaimContext reclaimContext, QuotaCounts oldCounts; if (removed.snapshotINode != null) { oldCounts = new QuotaCounts.Builder().build(); - List allBlocks = new ArrayList(); + // collect all distinct blocks + Set allBlocks = new HashSet(); if (file.getBlocks() != null) { allBlocks.addAll(Arrays.asList(file.getBlocks())); } if (removed.getBlocks() != null) { allBlocks.addAll(Arrays.asList(removed.getBlocks())); } + for (FileDiff diff : diffs) { + BlockInfo[] diffBlocks = diff.getBlocks(); + if (diffBlocks != null) { + allBlocks.addAll(Arrays.asList(diffBlocks)); + } + } for (BlockInfo b: allBlocks) { short replication = b.getReplication(); long blockSize = b.isComplete() ? b.getNumBytes() : file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/Snapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/Snapshot.java index 515f164bd8aac..512c90b494159 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/Snapshot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/Snapshot.java @@ -24,23 +24,24 @@ import java.util.Arrays; import java.util.Comparator; import java.util.Date; +import java.util.Objects; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.server.namenode.AclFeature; +import org.apache.hadoop.hdfs.server.namenode.INodeDirectoryAttributes; import org.apache.hadoop.hdfs.server.namenode.ContentSummaryComputationContext; +import org.apache.hadoop.hdfs.server.namenode.DirectoryWithQuotaFeature; import org.apache.hadoop.hdfs.server.namenode.FSImageFormat; import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization; import org.apache.hadoop.hdfs.server.namenode.INode; import org.apache.hadoop.hdfs.server.namenode.INodeDirectory; +import org.apache.hadoop.hdfs.server.namenode.QuotaCounts; import org.apache.hadoop.hdfs.server.namenode.XAttrFeature; import org.apache.hadoop.hdfs.util.ReadOnlyList; -import com.google.common.base.Predicate; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; import org.apache.hadoop.security.AccessControlException; /** Snapshot of a sub-tree in the namesystem. */ @@ -148,21 +149,26 @@ static Snapshot read(DataInput in, FSImageFormat.Loader loader) /** The root directory of the snapshot. */ static public class Root extends INodeDirectory { Root(INodeDirectory other) { - // Always preserve ACL, XAttr. - super(other, false, Lists.newArrayList( - Iterables.filter(Arrays.asList(other.getFeatures()), new Predicate() { - - @Override - public boolean apply(Feature input) { - if (AclFeature.class.isInstance(input) - || XAttrFeature.class.isInstance(input)) { - return true; + // Always preserve ACL, XAttr and Quota. + super(other, false, + Arrays.stream(other.getFeatures()).filter(feature -> + feature instanceof AclFeature + || feature instanceof XAttrFeature + || feature instanceof DirectoryWithQuotaFeature + ).map(feature -> { + if (feature instanceof DirectoryWithQuotaFeature) { + // Return copy if feature is quota because a ref could be updated + final QuotaCounts quota = + ((DirectoryWithQuotaFeature) feature).getSpaceAllowed(); + return new DirectoryWithQuotaFeature.Builder() + .nameSpaceQuota(quota.getNameSpace()) + .storageSpaceQuota(quota.getStorageSpace()) + .typeQuotas(quota.getTypeSpaces()) + .build(); + } else { + return feature; } - return false; - } - - })) - .toArray(new Feature[0])); + }).toArray(Feature[]::new)); } @Override @@ -182,6 +188,18 @@ public ContentSummaryComputationContext computeContentSummary( return computeDirectoryContentSummary(summary, snapshotId); } + @Override + public boolean metadataEquals(INodeDirectoryAttributes other) { + return other != null && getQuotaCounts().equals(other.getQuotaCounts()) + && getPermissionLong() == other.getPermissionLong() + // Acl feature maintains a reference counted map, thereby + // every snapshot copy should point to the same Acl object unless + // there is no change in acl values. + // Reference equals is hence intentional here. + && getAclFeature() == other.getAclFeature() + && Objects.equals(getXAttrFeature(), other.getXAttrFeature()); + } + @Override public String getFullPathName() { return getSnapshotPath(getParent().getFullPathName(), getLocalName()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDiffInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDiffInfo.java index ab6f4158c8417..58dd2cf0a3f04 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDiffInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDiffInfo.java @@ -33,8 +33,8 @@ import org.apache.hadoop.hdfs.server.namenode.INodeReference; import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.ChildrenDiff; -import com.google.common.base.Preconditions; -import com.google.common.primitives.SignedBytes; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.primitives.SignedBytes; import org.apache.hadoop.util.ChunkedArrayList; /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDiffListingInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDiffListingInfo.java index a7960700e6811..4b03c4f0d53a1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDiffListingInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDiffListingInfo.java @@ -29,7 +29,7 @@ import org.apache.hadoop.hdfs.server.namenode.INodeReference; import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.ChildrenDiff; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.util.ChunkedArrayList; /** @@ -116,7 +116,7 @@ boolean addDirDiff(long dirId, byte[][] parent, ChildrenDiff diff) { if (lastIndex == -1 || lastIndex >= clist.size()) { final List dlist = diff.getDeletedUnmodifiable(); - int size = dlist.size(); + int size = clist.size(); ListIterator iterator = lastIndex != -1 ? dlist.listIterator(lastIndex - size): dlist.listIterator(); while (iterator.hasNext()) { @@ -130,6 +130,10 @@ boolean addDirDiff(long dirId, byte[][] parent, ChildrenDiff diff) { deletedList.add(e); } else { setLastPath(parent); + // the offset will be set to created list + iterator index in the + // deleted list so that it points to the exact entry in the deleted + // list post checking the created list in the next iteration of rpc + // call setLastIndex(size + iterator.nextIndex()); return false; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java index d60a03822e57d..b43c45854bbfd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java @@ -40,7 +40,7 @@ import org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff; import org.apache.hadoop.hdfs.util.ReadOnlyList; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * A helper class defining static methods for reading/writing snapshot related diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java index 30b98b8e86421..e85da654ce0d2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java @@ -35,7 +35,7 @@ import javax.management.ObjectName; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; @@ -57,7 +57,7 @@ import org.apache.hadoop.hdfs.server.namenode.LeaseManager; import org.apache.hadoop.metrics2.util.MBeans; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/BlockStorageMovementAttemptedItems.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/BlockStorageMovementAttemptedItems.java index df4f0dddb49bb..ec583c710d533 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/BlockStorageMovementAttemptedItems.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/BlockStorageMovementAttemptedItems.java @@ -43,7 +43,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * A monitor class for checking whether block storage movements attempt diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/BlockStorageMovementNeeded.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/BlockStorageMovementNeeded.java index b3f8de97b4f26..3715163a40c07 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/BlockStorageMovementNeeded.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/BlockStorageMovementNeeded.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode.sps; +import java.io.FileNotFoundException; import java.io.IOException; import java.util.HashMap; import java.util.LinkedList; @@ -29,7 +30,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * A Class to track the block collection IDs (Inode's ID) for which physical @@ -227,15 +228,18 @@ public synchronized void clearQueuesWithNotification() { * ID's to process for satisfy the policy. */ private class SPSPathIdProcessor implements Runnable { + private static final int MAX_RETRY_COUNT = 3; @Override public void run() { LOG.info("Starting SPSPathIdProcessor!."); Long startINode = null; + int retryCount = 0; while (ctxt.isRunning()) { try { if (!ctxt.isInSafeMode()) { if (startINode == null) { + retryCount = 0; startINode = ctxt.getNextSPSPath(); } // else same id will be retried if (startINode == null) { @@ -248,7 +252,12 @@ public void run() { pendingWorkForDirectory.get(startINode); if (dirPendingWorkInfo != null && dirPendingWorkInfo.isDirWorkDone()) { - ctxt.removeSPSHint(startINode); + try { + ctxt.removeSPSHint(startINode); + } catch (FileNotFoundException e) { + // ignore if the file doesn't already exist + startINode = null; + } pendingWorkForDirectory.remove(startINode); } } @@ -268,6 +277,11 @@ public void run() { LOG.info("Interrupted while waiting in SPSPathIdProcessor", t); break; } + retryCount++; + if (retryCount >= MAX_RETRY_COUNT) { + LOG.warn("Skipping this inode {} due to too many retries.", startINode); + startINode = null; + } } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/StoragePolicySatisfier.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/StoragePolicySatisfier.java index 4c04b466789be..77675479bd205 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/StoragePolicySatisfier.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/StoragePolicySatisfier.java @@ -57,8 +57,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * Setting storagePolicy on a file after the file write will only update the new diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/StoragePolicySatisfyManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/StoragePolicySatisfyManager.java index 14cf05d4200d2..394ab12d443de 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/StoragePolicySatisfyManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/StoragePolicySatisfyManager.java @@ -31,7 +31,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * This manages satisfy storage policy invoked path ids and expose methods to diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/PhaseTracking.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/PhaseTracking.java index 3f1d9030297d9..b01a4c2845f7e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/PhaseTracking.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/PhaseTracking.java @@ -20,6 +20,7 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; +import org.apache.commons.lang3.builder.ToStringBuilder; import org.apache.hadoop.classification.InterfaceAudience; /** @@ -43,4 +44,15 @@ public PhaseTracking clone() { } return clone; } + + @Override + public String toString() { + return new ToStringBuilder(this) + .append("file", file) + .append("size", size) + .append("steps", steps) + .append("beginTime", beginTime) + .append("endTime", endTime) + .toString(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/StartupProgress.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/StartupProgress.java index 6249a84e7f93f..0ca338b34b16d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/StartupProgress.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/StartupProgress.java @@ -24,6 +24,9 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.hadoop.classification.InterfaceAudience; /** @@ -48,6 +51,9 @@ */ @InterfaceAudience.Private public class StartupProgress { + + private static final Logger LOG = LoggerFactory.getLogger(StartupProgress.class); + // package-private for access by StartupProgressView final Map phases = new ConcurrentHashMap(); @@ -81,6 +87,7 @@ public void beginPhase(Phase phase) { if (!isComplete()) { phases.get(phase).beginTime = monotonicNow(); } + LOG.debug("Beginning of the phase: {}", phase); } /** @@ -94,6 +101,7 @@ public void beginStep(Phase phase, Step step) { if (!isComplete(phase)) { lazyInitStep(phase, step).beginTime = monotonicNow(); } + LOG.debug("Beginning of the step. Phase: {}, Step: {}", phase, step); } /** @@ -105,6 +113,7 @@ public void endPhase(Phase phase) { if (!isComplete()) { phases.get(phase).endTime = monotonicNow(); } + LOG.debug("End of the phase: {}", phase); } /** @@ -118,6 +127,7 @@ public void endStep(Phase phase, Step step) { if (!isComplete(phase)) { lazyInitStep(phase, step).endTime = monotonicNow(); } + LOG.debug("End of the step. Phase: {}, Step: {}", phase, step); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/Step.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/Step.java index 0baf99d994eb0..5dee13d2a5e06 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/Step.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/Step.java @@ -21,6 +21,7 @@ import org.apache.commons.lang3.builder.CompareToBuilder; import org.apache.commons.lang3.builder.EqualsBuilder; import org.apache.commons.lang3.builder.HashCodeBuilder; +import org.apache.commons.lang3.builder.ToStringBuilder; import org.apache.hadoop.classification.InterfaceAudience; /** @@ -139,4 +140,14 @@ public int hashCode() { return new HashCodeBuilder().append(file).append(size).append(type) .toHashCode(); } + + @Override + public String toString() { + return new ToStringBuilder(this) + .append("file", file) + .append("sequenceNumber", sequenceNumber) + .append("size", size) + .append("type", type) + .toString(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/StepTracking.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/StepTracking.java index bc224ec567099..799b4d0b09fc6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/StepTracking.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/StepTracking.java @@ -18,6 +18,7 @@ import java.util.concurrent.atomic.AtomicLong; +import org.apache.commons.lang3.builder.ToStringBuilder; import org.apache.hadoop.classification.InterfaceAudience; /** @@ -36,4 +37,14 @@ public StepTracking clone() { clone.total = total; return clone; } + + @Override + public String toString() { + return new ToStringBuilder(this) + .append("count", count) + .append("total", total) + .append("beginTime", beginTime) + .append("endTime", endTime) + .toString(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/package-info.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/package-info.java index e6a8b97fb3faa..d7d7b3d754d63 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/package-info.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/package-info.java @@ -40,4 +40,3 @@ package org.apache.hadoop.hdfs.server.namenode.startupprogress; import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hdfs.server.namenode.NameNode; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/top/TopAuditLogger.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/top/TopAuditLogger.java index feba0e4f427e1..93eea6068c2fb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/top/TopAuditLogger.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/top/TopAuditLogger.java @@ -19,7 +19,7 @@ import java.net.InetAddress; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/top/TopConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/top/TopConf.java index ba820323b20b1..e78e41957d107 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/top/TopConf.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/top/TopConf.java @@ -19,11 +19,11 @@ import java.util.concurrent.TimeUnit; -import com.google.common.primitives.Ints; +import org.apache.hadoop.thirdparty.com.google.common.primitives.Ints; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * This class is a common place for NNTop configuration. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/top/metrics/TopMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/top/metrics/TopMetrics.java index 72ec9f23446e8..3f6bb13228d55 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/top/metrics/TopMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/top/metrics/TopMetrics.java @@ -17,13 +17,12 @@ */ package org.apache.hadoop.hdfs.server.namenode.top.metrics; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.hdfs.DFSConfigKeys; -import org.apache.hadoop.hdfs.server.namenode.top.TopConf; import org.apache.hadoop.hdfs.server.namenode.top.window.RollingWindowManager; import org.apache.hadoop.hdfs.server.namenode.top.window.RollingWindowManager.Op; import org.apache.hadoop.hdfs.server.namenode.top.window.RollingWindowManager.User; @@ -144,8 +143,6 @@ public void report(long currTime, String userName, String cmd) { for (RollingWindowManager rollingWindowManager : rollingWindowManagers .values()) { rollingWindowManager.recordMetric(currTime, cmd, userName, 1); - rollingWindowManager.recordMetric(currTime, - TopConf.ALL_CMDS, userName, 1); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/top/window/RollingWindow.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/top/window/RollingWindow.java index f927106c344ad..9cf64f42a0afc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/top/window/RollingWindow.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/top/window/RollingWindow.java @@ -113,8 +113,8 @@ private int computeBucketIndex(long time) { * as well as atomic fields. */ private class Bucket { - AtomicLong value = new AtomicLong(0); - AtomicLong updateTime = new AtomicLong(0); + private AtomicLong value = new AtomicLong(0); + private AtomicLong updateTime = new AtomicLong(-1); // -1 = never updated. /** * Check whether the last time that the bucket was updated is no longer @@ -125,7 +125,7 @@ private class Bucket { */ boolean isStaleNow(long time) { long utime = updateTime.get(); - return time - utime >= windowLenMs; + return (utime == -1) || (time - utime >= windowLenMs); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/top/window/RollingWindowManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/top/window/RollingWindowManager.java index 095294e811890..4e9807399395c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/top/window/RollingWindowManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/top/window/RollingWindowManager.java @@ -17,20 +17,22 @@ */ package org.apache.hadoop.hdfs.server.namenode.top.window; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.Stack; import java.util.concurrent.ConcurrentHashMap; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.hdfs.server.namenode.top.TopConf; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; -import org.apache.hadoop.metrics2.util.Metrics2Util.NameValuePair; -import org.apache.hadoop.metrics2.util.Metrics2Util.TopN; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -66,11 +68,15 @@ public static class TopWindow { public TopWindow(int windowMillis) { this.windowMillis = windowMillis; - this.top = Lists.newArrayList(); + this.top = new LinkedList<>(); } public void addOp(Op op) { - top.add(op); + if (op.getOpType().equals(TopConf.ALL_CMDS)) { + top.add(0, op); + } else { + top.add(op); + } } public int getWindowLenMs() { @@ -86,41 +92,59 @@ public List getOps() { * Represents an operation within a TopWindow. It contains a ranked * set of the top users for the operation. */ - public static class Op { + public static class Op implements Comparable { private final String opType; - private final List topUsers; + private final List users; private final long totalCount; + private final int limit; - public Op(String opType, long totalCount) { + public Op(String opType, UserCounts users, int limit) { this.opType = opType; - this.topUsers = Lists.newArrayList(); - this.totalCount = totalCount; - } - - public void addUser(User u) { - topUsers.add(u); + this.users = new ArrayList<>(users); + this.users.sort(Collections.reverseOrder()); + this.totalCount = users.getTotal(); + this.limit = limit; } public String getOpType() { return opType; } + public List getAllUsers() { + return users; + } + public List getTopUsers() { - return topUsers; + return (users.size() > limit) ? users.subList(0, limit) : users; } public long getTotalCount() { return totalCount; } + + @Override + public int compareTo(Op other) { + return Long.signum(totalCount - other.totalCount); + } + + @Override + public boolean equals(Object o) { + return (o instanceof Op) && totalCount == ((Op)o).totalCount; + } + + @Override + public int hashCode() { + return opType.hashCode(); + } } /** * Represents a user who called an Op within a TopWindow. Specifies the * user and the number of times the user called the operation. */ - public static class User { + public static class User implements Comparable { private final String user; - private final long count; + private long count; public User(String user, long count) { this.user = user; @@ -134,6 +158,56 @@ public String getUser() { public long getCount() { return count; } + + public void add(long delta) { + count += delta; + } + + @Override + public int compareTo(User other) { + return Long.signum(count - other.count); + } + + @Override + public boolean equals(Object o) { + return (o instanceof User) && user.equals(((User)o).user); + } + + @Override + public int hashCode() { + return user.hashCode(); + } + } + + private static class UserCounts extends ArrayList { + private long total = 0; + + UserCounts(int capacity) { + super(capacity); + } + + @Override + public boolean add(User user) { + long count = user.getCount(); + int i = indexOf(user); + if (i == -1) { + super.add(new User(user.getUser(), count)); + } else { + get(i).add(count); + } + total += count; + return true; + } + + @Override + public boolean addAll(Collection users) { + users.forEach(user -> add(user)); + return true; + } + + public long getTotal() { + return total; + } } /** @@ -142,7 +216,7 @@ public long getCount() { * operated on that metric. */ public ConcurrentHashMap metricMap = - new ConcurrentHashMap(); + new ConcurrentHashMap<>(); public RollingWindowManager(Configuration conf, int reportingPeriodMs) { @@ -184,35 +258,33 @@ public void recordMetric(long time, String command, * * @param time the current time * @return a TopWindow describing the top users for each metric in the - * window. + * window. */ public TopWindow snapshot(long time) { TopWindow window = new TopWindow(windowLenMs); Set metricNames = metricMap.keySet(); LOG.debug("iterating in reported metrics, size={} values={}", metricNames.size(), metricNames); + UserCounts totalCounts = new UserCounts(metricMap.size()); for (Map.Entry entry : metricMap.entrySet()) { String metricName = entry.getKey(); RollingWindowMap rollingWindows = entry.getValue(); - TopN topN = getTopUsersForMetric(time, metricName, rollingWindows); - final int size = topN.size(); - if (size == 0) { - continue; - } - Op op = new Op(metricName, topN.getTotal()); - window.addOp(op); - // Reverse the users from the TopUsers using a stack, - // since we'd like them sorted in descending rather than ascending order - Stack reverse = new Stack(); - for (int i = 0; i < size; i++) { - reverse.push(topN.poll()); - } - for (int i = 0; i < size; i++) { - NameValuePair userEntry = reverse.pop(); - User user = new User(userEntry.getName(), userEntry.getValue()); - op.addUser(user); + UserCounts topN = getTopUsersForMetric(time, metricName, rollingWindows); + if (!topN.isEmpty()) { + window.addOp(new Op(metricName, topN, topUsersCnt)); + totalCounts.addAll(topN); } } + // synthesize the overall total op count with the top users for every op. + Set topUsers = new HashSet<>(); + for (Op op : window.getOps()) { + topUsers.addAll(op.getTopUsers()); + } + // intersect totals with the top users. + totalCounts.retainAll(topUsers); + // allowed to exceed the per-op topUsersCnt to capture total ops for + // any user + window.addOp(new Op(TopConf.ALL_CMDS, totalCounts, Integer.MAX_VALUE)); return window; } @@ -223,9 +295,9 @@ public TopWindow snapshot(long time) { * @param metricName Name of metric * @return */ - private TopN getTopUsersForMetric(long time, String metricName, + private UserCounts getTopUsersForMetric(long time, String metricName, RollingWindowMap rollingWindows) { - TopN topN = new TopN(topUsersCnt); + UserCounts topN = new UserCounts(topUsersCnt); Iterator> iterator = rollingWindows.entrySet().iterator(); while (iterator.hasNext()) { @@ -242,7 +314,7 @@ private TopN getTopUsersForMetric(long time, String metricName, } LOG.debug("offer window of metric: {} userName: {} sum: {}", metricName, userName, windowSum); - topN.offer(new NameValuePair(userName, windowSum)); + topN.add(new User(userName, windowSum)); } LOG.debug("topN users size for command {} is: {}", metricName, topN.size()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java index 2423a037c8fd0..9ae5af23bdd10 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java @@ -113,9 +113,9 @@ import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.util.StringUtils; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Charsets; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import com.sun.jersey.spi.container.ResourceFilters; /** Web-hdfs NameNode implementation. */ @@ -131,6 +131,7 @@ public class NamenodeWebHdfsMethods { private String scheme; private Principal userPrincipal; private String remoteAddr; + private int remotePort; private @Context ServletContext context; private @Context HttpServletResponse response; @@ -145,6 +146,7 @@ public NamenodeWebHdfsMethods(@Context HttpServletRequest request) { // get the remote address, if coming in via a trusted proxy server then // the address with be that of the proxied client remoteAddr = JspHelper.getRemoteAddr(request); + remotePort = JspHelper.getRemotePort(request); supportEZ = Boolean.valueOf(request.getHeader(WebHdfsFileSystem.EZ_HEADER)); } @@ -223,6 +225,10 @@ public String getHostAddress() { return getRemoteAddr(); } @Override + public int getRemotePort() { + return getRemotePortFromJSPHelper(); + } + @Override public InetAddress getHostInetAddress() { try { return InetAddress.getByName(getHostAddress()); @@ -253,6 +259,10 @@ protected String getRemoteAddr() { return remoteAddr; } + protected int getRemotePortFromJSPHelper() { + return remotePort; + } + protected void queueExternalCall(ExternalCall call) throws IOException, InterruptedException { final NameNode namenode = (NameNode)context.getAttribute("name.node"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockECReconstructionCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockECReconstructionCommand.java index 6e9c55be2c24b..3b1e2d608465a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockECReconstructionCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockECReconstructionCommand.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.protocol; -import com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -78,21 +78,23 @@ public static class BlockECReconstructionInfo { private String[] targetStorageIDs; private StorageType[] targetStorageTypes; private final byte[] liveBlockIndices; + private final byte[] excludeReconstructedIndices; private final ErasureCodingPolicy ecPolicy; public BlockECReconstructionInfo(ExtendedBlock block, DatanodeInfo[] sources, DatanodeStorageInfo[] targetDnStorageInfo, - byte[] liveBlockIndices, ErasureCodingPolicy ecPolicy) { + byte[] liveBlockIndices, byte[] excludeReconstructedIndices, ErasureCodingPolicy ecPolicy) { this(block, sources, DatanodeStorageInfo .toDatanodeInfos(targetDnStorageInfo), DatanodeStorageInfo .toStorageIDs(targetDnStorageInfo), DatanodeStorageInfo - .toStorageTypes(targetDnStorageInfo), liveBlockIndices, ecPolicy); + .toStorageTypes(targetDnStorageInfo), liveBlockIndices, + excludeReconstructedIndices, ecPolicy); } public BlockECReconstructionInfo(ExtendedBlock block, DatanodeInfo[] sources, DatanodeInfo[] targets, String[] targetStorageIDs, StorageType[] targetStorageTypes, - byte[] liveBlockIndices, ErasureCodingPolicy ecPolicy) { + byte[] liveBlockIndices, byte[] excludeReconstructedIndices, ErasureCodingPolicy ecPolicy) { this.block = block; this.sources = sources; this.targets = targets; @@ -100,6 +102,7 @@ public BlockECReconstructionInfo(ExtendedBlock block, this.targetStorageTypes = targetStorageTypes; this.liveBlockIndices = liveBlockIndices == null ? new byte[]{} : liveBlockIndices; + this.excludeReconstructedIndices = excludeReconstructedIndices; this.ecPolicy = ecPolicy; } @@ -127,6 +130,10 @@ public byte[] getLiveBlockIndices() { return liveBlockIndices; } + public byte[] getExcludeReconstructedIndices() { + return excludeReconstructedIndices; + } + public ErasureCodingPolicy getErasureCodingPolicy() { return ecPolicy; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockRecoveryCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockRecoveryCommand.java index 926cce91be9a3..b2a11f0a171fc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockRecoveryCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockRecoveryCommand.java @@ -28,7 +28,7 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlock; -import com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; /** * BlockRecoveryCommand is an instruction to a data-node to recover diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockReportContext.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockReportContext.java index 94749e2d5bd4f..5bcd719b70499 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockReportContext.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockReportContext.java @@ -52,16 +52,12 @@ public class BlockReportContext { */ private final long leaseId; - private final boolean sorted; - public BlockReportContext(int totalRpcs, int curRpc, - long reportId, long leaseId, - boolean sorted) { + long reportId, long leaseId) { this.totalRpcs = totalRpcs; this.curRpc = curRpc; this.reportId = reportId; this.leaseId = leaseId; - this.sorted = sorted; } public int getTotalRpcs() { @@ -79,8 +75,4 @@ public long getReportId() { public long getLeaseId() { return leaseId; } - - public boolean isSorted() { - return sorted; - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlocksWithLocations.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlocksWithLocations.java index b6eddb61579da..f60d748dc9f56 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlocksWithLocations.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlocksWithLocations.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.protocol; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.StorageType; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java index 5680ef3162ed0..24cd7aa1155fc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java @@ -140,6 +140,7 @@ public HeartbeatResponse sendHeartbeat(DatanodeRegistration registration, * Each finalized block is represented as 3 longs. Each under- * construction replica is represented as 4 longs. * This is done instead of Block[] to reduce memory used by block reports. + * @param reports report of blocks per storage * @param context Context information for this block report. * * @return - the next command for DN to process. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeRegistration.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeRegistration.java index 711973838bc06..d7c2466e2eeee 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeRegistration.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeRegistration.java @@ -25,7 +25,7 @@ import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.common.StorageInfo; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * DatanodeRegistration class contains all information the name-node needs diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/InvalidBlockReportLeaseException.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/InvalidBlockReportLeaseException.java new file mode 100644 index 0000000000000..8428b805f74fb --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/InvalidBlockReportLeaseException.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.protocol; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * This exception is thrown when a datanode sends a full block report but it is + * rejected by the Namenode due to an invalid lease (expired or otherwise). + * + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class InvalidBlockReportLeaseException extends IOException { + /** for java.io.Serializable. */ + private static final long serialVersionUID = 1L; + + public InvalidBlockReportLeaseException(long blockReportID, long leaseID) { + super("Block report 0x" + Long.toHexString(blockReportID) + " was rejected as lease 0x" + + Long.toHexString(leaseID) + " is invalid"); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocols.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocols.java index d874e8f75dc66..47c381766cae2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocols.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocols.java @@ -27,7 +27,6 @@ import org.apache.hadoop.ipc.RefreshCallQueueProtocol; import org.apache.hadoop.ipc.GenericRefreshProtocol; import org.apache.hadoop.tools.GetUserMappingsProtocol; -import org.apache.hadoop.tracing.TraceAdminProtocol; /** The full set of RPC methods implemented by the Namenode. */ @InterfaceAudience.Private @@ -42,6 +41,5 @@ public interface NamenodeProtocols RefreshCallQueueProtocol, GenericRefreshProtocol, GetUserMappingsProtocol, - HAServiceProtocol, - TraceAdminProtocol { + HAServiceProtocol { } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamespaceInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamespaceInfo.java index 10650da75d14a..81a7e457891cb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamespaceInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamespaceInfo.java @@ -30,8 +30,8 @@ import org.apache.hadoop.hdfs.server.namenode.NNStorage; import org.apache.hadoop.util.VersionInfo; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * NamespaceInfo is returned by the name-node in reply diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/RemoteEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/RemoteEditLog.java index 1d26bc4983e54..8c27f30978324 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/RemoteEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/RemoteEditLog.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs.server.protocol; -import com.google.common.base.Function; -import com.google.common.collect.ComparisonChain; +import org.apache.hadoop.thirdparty.com.google.common.collect.ComparisonChain; +import java.util.function.Function; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; public class RemoteEditLog implements Comparable { @@ -82,16 +82,13 @@ public int hashCode() { } /** - * Guava Function which applies {@link #getStartTxId()} + * Java Function which applies {@link #getStartTxId()} */ public static final Function GET_START_TXID = - new Function() { - @Override - public Long apply(RemoteEditLog log) { + log -> { if (null == log) { return HdfsServerConstants.INVALID_TXID; } return log.getStartTxId(); - } - }; + }; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/RemoteEditLogManifest.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/RemoteEditLogManifest.java index 8252b3bc7f702..391078f558509 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/RemoteEditLogManifest.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/RemoteEditLogManifest.java @@ -20,8 +20,8 @@ import java.util.Collections; import java.util.List; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/AdminHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/AdminHelper.java index 9cb646b38f6f7..40d0e69591c9d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/AdminHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/AdminHelper.java @@ -1,4 +1,5 @@ /** + * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,10 +18,12 @@ */ package org.apache.hadoop.hdfs.tools; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.viewfs.ViewFileSystemOverloadScheme; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.protocol.CachePoolInfo; @@ -43,19 +46,29 @@ public class AdminHelper { static DistributedFileSystem getDFS(Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); - if (!(fs instanceof DistributedFileSystem)) { - throw new IllegalArgumentException("FileSystem " + fs.getUri() + - " is not an HDFS file system"); - } - return (DistributedFileSystem)fs; + return checkAndGetDFS(fs, conf); } static DistributedFileSystem getDFS(URI uri, Configuration conf) throws IOException { FileSystem fs = FileSystem.get(uri, conf); + return checkAndGetDFS(fs, conf); + } + + static DistributedFileSystem checkAndGetDFS(FileSystem fs, Configuration conf) + throws IOException { + if ((fs instanceof ViewFileSystemOverloadScheme)) { + // With ViewFSOverloadScheme, the admin will pass -fs option with intended + // child fs mount path. GenericOptionsParser would have set the given -fs + // as FileSystem's defaultURI. So, we are using FileSystem.getDefaultUri + // to use the given -fs path. + fs = ((ViewFileSystemOverloadScheme) fs) + .getRawFileSystem(new Path(FileSystem.getDefaultUri(conf)), conf); + } if (!(fs instanceof DistributedFileSystem)) { throw new IllegalArgumentException("FileSystem " + fs.getUri() - + " is not an HDFS file system"); + + " is not an HDFS file system. The fs class is: " + + fs.getClass().getName()); } return (DistributedFileSystem) fs; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/CacheAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/CacheAdmin.java index 9e7a3cb9c753b..a22d34bf007e0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/CacheAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/CacheAdmin.java @@ -44,7 +44,7 @@ import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Tool; -import com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; import org.apache.hadoop.util.ToolRunner; /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java index 04960e3c3e2ce..640af66e4b7f0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java @@ -35,10 +35,12 @@ import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.TreeSet; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; -import com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -87,7 +89,7 @@ import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.TransferFsImage; import org.apache.hadoop.io.MultipleIOException; -import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.ProtobufRpcEngine2; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RefreshCallQueueProtocol; import org.apache.hadoop.ipc.RefreshResponse; @@ -103,7 +105,7 @@ import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.ToolRunner; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * This class provides some DFS administrative access shell commands. @@ -130,11 +132,7 @@ public DFSAdminCommand(Configuration conf) { @Override public void run(PathData pathData) throws IOException { FileSystem fs = pathData.fs; - if (!(fs instanceof DistributedFileSystem)) { - throw new IllegalArgumentException("FileSystem " + fs.getUri() - + " is not an HDFS file system"); - } - this.dfs = (DistributedFileSystem) fs; + this.dfs = AdminHelper.checkAndGetDFS(fs, getConf()); run(pathData.path); } } @@ -433,7 +431,7 @@ static int run(DistributedFileSystem dfs, String[] argv, int idx) throws IOExcep */ private static final String commonUsageSummary = "\t[-report [-live] [-dead] [-decommissioning] " + - "[-enteringmaintenance] [-inmaintenance]]\n" + + "[-enteringmaintenance] [-inmaintenance] [-slownodes]]\n" + "\t[-safemode ]\n" + "\t[-saveNamespace [-beforeShutdown]]\n" + "\t[-rollEdits]\n" + @@ -451,8 +449,7 @@ static int run(DistributedFileSystem dfs, String[] argv, int idx) throws IOExcep "\t[-refreshSuperUserGroupsConfiguration]\n" + "\t[-refreshCallQueue]\n" + "\t[-refresh [arg1..argn]\n" + - "\t[-reconfig " + - "]\n" + + "\t[-reconfig ]\n" + "\t[-printTopology]\n" + "\t[-refreshNamenodes datanode_host:ipc_port]\n" + "\t[-getVolumeReport datanode_host:ipc_port]\n" + @@ -483,20 +480,20 @@ public DFSAdmin() { public DFSAdmin(Configuration conf) { super(conf); } - + protected DistributedFileSystem getDFS() throws IOException { - FileSystem fs = getFS(); - if (!(fs instanceof DistributedFileSystem)) { - throw new IllegalArgumentException("FileSystem " + fs.getUri() + - " is not an HDFS file system"); - } - return (DistributedFileSystem)fs; + return AdminHelper.checkAndGetDFS(getFS(), getConf()); } - + + public static final String[] DFS_REPORT_ARGS = + new String[] {"-live", "-dead", "-decommissioning", "-enteringmaintenance", + "-inmaintenance", "-slownodes"}; + /** * Gives a report on how the FileSystem is doing. * @exception IOException if the filesystem does not exist. */ + @SuppressWarnings("deprecation") public void report(String[] argv, int i) throws IOException { DistributedFileSystem dfs = getDFS(); FsStatus ds = dfs.getStatus(); @@ -583,19 +580,20 @@ public void report(String[] argv, int i) throws IOException { List args = Arrays.asList(argv); // Truncate already handled arguments before parsing report()-specific ones args = new ArrayList(args.subList(i, args.size())); - final boolean listLive = StringUtils.popOption("-live", args); - final boolean listDead = StringUtils.popOption("-dead", args); + final boolean listLive = StringUtils.popOption(DFS_REPORT_ARGS[0], args); + final boolean listDead = StringUtils.popOption(DFS_REPORT_ARGS[1], args); final boolean listDecommissioning = - StringUtils.popOption("-decommissioning", args); + StringUtils.popOption(DFS_REPORT_ARGS[2], args); final boolean listEnteringMaintenance = - StringUtils.popOption("-enteringmaintenance", args); + StringUtils.popOption(DFS_REPORT_ARGS[3], args); final boolean listInMaintenance = - StringUtils.popOption("-inmaintenance", args); - + StringUtils.popOption(DFS_REPORT_ARGS[4], args); + final boolean listSlowNodes = + StringUtils.popOption(DFS_REPORT_ARGS[5], args); // If no filter flags are found, then list all DN types boolean listAll = (!listLive && !listDead && !listDecommissioning - && !listEnteringMaintenance && !listInMaintenance); + && !listEnteringMaintenance && !listInMaintenance && !listSlowNodes); if (listAll || listLive) { printDataNodeReports(dfs, DatanodeReportType.LIVE, listLive, "Live"); @@ -619,6 +617,10 @@ public void report(String[] argv, int i) throws IOException { printDataNodeReports(dfs, DatanodeReportType.IN_MAINTENANCE, listInMaintenance, "In maintenance"); } + + if (listAll || listSlowNodes) { + printSlowDataNodeReports(dfs, listSlowNodes, "Slow"); + } } private static void printDataNodeReports(DistributedFileSystem dfs, @@ -636,6 +638,20 @@ private static void printDataNodeReports(DistributedFileSystem dfs, } } + private static void printSlowDataNodeReports(DistributedFileSystem dfs, boolean listNodes, + String nodeState) throws IOException { + DatanodeInfo[] nodes = dfs.getSlowDatanodeStats(); + if (nodes.length > 0 || listNodes) { + System.out.println(nodeState + " datanodes (" + nodes.length + "):\n"); + } + if (nodes.length > 0) { + for (DatanodeInfo dn : nodes) { + System.out.println(dn.getDatanodeReport()); + System.out.println(); + } + } + } + /** * Safe mode maintenance command. * Usage: hdfs dfsadmin -safemode [enter | leave | get | wait | forceExit] @@ -643,6 +659,7 @@ private static void printDataNodeReports(DistributedFileSystem dfs, * @param idx The index of the command that is being processed. * @exception IOException if the filesystem does not exist. */ + @SuppressWarnings("deprecation") public void setSafeMode(String[] argv, int idx) throws IOException { if (idx != argv.length - 1) { printUsage("-safemode"); @@ -696,6 +713,7 @@ public void setSafeMode(String[] argv, int idx) throws IOException { } + @SuppressWarnings("deprecation") private boolean waitExitSafeMode(DistributedFileSystem dfs, boolean inSafeMode) throws IOException { while (inSafeMode) { @@ -1010,14 +1028,14 @@ public int listOpenFiles(String[] argv) throws IOException { private void printOpenFiles(RemoteIterator openFilesIterator) throws IOException { - System.out.println(String.format("%-20s\t%-20s\t%s", "Client Host", - "Client Name", "Open File Path")); + System.out.printf("%-20s\t%-20s\t%s%n", "Client Host", + "Client Name", "Open File Path"); while (openFilesIterator.hasNext()) { OpenFileEntry openFileEntry = openFilesIterator.next(); - System.out.println(String.format("%-20s\t%-20s\t%20s", + System.out.printf("%-20s\t%-20s\t%20s%n", openFileEntry.getClientMachine(), openFileEntry.getClientName(), - openFileEntry.getFilePath())); + openFileEntry.getFilePath()); } } @@ -1045,14 +1063,7 @@ public int setBalancerBandwidth(String[] argv, int idx) throws IOException { System.err.println("Bandwidth should be a non-negative integer"); return exitCode; } - - FileSystem fs = getFS(); - if (!(fs instanceof DistributedFileSystem)) { - System.err.println("FileSystem is " + fs.getUri()); - return exitCode; - } - - DistributedFileSystem dfs = (DistributedFileSystem) fs; + DistributedFileSystem dfs = getDFS(); try{ dfs.setBalancerBandwidth(bandwidth); System.out.println("Balancer bandwidth is set to " + bandwidth); @@ -1119,7 +1130,7 @@ private void printHelp(String cmd) { commonUsageSummary; String report ="-report [-live] [-dead] [-decommissioning] " - + "[-enteringmaintenance] [-inmaintenance]:\n" + + + "[-enteringmaintenance] [-inmaintenance] [-slownodes]:\n" + "\tReports basic filesystem information and statistics. \n" + "\tThe dfs usage can be different from \"du\" usage, because it\n" + "\tmeasures raw space used by replication, checksums, snapshots\n" + @@ -1195,12 +1206,14 @@ private void printHelp(String cmd) { String refreshCallQueue = "-refreshCallQueue: Reload the call queue from config\n"; - String reconfig = "-reconfig " + + String reconfig = "-reconfig " + ":\n" + "\tStarts or gets the status of a reconfiguration operation, \n" + "\tor gets a list of reconfigurable properties.\n" + - - "\tThe second parameter specifies the node type\n"; + "\tThe second parameter specifies the node type\n" + + "\tThe third parameter specifies host address. For start or status, \n" + + "\tdatanode supports livenodes as third parameter, which will start \n" + + "\tor retrieve reconfiguration on all live datanodes."; String genericRefresh = "-refresh: Arguments are " + " [arg1..argn]\n" + "\tTriggers a runtime-refresh of the resource specified by " + @@ -1587,40 +1600,45 @@ public int metaSave(String[] argv, int idx) throws IOException { * @throws IOException If an error while getting datanode report */ public int printTopology() throws IOException { - DistributedFileSystem dfs = getDFS(); - final DatanodeInfo[] report = dfs.getDataNodeStats(); - - // Build a map of rack -> nodes from the datanode report - HashMap > tree = new HashMap>(); - for(DatanodeInfo dni : report) { - String location = dni.getNetworkLocation(); - String name = dni.getName(); - - if(!tree.containsKey(location)) { - tree.put(location, new TreeSet()); - } + DistributedFileSystem dfs = getDFS(); + final DatanodeInfo[] report = dfs.getDataNodeStats(); + + // Build a map of rack -> nodes from the datanode report + Map> map = new HashMap<>(); + for(DatanodeInfo dni : report) { + String location = dni.getNetworkLocation(); + String name = dni.getName(); + String dnState = dni.getAdminState().toString(); - tree.get(location).add(name); + if(!map.containsKey(location)) { + map.put(location, new HashMap<>()); } + + Map node = map.get(location); + node.put(name, dnState); + } - // Sort the racks (and nodes) alphabetically, display in order - ArrayList racks = new ArrayList(tree.keySet()); - Collections.sort(racks); + // Sort the racks (and nodes) alphabetically, display in order + List racks = new ArrayList<>(map.keySet()); + Collections.sort(racks); - for(String r : racks) { - System.out.println("Rack: " + r); - TreeSet nodes = tree.get(r); - - for(String n : nodes) { - System.out.print(" " + n); - String hostname = NetUtils.getHostNameOfIP(n); - if(hostname != null) - System.out.print(" (" + hostname + ")"); - System.out.println(); + for(String r : racks) { + System.out.println("Rack: " + r); + Map nodes = map.get(r); + + for(Map.Entry entry : nodes.entrySet()) { + String n = entry.getKey(); + System.out.print(" " + n); + String hostname = NetUtils.getHostNameOfIP(n); + if(hostname != null) { + System.out.print(" (" + hostname + ")"); } - + System.out.print(" " + entry.getValue()); System.out.println(); } + + System.out.println(); + } return 0; } @@ -1840,15 +1858,15 @@ public int refreshCallQueue() throws IOException { return 0; } - public int reconfig(String[] argv, int i) throws IOException { + public int reconfig(String[] argv, int i) throws IOException, InterruptedException { String nodeType = argv[i]; String address = argv[i + 1]; String op = argv[i + 2]; if ("start".equals(op)) { - return startReconfiguration(nodeType, address, System.out, System.err); + return startReconfigurationUtil(nodeType, address, System.out, System.err); } else if ("status".equals(op)) { - return getReconfigurationStatus(nodeType, address, System.out, System.err); + return getReconfigurationStatusUtil(nodeType, address, System.out, System.err); } else if ("properties".equals(op)) { return getReconfigurableProperties(nodeType, address, System.out, System.err); @@ -1858,12 +1876,57 @@ public int reconfig(String[] argv, int i) throws IOException { } int startReconfiguration(final String nodeThpe, final String address) - throws IOException { - return startReconfiguration(nodeThpe, address, System.out, System.err); + throws IOException, InterruptedException { + return startReconfigurationUtil(nodeThpe, address, System.out, System.err); + } + + int startReconfigurationUtil(final String nodeType, final String address, final PrintStream out, + final PrintStream err) throws IOException, InterruptedException { + if (!"livenodes".equals(address)) { + return startReconfiguration(nodeType, address, out, err); + } + if (!"datanode".equals(nodeType)) { + err.println("Only datanode type supports reconfiguration in bulk."); + return 1; + } + ExecutorService executorService = Executors.newFixedThreadPool(5); + DistributedFileSystem dfs = getDFS(); + DatanodeInfo[] nodes = dfs.getDataNodeStats(DatanodeReportType.LIVE); + AtomicInteger successCount = new AtomicInteger(); + AtomicInteger failCount = new AtomicInteger(); + if (nodes != null) { + for (DatanodeInfo node : nodes) { + executorService.submit(() -> { + int status = startReconfiguration(nodeType, node.getIpcAddr(false), out, err); + if (status == 0) { + successCount.incrementAndGet(); + } else { + failCount.incrementAndGet(); + } + }); + } + while ((successCount.get() + failCount.get()) < nodes.length) { + Thread.sleep(1000); + } + executorService.shutdown(); + if (!executorService.awaitTermination(1, TimeUnit.MINUTES)) { + err.println("Executor service could not be terminated in 60s. Please wait for" + + " sometime before the system cools down."); + } + out.println("Starting of reconfiguration task successful on " + successCount.get() + + " nodes, failed on " + failCount.get() + " nodes."); + if (failCount.get() == 0) { + return 0; + } else { + return 1; + } + } + err.println("DFS datanode stats could not be retrieved."); + return 1; } int startReconfiguration(final String nodeType, final String address, - final PrintStream out, final PrintStream err) throws IOException { + final PrintStream out, final PrintStream err) { String outMsg = null; String errMsg = null; int ret = 0; @@ -1904,8 +1967,53 @@ int startReconfigurationDispatch(final String nodeType, } } - int getReconfigurationStatus(final String nodeType, final String address, - final PrintStream out, final PrintStream err) throws IOException { + int getReconfigurationStatusUtil(final String nodeType, final String address, + final PrintStream out, final PrintStream err) throws IOException, InterruptedException { + if (!"livenodes".equals(address)) { + return getReconfigurationStatus(nodeType, address, out, err); + } + if (!"datanode".equals(nodeType)) { + err.println("Only datanode type supports reconfiguration in bulk."); + return 1; + } + ExecutorService executorService = Executors.newFixedThreadPool(5); + DistributedFileSystem dfs = getDFS(); + DatanodeInfo[] nodes = dfs.getDataNodeStats(DatanodeReportType.LIVE); + AtomicInteger successCount = new AtomicInteger(); + AtomicInteger failCount = new AtomicInteger(); + if (nodes != null) { + for (DatanodeInfo node : nodes) { + executorService.submit(() -> { + int status = getReconfigurationStatus(nodeType, node.getIpcAddr(false), out, err); + if (status == 0) { + successCount.incrementAndGet(); + } else { + failCount.incrementAndGet(); + } + }); + } + while ((successCount.get() + failCount.get()) < nodes.length) { + Thread.sleep(1000); + } + executorService.shutdown(); + if (!executorService.awaitTermination(1, TimeUnit.MINUTES)) { + err.println("Executor service could not be terminated in 60s. Please wait for" + + " sometime before the system cools down."); + } + out.println("Retrieval of reconfiguration status successful on " + successCount.get() + + " nodes, failed on " + failCount.get() + " nodes."); + if (failCount.get() == 0) { + return 0; + } else { + return 1; + } + } + err.println("DFS datanode stats could not be retrieved."); + return 1; + } + + int getReconfigurationStatus(final String nodeType, final String address, final PrintStream out, + final PrintStream err) { String outMsg = null; String errMsg = null; ReconfigurationTaskStatus status = null; @@ -2045,7 +2153,7 @@ public int genericRefresh(String[] argv, int i) throws IOException { InetSocketAddress address = NetUtils.createSocketAddr(hostport); UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); - RPC.setProtocolEngine(conf, xface, ProtobufRpcEngine.class); + RPC.setProtocolEngine(conf, xface, ProtobufRpcEngine2.class); GenericRefreshProtocolPB proxy = (GenericRefreshProtocolPB) RPC.getProxy(xface, RPC.getProtocolVersion(xface), address, ugi, conf, NetUtils.getDefaultSocketFactory(conf), 0); @@ -2089,7 +2197,7 @@ private static void printUsage(String cmd) { if ("-report".equals(cmd)) { System.err.println("Usage: hdfs dfsadmin" + " [-report] [-live] [-dead] [-decommissioning]" - + " [-enteringmaintenance] [-inmaintenance]"); + + " [-enteringmaintenance] [-inmaintenance] [-slownodes]"); } else if ("-safemode".equals(cmd)) { System.err.println("Usage: hdfs dfsadmin" + " [-safemode enter | leave | get | wait | forceExit]"); @@ -2148,7 +2256,7 @@ private static void printUsage(String cmd) { + " [-refreshCallQueue]"); } else if ("-reconfig".equals(cmd)) { System.err.println("Usage: hdfs dfsadmin" - + " [-reconfig " + + " [-reconfig " + "]"); } else if ("-refresh".equals(cmd)) { System.err.println("Usage: hdfs dfsadmin" @@ -2200,7 +2308,6 @@ private static void printUsage(String cmd) { /** * @param argv The parameters passed to this program. - * @exception Exception if the filesystem does not exist. * @return 0 on success, non zero on error. */ @Override @@ -2234,7 +2341,7 @@ public int run(String[] argv) { return exitCode; } } else if ("-report".equals(cmd)) { - if (argv.length > 6) { + if (argv.length > DFS_REPORT_ARGS.length + 1) { printUsage(cmd); return exitCode; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java index fcfb47c8c65ab..15c63732f7a69 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java @@ -23,9 +23,9 @@ import java.util.Collection; import java.util.Map; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSortedMap; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableSortedMap; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.Options; import org.apache.hadoop.ha.FailoverController; @@ -284,6 +284,11 @@ private int failover(CommandLine cmd) HAServiceTarget fromNode = resolveTarget(args[0]); HAServiceTarget toNode = resolveTarget(args[1]); + fromNode.setTransitionTargetHAStatus( + HAServiceProtocol.HAServiceState.STANDBY); + toNode.setTransitionTargetHAStatus( + HAServiceProtocol.HAServiceState.ACTIVE); + // Check that auto-failover is consistently configured for both nodes. Preconditions.checkState( fromNode.isAutoFailoverEnabled() == diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSZKFailoverController.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSZKFailoverController.java index 281d1be31eb3b..4773beb85de08 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSZKFailoverController.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSZKFailoverController.java @@ -29,7 +29,7 @@ import java.util.ArrayList; import java.util.List; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.HadoopIllegalArgumentException; @@ -111,21 +111,39 @@ protected byte[] targetToData(HAServiceTarget target) { @Override protected InetSocketAddress getRpcAddressToBindTo() { int zkfcPort = getZkfcPort(conf); - return new InetSocketAddress(localTarget.getAddress().getAddress(), - zkfcPort); + String zkfcBindAddr = getZkfcServerBindHost(conf); + if (zkfcBindAddr == null || zkfcBindAddr.isEmpty()) { + zkfcBindAddr = localTarget.getAddress().getAddress().getHostAddress(); + } + return new InetSocketAddress(zkfcBindAddr, zkfcPort); } - @Override protected PolicyProvider getPolicyProvider() { return new HDFSPolicyProvider(); } - + static int getZkfcPort(Configuration conf) { return conf.getInt(DFSConfigKeys.DFS_HA_ZKFC_PORT_KEY, DFSConfigKeys.DFS_HA_ZKFC_PORT_DEFAULT); } - + + /** + * Given a configuration get the bind host that could be used by ZKFC. + * We derive it from NN service rpc bind host or NN rpc bind host. + * + * @param conf input configuration + * @return the bind host address found in conf + */ + private static String getZkfcServerBindHost(Configuration conf) { + String addr = conf.getTrimmed( + DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY); + if (addr == null || addr.isEmpty()) { + addr = conf.getTrimmed(DFSConfigKeys.DFS_NAMENODE_RPC_BIND_HOST_KEY); + } + return addr; + } + public static DFSZKFailoverController create(Configuration conf) { Configuration localNNConf = DFSHAAdmin.addSecurityConfiguration(conf); String nsId = DFSUtil.getNamenodeNameServiceId(conf); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java index 8a2ef8b5920f5..33a117f314e56 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java @@ -27,6 +27,7 @@ import java.net.URLConnection; import java.net.URLEncoder; import java.security.PrivilegedExceptionAction; +import java.util.concurrent.TimeUnit; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -37,6 +38,7 @@ import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HAUtil; import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.server.namenode.NamenodeFsck; import org.apache.hadoop.hdfs.web.URLConnectionFactory; import org.apache.hadoop.security.UserGroupInformation; @@ -137,8 +139,17 @@ public DFSck(Configuration conf, PrintStream out) throws IOException { super(conf); this.ugi = UserGroupInformation.getCurrentUser(); this.out = out; + int connectTimeout = (int) conf.getTimeDuration( + HdfsClientConfigKeys.DFS_CLIENT_FSCK_CONNECT_TIMEOUT, + HdfsClientConfigKeys.DFS_CLIENT_FSCK_CONNECT_TIMEOUT_DEFAULT, + TimeUnit.MILLISECONDS); + int readTimeout = (int) conf.getTimeDuration( + HdfsClientConfigKeys.DFS_CLIENT_FSCK_READ_TIMEOUT, + HdfsClientConfigKeys.DFS_CLIENT_FSCK_READ_TIMEOUT_DEFAULT, + TimeUnit.MILLISECONDS); + this.connectionFactory = URLConnectionFactory - .newDefaultURLConnectionFactory(conf); + .newDefaultURLConnectionFactory(connectTimeout, readTimeout, conf); this.isSpnegoEnabled = UserGroupInformation.isSecurityEnabled(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DebugAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DebugAdmin.java index 9a654e580cb06..32e8248adc692 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DebugAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DebugAdmin.java @@ -24,16 +24,42 @@ import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; +import java.net.InetSocketAddress; import java.net.URI; import java.net.URISyntaxException; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.file.Files; +import java.util.ArrayList; import java.util.LinkedList; import java.util.List; +import java.util.concurrent.CompletionService; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.Future; +import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; -import com.google.common.util.concurrent.Uninterruptibles; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.hdfs.BlockReader; +import org.apache.hadoop.hdfs.DFSClient; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.client.impl.BlockReaderRemote; +import org.apache.hadoop.hdfs.net.Peer; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; +import org.apache.hadoop.hdfs.protocol.ExtendedBlock; +import org.apache.hadoop.hdfs.protocol.LocatedBlock; +import org.apache.hadoop.hdfs.protocol.LocatedBlocks; +import org.apache.hadoop.hdfs.protocol.LocatedStripedBlock; +import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; +import org.apache.hadoop.hdfs.server.datanode.CachingStrategy; +import org.apache.hadoop.hdfs.util.StripedBlockUtil; +import org.apache.hadoop.io.erasurecode.CodecUtil; +import org.apache.hadoop.io.erasurecode.ErasureCoderOptions; +import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureEncoder; +import org.apache.hadoop.net.NetUtils; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Uninterruptibles; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -65,17 +91,18 @@ public class DebugAdmin extends Configured implements Tool { /** * All the debug commands we can run. */ - private DebugCommand DEBUG_COMMANDS[] = { + private final DebugCommand[] DEBUG_COMMANDS = { new VerifyMetaCommand(), new ComputeMetaCommand(), new RecoverLeaseCommand(), + new VerifyECCommand(), new HelpCommand() }; /** * The base class for debug commands. */ - private abstract class DebugCommand { + private abstract static class DebugCommand { final String name; final String usageText; final String helpText; @@ -94,15 +121,15 @@ private abstract class DebugCommand { /** * The command for verifying a block metadata file and possibly block file. */ - private class VerifyMetaCommand extends DebugCommand { + private static class VerifyMetaCommand extends DebugCommand { VerifyMetaCommand() { super("verifyMeta", -"verifyMeta -meta [-block ]", -" Verify HDFS metadata and block files. If a block file is specified, we" + - System.lineSeparator() + -" will verify that the checksums in the metadata file match the block" + - System.lineSeparator() + -" file."); + "verifyMeta -meta [-block ]", + " Verify HDFS metadata and block files. If a block file is specified, we" + + System.lineSeparator() + + " will verify that the checksums in the metadata file match the block" + + System.lineSeparator() + + " file."); } int run(List args) throws IOException { @@ -202,7 +229,7 @@ int run(List args) throws IOException { blockFile); return 0; } finally { - IOUtils.cleanup(null, metaStream, dataStream, checksumStream); + IOUtils.cleanupWithLogger(null, metaStream, dataStream, checksumStream); } } } @@ -210,7 +237,7 @@ int run(List args) throws IOException { /** * The command for verifying a block metadata file and possibly block file. */ - private class ComputeMetaCommand extends DebugCommand { + private static class ComputeMetaCommand extends DebugCommand { ComputeMetaCommand() { super("computeMeta", "computeMeta -block -out ", @@ -287,7 +314,7 @@ int run(List args) throws IOException { + " saved metadata to meta file " + outFile); return 0; } finally { - IOUtils.cleanup(null, metaOut); + IOUtils.cleanupWithLogger(null, metaOut); } } } @@ -387,6 +414,209 @@ int run(List args) throws IOException { } } + /** + * The command for verifying the correctness of erasure coding on an erasure coded file. + */ + private class VerifyECCommand extends DebugCommand { + private DFSClient client; + private int dataBlkNum; + private int parityBlkNum; + private int cellSize; + private boolean useDNHostname; + private CachingStrategy cachingStrategy; + private int stripedReadBufferSize; + private CompletionService readService; + private RawErasureEncoder encoder; + private BlockReader[] blockReaders; + + + VerifyECCommand() { + super("verifyEC", + "verifyEC -file ", + " Verify HDFS erasure coding on all block groups of the file."); + } + + int run(List args) throws IOException { + if (args.size() < 2) { + System.out.println(usageText); + System.out.println(helpText + System.lineSeparator()); + return 1; + } + String file = StringUtils.popOptionWithArgument("-file", args); + Path path = new Path(file); + DistributedFileSystem dfs = AdminHelper.getDFS(getConf()); + this.client = dfs.getClient(); + + FileStatus fileStatus; + try { + fileStatus = dfs.getFileStatus(path); + } catch (FileNotFoundException e) { + System.err.println("File " + file + " does not exist."); + return 1; + } + + if (!fileStatus.isFile()) { + System.err.println("File " + file + " is not a regular file."); + return 1; + } + if (!dfs.isFileClosed(path)) { + System.err.println("File " + file + " is not closed."); + return 1; + } + this.useDNHostname = getConf().getBoolean(DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME, + DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT); + this.cachingStrategy = CachingStrategy.newDefaultStrategy(); + this.stripedReadBufferSize = getConf().getInt( + DFSConfigKeys.DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_BUFFER_SIZE_KEY, + DFSConfigKeys.DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_BUFFER_SIZE_DEFAULT); + + LocatedBlocks locatedBlocks = client.getLocatedBlocks(file, 0, fileStatus.getLen()); + if (locatedBlocks.getErasureCodingPolicy() == null) { + System.err.println("File " + file + " is not erasure coded."); + return 1; + } + ErasureCodingPolicy ecPolicy = locatedBlocks.getErasureCodingPolicy(); + this.dataBlkNum = ecPolicy.getNumDataUnits(); + this.parityBlkNum = ecPolicy.getNumParityUnits(); + this.cellSize = ecPolicy.getCellSize(); + this.encoder = CodecUtil.createRawEncoder(getConf(), ecPolicy.getCodecName(), + new ErasureCoderOptions( + ecPolicy.getNumDataUnits(), ecPolicy.getNumParityUnits())); + int blockNum = dataBlkNum + parityBlkNum; + this.readService = new ExecutorCompletionService<>( + DFSUtilClient.getThreadPoolExecutor(blockNum, blockNum, 60, + new LinkedBlockingQueue<>(), "read-", false)); + this.blockReaders = new BlockReader[dataBlkNum + parityBlkNum]; + + for (LocatedBlock locatedBlock : locatedBlocks.getLocatedBlocks()) { + System.out.println("Checking EC block group: blk_" + locatedBlock.getBlock().getBlockId()); + LocatedStripedBlock blockGroup = (LocatedStripedBlock) locatedBlock; + + try { + verifyBlockGroup(blockGroup); + System.out.println("Status: OK"); + } catch (Exception e) { + System.err.println("Status: ERROR, message: " + e.getMessage()); + return 1; + } finally { + closeBlockReaders(); + } + } + System.out.println("\nAll EC block group status: OK"); + return 0; + } + + private void verifyBlockGroup(LocatedStripedBlock blockGroup) throws Exception { + final LocatedBlock[] indexedBlocks = StripedBlockUtil.parseStripedBlockGroup(blockGroup, + cellSize, dataBlkNum, parityBlkNum); + + int blockNumExpected = Math.min(dataBlkNum, + (int) ((blockGroup.getBlockSize() - 1) / cellSize + 1)) + parityBlkNum; + if (blockGroup.getBlockIndices().length < blockNumExpected) { + throw new Exception("Block group is under-erasure-coded."); + } + + long maxBlockLen = 0L; + DataChecksum checksum = null; + for (int i = 0; i < dataBlkNum + parityBlkNum; i++) { + LocatedBlock block = indexedBlocks[i]; + if (block == null) { + blockReaders[i] = null; + continue; + } + if (block.getBlockSize() > maxBlockLen) { + maxBlockLen = block.getBlockSize(); + } + BlockReader blockReader = createBlockReader(block.getBlock(), + block.getLocations()[0], block.getBlockToken()); + if (checksum == null) { + checksum = blockReader.getDataChecksum(); + } else { + assert checksum.equals(blockReader.getDataChecksum()); + } + blockReaders[i] = blockReader; + } + assert checksum != null; + int bytesPerChecksum = checksum.getBytesPerChecksum(); + int bufferSize = stripedReadBufferSize < bytesPerChecksum ? bytesPerChecksum : + stripedReadBufferSize - stripedReadBufferSize % bytesPerChecksum; + final ByteBuffer[] buffers = new ByteBuffer[dataBlkNum + parityBlkNum]; + final ByteBuffer[] outputs = new ByteBuffer[parityBlkNum]; + for (int i = 0; i < dataBlkNum + parityBlkNum; i++) { + buffers[i] = ByteBuffer.allocate(bufferSize); + } + for (int i = 0; i < parityBlkNum; i++) { + outputs[i] = ByteBuffer.allocate(bufferSize); + } + long positionInBlock = 0L; + while (positionInBlock < maxBlockLen) { + final int toVerifyLen = (int) Math.min(bufferSize, maxBlockLen - positionInBlock); + List> futures = new ArrayList<>(dataBlkNum + parityBlkNum); + for (int i = 0; i < dataBlkNum + parityBlkNum; i++) { + final int fi = i; + futures.add(this.readService.submit(() -> { + BlockReader blockReader = blockReaders[fi]; + ByteBuffer buffer = buffers[fi]; + buffer.clear(); + buffer.limit(toVerifyLen); + int readLen = 0; + if (blockReader != null) { + int toRead = buffer.remaining(); + while (readLen < toRead) { + int nread = blockReader.read(buffer); + if (nread <= 0) { + break; + } + readLen += nread; + } + } + while (buffer.hasRemaining()) { + buffer.put((byte) 0); + } + buffer.flip(); + return readLen; + })); + } + for (int i = 0; i < dataBlkNum + parityBlkNum; i++) { + futures.get(i).get(1, TimeUnit.MINUTES); + } + ByteBuffer[] inputs = new ByteBuffer[dataBlkNum]; + System.arraycopy(buffers, 0, inputs, 0, dataBlkNum); + for (int i = 0; i < parityBlkNum; i++) { + outputs[i].clear(); + outputs[i].limit(toVerifyLen); + } + this.encoder.encode(inputs, outputs); + for (int i = 0; i < parityBlkNum; i++) { + if (!buffers[dataBlkNum + i].equals(outputs[i])) { + throw new Exception("EC compute result not match."); + } + } + positionInBlock += toVerifyLen; + } + } + + private BlockReader createBlockReader(ExtendedBlock block, DatanodeInfo dnInfo, + Token token) throws IOException { + InetSocketAddress dnAddress = NetUtils.createSocketAddr(dnInfo.getXferAddr(useDNHostname)); + Peer peer = client.newConnectedPeer(dnAddress, token, dnInfo); + return BlockReaderRemote.newBlockReader( + "dummy", block, token, 0, + block.getNumBytes(), true, "", peer, dnInfo, + null, cachingStrategy, -1, getConf()); + } + + private void closeBlockReaders() { + for (int i = 0; i < blockReaders.length; i++) { + if (blockReaders[i] != null) { + IOUtils.closeStream(blockReaders[i]); + blockReaders[i] = null; + } + } + } + + } + /** * The command for getting help about other commands. */ @@ -459,9 +689,9 @@ private void printUsage() { if (!command.name.equals("help")) { System.out.println(command.usageText); } - System.out.println(); - ToolRunner.printGenericCommandUsage(System.out); } + System.out.println(); + ToolRunner.printGenericCommandUsage(System.out); } public static void main(String[] argsArray) throws Exception { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java index 0dc4bcba4d24f..10156287be15d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java @@ -43,7 +43,7 @@ import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.GenericOptionsParser; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * Fetch a DelegationToken from the current Namenode and store it in the diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/NNHAServiceTarget.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/NNHAServiceTarget.java index c4527e59b2884..94aff53470b72 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/NNHAServiceTarget.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/NNHAServiceTarget.java @@ -33,7 +33,7 @@ import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.net.NetUtils; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMESERVICES; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java index a6901f335d1fb..fc5f30e883001 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java @@ -41,7 +41,7 @@ import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.XMLReaderFactory; -import com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; /** * OfflineEditsXmlLoader walks an EditsVisitor over an OEV XML file @@ -86,6 +86,10 @@ public OfflineEditsXmlLoader(OfflineEditsVisitor visitor, public void loadEdits() throws IOException { try { XMLReader xr = XMLReaderFactory.createXMLReader(); + xr.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + xr.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + xr.setFeature("http://xml.org/sax/features/external-general-entities", false); + xr.setFeature("http://xml.org/sax/features/external-parameter-entities", false); xr.setContentHandler(this); xr.setErrorHandler(this); xr.setDTDHandler(null); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/StatisticsEditsVisitor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/StatisticsEditsVisitor.java index cc97ea71d28e5..411df1062771b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/StatisticsEditsVisitor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/StatisticsEditsVisitor.java @@ -30,7 +30,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes; -import com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; /** * StatisticsEditsVisitor implements text version of EditsVisitor diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/XmlEditsVisitor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/XmlEditsVisitor.java index ddf7933f0322c..9fabd1887ce48 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/XmlEditsVisitor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/XmlEditsVisitor.java @@ -60,8 +60,8 @@ public class XmlEditsVisitor implements OfflineEditsVisitor { public XmlEditsVisitor(OutputStream out) throws IOException { this.out = out; - factory =(SAXTransformerFactory)SAXTransformerFactory.newInstance(); try { + factory = org.apache.hadoop.util.XMLUtils.newSecureSAXTransformerFactory(); TransformerHandler handler = factory.newTransformerHandler(); handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml"); handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, "UTF-8"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageHandler.java index 9b0031a3a2eaa..f14ee5f930fe0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageHandler.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.tools.offlineImageViewer; -import com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; import io.netty.channel.ChannelFutureListener; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageLoader.java index 7032e2a776d34..c8a6edd021f3e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageLoader.java @@ -30,7 +30,7 @@ import java.util.List; import java.util.Map; -import com.google.common.collect.ImmutableList; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; import org.apache.hadoop.thirdparty.protobuf.CodedInputStream; import org.apache.hadoop.thirdparty.protobuf.InvalidProtocolBufferException; @@ -43,6 +43,7 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.PermissionStatus; import org.apache.hadoop.hdfs.XAttrHelper; +import org.apache.hadoop.hdfs.protocol.XAttrNotFoundException; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos; import org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode; import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf; @@ -57,9 +58,9 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.LimitInputStream; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; /** * FSImageLoader loads fsimage and provide methods to return JSON formatted @@ -452,8 +453,7 @@ String getXAttrs(String path, List names, String encoder) } if (!found) { - throw new IOException( - "At least one of the attributes provided was not found."); + throw new XAttrNotFoundException(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionCalculator.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionCalculator.java index 25a7bbe40bd0a..54b183b7b6965 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionCalculator.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionCalculator.java @@ -33,7 +33,7 @@ import org.apache.hadoop.util.LimitInputStream; import org.apache.hadoop.util.StringUtils; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * This is the tool for analyzing file sizes in the namespace image. In order to diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageReconstructor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageReconstructor.java index 7eccf75dc65e5..6a2049acb4b54 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageReconstructor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageReconstructor.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.tools.offlineImageViewer; -import com.google.common.base.Preconditions; +import org.apache.hadoop.util.Preconditions; import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.ACL_ENTRY_NAME_MASK; import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.ACL_ENTRY_NAME_OFFSET; import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.ACL_ENTRY_SCOPE_OFFSET; @@ -49,13 +49,14 @@ import java.util.LinkedList; import java.util.Map; -import com.google.common.io.CountingOutputStream; -import com.google.common.primitives.Ints; +import org.apache.hadoop.thirdparty.com.google.common.io.CountingOutputStream; +import org.apache.hadoop.thirdparty.com.google.common.primitives.Ints; import org.apache.hadoop.thirdparty.protobuf.ByteString; import org.apache.hadoop.thirdparty.protobuf.TextFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.permission.AclEntry; @@ -147,6 +148,8 @@ private OfflineImageReconstructor(CountingOutputStream out, InputStreamReader reader) throws XMLStreamException { this.out = out; XMLInputFactory factory = XMLInputFactory.newInstance(); + factory.setProperty(XMLInputFactory.SUPPORT_DTD, false); + factory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); this.events = factory.createXMLEventReader(reader); this.sections = new HashMap<>(); this.sections.put(NameSectionProcessor.NAME, new NameSectionProcessor()); @@ -1761,6 +1764,10 @@ private void processXml() throws Exception { XMLEvent ev = expectTag("[section header]", true); if (ev.getEventType() == XMLStreamConstants.END_ELEMENT) { if (ev.asEndElement().getName().getLocalPart().equals("fsimage")) { + if(unprocessedSections.size() == 1 && unprocessedSections.contains + (SnapshotDiffSectionProcessor.NAME)){ + break; + } throw new IOException("FSImage XML ended prematurely, without " + "including section(s) " + StringUtils.join(", ", unprocessedSections)); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageCorruptionDetector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageCorruptionDetector.java index 7e5eb3177f95b..737e7384b9a7c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageCorruptionDetector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageCorruptionDetector.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.tools.offlineImageViewer; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.hdfs.server.namenode.FsImageProto; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageTextWriter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageTextWriter.java index 1bd7d97f8a155..ccab7b0c6957f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageTextWriter.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageTextWriter.java @@ -17,9 +17,9 @@ */ package org.apache.hadoop.hdfs.tools.offlineImageViewer; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.commons.lang3.StringUtils; import org.apache.commons.text.StringEscapeUtils; import org.apache.hadoop.conf.Configuration; @@ -319,10 +319,10 @@ private static class LevelDBStore implements Closeable { @Override public void close() throws IOException { if (batch != null) { - IOUtils.cleanup(null, batch); + IOUtils.cleanupWithLogger(null, batch); batch = null; } - IOUtils.cleanup(null, db); + IOUtils.cleanupWithLogger(null, db); db = null; } @@ -388,13 +388,13 @@ protected boolean removeEldestEntry(Map.Entry entry) { dirMap = new LevelDBStore(new File(dbDir, "dirMap")); } catch (IOException e) { LOG.error("Failed to open LevelDBs", e); - IOUtils.cleanup(null, this); + IOUtils.cleanupWithLogger(null, this); } } @Override public void close() throws IOException { - IOUtils.cleanup(null, dirChildMap, dirMap); + IOUtils.cleanupWithLogger(null, dirChildMap, dirMap); dirChildMap = null; dirMap = null; } @@ -515,7 +515,7 @@ public long getParentId(long id) throws IOException { @Override public void close() throws IOException { out.flush(); - IOUtils.cleanup(null, metadataMap); + IOUtils.cleanupWithLogger(null, metadataMap); } void append(StringBuffer buffer, int field) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java index 2c4a8946e8bd2..920dfdc6dc933 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java @@ -68,8 +68,8 @@ import org.apache.hadoop.hdfs.util.XMLUtils; import org.apache.hadoop.io.erasurecode.ECSchema; import org.apache.hadoop.util.LimitInputStream; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.hadoop.util.VersionInfo; import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.XATTR_NAMESPACE_MASK; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TextWriterImageVisitor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TextWriterImageVisitor.java index 7f837fd32d6d9..f732bd6a7f542 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TextWriterImageVisitor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TextWriterImageVisitor.java @@ -22,7 +22,7 @@ import java.nio.file.Files; import java.nio.file.Paths; -import com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; /** * TextWriterImageProcessor mixes in the ability for ImageVisitor diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/WebImageViewer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/WebImageViewer.java index 767ecd809e048..29ac759576c28 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/WebImageViewer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/WebImageViewer.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.tools.offlineImageViewer; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import io.netty.bootstrap.ServerBootstrap; import io.netty.channel.Channel; import io.netty.channel.ChannelInitializer; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/snapshot/LsSnapshottableDir.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/snapshot/LsSnapshottableDir.java index 2f030b4734ce9..640065bb5c663 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/snapshot/LsSnapshottableDir.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/snapshot/LsSnapshottableDir.java @@ -20,7 +20,6 @@ import java.io.IOException; import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hdfs.DistributedFileSystem; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/BestEffortLongFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/BestEffortLongFile.java index 2fdf5472bee03..375630db6bdc8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/BestEffortLongFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/BestEffortLongFile.java @@ -28,8 +28,8 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.io.IOUtils; -import com.google.common.io.Files; -import com.google.common.primitives.Longs; +import org.apache.hadoop.thirdparty.com.google.common.io.Files; +import org.apache.hadoop.thirdparty.com.google.common.primitives.Longs; /** * Class that represents a file on disk which stores a single long diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/Diff.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/Diff.java index 188537b756f5d..21a7bb58750b8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/Diff.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/Diff.java @@ -17,13 +17,13 @@ */ package org.apache.hadoop.hdfs.util; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; + import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.List; -import com.google.common.base.Preconditions; - /** * The difference between the current state and a previous state of a list. * @@ -166,6 +166,17 @@ public E setCreated(int index, E element) { return old; } + public boolean removeCreated(final E element) { + if (created != null) { + final int i = search(created, element.getKey()); + if (i >= 0 && created.get(i) == element) { + created.remove(i); + return true; + } + } + return false; + } + public void clearCreated() { if (created != null) { created.clear(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/EnumCounters.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/EnumCounters.java index dded84800ec33..880bf6edb2516 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/EnumCounters.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/EnumCounters.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.util; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.lang3.ArrayUtils; import java.util.Arrays; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/EnumDoubles.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/EnumDoubles.java index 126070aa016a1..fee687edf54b1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/EnumDoubles.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/EnumDoubles.java @@ -19,7 +19,7 @@ import java.util.Arrays; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * Similar to {@link EnumCounters} except that the value type is double. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/FoldedTreeSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/FoldedTreeSet.java deleted file mode 100644 index 1c6be1d629830..0000000000000 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/FoldedTreeSet.java +++ /dev/null @@ -1,1285 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdfs.util; - -import org.apache.hadoop.util.Time; - -import java.util.Arrays; -import java.util.Collection; -import java.util.Comparator; -import java.util.ConcurrentModificationException; -import java.util.Iterator; -import java.util.NoSuchElementException; -import java.util.Objects; -import java.util.SortedSet; - -/** - * A memory efficient implementation of RBTree. Instead of having a Node for - * each entry each node contains an array holding 64 entries. - * - * Based on the Apache Harmony folded TreeMap. - * - * @param Entry type - */ -public class FoldedTreeSet implements SortedSet { - - private static final boolean RED = true; - private static final boolean BLACK = false; - - private final Comparator comparator; - private Node root; - private int size; - private int nodeCount; - private int modCount; - private Node cachedNode; - - /** - * Internal tree node that holds a sorted array of entries. - * - * @param type of the elements - */ - private static class Node { - - private static final int NODE_SIZE = 64; - - // Tree structure - private Node parent, left, right; - private boolean color; - private final E[] entries; - private int leftIndex = 0, rightIndex = -1; - private int size = 0; - // List for fast ordered iteration - private Node prev, next; - - @SuppressWarnings("unchecked") - public Node() { - entries = (E[]) new Object[NODE_SIZE]; - } - - public boolean isRed() { - return color == RED; - } - - public boolean isBlack() { - return color == BLACK; - } - - public Node getLeftMostNode() { - Node node = this; - while (node.left != null) { - node = node.left; - } - return node; - } - - public Node getRightMostNode() { - Node node = this; - while (node.right != null) { - node = node.right; - } - return node; - } - - public void addEntryLeft(E entry) { - assert rightIndex < entries.length; - assert !isFull(); - - if (leftIndex == 0) { - rightIndex++; - // Shift entries right/up - System.arraycopy(entries, 0, entries, 1, size); - } else { - leftIndex--; - } - size++; - entries[leftIndex] = entry; - } - - public void addEntryRight(E entry) { - assert !isFull(); - - if (rightIndex == NODE_SIZE - 1) { - assert leftIndex > 0; - // Shift entries left/down - System.arraycopy(entries, leftIndex, entries, --leftIndex, size); - } else { - rightIndex++; - } - size++; - entries[rightIndex] = entry; - } - - public void addEntryAt(E entry, int index) { - assert !isFull(); - - if (leftIndex == 0 || ((rightIndex != Node.NODE_SIZE - 1) - && (rightIndex - index <= index - leftIndex))) { - rightIndex++; - System.arraycopy(entries, index, - entries, index + 1, rightIndex - index); - entries[index] = entry; - } else { - int newLeftIndex = leftIndex - 1; - System.arraycopy(entries, leftIndex, - entries, newLeftIndex, index - leftIndex); - leftIndex = newLeftIndex; - entries[index - 1] = entry; - } - size++; - } - - public void addEntriesLeft(Node from) { - leftIndex -= from.size; - size += from.size; - System.arraycopy(from.entries, from.leftIndex, - entries, leftIndex, from.size); - } - - public void addEntriesRight(Node from) { - System.arraycopy(from.entries, from.leftIndex, - entries, rightIndex + 1, from.size); - size += from.size; - rightIndex += from.size; - } - - public E insertEntrySlideLeft(E entry, int index) { - E pushedEntry = entries[0]; - System.arraycopy(entries, 1, entries, 0, index - 1); - entries[index - 1] = entry; - return pushedEntry; - } - - public E insertEntrySlideRight(E entry, int index) { - E movedEntry = entries[rightIndex]; - System.arraycopy(entries, index, entries, index + 1, rightIndex - index); - entries[index] = entry; - return movedEntry; - } - - public E removeEntryLeft() { - assert !isEmpty(); - E entry = entries[leftIndex]; - entries[leftIndex] = null; - leftIndex++; - size--; - return entry; - } - - public E removeEntryRight() { - assert !isEmpty(); - E entry = entries[rightIndex]; - entries[rightIndex] = null; - rightIndex--; - size--; - return entry; - } - - public E removeEntryAt(int index) { - assert !isEmpty(); - - E entry = entries[index]; - int rightSize = rightIndex - index; - int leftSize = index - leftIndex; - if (rightSize <= leftSize) { - System.arraycopy(entries, index + 1, entries, index, rightSize); - entries[rightIndex] = null; - rightIndex--; - } else { - System.arraycopy(entries, leftIndex, entries, leftIndex + 1, leftSize); - entries[leftIndex] = null; - leftIndex++; - } - size--; - return entry; - } - - public boolean isFull() { - return size == NODE_SIZE; - } - - public boolean isEmpty() { - return size == 0; - } - - public void clear() { - if (leftIndex < rightIndex) { - Arrays.fill(entries, leftIndex, rightIndex + 1, null); - } - size = 0; - leftIndex = 0; - rightIndex = -1; - prev = null; - next = null; - parent = null; - left = null; - right = null; - color = BLACK; - } - } - - private static final class TreeSetIterator implements Iterator { - - private final FoldedTreeSet tree; - private int iteratorModCount; - private Node node; - private int index; - private E lastEntry; - private int lastIndex; - private Node lastNode; - - private TreeSetIterator(FoldedTreeSet tree) { - this.tree = tree; - this.iteratorModCount = tree.modCount; - if (!tree.isEmpty()) { - this.node = tree.root.getLeftMostNode(); - this.index = this.node.leftIndex; - } - } - - @Override - public boolean hasNext() { - checkForModification(); - return node != null; - } - - @Override - public E next() { - if (hasNext()) { - lastEntry = node.entries[index]; - lastIndex = index; - lastNode = node; - if (++index > node.rightIndex) { - node = node.next; - if (node != null) { - index = node.leftIndex; - } - } - return lastEntry; - } else { - throw new NoSuchElementException("Iterator exhausted"); - } - } - - @Override - public void remove() { - if (lastEntry == null) { - throw new IllegalStateException("No current element"); - } - checkForModification(); - if (lastNode.size == 1) { - // Safe to remove lastNode, the iterator is on the next node - tree.deleteNode(lastNode); - } else if (lastNode.leftIndex == lastIndex) { - // Safe to remove leftmost entry, the iterator is on the next index - lastNode.removeEntryLeft(); - } else if (lastNode.rightIndex == lastIndex) { - // Safe to remove the rightmost entry, the iterator is on the next node - lastNode.removeEntryRight(); - } else { - // Remove entry in the middle of the array - assert node == lastNode; - int oldRIndex = lastNode.rightIndex; - lastNode.removeEntryAt(lastIndex); - if (oldRIndex > lastNode.rightIndex) { - // Entries moved to the left in the array so index must be reset - index = lastIndex; - } - } - lastEntry = null; - iteratorModCount++; - tree.modCount++; - tree.size--; - } - - private void checkForModification() { - if (iteratorModCount != tree.modCount) { - throw new ConcurrentModificationException("Tree has been modified " - + "outside of iterator"); - } - } - } - - /** - * Create a new TreeSet that uses the natural ordering of objects. The element - * type must implement Comparable. - */ - public FoldedTreeSet() { - this(null); - } - - /** - * Create a new TreeSet that orders the elements using the supplied - * Comparator. - * - * @param comparator Comparator able to compare elements of type E - */ - public FoldedTreeSet(Comparator comparator) { - this.comparator = comparator; - } - - private Node cachedOrNewNode(E entry) { - Node node = (cachedNode != null) ? cachedNode : new Node(); - cachedNode = null; - nodeCount++; - // Since BlockIDs are always increasing for new blocks it is best to - // add values on the left side to enable quicker inserts on the right - node.addEntryLeft(entry); - return node; - } - - private void cacheAndClear(Node node) { - if (cachedNode == null) { - node.clear(); - cachedNode = node; - } - } - - @Override - public Comparator comparator() { - return comparator; - } - - @Override - public SortedSet subSet(E fromElement, E toElement) { - throw new UnsupportedOperationException("Not supported yet."); - } - - @Override - public SortedSet headSet(E toElement) { - throw new UnsupportedOperationException("Not supported yet."); - } - - @Override - public SortedSet tailSet(E fromElement) { - throw new UnsupportedOperationException("Not supported yet."); - } - - @Override - public E first() { - if (!isEmpty()) { - Node node = root.getLeftMostNode(); - return node.entries[node.leftIndex]; - } - return null; - } - - @Override - public E last() { - if (!isEmpty()) { - Node node = root.getRightMostNode(); - return node.entries[node.rightIndex]; - } - return null; - } - - @Override - public int size() { - return size; - } - - @Override - public boolean isEmpty() { - return root == null; - } - - /** - * Lookup and return a stored object using a user provided comparator. - * - * @param obj Lookup key - * @param cmp User provided Comparator. The comparator should expect that the - * proved obj will always be the first method parameter and any - * stored object will be the second parameter. - * - * @return A matching stored object or null if non is found - */ - public E get(Object obj, Comparator cmp) { - Objects.requireNonNull(obj); - - Node node = root; - while (node != null) { - E[] entries = node.entries; - - int leftIndex = node.leftIndex; - int result = compare(obj, entries[leftIndex], cmp); - if (result < 0) { - node = node.left; - } else if (result == 0) { - return entries[leftIndex]; - } else { - int rightIndex = node.rightIndex; - if (leftIndex != rightIndex) { - result = compare(obj, entries[rightIndex], cmp); - } - if (result == 0) { - return entries[rightIndex]; - } else if (result > 0) { - node = node.right; - } else { - int low = leftIndex + 1; - int high = rightIndex - 1; - while (low <= high) { - int mid = (low + high) >>> 1; - result = compare(obj, entries[mid], cmp); - if (result > 0) { - low = mid + 1; - } else if (result < 0) { - high = mid - 1; - } else { - return entries[mid]; - } - } - return null; - } - } - } - return null; - } - - /** - * Lookup and return a stored object. - * - * @param entry Lookup entry - * - * @return A matching stored object or null if non is found - */ - public E get(E entry) { - return get(entry, comparator); - } - - @Override - @SuppressWarnings("unchecked") - public boolean contains(Object obj) { - return get((E) obj) != null; - } - - @SuppressWarnings({"unchecked", "rawtypes"}) - private static int compare(Object lookup, Object stored, Comparator cmp) { - return cmp != null - ? cmp.compare(lookup, stored) - : ((Comparable) lookup).compareTo(stored); - } - - @Override - public Iterator iterator() { - return new TreeSetIterator<>(this); - } - - @Override - public Object[] toArray() { - Object[] objects = new Object[size]; - if (!isEmpty()) { - int pos = 0; - for (Node node = root.getLeftMostNode(); node != null; - pos += node.size, node = node.next) { - System.arraycopy(node.entries, node.leftIndex, objects, pos, node.size); - } - } - return objects; - } - - @Override - @SuppressWarnings("unchecked") - public T[] toArray(T[] a) { - T[] r = a.length >= size ? a - : (T[]) java.lang.reflect.Array - .newInstance(a.getClass().getComponentType(), size); - if (!isEmpty()) { - Node node = root.getLeftMostNode(); - int pos = 0; - while (node != null) { - System.arraycopy(node.entries, node.leftIndex, r, pos, node.size); - pos += node.size; - node = node.next; - } - if (r.length > pos) { - r[pos] = null; - } - } else if (a.length > 0) { - a[0] = null; - } - return r; - } - - /** - * Add or replace an entry in the TreeSet. - * - * @param entry Entry to add or replace/update. - * - * @return the previous entry, or null if this set did not already contain the - * specified entry - */ - public E addOrReplace(E entry) { - return add(entry, true); - } - - @Override - public boolean add(E entry) { - return add(entry, false) == null; - } - - /** - * Internal add method to add a entry to the set. - * - * @param entry Entry to add - * @param replace Should the entry replace an old entry which is equal to the - * new entry - * - * @return null if entry added and didn't exist or the previous value (which - * might not have been overwritten depending on the replace parameter) - */ - private E add(E entry, boolean replace) { - Objects.requireNonNull(entry); - - // Empty tree - if (isEmpty()) { - root = cachedOrNewNode(entry); - size = 1; - modCount++; - return null; - } - - // Compare right entry first since inserts of comperatively larger entries - // is more likely to be inserted. BlockID is always increasing in HDFS. - Node node = root; - Node prevNode = null; - int result = 0; - while (node != null) { - prevNode = node; - E[] entries = node.entries; - int rightIndex = node.rightIndex; - result = compare(entry, entries[rightIndex], comparator); - if (result > 0) { - node = node.right; - } else if (result == 0) { - E prevEntry = entries[rightIndex]; - if (replace) { - entries[rightIndex] = entry; - } - return prevEntry; - } else { - int leftIndex = node.leftIndex; - if (leftIndex != rightIndex) { - result = compare(entry, entries[leftIndex], comparator); - } - if (result < 0) { - node = node.left; - } else if (result == 0) { - E prevEntry = entries[leftIndex]; - if (replace) { - entries[leftIndex] = entry; - } - return prevEntry; - } else { - // Insert in this node - int low = leftIndex + 1, high = rightIndex - 1; - while (low <= high) { - int mid = (low + high) >>> 1; - result = compare(entry, entries[mid], comparator); - if (result > 0) { - low = mid + 1; - } else if (result == 0) { - E prevEntry = entries[mid]; - if (replace) { - entries[mid] = entry; - } - return prevEntry; - } else { - high = mid - 1; - } - } - addElementInNode(node, entry, low); - return null; - } - } - } - - assert prevNode != null; - size++; - modCount++; - if (!prevNode.isFull()) { - // The previous node still has space - if (result < 0) { - prevNode.addEntryLeft(entry); - } else { - prevNode.addEntryRight(entry); - } - } else if (result < 0) { - // The previous node is full, add to adjencent node or a new node - if (prevNode.prev != null && !prevNode.prev.isFull()) { - prevNode.prev.addEntryRight(entry); - } else { - attachNodeLeft(prevNode, cachedOrNewNode(entry)); - } - } else if (prevNode.next != null && !prevNode.next.isFull()) { - prevNode.next.addEntryLeft(entry); - } else { - attachNodeRight(prevNode, cachedOrNewNode(entry)); - } - return null; - } - - /** - * Insert an entry last in the sorted tree. The entry must be the considered - * larger than the currently largest entry in the set when doing - * current.compareTo(entry), if entry is not the largest entry the method will - * fall back on the regular add method. - * - * @param entry entry to add - * - * @return True if added, false if already existed in the set - */ - public boolean addSortedLast(E entry) { - - if (isEmpty()) { - root = cachedOrNewNode(entry); - size = 1; - modCount++; - return true; - } else { - Node node = root.getRightMostNode(); - if (compare(node.entries[node.rightIndex], entry, comparator) < 0) { - size++; - modCount++; - if (!node.isFull()) { - node.addEntryRight(entry); - } else { - attachNodeRight(node, cachedOrNewNode(entry)); - } - return true; - } - } - - // Fallback on normal add if entry is unsorted - return add(entry); - } - - private void addElementInNode(Node node, E entry, int index) { - size++; - modCount++; - - if (!node.isFull()) { - node.addEntryAt(entry, index); - } else { - // Node is full, insert and push old entry - Node prev = node.prev; - Node next = node.next; - if (prev == null) { - // First check if we have space in the the next node - if (next != null && !next.isFull()) { - E movedEntry = node.insertEntrySlideRight(entry, index); - next.addEntryLeft(movedEntry); - } else { - // Since prev is null the left child must be null - assert node.left == null; - E movedEntry = node.insertEntrySlideLeft(entry, index); - Node newNode = cachedOrNewNode(movedEntry); - attachNodeLeft(node, newNode); - } - } else if (!prev.isFull()) { - // Prev has space - E movedEntry = node.insertEntrySlideLeft(entry, index); - prev.addEntryRight(movedEntry); - } else if (next == null) { - // Since next is null the right child must be null - assert node.right == null; - E movedEntry = node.insertEntrySlideRight(entry, index); - Node newNode = cachedOrNewNode(movedEntry); - attachNodeRight(node, newNode); - } else if (!next.isFull()) { - // Next has space - E movedEntry = node.insertEntrySlideRight(entry, index); - next.addEntryLeft(movedEntry); - } else { - // Both prev and next nodes exist and are full - E movedEntry = node.insertEntrySlideRight(entry, index); - Node newNode = cachedOrNewNode(movedEntry); - if (node.right == null) { - attachNodeRight(node, newNode); - } else { - // Since our right node exist, - // the left node of our next node must be empty - assert next.left == null; - attachNodeLeft(next, newNode); - } - } - } - } - - private void attachNodeLeft(Node node, Node newNode) { - newNode.parent = node; - node.left = newNode; - - newNode.next = node; - newNode.prev = node.prev; - if (newNode.prev != null) { - newNode.prev.next = newNode; - } - node.prev = newNode; - balanceInsert(newNode); - } - - private void attachNodeRight(Node node, Node newNode) { - newNode.parent = node; - node.right = newNode; - - newNode.prev = node; - newNode.next = node.next; - if (newNode.next != null) { - newNode.next.prev = newNode; - } - node.next = newNode; - balanceInsert(newNode); - } - - /** - * Balance the RB Tree after insert. - * - * @param node Added node - */ - private void balanceInsert(Node node) { - node.color = RED; - - while (node != root && node.parent.isRed()) { - if (node.parent == node.parent.parent.left) { - Node uncle = node.parent.parent.right; - if (uncle != null && uncle.isRed()) { - node.parent.color = BLACK; - uncle.color = BLACK; - node.parent.parent.color = RED; - node = node.parent.parent; - } else { - if (node == node.parent.right) { - node = node.parent; - rotateLeft(node); - } - node.parent.color = BLACK; - node.parent.parent.color = RED; - rotateRight(node.parent.parent); - } - } else { - Node uncle = node.parent.parent.left; - if (uncle != null && uncle.isRed()) { - node.parent.color = BLACK; - uncle.color = BLACK; - node.parent.parent.color = RED; - node = node.parent.parent; - } else { - if (node == node.parent.left) { - node = node.parent; - rotateRight(node); - } - node.parent.color = BLACK; - node.parent.parent.color = RED; - rotateLeft(node.parent.parent); - } - } - } - root.color = BLACK; - } - - private void rotateRight(Node node) { - Node pivot = node.left; - node.left = pivot.right; - if (pivot.right != null) { - pivot.right.parent = node; - } - pivot.parent = node.parent; - if (node.parent == null) { - root = pivot; - } else if (node == node.parent.right) { - node.parent.right = pivot; - } else { - node.parent.left = pivot; - } - pivot.right = node; - node.parent = pivot; - } - - private void rotateLeft(Node node) { - Node pivot = node.right; - node.right = pivot.left; - if (pivot.left != null) { - pivot.left.parent = node; - } - pivot.parent = node.parent; - if (node.parent == null) { - root = pivot; - } else if (node == node.parent.left) { - node.parent.left = pivot; - } else { - node.parent.right = pivot; - } - pivot.left = node; - node.parent = pivot; - } - - /** - * Remove object using a provided comparator, and return the removed entry. - * - * @param obj Lookup entry - * @param cmp User provided Comparator. The comparator should expect that the - * proved obj will always be the first method parameter and any - * stored object will be the second parameter. - * - * @return The removed entry or null if not found - */ - public E removeAndGet(Object obj, Comparator cmp) { - Objects.requireNonNull(obj); - - if (!isEmpty()) { - Node node = root; - while (node != null) { - E[] entries = node.entries; - int leftIndex = node.leftIndex; - int result = compare(obj, entries[leftIndex], cmp); - if (result < 0) { - node = node.left; - } else if (result == 0) { - return removeElementLeft(node); - } else { - int rightIndex = node.rightIndex; - if (leftIndex != rightIndex) { - result = compare(obj, entries[rightIndex], cmp); - } - if (result == 0) { - return removeElementRight(node); - } else if (result > 0) { - node = node.right; - } else { - int low = leftIndex + 1, high = rightIndex - 1; - while (low <= high) { - int mid = (low + high) >>> 1; - result = compare(obj, entries[mid], cmp); - if (result > 0) { - low = mid + 1; - } else if (result == 0) { - return removeElementAt(node, mid); - } else { - high = mid - 1; - } - } - return null; - } - } - } - } - return null; - } - - /** - * Remove object and return the removed entry. - * - * @param obj Lookup entry - * - * @return The removed entry or null if not found - */ - public E removeAndGet(Object obj) { - return removeAndGet(obj, comparator); - } - - /** - * Remove object using a provided comparator. - * - * @param obj Lookup entry - * @param cmp User provided Comparator. The comparator should expect that the - * proved obj will always be the first method parameter and any - * stored object will be the second parameter. - * - * @return True if found and removed, else false - */ - public boolean remove(Object obj, Comparator cmp) { - return removeAndGet(obj, cmp) != null; - } - - @Override - public boolean remove(Object obj) { - return removeAndGet(obj, comparator) != null; - } - - private E removeElementLeft(Node node) { - modCount++; - size--; - E entry = node.removeEntryLeft(); - - if (node.isEmpty()) { - deleteNode(node); - } else if (node.prev != null - && (Node.NODE_SIZE - 1 - node.prev.rightIndex) >= node.size) { - // Remaining entries fit in the prev node, move them and delete this node - node.prev.addEntriesRight(node); - deleteNode(node); - } else if (node.next != null && node.next.leftIndex >= node.size) { - // Remaining entries fit in the next node, move them and delete this node - node.next.addEntriesLeft(node); - deleteNode(node); - } else if (node.prev != null && node.prev.size < node.leftIndex) { - // Entries in prev node will fit in this node, move them and delete prev - node.addEntriesLeft(node.prev); - deleteNode(node.prev); - } - - return entry; - } - - private E removeElementRight(Node node) { - modCount++; - size--; - E entry = node.removeEntryRight(); - - if (node.isEmpty()) { - deleteNode(node); - } else if (node.prev != null - && (Node.NODE_SIZE - 1 - node.prev.rightIndex) >= node.size) { - // Remaining entries fit in the prev node, move them and delete this node - node.prev.addEntriesRight(node); - deleteNode(node); - } else if (node.next != null && node.next.leftIndex >= node.size) { - // Remaining entries fit in the next node, move them and delete this node - node.next.addEntriesLeft(node); - deleteNode(node); - } else if (node.next != null - && node.next.size < (Node.NODE_SIZE - 1 - node.rightIndex)) { - // Entries in next node will fit in this node, move them and delete next - node.addEntriesRight(node.next); - deleteNode(node.next); - } - - return entry; - } - - private E removeElementAt(Node node, int index) { - modCount++; - size--; - E entry = node.removeEntryAt(index); - - if (node.prev != null - && (Node.NODE_SIZE - 1 - node.prev.rightIndex) >= node.size) { - // Remaining entries fit in the prev node, move them and delete this node - node.prev.addEntriesRight(node); - deleteNode(node); - } else if (node.next != null && (node.next.leftIndex) >= node.size) { - // Remaining entries fit in the next node, move them and delete this node - node.next.addEntriesLeft(node); - deleteNode(node); - } else if (node.prev != null && node.prev.size < node.leftIndex) { - // Entries in prev node will fit in this node, move them and delete prev - node.addEntriesLeft(node.prev); - deleteNode(node.prev); - } else if (node.next != null - && node.next.size < (Node.NODE_SIZE - 1 - node.rightIndex)) { - // Entries in next node will fit in this node, move them and delete next - node.addEntriesRight(node.next); - deleteNode(node.next); - } - - return entry; - } - - /** - * Delete the node and ensure the tree is balanced. - * - * @param node node to delete - */ - private void deleteNode(final Node node) { - if (node.right == null) { - if (node.left != null) { - attachToParent(node, node.left); - } else { - attachNullToParent(node); - } - } else if (node.left == null) { - attachToParent(node, node.right); - } else { - // node.left != null && node.right != null - // node.next should replace node in tree - // node.next != null guaranteed since node.left != null - // node.next.left == null since node.next.prev is node - // node.next.right may be null or non-null - Node toMoveUp = node.next; - if (toMoveUp.right == null) { - attachNullToParent(toMoveUp); - } else { - attachToParent(toMoveUp, toMoveUp.right); - } - toMoveUp.left = node.left; - if (toMoveUp.left != null) { - toMoveUp.left.parent = toMoveUp; - } - toMoveUp.right = node.right; - if (toMoveUp.right != null) { - toMoveUp.right.parent = toMoveUp; - } - attachToParentNoBalance(node, toMoveUp); - toMoveUp.color = node.color; - } - - // Remove node from ordered list of nodes - if (node.prev != null) { - node.prev.next = node.next; - } - if (node.next != null) { - node.next.prev = node.prev; - } - - nodeCount--; - cacheAndClear(node); - } - - private void attachToParentNoBalance(Node toDelete, Node toConnect) { - Node parent = toDelete.parent; - toConnect.parent = parent; - if (parent == null) { - root = toConnect; - } else if (toDelete == parent.left) { - parent.left = toConnect; - } else { - parent.right = toConnect; - } - } - - private void attachToParent(Node toDelete, Node toConnect) { - attachToParentNoBalance(toDelete, toConnect); - if (toDelete.isBlack()) { - balanceDelete(toConnect); - } - } - - private void attachNullToParent(Node toDelete) { - Node parent = toDelete.parent; - if (parent == null) { - root = null; - } else { - if (toDelete == parent.left) { - parent.left = null; - } else { - parent.right = null; - } - if (toDelete.isBlack()) { - balanceDelete(parent); - } - } - } - - /** - * Balance tree after removing a node. - * - * @param node Node to balance after deleting another node - */ - private void balanceDelete(Node node) { - while (node != root && node.isBlack()) { - if (node == node.parent.left) { - Node sibling = node.parent.right; - if (sibling == null) { - node = node.parent; - continue; - } - if (sibling.isRed()) { - sibling.color = BLACK; - node.parent.color = RED; - rotateLeft(node.parent); - sibling = node.parent.right; - if (sibling == null) { - node = node.parent; - continue; - } - } - if ((sibling.left == null || !sibling.left.isRed()) - && (sibling.right == null || !sibling.right.isRed())) { - sibling.color = RED; - node = node.parent; - } else { - if (sibling.right == null || !sibling.right.isRed()) { - sibling.left.color = BLACK; - sibling.color = RED; - rotateRight(sibling); - sibling = node.parent.right; - } - sibling.color = node.parent.color; - node.parent.color = BLACK; - sibling.right.color = BLACK; - rotateLeft(node.parent); - node = root; - } - } else { - Node sibling = node.parent.left; - if (sibling == null) { - node = node.parent; - continue; - } - if (sibling.isRed()) { - sibling.color = BLACK; - node.parent.color = RED; - rotateRight(node.parent); - sibling = node.parent.left; - if (sibling == null) { - node = node.parent; - continue; - } - } - if ((sibling.left == null || sibling.left.isBlack()) - && (sibling.right == null || sibling.right.isBlack())) { - sibling.color = RED; - node = node.parent; - } else { - if (sibling.left == null || sibling.left.isBlack()) { - sibling.right.color = BLACK; - sibling.color = RED; - rotateLeft(sibling); - sibling = node.parent.left; - } - sibling.color = node.parent.color; - node.parent.color = BLACK; - sibling.left.color = BLACK; - rotateRight(node.parent); - node = root; - } - } - } - node.color = BLACK; - } - - @Override - public boolean containsAll(Collection c) { - for (Object entry : c) { - if (!contains(entry)) { - return false; - } - } - return true; - } - - @Override - public boolean addAll(Collection c) { - boolean modified = false; - for (E entry : c) { - if (add(entry)) { - modified = true; - } - } - return modified; - } - - @Override - public boolean retainAll(Collection c) { - boolean modified = false; - Iterator it = iterator(); - while (it.hasNext()) { - if (!c.contains(it.next())) { - it.remove(); - modified = true; - } - } - return modified; - } - - @Override - public boolean removeAll(Collection c) { - boolean modified = false; - for (Object entry : c) { - if (remove(entry)) { - modified = true; - } - } - return modified; - } - - @Override - public void clear() { - modCount++; - if (!isEmpty()) { - size = 0; - nodeCount = 0; - cacheAndClear(root); - root = null; - } - } - - /** - * Returns the current size divided by the capacity of the tree. A value - * between 0.0 and 1.0, where 1.0 means that every allocated node in the tree - * is completely full. - * - * An empty set will return 1.0 - * - * @return the fill ratio of the tree - */ - public double fillRatio() { - if (nodeCount > 1) { - // Count the last node as completely full since it can't be compacted - return (size + (Node.NODE_SIZE - root.getRightMostNode().size)) - / (double) (nodeCount * Node.NODE_SIZE); - } - return 1.0; - } - - /** - * Compact all the entries to use the fewest number of nodes in the tree. - * - * Having a compact tree minimize memory usage, but can cause inserts to get - * slower due to new nodes needs to be allocated as there is no space in any - * of the existing nodes anymore for entries added in the middle of the set. - * - * Useful to do to reduce memory consumption and if the tree is know to not - * change after compaction or mainly added to at either extreme. - * - * @param timeout Maximum time to spend compacting the tree set in - * milliseconds. - * - * @return true if compaction completed, false if aborted - */ - public boolean compact(long timeout) { - - if (!isEmpty()) { - long start = Time.monotonicNow(); - Node node = root.getLeftMostNode(); - while (node != null) { - if (node.prev != null && !node.prev.isFull()) { - Node prev = node.prev; - int count = Math.min(Node.NODE_SIZE - prev.size, node.size); - System.arraycopy(node.entries, node.leftIndex, - prev.entries, prev.rightIndex + 1, count); - node.leftIndex += count; - node.size -= count; - prev.rightIndex += count; - prev.size += count; - } - if (node.isEmpty()) { - Node temp = node.next; - deleteNode(node); - node = temp; - continue; - } else if (!node.isFull()) { - if (node.leftIndex != 0) { - System.arraycopy(node.entries, node.leftIndex, - node.entries, 0, node.size); - Arrays.fill(node.entries, node.size, node.rightIndex + 1, null); - node.leftIndex = 0; - node.rightIndex = node.size - 1; - } - } - node = node.next; - - if (Time.monotonicNow() - start > timeout) { - return false; - } - } - } - - return true; - } -} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/MD5FileUtils.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/MD5FileUtils.java index b541b14f64924..2bc63ec77eb29 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/MD5FileUtils.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/MD5FileUtils.java @@ -35,7 +35,7 @@ import org.apache.hadoop.io.MD5Hash; import org.apache.hadoop.util.StringUtils; -import com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; /** * Static functions for dealing with files of the same format diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/PersistentLongFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/PersistentLongFile.java index a94d7edff08b3..78834e0161a4f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/PersistentLongFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/PersistentLongFile.java @@ -28,7 +28,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.io.IOUtils; -import com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; /** * Class that represents a file on disk which persistently stores diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/ReferenceCountMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/ReferenceCountMap.java index 5b29c4308b8bb..6c40b29c0d812 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/ReferenceCountMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/ReferenceCountMap.java @@ -17,14 +17,14 @@ */ package org.apache.hadoop.hdfs.util; -import java.util.HashMap; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.ImmutableList; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; /** * Class for de-duplication of instances.
      @@ -37,7 +37,7 @@ @InterfaceStability.Evolving public class ReferenceCountMap { - private Map referenceMap = new HashMap(); + private Map referenceMap = new ConcurrentHashMap<>(); /** * Add the reference. If the instance already present, just increase the @@ -47,10 +47,9 @@ public class ReferenceCountMap { * @return Referenced instance */ public E put(E key) { - E value = referenceMap.get(key); + E value = referenceMap.putIfAbsent(key, key); if (value == null) { value = key; - referenceMap.put(key, value); } value.incrementAndGetRefCount(); return value; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java index bf5568bc3e563..ceddfca2ced09 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.web; -import com.google.common.collect.ImmutableMap; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileChecksum; @@ -40,7 +40,7 @@ import org.apache.hadoop.util.StringUtils; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import java.io.IOException; import java.util.*; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/ExceptionHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/ExceptionHandler.java index 2e0a17efa4b30..02b70c16be649 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/ExceptionHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/ExceptionHandler.java @@ -35,7 +35,7 @@ import org.apache.hadoop.security.authorize.AuthorizationException; import org.apache.hadoop.security.token.SecretManager.InvalidToken; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import com.sun.jersey.api.ParamException; import com.sun.jersey.api.container.ContainerException; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeLifelineProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeLifelineProtocol.proto index e10a8861e6153..9e436ea9b4d6a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeLifelineProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeLifelineProtocol.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax = "proto2"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto index 0e241301e0865..52d45ebcb3451 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ @@ -257,8 +257,9 @@ message BlockReportContextProto { // bypass rate-limiting. optional uint64 leaseId = 4 [ default = 0 ]; + // for compatibility, field number 5 should not be reused, see HDFS-13671. // True if the reported blocks are sorted by increasing block IDs - optional bool sorted = 5 [default = false]; + // optional bool sorted = 5 [default = false]; } /** @@ -406,6 +407,9 @@ message CommitBlockSynchronizationResponseProto { message SlowPeerReportProto { optional string dataNodeId = 1; optional double aggregateLatency = 2; + optional double median = 3; + optional double mad = 4; + optional double upperLimitLatency = 5; } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/HAZKInfo.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/HAZKInfo.proto index 6d45a935ee4fd..ecff0e8bd8743 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/HAZKInfo.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/HAZKInfo.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax = "proto2"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/HdfsServer.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/HdfsServer.proto index 78607efddab30..e1488258692de 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/HdfsServer.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/HdfsServer.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/InterDatanodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/InterDatanodeProtocol.proto index 47332a8817bdd..742fd82643df0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/InterDatanodeProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/InterDatanodeProtocol.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/InterQJournalProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/InterQJournalProtocol.proto index e73ca23e92f94..1c78423b40990 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/InterQJournalProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/InterQJournalProtocol.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax = "proto2"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/JournalProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/JournalProtocol.proto index 35c401e33e5e6..bfbfc6fd3367b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/JournalProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/JournalProtocol.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/NamenodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/NamenodeProtocol.proto index 97f5bcaf61f0b..61c5610283ac1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/NamenodeProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/NamenodeProtocol.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto index e366d1fb8d74b..b0a5a19f8ec7a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto @@ -18,7 +18,7 @@ /** * These .proto interfaces are private and stable. - * Please see http://wiki.apache.org/hadoop/Compatibility + * Please see https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Compatibility.html * for what changes are allowed for a *stable* .proto interface. */ syntax = "proto2"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 913e47b75ff67..f8abb59daa082 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -313,6 +313,17 @@ + + dfs.namenode.redundancy.considerLoadByStorageType + false + + Decide if chooseTarget considers the target's load with respect to the + storage type. Typically to be used when datanodes contain homogenous + storage types. Irrelevent if dfs.namenode.redundancy.considerLoad is + false. + + + dfs.namenode.redundancy.considerLoad.factor 2.0 @@ -327,9 +338,23 @@ Decide if sort block locations considers the target's load or not when read. Turn off by default. + It is not possible to enable this feature along with dfs.namenode.read.considerStorageType as only one sort can be + enabled at a time. + + dfs.namenode.read.considerStorageType + false + + Decide if sort block locations considers the target's storage type or not when read. Any locations with the same + network distance are sorted in order of the storage speed, fastest first (RAM, SSD, Disk, Archive). This is + disabled by default, and the locations will be ordered randomly. + It is not possible to enable this feature along with dfs.namenode.read.considerLoad as only one sort can be + enabled at a time. + + + dfs.datanode.httpserver.filter.handlers org.apache.hadoop.hdfs.server.datanode.web.RestCsrfPreventionFilterHandler @@ -519,6 +544,17 @@ + + dfs.namenode.ip-proxy-users + + A comma separated list of user names that are allowed by the + NameNode to specify a different client IP address in the caller context. + This is used by Router-Based Federation (RBF) to provide the actual client's + IP address to the NameNode, which is critical to preserve data locality when + using RBF. If you are using RBF, add the user that runs the routers. + + + dfs.namenode.acls.enabled true @@ -764,7 +800,7 @@ dfs.blockreport.initialDelay - 0s + 0 Delay for first block report in seconds. Support multiple time unit suffix(case insensitive), as described in dfs.heartbeat.interval.If @@ -809,7 +845,7 @@ dfs.datanode.directoryscan.interval - 21600s + 21600 Interval in seconds for Datanode to scan data directories and reconcile the difference between blocks in memory and on the disk. Support multiple time unit suffix(case insensitive), as described @@ -848,7 +884,7 @@ dfs.heartbeat.interval - 3s + 3 Determines datanode heartbeat interval in seconds. Can use the following suffix (case insensitive): @@ -1061,7 +1097,7 @@ dfs.namenode.decommission.interval - 30s + 30 Namenode periodicity in seconds to check if decommission or maintenance is complete. Support multiple time unit suffix(case insensitive), as described in dfs.heartbeat.interval. @@ -1129,7 +1165,7 @@ dfs.namenode.redundancy.interval.seconds - 3s + 3 The periodicity in seconds with which the namenode computes low redundancy work for datanodes. Support multiple time unit suffix(case insensitive), as described in dfs.heartbeat.interval. @@ -1245,7 +1281,7 @@ dfs.namenode.checkpoint.period - 3600s + 3600 The number of seconds between two periodic checkpoints. Support multiple time unit suffix(case insensitive), as described @@ -1265,7 +1301,7 @@ dfs.namenode.checkpoint.check.period - 60s + 60 The SecondaryNameNode and CheckpointNode will poll the NameNode every 'dfs.namenode.checkpoint.check.period' seconds to query the number of uncheckpointed transactions. Support multiple time unit suffix(case insensitive), @@ -1414,13 +1450,13 @@ dfs.image.transfer.bandwidthPerSec - 0 + 52428800 Maximum bandwidth used for regular image transfers (instead of bootstrapping the standby namenode), in bytes per second. This can help keep normal namenode operations responsive during checkpointing. - A default value of 0 indicates that throttling is disabled. + A default value is 50mb per second. The maximum bandwidth used for bootstrapping standby namenode is configured with dfs.image.transfer-bootstrap-standby.bandwidthPerSec. Support multiple size unit suffix(case insensitive), as described @@ -1566,12 +1602,31 @@ dfs.block.scanner.volume.bytes.per.second 1048576 - If this is 0, the DataNode's block scanner will be disabled. If this + If this is configured less than or equal to zero, the DataNode's block scanner will be disabled. If this is positive, this is the number of bytes per second that the DataNode's block scanner will try to scan from each volume. + + dfs.block.scanner.skip.recent.accessed + false + + If this is true, scanner will check the access time of block file to avoid + scanning blocks accessed during recent scan peroid, reducing disk IO. + This feature will not work if the DataNode volume has noatime mount option. + + + + + dfs.block.scanner.volume.join.timeout.ms + 5000 + + The amount of time in milliseconds that the BlockScanner times out waiting + for the VolumeScanner thread to join during a shutdown call. + + + dfs.datanode.readahead.bytes 4194304 @@ -1703,7 +1758,7 @@ dfs.client.datanode-restart.timeout - 30s + 30 Expert only. The time to wait, in seconds, from reception of an datanode shutdown notification for quick restart, until declaring @@ -1773,7 +1828,7 @@ dfs.ha.log-roll.period - 120s + 120 How often, in seconds, the StandbyNode should ask the active to roll edit logs. Since the StandbyNode only reads from finalized @@ -1788,7 +1843,7 @@ dfs.ha.tail-edits.period - 60s + 60 How often, the StandbyNode and ObserverNode should check if there are new edit log entries ready to be consumed. This is the minimum period between @@ -2044,6 +2099,16 @@ + + dfs.namenode.avoid.read.slow.datanode + false + + Indicate whether or not to avoid reading from "slow" datanodes. + Slow datanodes will be moved to the end of the node list returned + for reading. + + + dfs.namenode.avoid.write.stale.datanode false @@ -2276,6 +2341,31 @@ + + dfs.datanode.min.outlier.detection.nodes + 10 + + Minimum number of nodes to run outlier detection. + + + + + dfs.datanode.slowpeer.low.threshold.ms + 5 + + Threshold in milliseconds below which a DataNode is definitely not slow. + + + + + dfs.datanode.max.nodes.to.report + 5 + + Number of nodes to include in JSON report. We will return nodes with + the highest number of votes from peers. + + + dfs.datanode.outliers.report.interval 30m @@ -2289,6 +2379,36 @@ + + dfs.namenode.block-placement-policy.exclude-slow-nodes.enabled + false + + If this is set to true, we will filter out slow nodes + when choosing targets for blocks. + + + + + dfs.namenode.max.slowpeer.collect.nodes + 5 + + How many slow nodes we will collect for filtering out + when choosing targets for blocks. + + It is ignored if dfs.namenode.block-placement-policy.exclude-slow-nodes.enabled is false. + + + + + dfs.namenode.slowpeer.collect.interval + 30m + + Interval at which the slow peer trackers runs in the background to collect slow peers. + + It is ignored if dfs.namenode.block-placement-policy.exclude-slow-nodes.enabled is false. + + + dfs.datanode.fileio.profiling.sampling.percentage 0 @@ -2300,6 +2420,40 @@ + + dfs.datanode.min.outlier.detection.disks + 5 + + Minimum number of disks to run outlier detection. + + + + + dfs.datanode.slowdisk.low.threshold.ms + 20 + + Threshold in milliseconds below which a disk is definitely not slow. + + + + + dfs.datanode.max.disks.to.report + 5 + + Number of disks to include in JSON report per operation. We will return + disks with the highest latency. + + + + + dfs.datanode.max.slowdisks.to.exclude + 0 + + The number of slow disks that needs to be excluded. By default, this parameter is set to 0, + which disables excluding slow disk when choosing volume. + + + hadoop.user.group.metrics.percentiles.intervals @@ -2784,6 +2938,16 @@ + + dfs.datanode.fsdatasetasyncdisk.max.threads.per.volume + 4 + + The maximum number of threads per volume used to process async disk + operations on the datanode. These threads consume I/O and CPU at the + same time. This will affect normal data node operations. + + + dfs.cachereport.intervalMsec 10000 @@ -2978,6 +3142,15 @@ + + dfs.client.read.use.cache.priority + false + + If true, the cached replica of the datanode is preferred + else the replica closest to client is preferred. + + + dfs.block.local-path-access.user @@ -3078,26 +3251,18 @@ - dfs.client.deadnode.detection.deadnode.queue.max - 100 - - The max queue size of probing dead node. - - - - - dfs.client.deadnode.detection.suspectnode.queue.max - 1000 + dfs.client.deadnode.detection.probe.deadnode.threads + 10 - The max queue size of probing suspect node. + The maximum number of threads to use for probing dead node. - dfs.client.deadnode.detection.probe.deadnode.threads - 10 + dfs.client.deadnode.detection.idle.sleep.ms + 10000 - The maximum number of threads to use for probing dead node. + The sleep time of DeadNodeDetector per iteration. @@ -3149,6 +3314,25 @@ + + dfs.client.refresh.read-block-locations.register-automatically + true + + Whether to auto-register all DFSInputStreams for background LocatedBlock refreshes. + If false, user must manually register using DFSClient#addLocatedBlocksRefresh(DFSInputStream) + + + + + dfs.client.refresh.read-block-locations.threads + 5 + + Number of threads to use for refreshing LocatedBlocks of registered + DFSInputStreams. If a DFSClient opens many DFSInputStreams, increasing + this may help refresh them all in a timely manner. + + + dfs.namenode.lease-recheck-interval-ms 2000 @@ -3217,6 +3401,19 @@ + + dfs.datanode.lock.read.write.enabled + true + If this is true, the FsDataset lock will be a read write lock. If + it is false, all locks will be a write lock. + Enabling this should give better datanode throughput, as many read only + functions can run concurrently under the read lock, when they would + previously have required the exclusive write lock. As the feature is + experimental, this switch can be used to disable the shared read lock, and + cause all lock acquisitions to use the exclusive write lock. + + + dfs.datanode.lock-reporting-threshold-ms 300 @@ -3242,7 +3439,7 @@ dfs.datanode.block.id.layout.upgrade.threads - 12 + 6 The number of threads to use when creating hard links from current to previous blocks during upgrade of a DataNode to block ID-based block layout (see HDFS-6482 for details on the layout). @@ -3575,13 +3772,23 @@ + + dfs.datanode.ec.reconstruction.validation + false + + Decide if datanode validates that EC reconstruction tasks reconstruct + target blocks correctly. When validation fails, reconstruction tasks + will fail and be retried by namenode. + + + dfs.namenode.quota.init-threads - 4 + 12 The number of concurrent threads to be used in quota initialization. The speed of quota initialization also affects the namenode fail-over latency. - If the size of name space is big, try increasing this. + If the size of name space is big, try increasing this to 16 or higher. @@ -3625,7 +3832,7 @@ dfs.datanode.bp-ready.timeout - 20s + 20 The maximum wait time for datanode to be ready before failing the received request. Setting this to 0 fails requests right away if the @@ -3783,7 +3990,7 @@ 10000,6,60000,10 Specify a policy of multiple linear random retry for WebHDFS client, - e.g. given pairs of number of retries and sleep time (n0, t0), (n1, t1), + e.g. given pairs of sleep time and number of retries (t0, n0), (t1, n1), ..., the first n0 retries sleep t0 milliseconds on average, the following n1 retries sleep t1 milliseconds on average, and so on. @@ -4040,6 +4247,16 @@ + + dfs.checksum.ec.socket-timeout + 3000 + + Default timeout value in milliseconds for computing the checksum of striped blocks. + Recommended to set the same value between client and DNs in a cluster because mismatching + may cause exhausting handler threads. + + + dfs.client.block.write.locateFollowingBlock.retries 5 @@ -4139,6 +4356,15 @@ + + dfs.client.read.uri.cache.enabled + false + + If true, dfs client will use cache when creating URI based on host:port + to reduce the frequency of URI object creation. + + + dfs.client.read.short.circuit.replica.stale.threshold.ms 1800000 @@ -4155,6 +4381,16 @@ + + dfs.client.short.circuit.num + 1 + + Number of short-circuit caches. This setting should + be in the range 1 - 5. Lower values will result in lower CPU consumption; higher + values may speed up massive parallel reading files. + + + dfs.client.read.striped.threadpool.size 18 @@ -4203,7 +4439,10 @@ dfs.client.retry.policy.spec 10000,6,60000,10 - Set to pairs of timeouts and retries for DFSClient. + Specify a policy of multiple linear random retry for the DFS client, + e.g. given pairs of sleep time and number of retries (t0, n0), (t1, n1), + ..., the first n0 retries sleep t0 milliseconds on average, + the following n1 retries sleep t1 milliseconds on average, and so on. @@ -4396,6 +4635,24 @@ + + dfs.datanode.ec.reconstruct.read.bandwidthPerSec + 0 + + Specifies the maximum amount of bandwidth that the EC reconstruction can utilize for reading. + When the bandwidth value is zero, there is no limit. + + + + + dfs.datanode.ec.reconstruct.write.bandwidthPerSec + 0 + + Specifies the maximum amount of bandwidth that the EC reconstruction can utilize for writing. + When the bandwidth value is zero, there is no limit. + + + dfs.datanode.fsdataset.factory @@ -4579,6 +4836,15 @@ + + dfs.ha.allow.stale.reads + false + + If true, a NameNode in Standby state can process read request and the result + could be stale. + + + dfs.journalnode.edits.dir /tmp/hadoop/dfs/journalnode/ @@ -4747,6 +5013,27 @@ + + dfs.namenode.audit.log.async.blocking + true + + Only used when enables asynchronous audit log. Sets whether audit log async + appender should wait if there is no space available in the event buffer or + immediately return. Default value is true. + + + + + dfs.namenode.audit.log.async.buffer.size + 128 + + Only used when enables asynchronous audit log. Sets the number of audit + logs allowed in the event buffer before the calling thread is blocked + (if dfs.namenode.audit.log.async.blocking is true) or until logs are + summarized and discarded. Default value is 128. + + + dfs.namenode.audit.log.token.tracking.id false @@ -4755,6 +5042,14 @@ + + dfs.namenode.audit.log.with.remote.port + false + + If true, adds a port of RPC call to callerContext for all audit log events. + + + dfs.namenode.available-space-block-placement-policy.balanced-space-preference-fraction 0.6 @@ -4766,6 +5061,18 @@ + + dfs.namenode.available-space-block-placement-policy.balanced-space-tolerance + 5 + + Only used when the dfs.block.replicator.classname is set to + org.apache.hadoop.hdfs.server.blockmanagement.AvailableSpaceBlockPlacementPolicy. + Special value between 0 and 20, inclusive. if the value is set beyond the scope, + this value will be set as 5 by default, Increases tolerance of + placing blocks on Datanodes with similar disk space used. + + + dfs.namenode.available-space-block-placement-policy.balance-local-node @@ -4778,6 +5085,30 @@ + + dfs.namenode.available-space-rack-fault-tolerant-block-placement-policy.balanced-space-preference-fraction + 0.6 + + Only used when the dfs.block.replicator.classname is set to + org.apache.hadoop.hdfs.server.blockmanagement.AvailableSpaceRackFaultTolerantBlockPlacementPolicy. + Special value between 0 and 1, noninclusive. Increases chance of + placing blocks on Datanodes with less disk space used. More the value near 1 + more are the chances of choosing the datanode with less percentage of data. + Similarly as the value moves near 0, the chances of choosing datanode with + high load increases as the value reaches near 0. + + + + dfs.namenode.available-space-rack-fault-tolerant-block-placement-policy.balanced-space-tolerance + 5 + + Only used when the dfs.block.replicator.classname is set to + org.apache.hadoop.hdfs.server.blockmanagement.AvailableSpaceRackFaultTolerantBlockPlacementPolicy. + Special value between 0 and 20, inclusive. if the value is set beyond the scope, + this value will be set as 5 by default, Increases tolerance of + placing blocks on Datanodes with similar disk space used. + + dfs.namenode.backup.dnrpc-address @@ -4901,7 +5232,7 @@ dfs.namenode.replication.max-streams 2 - Hard limit for the number of highest-priority replication streams. + Hard limit for the number of replication streams other than those with highest-priority. @@ -4935,32 +5266,6 @@ - - dfs.namenode.storageinfo.defragment.timeout.ms - 4 - - Timeout value in ms for the StorageInfo compaction run. - - - - - dfs.namenode.storageinfo.defragment.interval.ms - 600000 - - The thread for checking the StorageInfo for defragmentation will - run periodically. The time between runs is determined by this - property. - - - - - dfs.namenode.storageinfo.defragment.ratio - 0.75 - - The defragmentation threshold for the StorageInfo. - - - dfs.namenode.snapshot.capture.openfiles false @@ -5861,4 +6166,31 @@ directories when permissions is enabled. Default value is false; + + + dfs.protected.subdirectories.enable + false + whether to protect the subdirectories of directories which + set on fs.protected.directories. + + + + + dfs.client.fsck.connect.timeout + 60000ms + + The amount of time the fsck client will wait to connect to the namenode + before timing out. + + + + + dfs.client.fsck.read.timeout + 60000ms + + The amount of time the fsck client will wait to read from the namenode + before timing out. If the namenode does not report progress more + frequently than this time, the client will give up waiting. + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/datanode/datanode.html b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/datanode/datanode.html index 8341b7b84c16f..28cba0153c14d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/datanode/datanode.html +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/datanode/datanode.html @@ -71,6 +71,7 @@ +
      Cluster ID:{ClusterId}
      Started:{DNStartedTimeInMillis|date_tostring}
      Version:{Version}
      {/dn} @@ -80,9 +81,11 @@

    Namenode AddressNamenode HA State Block Pool ID Actor StateLast HeartbeatLast Heartbeat SentLast Heartbeat Response Last Block Report Last Block Report Size (Max Size)
    {NamenodeAddress}{NamenodeHaState} {BlockPoolID} {ActorState} {LastHeartbeat}s{LastHeartbeatResponseTime}s {#helper_relative_time value="{LastBlockReport}"/} {maxBlockReportSize|fmt_bytes} ({maxDataLength|fmt_bytes})
    -{#ozone.enabled} - - - - - - - - - - - - {#ozone.SCMServers} - - - - - - - - {/ozone.SCMServers} -
    SCM AddressStatusVersionMissed countLast heartbeat
    {addressString}{state}{versionNumber}{missedCount}s{lastSuccessfulHeartbeat|elapsed|fmt_time}
    - - - - - - - - - - - - - {#ozone.LocationReport} - - - - - - - - {/ozone.LocationReport} -
    IDCapacityRemainingSCM usedfailed
    {id}{capacity|fmt_bytes}{remaining|fmt_bytes}{scmUsed|fmt_bytes}{failed}
    -{/ozone.enabled} - @@ -171,7 +130,7 @@ {/dn.VolumeInfo} - + @@ -180,4 +139,4 @@ - \ No newline at end of file + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html index 582420e171007..ad5c30fe597ef 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html @@ -52,6 +52,7 @@
  • Metrics
  • Configuration
  • Process Thread Dump
  • +
  • Network Topology
  • @@ -340,7 +341,7 @@ {#LiveNodes} - + @@ -363,7 +364,7 @@ {#DeadNodes} - + @@ -371,6 +372,8 @@ + + {/DeadNodes}
    {state}{name} ({xferaddr}){location}/{name} ({xferaddr}) {dnWebAddress} {lastContact}s {lastBlockReport}m
    {state}{name} ({xferaddr}){location}/{name} ({xferaddr}) {#helper_relative_time value="{lastContact}"/}
    @@ -390,7 +393,7 @@ {#EnteringMaintenanceNodes}
    {name} ({xferaddr}){location}/{name} ({xferaddr}) {underReplicatedBlocks} {maintenanceOnlyReplicas} {underReplicateInOpenFiles}
    {name} ({xferaddr}){location}/{name} ({xferaddr}) {underReplicatedBlocks} {decommissionOnlyReplicas} {underReplicateInOpenFiles}
    {name} ({xferaddr}){location}/{name} ({xferaddr}) {#helper_date_tostring value="{lastVolumeFailureDate}"/} {volfails} {estimatedCapacityLostTotal|fmt_bytes}
    - - - - diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/journal/jn.js b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/journal/jn.js new file mode 100644 index 0000000000000..260615b0e21e1 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/journal/jn.js @@ -0,0 +1,84 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +(function () { + "use strict"; + + var data = {}; + + dust.loadSource(dust.compile($('#tmpl-jn').html(), 'jn')); + + var BEANS = [ + {"name": "jn", "url": "/jmx?qry=Hadoop:service=JournalNode,name=JournalNodeInfo"}, + {"name": "journals", "url": "/jmx?qry=Hadoop:service=JournalNode,name=Journal-*"} + + ]; + + var HELPERS = { + 'helper_date_tostring' : function (chunk, ctx, bodies, params) { + var value = dust.helpers.tap(params.value, chunk, ctx); + return chunk.write('' + moment(Number(value)).format('ddd MMM DD HH:mm:ss ZZ YYYY')); + } + }; + + load_json( + BEANS, + guard_with_startup_progress(function(d) { + for (var k in d) { + data[k] = k === 'journals' ? workaround(d[k].beans) : d[k].beans[0]; + } + render(); + }), + function (url, jqxhr, text, err) { + show_err_msg('

    Failed to retrieve data from ' + url + ', cause: ' + err + '

    '); + }); + + function guard_with_startup_progress(fn) { + return function() { + try { + fn.apply(this, arguments); + } catch (err) { + if (err instanceof TypeError) { + show_err_msg('JournalNode error: ' + err); + } + } + }; + } + + function workaround(journals) { + for (var i in journals){ + var str= journals[i]['modelerType']; + var index= str.indexOf("-"); + journals[i]['NameService']= str.substr(index + 1); + } + + return journals; + } + + function render() { + var base = dust.makeBase(HELPERS); + dust.render('jn', base.push(data), function(err, out) { + $('#tab-overview').html(out); + $('#tab-overview').addClass('active'); + }); + } + + function show_err_msg() { + $('#alert-panel-body').html("Failed to load journalnode information"); + $('#alert-panel').show(); + } +})(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/journal/journalnode.html b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/journal/journalnode.html new file mode 100644 index 0000000000000..0743e943c1dd3 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/journal/journalnode.html @@ -0,0 +1,108 @@ + + + + + + + + JournalNode Information + + + + + +
    + +
    +
    + +
    +
    +
    + +
    +
    +
    + +
    +
    +

    Hadoop, {release-year-token}.

    +
    +
    + + + + + + + + + + + + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/secondary/status.html b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/secondary/status.html index e9c206eda0381..a3484fbcb6a88 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/secondary/status.html +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/secondary/status.html @@ -86,7 +86,7 @@ {/snn} - - - + parallel-tests-createdir - run + parallel-tests-createdir @@ -342,7 +385,6 @@ test - 1 ${testsThreadCount} false ${maven-surefire-plugin.argLine} -DminiClusterDedicatedDirs=true @@ -351,7 +393,7 @@ ${test.build.data}/${surefire.forkNumber} ${test.build.dir}/${surefire.forkNumber} ${hadoop.tmp.dir}/${surefire.forkNumber} - fork-${surefire.forkNumber} + fork-000${surefire.forkNumber} ${fs.azure.scale.test.enabled} ${fs.azure.scale.test.huge.filesize} ${fs.azure.scale.test.huge.partitionsize} @@ -382,7 +424,7 @@ ${test.build.data}/${surefire.forkNumber} ${test.build.dir}/${surefire.forkNumber} ${hadoop.tmp.dir}/${surefire.forkNumber} - fork-${surefire.forkNumber} + fork-000${surefire.forkNumber} ${fs.azure.scale.test.enabled} ${fs.azure.scale.test.huge.filesize} ${fs.azure.scale.test.huge.partitionsize} @@ -408,7 +450,6 @@ verify - 1 ${testsThreadCount} false ${maven-surefire-plugin.argLine} -DminiClusterDedicatedDirs=true @@ -425,7 +466,7 @@ - fork-${surefire.forkNumber} + fork-000${surefire.forkNumber} ${fs.azure.scale.test.enabled} ${fs.azure.scale.test.huge.filesize} @@ -486,31 +527,13 @@ - maven-antrun-plugin + org.apache.hadoop + hadoop-maven-plugins - create-parallel-tests-dirs - test-compile - - - - - + parallel-tests-createdir - run + parallel-tests-createdir @@ -533,7 +556,7 @@ ${test.build.data}/${surefire.forkNumber} ${test.build.dir}/${surefire.forkNumber} ${hadoop.tmp.dir}/${surefire.forkNumber} - fork-${surefire.forkNumber} + fork-000${surefire.forkNumber} ${fs.azure.scale.test.enabled} ${fs.azure.scale.test.huge.filesize} ${fs.azure.scale.test.huge.partitionsize} @@ -577,7 +600,7 @@ - fork-${surefire.forkNumber} + fork-000${surefire.forkNumber} ${fs.azure.scale.test.enabled} ${fs.azure.scale.test.timeout} @@ -593,6 +616,10 @@ **/azurebfs/ITestAbfsReadWriteAndSeek.java **/azurebfs/ITestAzureBlobFileSystemListStatus.java **/azurebfs/extensions/ITestAbfsDelegationTokens.java + **/azurebfs/ITestSmallWriteOptimization.java + **/azurebfs/ITestAbfsStreamStatistics*.java + **/azurebfs/services/ITestReadBufferManager.java + **/azurebfs/commit/*.java @@ -621,7 +648,7 @@ - fork-${surefire.forkNumber} + fork-000${surefire.forkNumber} ${fs.azure.scale.test.enabled} ${fs.azure.scale.test.timeout} @@ -632,6 +659,10 @@ **/azurebfs/ITestAbfsReadWriteAndSeek.java **/azurebfs/ITestAzureBlobFileSystemListStatus.java **/azurebfs/extensions/ITestAbfsDelegationTokens.java + **/azurebfs/ITestSmallWriteOptimization.java + **/azurebfs/services/ITestReadBufferManager.java + **/azurebfs/ITestAbfsStreamStatistics*.java + **/azurebfs/commit/*.java @@ -652,31 +683,13 @@ - maven-antrun-plugin + org.apache.hadoop + hadoop-maven-plugins - create-parallel-tests-dirs - test-compile - - - - - + parallel-tests-createdir - run + parallel-tests-createdir @@ -691,7 +704,6 @@ test - 1 ${testsThreadCount} false ${maven-surefire-plugin.argLine} -DminiClusterDedicatedDirs=true @@ -700,7 +712,7 @@ ${test.build.data}/${surefire.forkNumber} ${test.build.dir}/${surefire.forkNumber} ${hadoop.tmp.dir}/${surefire.forkNumber} - fork-${surefire.forkNumber} + fork-000${surefire.forkNumber} ${fs.azure.scale.test.enabled} ${fs.azure.scale.test.huge.filesize} ${fs.azure.scale.test.huge.partitionsize} @@ -730,7 +742,7 @@ ${test.build.data}/${surefire.forkNumber} ${test.build.dir}/${surefire.forkNumber} ${hadoop.tmp.dir}/${surefire.forkNumber} - fork-${surefire.forkNumber} + fork-000${surefire.forkNumber} ${fs.azure.scale.test.enabled} ${fs.azure.scale.test.huge.filesize} ${fs.azure.scale.test.huge.partitionsize} @@ -772,7 +784,7 @@ - fork-${surefire.forkNumber} + fork-000${surefire.forkNumber} ${fs.azure.scale.test.enabled} ${fs.azure.scale.test.huge.filesize} diff --git a/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml b/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml index c50236162d803..2065746b76611 100644 --- a/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml +++ b/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml @@ -46,4 +46,13 @@ files="org[\\/]apache[\\/]hadoop[\\/]fs[\\/]azurebfs[\\/]AzureBlobFileSystemStore.java"/> + + + + + diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureFileSystemThreadPoolExecutor.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureFileSystemThreadPoolExecutor.java index a9be8c5a75e8c..f1f8d3d5003c6 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureFileSystemThreadPoolExecutor.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureFileSystemThreadPoolExecutor.java @@ -30,7 +30,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; class AzureFileSystemThreadPoolExecutor { diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java index 414d2f2ee098a..39127712f8408 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java @@ -40,6 +40,8 @@ import java.util.Iterator; import java.util.Locale; import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; import java.util.Set; import org.apache.commons.lang3.StringUtils; @@ -64,7 +66,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import com.microsoft.azure.storage.CloudStorageAccount; import com.microsoft.azure.storage.OperationContext; import com.microsoft.azure.storage.RetryExponentialRetry; @@ -180,6 +182,11 @@ public class AzureNativeFileSystemStore implements NativeFileSystemStore { */ public static final String KEY_USE_LOCAL_SAS_KEY_MODE = "fs.azure.local.sas.key.mode"; + /** + * Config to control case sensitive metadata key checks/retrieval. If this + * is false, blob metadata keys will be treated case insensitive. + */ + private static final String KEY_BLOB_METADATA_KEY_CASE_SENSITIVE = "fs.azure.blob.metadata.key.case.sensitive"; private static final String PERMISSION_METADATA_KEY = "hdi_permission"; private static final String OLD_PERMISSION_METADATA_KEY = "asv_permission"; private static final String IS_FOLDER_METADATA_KEY = "hdi_isfolder"; @@ -235,6 +242,16 @@ public class AzureNativeFileSystemStore implements NativeFileSystemStore { */ public static final String KEY_ENABLE_FLAT_LISTING = "fs.azure.flatlist.enable"; + /** + * Optional config to enable a lock free pread which will bypass buffer in + * BlockBlobInputStream. + * This is not a config which can be set at cluster level. It can be used as + * an option on FutureDataInputStreamBuilder. + * @see FileSystem#openFile(org.apache.hadoop.fs.Path) + */ + public static final String FS_AZURE_BLOCK_BLOB_BUFFERED_PREAD_DISABLE = + "fs.azure.block.blob.buffered.pread.disable"; + /** * The set of directories where we should apply atomic folder rename * synchronized with createNonRecursive. @@ -353,6 +370,8 @@ public class AzureNativeFileSystemStore implements NativeFileSystemStore { private String delegationToken; + private boolean metadataKeyCaseSensitive; + /** The error message template when container is not accessible. */ public static final String NO_ACCESS_TO_CONTAINER_MSG = "No credentials found for " + "account %s in the configuration, and its container %s is not " @@ -574,6 +593,12 @@ public void initialize(URI uri, Configuration conf, AzureFileSystemInstrumentati LOG.warn("Unable to initialize HBase root as an atomic rename directory."); } LOG.debug("Atomic rename directories: {} ", setToString(atomicRenameDirs)); + metadataKeyCaseSensitive = conf + .getBoolean(KEY_BLOB_METADATA_KEY_CASE_SENSITIVE, true); + if (!metadataKeyCaseSensitive) { + LOG.info("{} configured as false. Blob metadata will be treated case insensitive.", + KEY_BLOB_METADATA_KEY_CASE_SENSITIVE); + } } /** @@ -1577,8 +1602,8 @@ private OutputStream openOutputStream(final CloudBlobWrapper blob) * Opens a new input stream for the given blob (page or block blob) * to read its data. */ - private InputStream openInputStream(CloudBlobWrapper blob) - throws StorageException, IOException { + private InputStream openInputStream(CloudBlobWrapper blob, + Optional options) throws StorageException, IOException { if (blob instanceof CloudBlockBlobWrapper) { LOG.debug("Using stream seek algorithm {}", inputStreamVersion); switch(inputStreamVersion) { @@ -1586,9 +1611,13 @@ private InputStream openInputStream(CloudBlobWrapper blob) return blob.openInputStream(getDownloadOptions(), getInstrumentedContext(isConcurrentOOBAppendAllowed())); case 2: + boolean bufferedPreadDisabled = options.map(c -> c + .getBoolean(FS_AZURE_BLOCK_BLOB_BUFFERED_PREAD_DISABLE, false)) + .orElse(false); return new BlockBlobInputStream((CloudBlockBlobWrapper) blob, getDownloadOptions(), - getInstrumentedContext(isConcurrentOOBAppendAllowed())); + getInstrumentedContext(isConcurrentOOBAppendAllowed()), + bufferedPreadDisabled); default: throw new IOException("Unknown seek algorithm: " + inputStreamVersion); } @@ -1618,15 +1647,24 @@ private static void storeMetadataAttribute(CloudBlobWrapper blob, blob.setMetadata(metadata); } - private static String getMetadataAttribute(CloudBlobWrapper blob, + private String getMetadataAttribute(HashMap metadata, String... keyAlternatives) { - HashMap metadata = blob.getMetadata(); if (null == metadata) { return null; } for (String key : keyAlternatives) { - if (metadata.containsKey(key)) { - return metadata.get(key); + if (metadataKeyCaseSensitive) { + if (metadata.containsKey(key)) { + return metadata.get(key); + } + } else { + // See HADOOP-17643 for details on why this case insensitive metadata + // checks been added + for (Entry entry : metadata.entrySet()) { + if (key.equalsIgnoreCase(entry.getKey())) { + return entry.getValue(); + } + } } } return null; @@ -1650,7 +1688,7 @@ private static void storePermissionStatus(CloudBlobWrapper blob, } private PermissionStatus getPermissionStatus(CloudBlobWrapper blob) { - String permissionMetadataValue = getMetadataAttribute(blob, + String permissionMetadataValue = getMetadataAttribute(blob.getMetadata(), PERMISSION_METADATA_KEY, OLD_PERMISSION_METADATA_KEY); if (permissionMetadataValue != null) { return PermissionStatusJsonSerializer.fromJSONString( @@ -1698,19 +1736,32 @@ private static void storeLinkAttribute(CloudBlobWrapper blob, OLD_LINK_BACK_TO_UPLOAD_IN_PROGRESS_METADATA_KEY); } - private static String getLinkAttributeValue(CloudBlobWrapper blob) + private String getLinkAttributeValue(CloudBlobWrapper blob) throws UnsupportedEncodingException { - String encodedLinkTarget = getMetadataAttribute(blob, + String encodedLinkTarget = getMetadataAttribute(blob.getMetadata(), LINK_BACK_TO_UPLOAD_IN_PROGRESS_METADATA_KEY, OLD_LINK_BACK_TO_UPLOAD_IN_PROGRESS_METADATA_KEY); return decodeMetadataAttribute(encodedLinkTarget); } - private static boolean retrieveFolderAttribute(CloudBlobWrapper blob) { + private boolean retrieveFolderAttribute(CloudBlobWrapper blob) { HashMap metadata = blob.getMetadata(); - return null != metadata - && (metadata.containsKey(IS_FOLDER_METADATA_KEY) || metadata - .containsKey(OLD_IS_FOLDER_METADATA_KEY)); + if (null != metadata) { + if (metadataKeyCaseSensitive) { + return metadata.containsKey(IS_FOLDER_METADATA_KEY) + || metadata.containsKey(OLD_IS_FOLDER_METADATA_KEY); + } else { + // See HADOOP-17643 for details on why this case insensitive metadata + // checks been added + for (String key : metadata.keySet()) { + if (key.equalsIgnoreCase(IS_FOLDER_METADATA_KEY) + || key.equalsIgnoreCase(OLD_IS_FOLDER_METADATA_KEY)) { + return true; + } + } + } + } + return false; } private static void storeVersionAttribute(CloudBlobContainerWrapper container) { @@ -1725,18 +1776,9 @@ private static void storeVersionAttribute(CloudBlobContainerWrapper container) { container.setMetadata(metadata); } - private static String retrieveVersionAttribute( - CloudBlobContainerWrapper container) { - HashMap metadata = container.getMetadata(); - if (metadata == null) { - return null; - } else if (metadata.containsKey(VERSION_METADATA_KEY)) { - return metadata.get(VERSION_METADATA_KEY); - } else if (metadata.containsKey(OLD_VERSION_METADATA_KEY)) { - return metadata.get(OLD_VERSION_METADATA_KEY); - } else { - return null; - } + private String retrieveVersionAttribute(CloudBlobContainerWrapper container) { + return getMetadataAttribute(container.getMetadata(), VERSION_METADATA_KEY, + OLD_VERSION_METADATA_KEY); } @Override @@ -2231,7 +2273,8 @@ public byte[] retrieveAttribute(String key, String attribute) throws IOException CloudBlobWrapper blob = getBlobReference(key); blob.downloadAttributes(getInstrumentedContext()); - String value = getMetadataAttribute(blob, ensureValidAttributeName(attribute)); + String value = getMetadataAttribute(blob.getMetadata(), + ensureValidAttributeName(attribute)); value = decodeMetadataAttribute(value); return value == null ? null : value.getBytes(METADATA_ENCODING); } catch (Exception e) { @@ -2262,6 +2305,12 @@ public InputStream retrieve(String key) throws AzureException, IOException { @Override public InputStream retrieve(String key, long startByteOffset) throws AzureException, IOException { + return retrieve(key, startByteOffset, Optional.empty()); + } + + @Override + public InputStream retrieve(String key, long startByteOffset, + Optional options) throws AzureException, IOException { try { // Check if a session exists, if not create a session with the // Azure storage server. @@ -2273,7 +2322,7 @@ public InputStream retrieve(String key, long startByteOffset) } checkContainer(ContainerAccessType.PureRead); - InputStream inputStream = openInputStream(getBlobReference(key)); + InputStream inputStream = openInputStream(getBlobReference(key), options); if (startByteOffset > 0) { // Skip bytes and ignore return value. This is okay // because if you try to skip too far you will be positioned @@ -2824,7 +2873,7 @@ public void rename(String srcKey, String dstKey, boolean acquireLease, OutputStream opStream = null; try { if (srcBlob.getProperties().getBlobType() == BlobType.PAGE_BLOB){ - ipStream = openInputStream(srcBlob); + ipStream = openInputStream(srcBlob, Optional.empty()); opStream = openOutputStream(dstBlob); byte[] buffer = new byte[PageBlobFormatHelpers.PAGE_SIZE]; int len; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/BlockBlobAppendStream.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/BlockBlobAppendStream.java index 5f051effefb9a..5412c0544061b 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/BlockBlobAppendStream.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/BlockBlobAppendStream.java @@ -27,7 +27,6 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; -import java.util.Locale; import java.util.UUID; import java.util.Random; import java.util.concurrent.ConcurrentLinkedDeque; @@ -42,8 +41,9 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.fs.impl.StoreImplementationUtils; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.FSExceptionMessages; @@ -551,13 +551,7 @@ public boolean hasCapability(String capability) { if (!compactionEnabled) { return false; } - switch (capability.toLowerCase(Locale.ENGLISH)) { - case StreamCapabilities.HSYNC: - case StreamCapabilities.HFLUSH: - return true; - default: - return false; - } + return StoreImplementationUtils.isProbeForSyncable(capability); } /** diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/BlockBlobInputStream.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/BlockBlobInputStream.java index c37b2bec6ecd7..00e84add34cf9 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/BlockBlobInputStream.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/BlockBlobInputStream.java @@ -28,6 +28,7 @@ import com.microsoft.azure.storage.blob.BlobRequestOptions; import org.apache.hadoop.fs.FSExceptionMessages; +import org.apache.hadoop.fs.FSInputStream; import org.apache.hadoop.fs.Seekable; import org.apache.hadoop.fs.azure.StorageInterface.CloudBlockBlobWrapper; @@ -36,10 +37,11 @@ * random access and seek. Random access performance is improved by several * orders of magnitude. */ -final class BlockBlobInputStream extends InputStream implements Seekable { +final class BlockBlobInputStream extends FSInputStream { private final CloudBlockBlobWrapper blob; private final BlobRequestOptions options; private final OperationContext opContext; + private final boolean bufferedPreadDisabled; private InputStream blobInputStream = null; private int minimumReadSizeInBytes = 0; private long streamPositionAfterLastRead = -1; @@ -64,10 +66,12 @@ final class BlockBlobInputStream extends InputStream implements Seekable { */ BlockBlobInputStream(CloudBlockBlobWrapper blob, BlobRequestOptions options, - OperationContext opContext) throws IOException { + OperationContext opContext, boolean bufferedPreadDisabled) + throws IOException { this.blob = blob; this.options = options; this.opContext = opContext; + this.bufferedPreadDisabled = bufferedPreadDisabled; this.minimumReadSizeInBytes = blob.getStreamMinimumReadSizeInBytes(); @@ -263,6 +267,39 @@ private int doNetworkRead(byte[] buffer, int offset, int len) } } + @Override + public int read(long position, byte[] buffer, int offset, int length) + throws IOException { + synchronized (this) { + checkState(); + } + if (!bufferedPreadDisabled) { + // This will do a seek + read in which the streamBuffer will get used. + return super.read(position, buffer, offset, length); + } + validatePositionedReadArgs(position, buffer, offset, length); + if (length == 0) { + return 0; + } + if (position >= streamLength) { + throw new EOFException("position is beyond stream capacity"); + } + MemoryOutputStream os = new MemoryOutputStream(buffer, offset, length); + long bytesToRead = Math.min(minimumReadSizeInBytes, + Math.min(os.capacity(), streamLength - position)); + try { + blob.downloadRange(position, bytesToRead, os, options, opContext); + } catch (StorageException e) { + throw new IOException(e); + } + int bytesRead = os.size(); + if (bytesRead == 0) { + // This may happen if the blob was modified after the length was obtained. + throw new EOFException("End of stream reached unexpectedly."); + } + return bytesRead; + } + /** * Reads up to len bytes of data from the input stream into an * array of bytes. diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/CachingAuthorizer.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/CachingAuthorizer.java index 016ae745c5688..5fa0ad8ae34e8 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/CachingAuthorizer.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/CachingAuthorizer.java @@ -17,14 +17,14 @@ */ package org.apache.hadoop.fs.azure; -import com.google.common.cache.Cache; +import org.apache.hadoop.thirdparty.com.google.common.cache.Cache; import org.apache.hadoop.conf.Configuration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.concurrent.TimeUnit; -import com.google.common.cache.CacheBuilder; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheBuilder; /** * Class that provides caching for Authorize and getSasUri calls diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/ClientThrottlingAnalyzer.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/ClientThrottlingAnalyzer.java index 859a608a1e1e7..48ffa65f69590 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/ClientThrottlingAnalyzer.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/ClientThrottlingAnalyzer.java @@ -18,8 +18,8 @@ package org.apache.hadoop.fs.azure; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java index f5705283b512d..b7b859cb9f3be 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java @@ -33,11 +33,13 @@ import java.util.EnumSet; import java.util.TimeZone; import java.util.UUID; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.atomic.AtomicInteger; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.Arrays; import java.util.List; +import java.util.Optional; import java.util.Stack; import java.util.HashMap; @@ -61,6 +63,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PositionedReadable; import org.apache.hadoop.fs.Seekable; import org.apache.hadoop.fs.StreamCapabilities; import org.apache.hadoop.fs.Syncable; @@ -70,6 +73,9 @@ import org.apache.hadoop.fs.azure.security.Constants; import org.apache.hadoop.fs.azure.security.RemoteWasbDelegationTokenManager; import org.apache.hadoop.fs.azure.security.WasbDelegationTokenManager; +import org.apache.hadoop.fs.impl.AbstractFSBuilderImpl; +import org.apache.hadoop.fs.impl.OpenFileParameters; +import org.apache.hadoop.fs.impl.StoreImplementationUtils; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.PermissionStatus; @@ -78,18 +84,20 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.delegation.web.DelegationTokenAuthenticatedURL; +import org.apache.hadoop.util.LambdaUtils; import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.Time; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_STANDARD_OPTIONS; import static org.apache.hadoop.fs.azure.NativeAzureFileSystemHelper.*; import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import com.microsoft.azure.storage.StorageException; /** @@ -914,6 +922,43 @@ public synchronized int read(byte[] b, int off, int len) throws FileNotFoundExce } } + @Override + public int read(long position, byte[] buffer, int offset, int length) + throws IOException { + // SpotBugs reports bug type IS2_INCONSISTENT_SYNC here. + // This report is not valid here. + // 'this.in' is instance of BlockBlobInputStream and read(long, byte[], int, int) + // calls it's Super class method when 'fs.azure.block.blob.buffered.pread.disable' + // is configured false. Super class FSInputStream's implementation is having + // proper synchronization. + // When 'fs.azure.block.blob.buffered.pread.disable' is true, we want a lock free + // implementation of blob read. Here we don't use any of the InputStream's + // shared resource (buffer) and also don't change any cursor position etc. + // So its safe to go with unsynchronized way of read. + if (in instanceof PositionedReadable) { + try { + int result = ((PositionedReadable) this.in).read(position, buffer, + offset, length); + if (null != statistics && result > 0) { + statistics.incrementBytesRead(result); + } + return result; + } catch (IOException e) { + Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(e); + if (innerException instanceof StorageException) { + LOG.error("Encountered Storage Exception for read on Blob : {}" + + " Exception details: {} Error Code : {}", + key, e, ((StorageException) innerException).getErrorCode()); + if (NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) { + throw new FileNotFoundException(String.format("%s is not found", key)); + } + } + throw e; + } + } + return super.read(position, buffer, offset, length); + } + @Override public synchronized void close() throws IOException { if (!closed) { @@ -1052,10 +1097,7 @@ public void hsync() throws IOException { */ @Override // StreamCapability public boolean hasCapability(String capability) { - if (out instanceof StreamCapabilities) { - return ((StreamCapabilities) out).hasCapability(capability); - } - return false; + return StoreImplementationUtils.hasCapability(out, capability); } @Override @@ -3045,6 +3087,12 @@ public boolean mkdirs(Path f, FsPermission permission, boolean noUmask) throws I @Override public FSDataInputStream open(Path f, int bufferSize) throws FileNotFoundException, IOException { + return open(f, bufferSize, Optional.empty()); + } + + private FSDataInputStream open(Path f, int bufferSize, + Optional options) + throws FileNotFoundException, IOException { LOG.debug("Opening file: {}", f.toString()); @@ -3079,7 +3127,7 @@ public FSDataInputStream open(Path f, int bufferSize) throws FileNotFoundExcepti InputStream inputStream; try { - inputStream = store.retrieve(key); + inputStream = store.retrieve(key, 0, options); } catch(Exception ex) { Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex); @@ -3096,6 +3144,18 @@ public FSDataInputStream open(Path f, int bufferSize) throws FileNotFoundExcepti new NativeAzureFsInputStream(inputStream, key, meta.getLen()), bufferSize)); } + @Override + protected CompletableFuture openFileWithOptions(Path path, + OpenFileParameters parameters) throws IOException { + AbstractFSBuilderImpl.rejectUnknownMandatoryKeys( + parameters.getMandatoryKeys(), + FS_OPTION_OPENFILE_STANDARD_OPTIONS, + "for " + path); + return LambdaUtils.eval( + new CompletableFuture<>(), () -> + open(path, parameters.getBufferSize(), Optional.of(parameters.getOptions()))); + } + @Override public boolean rename(Path src, Path dst) throws FileNotFoundException, IOException { diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemHelper.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemHelper.java index e4ad70cedb2cf..c918518bff24a 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemHelper.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemHelper.java @@ -23,7 +23,7 @@ import java.net.HttpURLConnection; import java.util.Map; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeFileSystemStore.java index 414a01115c1d8..91aad992a1f19 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeFileSystemStore.java @@ -23,13 +23,14 @@ import java.io.InputStream; import java.net.URI; import java.util.Date; +import java.util.Optional; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation; import org.apache.hadoop.fs.permission.PermissionStatus; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** *

    @@ -50,6 +51,9 @@ void storeEmptyFolder(String key, PermissionStatus permissionStatus) InputStream retrieve(String key, long byteRangeStart) throws IOException; + InputStream retrieve(String key, long byteRangeStart, + Optional options) throws IOException; + DataOutputStream storefile(String keyEncoded, PermissionStatus permissionStatus, String key) throws AzureException; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/PageBlobOutputStream.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/PageBlobOutputStream.java index 591c2ec50dee7..1e409cd908fa8 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/PageBlobOutputStream.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/PageBlobOutputStream.java @@ -43,7 +43,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import com.microsoft.azure.storage.OperationContext; import com.microsoft.azure.storage.StorageException; import com.microsoft.azure.storage.blob.BlobRequestOptions; @@ -580,6 +580,7 @@ public synchronized void hsync() throws IOException { // Restore the interrupted status Thread.currentThread().interrupt(); } + checkStreamState(); LOG.debug("Leaving PageBlobOutputStream#hsync(). Total hsync duration = " + (System.currentTimeMillis() - start) + " msec."); } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteWasbAuthorizerImpl.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteWasbAuthorizerImpl.java index 76ced3b96da5d..33ae9b83adea9 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteWasbAuthorizerImpl.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteWasbAuthorizerImpl.java @@ -22,7 +22,7 @@ import com.fasterxml.jackson.databind.JsonMappingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.azure.security.Constants; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SelfRenewingLease.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SelfRenewingLease.java index 10956f73f729a..200945f6b533e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SelfRenewingLease.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SelfRenewingLease.java @@ -22,7 +22,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.azure.StorageInterface.CloudBlobWrapper; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import com.microsoft.azure.storage.AccessCondition; import com.microsoft.azure.storage.StorageException; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SyncableDataOutputStream.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SyncableDataOutputStream.java index dcfff2fbe3784..f8aed2612a857 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SyncableDataOutputStream.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SyncableDataOutputStream.java @@ -22,9 +22,13 @@ import java.io.IOException; import java.io.OutputStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.StreamCapabilities; import org.apache.hadoop.fs.Syncable; -import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.impl.StoreImplementationUtils; /** * Support the Syncable interface on top of a DataOutputStream. @@ -35,6 +39,8 @@ public class SyncableDataOutputStream extends DataOutputStream implements Syncable, StreamCapabilities { + private static final Logger LOG = LoggerFactory.getLogger(SyncableDataOutputStream.class); + public SyncableDataOutputStream(OutputStream out) { super(out); } @@ -51,10 +57,7 @@ public OutputStream getOutStream() { @Override public boolean hasCapability(String capability) { - if (out instanceof StreamCapabilities) { - return ((StreamCapabilities) out).hasCapability(capability); - } - return false; + return StoreImplementationUtils.hasCapability(out, capability); } @Override @@ -70,4 +73,34 @@ public void hsync() throws IOException { ((Syncable) out).hsync(); } } + + @Override + public void close() throws IOException { + IOException ioeFromFlush = null; + try { + flush(); + } catch (IOException e) { + ioeFromFlush = e; + throw e; + } finally { + try { + this.out.close(); + } catch (IOException e) { + // If there was an Exception during flush(), the Azure SDK will throw back the + // same when we call close on the same stream. When try and finally both throw + // Exception, Java will use Throwable#addSuppressed for one of the Exception so + // that the caller will get one exception back. When within this, if both + // Exceptions are equal, it will throw back IllegalStateException. This makes us + // to throw back a non IOE. The below special handling is to avoid this. + if (ioeFromFlush == e) { + // Do nothing.. + // The close() call gave back the same IOE which flush() gave. Just swallow it + LOG.debug("flush() and close() throwing back same Exception. Just swallowing the latter", e); + } else { + // Let Java handle 2 different Exceptions been thrown from try and finally. + throw e; + } + } + } + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbFsck.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbFsck.java index f512489a8ae90..d819ede67c466 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbFsck.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbFsck.java @@ -33,7 +33,7 @@ import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * An fsck tool implementation for WASB that does various admin/cleanup/recovery diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbRemoteCallHelper.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbRemoteCallHelper.java index 606c3f040f8fe..e595a7972a0c3 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbRemoteCallHelper.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbRemoteCallHelper.java @@ -18,7 +18,7 @@ package org.apache.hadoop.fs.azure; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.fs.azure.security.Constants; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.http.Header; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/JsonUtils.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/JsonUtils.java index 9c40325e217e7..251ae6c931017 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/JsonUtils.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/JsonUtils.java @@ -18,7 +18,6 @@ package org.apache.hadoop.fs.azure.security; -import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.util.JsonSerialization; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/Abfs.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/Abfs.java index 32df94223862c..e595b2f4efac0 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/Abfs.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/Abfs.java @@ -43,4 +43,13 @@ public class Abfs extends DelegateToFileSystem { public int getUriDefaultPort() { return -1; } + + /** + * Close the file system; the FileContext API doesn't have an explicit close. + */ + @Override + protected void finalize() throws Throwable { + fsImpl.close(); + super.finalize(); + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java index 61fe3d8d6d22f..1bf7c569da13b 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java @@ -20,10 +20,9 @@ import java.io.IOException; import java.lang.reflect.Field; -import java.util.Map; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; @@ -32,6 +31,7 @@ import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; import org.apache.hadoop.fs.azurebfs.constants.AuthConfigurations; import org.apache.hadoop.fs.azurebfs.contracts.annotations.ConfigurationValidationAnnotations.IntegerConfigurationValidatorAnnotation; +import org.apache.hadoop.fs.azurebfs.contracts.annotations.ConfigurationValidationAnnotations.IntegerWithOutlierConfigurationValidatorAnnotation; import org.apache.hadoop.fs.azurebfs.contracts.annotations.ConfigurationValidationAnnotations.LongConfigurationValidatorAnnotation; import org.apache.hadoop.fs.azurebfs.contracts.annotations.ConfigurationValidationAnnotations.StringConfigurationValidatorAnnotation; import org.apache.hadoop.fs.azurebfs.contracts.annotations.ConfigurationValidationAnnotations.Base64StringConfigurationValidatorAnnotation; @@ -47,6 +47,7 @@ import org.apache.hadoop.fs.azurebfs.diagnostics.IntegerConfigurationBasicValidator; import org.apache.hadoop.fs.azurebfs.diagnostics.LongConfigurationBasicValidator; import org.apache.hadoop.fs.azurebfs.diagnostics.StringConfigurationBasicValidator; +import org.apache.hadoop.fs.azurebfs.enums.Trilean; import org.apache.hadoop.fs.azurebfs.extensions.CustomTokenProviderAdaptee; import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; @@ -57,8 +58,10 @@ import org.apache.hadoop.fs.azurebfs.oauth2.UserPasswordTokenProvider; import org.apache.hadoop.fs.azurebfs.security.AbfsDelegationTokenManager; import org.apache.hadoop.fs.azurebfs.services.AuthType; +import org.apache.hadoop.fs.azurebfs.services.ExponentialRetryPolicy; import org.apache.hadoop.fs.azurebfs.services.KeyProvider; import org.apache.hadoop.fs.azurebfs.services.SimpleKeyProvider; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; import org.apache.hadoop.security.ProviderUtils; import org.apache.hadoop.util.ReflectionUtils; @@ -66,6 +69,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.*; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.*; @@ -81,18 +85,59 @@ public class AbfsConfiguration{ private final boolean isSecure; private static final Logger LOG = LoggerFactory.getLogger(AbfsConfiguration.class); + @StringConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ACCOUNT_IS_HNS_ENABLED, + DefaultValue = DEFAULT_FS_AZURE_ACCOUNT_IS_HNS_ENABLED) + private String isNamespaceEnabledAccount; + + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = AZURE_WRITE_MAX_CONCURRENT_REQUESTS, + DefaultValue = -1) + private int writeMaxConcurrentRequestCount; + + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = AZURE_WRITE_MAX_REQUESTS_TO_QUEUE, + DefaultValue = -1) + private int maxWriteRequestsToQueue; + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = AZURE_WRITE_BUFFER_SIZE, MinValue = MIN_BUFFER_SIZE, MaxValue = MAX_BUFFER_SIZE, DefaultValue = DEFAULT_WRITE_BUFFER_SIZE) private int writeBufferSize; + @BooleanConfigurationValidatorAnnotation(ConfigurationKey = AZURE_ENABLE_SMALL_WRITE_OPTIMIZATION, + DefaultValue = DEFAULT_AZURE_ENABLE_SMALL_WRITE_OPTIMIZATION) + private boolean enableSmallWriteOptimization; + + @BooleanConfigurationValidatorAnnotation( + ConfigurationKey = AZURE_READ_SMALL_FILES_COMPLETELY, + DefaultValue = DEFAULT_READ_SMALL_FILES_COMPLETELY) + private boolean readSmallFilesCompletely; + + @BooleanConfigurationValidatorAnnotation( + ConfigurationKey = AZURE_READ_OPTIMIZE_FOOTER_READ, + DefaultValue = DEFAULT_OPTIMIZE_FOOTER_READ) + private boolean optimizeFooterRead; + + @BooleanConfigurationValidatorAnnotation( + ConfigurationKey = FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED, + DefaultValue = DEFAULT_FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED) + private boolean isExpectHeaderEnabled; + + @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED, + DefaultValue = DEFAULT_FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED) + private boolean accountThrottlingEnabled; + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = AZURE_READ_BUFFER_SIZE, MinValue = MIN_BUFFER_SIZE, MaxValue = MAX_BUFFER_SIZE, DefaultValue = DEFAULT_READ_BUFFER_SIZE) private int readBufferSize; + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = AZURE_READ_AHEAD_RANGE, + MinValue = MIN_BUFFER_SIZE, + MaxValue = MAX_BUFFER_SIZE, + DefaultValue = DEFAULT_READ_AHEAD_RANGE) + private int readAheadRange; + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = AZURE_MIN_BACKOFF_INTERVAL, DefaultValue = DEFAULT_MIN_BACKOFF_INTERVAL) private int minBackoffInterval; @@ -110,6 +155,31 @@ public class AbfsConfiguration{ DefaultValue = DEFAULT_MAX_RETRY_ATTEMPTS) private int maxIoRetries; + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = AZURE_CUSTOM_TOKEN_FETCH_RETRY_COUNT, + MinValue = 0, + DefaultValue = DEFAULT_CUSTOM_TOKEN_FETCH_RETRY_COUNT) + private int customTokenFetchRetryCount; + + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = AZURE_OAUTH_TOKEN_FETCH_RETRY_COUNT, + MinValue = 0, + DefaultValue = DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MAX_ATTEMPTS) + private int oauthTokenFetchRetryCount; + + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = AZURE_OAUTH_TOKEN_FETCH_RETRY_MIN_BACKOFF, + MinValue = 0, + DefaultValue = DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MIN_BACKOFF_INTERVAL) + private int oauthTokenFetchRetryMinBackoff; + + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = AZURE_OAUTH_TOKEN_FETCH_RETRY_MAX_BACKOFF, + MinValue = 0, + DefaultValue = DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MAX_BACKOFF_INTERVAL) + private int oauthTokenFetchRetryMaxBackoff; + + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = AZURE_OAUTH_TOKEN_FETCH_RETRY_DELTA_BACKOFF, + MinValue = 0, + DefaultValue = DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_DELTA_BACKOFF) + private int oauthTokenFetchRetryDeltaBackoff; + @LongConfigurationValidatorAnnotation(ConfigurationKey = AZURE_BLOCK_SIZE_PROPERTY_NAME, MinValue = 0, MaxValue = MAX_AZURE_BLOCK_SIZE, @@ -143,6 +213,28 @@ public class AbfsConfiguration{ DefaultValue = DEFAULT_FS_AZURE_ATOMIC_RENAME_DIRECTORIES) private String azureAtomicDirs; + @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ENABLE_CONDITIONAL_CREATE_OVERWRITE, + DefaultValue = DEFAULT_FS_AZURE_ENABLE_CONDITIONAL_CREATE_OVERWRITE) + private boolean enableConditionalCreateOverwrite; + + @BooleanConfigurationValidatorAnnotation(ConfigurationKey = + FS_AZURE_ENABLE_MKDIR_OVERWRITE, DefaultValue = + DEFAULT_FS_AZURE_ENABLE_MKDIR_OVERWRITE) + private boolean mkdirOverwrite; + + @StringConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_APPEND_BLOB_KEY, + DefaultValue = DEFAULT_FS_AZURE_APPEND_BLOB_DIRECTORIES) + private String azureAppendBlobDirs; + + @StringConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_INFINITE_LEASE_KEY, + DefaultValue = DEFAULT_FS_AZURE_INFINITE_LEASE_DIRECTORIES) + private String azureInfiniteLeaseDirs; + + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_LEASE_THREADS, + MinValue = MIN_LEASE_THREADS, + DefaultValue = DEFAULT_LEASE_THREADS) + private int numLeaseThreads; + @BooleanConfigurationValidatorAnnotation(ConfigurationKey = AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION, DefaultValue = DEFAULT_AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION) private boolean createRemoteFileSystemDuringInitialization; @@ -155,6 +247,16 @@ public class AbfsConfiguration{ DefaultValue = DEFAULT_READ_AHEAD_QUEUE_DEPTH) private int readAheadQueueDepth; + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_READ_AHEAD_BLOCK_SIZE, + MinValue = MIN_BUFFER_SIZE, + MaxValue = MAX_BUFFER_SIZE, + DefaultValue = DEFAULT_READ_AHEAD_BLOCK_SIZE) + private int readAheadBlockSize; + + @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ALWAYS_READ_BUFFER_SIZE, + DefaultValue = DEFAULT_ALWAYS_READ_BUFFER_SIZE) + private boolean alwaysReadBufferSize; + @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ENABLE_FLUSH, DefaultValue = DEFAULT_ENABLE_FLUSH) private boolean enableFlush; @@ -167,10 +269,35 @@ public class AbfsConfiguration{ DefaultValue = DEFAULT_ENABLE_AUTOTHROTTLING) private boolean enableAutoThrottling; + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ACCOUNT_OPERATION_IDLE_TIMEOUT, + DefaultValue = DEFAULT_ACCOUNT_OPERATION_IDLE_TIMEOUT_MS) + private int accountOperationIdleTimeout; + + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ANALYSIS_PERIOD, + DefaultValue = DEFAULT_ANALYSIS_PERIOD_MS) + private int analysisPeriod; + + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ABFS_IO_RATE_LIMIT, + MinValue = 0, + DefaultValue = RATE_LIMIT_DEFAULT) + private int rateLimit; + @StringConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_USER_AGENT_PREFIX_KEY, - DefaultValue = "") + DefaultValue = DEFAULT_FS_AZURE_USER_AGENT_PREFIX) private String userAgentId; + @StringConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_CLUSTER_NAME, + DefaultValue = DEFAULT_VALUE_UNKNOWN) + private String clusterName; + + @StringConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_CLUSTER_TYPE, + DefaultValue = DEFAULT_VALUE_UNKNOWN) + private String clusterType; + + @StringConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_CLIENT_CORRELATIONID, + DefaultValue = EMPTY_STRING) + private String clientCorrelationId; + @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ENABLE_DELEGATION_TOKEN, DefaultValue = DEFAULT_ENABLE_DELEGATION_TOKEN) private boolean enableDelegationToken; @@ -192,7 +319,23 @@ public class AbfsConfiguration{ DefaultValue = DEFAULT_ABFS_LATENCY_TRACK) private boolean trackLatency; - private Map storageAccountKeys; + @BooleanConfigurationValidatorAnnotation( + ConfigurationKey = FS_AZURE_ENABLE_READAHEAD, + DefaultValue = DEFAULT_ENABLE_READAHEAD) + private boolean enabledReadAhead; + + @LongConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_SAS_TOKEN_RENEW_PERIOD_FOR_STREAMS, + MinValue = 0, + DefaultValue = DEFAULT_SAS_TOKEN_RENEW_PERIOD_FOR_STREAMS_IN_SECONDS) + private long sasTokenRenewPeriodForStreamsInSeconds; + + @BooleanConfigurationValidatorAnnotation(ConfigurationKey = + FS_AZURE_ENABLE_ABFS_LIST_ITERATOR, DefaultValue = DEFAULT_ENABLE_ABFS_LIST_ITERATOR) + private boolean enableAbfsListIterator; + + @BooleanConfigurationValidatorAnnotation(ConfigurationKey = + FS_AZURE_ABFS_RENAME_RESILIENCE, DefaultValue = DEFAULT_ENABLE_ABFS_RENAME_RESILIENCE) + private boolean renameResilience; public AbfsConfiguration(final Configuration rawConfig, String accountName) throws IllegalAccessException, InvalidConfigurationValueException, IOException { @@ -201,12 +344,13 @@ public AbfsConfiguration(final Configuration rawConfig, String accountName) this.accountName = accountName; this.isSecure = getBoolean(FS_AZURE_SECURE_MODE, false); - validateStorageAccountKeys(); Field[] fields = this.getClass().getDeclaredFields(); for (Field field : fields) { field.setAccessible(true); if (field.isAnnotationPresent(IntegerConfigurationValidatorAnnotation.class)) { field.set(this, validateInt(field)); + } else if (field.isAnnotationPresent(IntegerWithOutlierConfigurationValidatorAnnotation.class)) { + field.set(this, validateIntWithOutlier(field)); } else if (field.isAnnotationPresent(LongConfigurationValidatorAnnotation.class)) { field.set(this, validateLong(field)); } else if (field.isAnnotationPresent(StringConfigurationValidatorAnnotation.class)) { @@ -219,6 +363,10 @@ public AbfsConfiguration(final Configuration rawConfig, String accountName) } } + public Trilean getIsNamespaceEnabledAccount() { + return Trilean.getTrilean(isNamespaceEnabledAccount); + } + /** * Gets the Azure Storage account name corresponding to this instance of configuration. * @return the Azure Storage account name @@ -227,6 +375,14 @@ public String getAccountName() { return accountName; } + /** + * Gets client correlation ID provided in config. + * @return Client Correlation ID config + */ + public String getClientCorrelationId() { + return clientCorrelationId; + } + /** * Appends an account name to a configuration key yielding the * account-specific form. @@ -298,31 +454,109 @@ public String getPasswordString(String key) throws IOException { } /** - * Returns the account-specific Class if it exists, then looks for an - * account-agnostic value, and finally tries the default value. + * Returns a value for the key if the value exists and is not null. + * Otherwise, throws {@link ConfigurationPropertyNotFoundException} with + * key name. + * + * @param key Account-agnostic configuration key + * @return value if exists + * @throws IOException if error in fetching password or + * ConfigurationPropertyNotFoundException for missing key + */ + private String getMandatoryPasswordString(String key) throws IOException { + String value = getPasswordString(key); + if (value == null) { + throw new ConfigurationPropertyNotFoundException(key); + } + return value; + } + + /** + * Returns account-specific token provider class if it exists, else checks if + * an account-agnostic setting is present for token provider class if AuthType + * matches with authType passed. + * @param authType AuthType effective on the account * @param name Account-agnostic configuration key * @param defaultValue Class returned if none is configured * @param xface Interface shared by all possible values + * @param Interface class type * @return Highest-precedence Class object that was found */ - public Class getClass(String name, Class defaultValue, Class xface) { + public Class getTokenProviderClass(AuthType authType, + String name, + Class defaultValue, + Class xface) { + Class tokenProviderClass = getAccountSpecificClass(name, defaultValue, + xface); + + // If there is none set specific for account + // fall back to generic setting if Auth Type matches + if ((tokenProviderClass == null) + && (authType == getAccountAgnosticEnum( + FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME, AuthType.SharedKey))) { + tokenProviderClass = getAccountAgnosticClass(name, defaultValue, xface); + } + + return (tokenProviderClass == null) + ? null + : tokenProviderClass.asSubclass(xface); + } + + /** + * Returns the account-specific class if it exists, else returns default value. + * @param name Account-agnostic configuration key + * @param defaultValue Class returned if none is configured + * @param xface Interface shared by all possible values + * @param Interface class type + * @return Account specific Class object that was found + */ + public Class getAccountSpecificClass(String name, + Class defaultValue, + Class xface) { return rawConfig.getClass(accountConf(name), - rawConfig.getClass(name, defaultValue, xface), + defaultValue, xface); } /** - * Returns the account-specific password in string form if it exists, then + * Returns account-agnostic Class if it exists, else returns the default value. + * @param name Account-agnostic configuration key + * @param defaultValue Class returned if none is configured + * @param xface Interface shared by all possible values + * @param Interface class type + * @return Account-Agnostic Class object that was found + */ + public Class getAccountAgnosticClass(String name, + Class defaultValue, + Class xface) { + return rawConfig.getClass(name, defaultValue, xface); + } + + /** + * Returns the account-specific enum value if it exists, then * looks for an account-agnostic value. * @param name Account-agnostic configuration key * @param defaultValue Value returned if none is configured - * @return value in String form if one exists, else null + * @param Enum type + * @return enum value if one exists, else null */ public > T getEnum(String name, T defaultValue) { return rawConfig.getEnum(accountConf(name), rawConfig.getEnum(name, defaultValue)); } + /** + * Returns the account-agnostic enum value if it exists, else + * return default. + * @param name Account-agnostic configuration key + * @param defaultValue Value returned if none is configured + * @param Enum type + * @return enum value if one exists, else null + */ + public > T getAccountAgnosticEnum(String name, T defaultValue) { + return rawConfig.getEnum(name, defaultValue); + } + /** * Unsets parameter in the underlying Configuration object. * Provided only as a convenience; does not add any account logic. @@ -397,6 +631,18 @@ public int getWriteBufferSize() { return this.writeBufferSize; } + public boolean isSmallWriteOptimizationEnabled() { + return this.enableSmallWriteOptimization; + } + + public boolean readSmallFilesCompletely() { + return this.readSmallFilesCompletely; + } + + public boolean optimizeFooterRead() { + return this.optimizeFooterRead; + } + public int getReadBufferSize() { return this.readBufferSize; } @@ -417,6 +663,10 @@ public int getMaxIoRetries() { return this.maxIoRetries; } + public int getCustomTokenFetchRetryCount() { + return this.customTokenFetchRetryCount; + } + public long getAzureBlockSize() { return this.azureBlockSize; } @@ -425,6 +675,10 @@ public boolean isCheckAccessEnabled() { return this.isCheckAccessEnabled; } + public long getSasTokenRenewPeriodForStreamsInSeconds() { + return this.sasTokenRenewPeriodForStreamsInSeconds; + } + public String getAzureBlockLocationHost() { return this.azureBlockLocationHost; } @@ -449,6 +703,34 @@ public String getAzureAtomicRenameDirs() { return this.azureAtomicDirs; } + public boolean isConditionalCreateOverwriteEnabled() { + return this.enableConditionalCreateOverwrite; + } + + public boolean isEnabledMkdirOverwrite() { + return mkdirOverwrite; + } + + public String getAppendBlobDirs() { + return this.azureAppendBlobDirs; + } + + public boolean isExpectHeaderEnabled() { + return this.isExpectHeaderEnabled; + } + + public boolean accountThrottlingEnabled() { + return accountThrottlingEnabled; + } + + public String getAzureInfiniteLeaseDirs() { + return this.azureInfiniteLeaseDirs; + } + + public int getNumLeaseThreads() { + return this.numLeaseThreads; + } + public boolean getCreateRemoteFileSystemDuringInitialization() { // we do not support creating the filesystem when AuthType is SAS return this.createRemoteFileSystemDuringInitialization @@ -463,6 +745,14 @@ public int getReadAheadQueueDepth() { return this.readAheadQueueDepth; } + public int getReadAheadBlockSize() { + return this.readAheadBlockSize; + } + + public boolean shouldReadBufferSizeAlways() { + return this.alwaysReadBufferSize; + } + public boolean isFlushEnabled() { return this.enableFlush; } @@ -475,14 +765,42 @@ public boolean isAutoThrottlingEnabled() { return this.enableAutoThrottling; } + public int getAccountOperationIdleTimeout() { + return accountOperationIdleTimeout; + } + + public int getAnalysisPeriod() { + return analysisPeriod; + } + + public int getRateLimit() { + return rateLimit; + } + public String getCustomUserAgentPrefix() { return this.userAgentId; } + public String getClusterName() { + return this.clusterName; + } + + public String getClusterType() { + return this.clusterType; + } + public DelegatingSSLSocketFactory.SSLChannelMode getPreferredSSLFactoryOption() { return getEnum(FS_AZURE_SSL_CHANNEL_MODE_KEY, DEFAULT_FS_AZURE_SSL_CHANNEL_MODE); } + /** + * Enum config to allow user to pick format of x-ms-client-request-id header + * @return tracingContextFormat config if valid, else default ALL_ID_FORMAT + */ + public TracingHeaderFormat getTracingHeaderFormat() { + return getEnum(FS_AZURE_TRACINGHEADER_FORMAT, TracingHeaderFormat.ALL_ID_FORMAT); + } + public AuthType getAuthType(String accountName) { return getEnum(FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME, AuthType.SharedKey); } @@ -517,27 +835,37 @@ public AccessTokenProvider getTokenProvider() throws TokenAccessProviderExceptio if (authType == AuthType.OAuth) { try { Class tokenProviderClass = - getClass(FS_AZURE_ACCOUNT_TOKEN_PROVIDER_TYPE_PROPERTY_NAME, null, - AccessTokenProvider.class); - AccessTokenProvider tokenProvider = null; + getTokenProviderClass(authType, + FS_AZURE_ACCOUNT_TOKEN_PROVIDER_TYPE_PROPERTY_NAME, null, + AccessTokenProvider.class); + + AccessTokenProvider tokenProvider; if (tokenProviderClass == ClientCredsTokenProvider.class) { - String authEndpoint = getPasswordString(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ENDPOINT); - String clientId = getPasswordString(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ID); - String clientSecret = getPasswordString(FS_AZURE_ACCOUNT_OAUTH_CLIENT_SECRET); + String authEndpoint = + getMandatoryPasswordString(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ENDPOINT); + String clientId = + getMandatoryPasswordString(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ID); + String clientSecret = + getMandatoryPasswordString(FS_AZURE_ACCOUNT_OAUTH_CLIENT_SECRET); tokenProvider = new ClientCredsTokenProvider(authEndpoint, clientId, clientSecret); LOG.trace("ClientCredsTokenProvider initialized"); } else if (tokenProviderClass == UserPasswordTokenProvider.class) { - String authEndpoint = getPasswordString(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ENDPOINT); - String username = getPasswordString(FS_AZURE_ACCOUNT_OAUTH_USER_NAME); - String password = getPasswordString(FS_AZURE_ACCOUNT_OAUTH_USER_PASSWORD); + String authEndpoint = + getMandatoryPasswordString(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ENDPOINT); + String username = + getMandatoryPasswordString(FS_AZURE_ACCOUNT_OAUTH_USER_NAME); + String password = + getMandatoryPasswordString(FS_AZURE_ACCOUNT_OAUTH_USER_PASSWORD); tokenProvider = new UserPasswordTokenProvider(authEndpoint, username, password); LOG.trace("UserPasswordTokenProvider initialized"); } else if (tokenProviderClass == MsiTokenProvider.class) { String authEndpoint = getTrimmedPasswordString( FS_AZURE_ACCOUNT_OAUTH_MSI_ENDPOINT, AuthConfigurations.DEFAULT_FS_AZURE_ACCOUNT_OAUTH_MSI_ENDPOINT); - String tenantGuid = getPasswordString(FS_AZURE_ACCOUNT_OAUTH_MSI_TENANT); - String clientId = getPasswordString(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ID); + String tenantGuid = + getMandatoryPasswordString(FS_AZURE_ACCOUNT_OAUTH_MSI_TENANT); + String clientId = + getMandatoryPasswordString(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ID); String authority = getTrimmedPasswordString( FS_AZURE_ACCOUNT_OAUTH_MSI_AUTHORITY, AuthConfigurations.DEFAULT_FS_AZURE_ACCOUNT_OAUTH_MSI_AUTHORITY); @@ -549,8 +877,10 @@ public AccessTokenProvider getTokenProvider() throws TokenAccessProviderExceptio String authEndpoint = getTrimmedPasswordString( FS_AZURE_ACCOUNT_OAUTH_REFRESH_TOKEN_ENDPOINT, AuthConfigurations.DEFAULT_FS_AZURE_ACCOUNT_OAUTH_REFRESH_TOKEN_ENDPOINT); - String refreshToken = getPasswordString(FS_AZURE_ACCOUNT_OAUTH_REFRESH_TOKEN); - String clientId = getPasswordString(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ID); + String refreshToken = + getMandatoryPasswordString(FS_AZURE_ACCOUNT_OAUTH_REFRESH_TOKEN); + String clientId = + getMandatoryPasswordString(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ID); tokenProvider = new RefreshTokenBasedTokenProvider(authEndpoint, clientId, refreshToken); LOG.trace("RefreshTokenBasedTokenProvider initialized"); @@ -561,14 +891,17 @@ public AccessTokenProvider getTokenProvider() throws TokenAccessProviderExceptio } catch(IllegalArgumentException e) { throw e; } catch (Exception e) { - throw new TokenAccessProviderException("Unable to load key provider class.", e); + throw new TokenAccessProviderException("Unable to load OAuth token provider class.", e); } } else if (authType == AuthType.Custom) { try { String configKey = FS_AZURE_ACCOUNT_TOKEN_PROVIDER_TYPE_PROPERTY_NAME; - Class customTokenProviderClass = - getClass(configKey, null, CustomTokenProviderAdaptee.class); + + Class customTokenProviderClass + = getTokenProviderClass(authType, configKey, null, + CustomTokenProviderAdaptee.class); + if (customTokenProviderClass == null) { throw new IllegalArgumentException( String.format("The configuration value for \"%s\" is invalid.", configKey)); @@ -581,7 +914,7 @@ public AccessTokenProvider getTokenProvider() throws TokenAccessProviderExceptio LOG.trace("Initializing {}", customTokenProviderClass.getName()); azureTokenProvider.initialize(rawConfig, accountName); LOG.trace("{} init complete", customTokenProviderClass.getName()); - return new CustomTokenProviderAdapter(azureTokenProvider); + return new CustomTokenProviderAdapter(azureTokenProvider, getCustomTokenFetchRetryCount()); } catch(IllegalArgumentException e) { throw e; } catch (Exception e) { @@ -604,7 +937,9 @@ public SASTokenProvider getSASTokenProvider() throws AzureBlobFileSystemExceptio try { String configKey = FS_AZURE_SAS_TOKEN_PROVIDER_TYPE; Class sasTokenProviderClass = - getClass(configKey, null, SASTokenProvider.class); + getTokenProviderClass(authType, configKey, null, + SASTokenProvider.class); + Preconditions.checkArgument(sasTokenProviderClass != null, String.format("The configuration value for \"%s\" is invalid.", configKey)); @@ -622,14 +957,17 @@ public SASTokenProvider getSASTokenProvider() throws AzureBlobFileSystemExceptio } } - void validateStorageAccountKeys() throws InvalidConfigurationValueException { - Base64StringConfigurationBasicValidator validator = new Base64StringConfigurationBasicValidator( - FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME, "", true); - this.storageAccountKeys = rawConfig.getValByRegex(FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME_REGX); + public boolean isReadAheadEnabled() { + return this.enabledReadAhead; + } - for (Map.Entry account : storageAccountKeys.entrySet()) { - validator.validate(account.getValue()); - } + @VisibleForTesting + void setReadAheadEnabled(final boolean enabledReadAhead) { + this.enabledReadAhead = enabledReadAhead; + } + + public int getReadAheadRange() { + return this.readAheadRange; } int validateInt(Field field) throws IllegalAccessException, InvalidConfigurationValueException { @@ -645,6 +983,21 @@ int validateInt(Field field) throws IllegalAccessException, InvalidConfiguration validator.ThrowIfInvalid()).validate(value); } + int validateIntWithOutlier(Field field) throws IllegalAccessException, InvalidConfigurationValueException { + IntegerWithOutlierConfigurationValidatorAnnotation validator = + field.getAnnotation(IntegerWithOutlierConfigurationValidatorAnnotation.class); + String value = get(validator.ConfigurationKey()); + + // validate + return new IntegerConfigurationBasicValidator( + validator.OutlierValue(), + validator.MinValue(), + validator.MaxValue(), + validator.DefaultValue(), + validator.ConfigurationKey(), + validator.ThrowIfInvalid()).validate(value); + } + long validateLong(Field field) throws IllegalAccessException, InvalidConfigurationValueException { LongConfigurationValidatorAnnotation validator = field.getAnnotation(LongConfigurationValidatorAnnotation.class); String value = rawConfig.get(validator.ConfigurationKey()); @@ -691,6 +1044,35 @@ boolean validateBoolean(Field field) throws IllegalAccessException, InvalidConfi validator.ThrowIfInvalid()).validate(value); } + public ExponentialRetryPolicy getOauthTokenFetchRetryPolicy() { + return new ExponentialRetryPolicy(oauthTokenFetchRetryCount, + oauthTokenFetchRetryMinBackoff, oauthTokenFetchRetryMaxBackoff, + oauthTokenFetchRetryDeltaBackoff); + } + + public int getWriteMaxConcurrentRequestCount() { + if (this.writeMaxConcurrentRequestCount < 1) { + return 4 * Runtime.getRuntime().availableProcessors(); + } + return this.writeMaxConcurrentRequestCount; + } + + public int getMaxWriteRequestsToQueue() { + if (this.maxWriteRequestsToQueue < 1) { + return 2 * getWriteMaxConcurrentRequestCount(); + } + return this.maxWriteRequestsToQueue; + } + + public boolean enableAbfsListIterator() { + return this.enableAbfsListIterator; + } + + public String getClientProvidedEncryptionKey() { + String accSpecEncKey = accountConf(FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY); + return rawConfig.get(accSpecEncKey, null); + } + @VisibleForTesting void setReadBufferSize(int bufferSize) { this.readBufferSize = bufferSize; @@ -716,6 +1098,21 @@ void setListMaxResults(int listMaxResults) { this.listMaxResults = listMaxResults; } + @VisibleForTesting + public void setMaxIoRetries(int maxIoRetries) { + this.maxIoRetries = maxIoRetries; + } + + @VisibleForTesting + void setMaxBackoffIntervalMilliseconds(int maxBackoffInterval) { + this.maxBackoffInterval = maxBackoffInterval; + } + + @VisibleForTesting + void setIsNamespaceEnabledAccount(String isNamespaceEnabledAccount) { + this.isNamespaceEnabledAccount = isNamespaceEnabledAccount; + } + private String getTrimmedPasswordString(String key, String defaultValue) throws IOException { String value = getPasswordString(key); if (StringUtils.isBlank(value)) { @@ -731,4 +1128,26 @@ private String appendSlashIfNeeded(String authority) { return authority; } + @VisibleForTesting + public void setReadSmallFilesCompletely(boolean readSmallFilesCompletely) { + this.readSmallFilesCompletely = readSmallFilesCompletely; + } + + @VisibleForTesting + public void setOptimizeFooterRead(boolean optimizeFooterRead) { + this.optimizeFooterRead = optimizeFooterRead; + } + + @VisibleForTesting + public void setEnableAbfsListIterator(boolean enableAbfsListIterator) { + this.enableAbfsListIterator = enableAbfsListIterator; + } + + public boolean getRenameResilience() { + return renameResilience; + } + + void setRenameResilience(boolean actualResilience) { + renameResilience = actualResilience; + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsCountersImpl.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsCountersImpl.java new file mode 100644 index 0000000000000..f19b262d96a30 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsCountersImpl.java @@ -0,0 +1,247 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import java.net.URI; +import java.util.Map; +import java.util.UUID; + +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; + +import org.apache.hadoop.fs.azurebfs.services.AbfsCounters; +import org.apache.hadoop.fs.statistics.DurationTracker; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore; +import org.apache.hadoop.fs.statistics.impl.IOStatisticsStoreBuilder; +import org.apache.hadoop.metrics2.MetricStringBuilder; +import org.apache.hadoop.metrics2.lib.MetricsRegistry; +import org.apache.hadoop.metrics2.lib.MutableCounterLong; +import org.apache.hadoop.metrics2.lib.MutableMetric; + +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.*; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.iostatisticsStore; + +/** + * Instrumentation of Abfs counters. + */ +public class AbfsCountersImpl implements AbfsCounters { + + /** + * Single context for all the Abfs counters to separate them from other + * counters. + */ + private static final String CONTEXT = "AbfsContext"; + /** + * The name of a field added to metrics records that uniquely identifies a + * specific FileSystem instance. + */ + private static final String REGISTRY_ID = "AbfsID"; + /** + * The name of a field added to metrics records that indicates the hostname + * portion of the FS URL. + */ + private static final String METRIC_BUCKET = "AbfsBucket"; + + private final MetricsRegistry registry = + new MetricsRegistry("abfsMetrics").setContext(CONTEXT); + + private final IOStatisticsStore ioStatisticsStore; + + private static final AbfsStatistic[] STATISTIC_LIST = { + CALL_CREATE, + CALL_OPEN, + CALL_GET_FILE_STATUS, + CALL_APPEND, + CALL_CREATE_NON_RECURSIVE, + CALL_DELETE, + CALL_EXIST, + CALL_GET_DELEGATION_TOKEN, + CALL_LIST_STATUS, + CALL_MKDIRS, + CALL_RENAME, + DIRECTORIES_CREATED, + DIRECTORIES_DELETED, + FILES_CREATED, + FILES_DELETED, + ERROR_IGNORED, + CONNECTIONS_MADE, + SEND_REQUESTS, + GET_RESPONSES, + BYTES_SENT, + BYTES_RECEIVED, + READ_THROTTLES, + WRITE_THROTTLES, + SERVER_UNAVAILABLE, + RENAME_RECOVERY, + METADATA_INCOMPLETE_RENAME_FAILURES, + RENAME_PATH_ATTEMPTS + + }; + + private static final AbfsStatistic[] DURATION_TRACKER_LIST = { + HTTP_HEAD_REQUEST, + HTTP_GET_REQUEST, + HTTP_DELETE_REQUEST, + HTTP_PUT_REQUEST, + HTTP_PATCH_REQUEST, + HTTP_POST_REQUEST + }; + + public AbfsCountersImpl(URI uri) { + UUID fileSystemInstanceId = UUID.randomUUID(); + registry.tag(REGISTRY_ID, + "A unique identifier for the instance", + fileSystemInstanceId.toString()); + registry.tag(METRIC_BUCKET, "Hostname from the FS URL", uri.getHost()); + + IOStatisticsStoreBuilder ioStatisticsStoreBuilder = iostatisticsStore(); + // Declaring the counters. + for (AbfsStatistic stats : STATISTIC_LIST) { + ioStatisticsStoreBuilder.withCounters(stats.getStatName()); + createCounter(stats); + } + // Declaring the DurationTrackers. + for (AbfsStatistic durationStats : DURATION_TRACKER_LIST) { + ioStatisticsStoreBuilder.withDurationTracking(durationStats.getStatName()); + } + ioStatisticsStore = ioStatisticsStoreBuilder.build(); + } + + /** + * Look up a Metric from registered set. + * + * @param name name of metric. + * @return the metric or null. + */ + private MutableMetric lookupMetric(String name) { + return getRegistry().get(name); + } + + /** + * Look up counter by name. + * + * @param name name of counter. + * @return counter if found, else null. + */ + private MutableCounterLong lookupCounter(String name) { + MutableMetric metric = lookupMetric(name); + if (metric == null) { + return null; + } + if (!(metric instanceof MutableCounterLong)) { + throw new IllegalStateException("Metric " + name + + " is not a MutableCounterLong: " + metric); + } + return (MutableCounterLong) metric; + } + + /** + * Create a counter in the registry. + * + * @param stats AbfsStatistic whose counter needs to be made. + * @return counter or null. + */ + private MutableCounterLong createCounter(AbfsStatistic stats) { + return registry.newCounter(stats.getStatName(), + stats.getStatDescription(), 0L); + } + + /** + * {@inheritDoc} + * + * Increment a statistic with some value. + * + * @param statistic AbfsStatistic need to be incremented. + * @param value long value to be incremented by. + */ + @Override + public void incrementCounter(AbfsStatistic statistic, long value) { + ioStatisticsStore.incrementCounter(statistic.getStatName(), value); + MutableCounterLong counter = lookupCounter(statistic.getStatName()); + if (counter != null) { + counter.incr(value); + } + } + + /** + * Getter for MetricRegistry. + * + * @return MetricRegistry or null. + */ + private MetricsRegistry getRegistry() { + return registry; + } + + /** + * {@inheritDoc} + * + * Method to aggregate all the counters in the MetricRegistry and form a + * string with prefix, separator and suffix. + * + * @param prefix string that would be before metric. + * @param separator string that would be between metric name and value. + * @param suffix string that would be after metric value. + * @param all gets all the values even if unchanged. + * @return a String with all the metrics and their values. + */ + @Override + public String formString(String prefix, String separator, String suffix, + boolean all) { + + MetricStringBuilder metricStringBuilder = new MetricStringBuilder(null, + prefix, separator, suffix); + registry.snapshot(metricStringBuilder, all); + return metricStringBuilder.toString(); + } + + /** + * {@inheritDoc} + * + * Map of all the counters for testing. + * + * @return a map of the IOStatistics counters. + */ + @VisibleForTesting + @Override + public Map toMap() { + return ioStatisticsStore.counters(); + } + + /** + * Returning the instance of IOStatisticsStore used to collect the metrics + * in AbfsCounters. + * + * @return instance of IOStatistics. + */ + @Override + public IOStatistics getIOStatistics() { + return ioStatisticsStore; + } + + /** + * Tracks the duration of a statistic. + * + * @param key name of the statistic. + * @return DurationTracker for that statistic. + */ + @Override + public DurationTracker trackDuration(String key) { + return ioStatisticsStore.trackDuration(key); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsStatistic.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsStatistic.java new file mode 100644 index 0000000000000..3a77e82ffb4fb --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsStatistic.java @@ -0,0 +1,189 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.fs.StorageStatistics.CommonStatisticNames; +import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; +import org.apache.hadoop.fs.statistics.StoreStatisticNames; + +/** + * Statistic which are collected in Abfs. + * Available as metrics in {@link AbfsCountersImpl}. + */ +public enum AbfsStatistic { + + CALL_CREATE(CommonStatisticNames.OP_CREATE, + "Calls of create()."), + CALL_OPEN(CommonStatisticNames.OP_OPEN, + "Calls of open()."), + CALL_GET_FILE_STATUS(CommonStatisticNames.OP_GET_FILE_STATUS, + "Calls of getFileStatus()."), + CALL_APPEND(CommonStatisticNames.OP_APPEND, + "Calls of append()."), + CALL_CREATE_NON_RECURSIVE(CommonStatisticNames.OP_CREATE_NON_RECURSIVE, + "Calls of createNonRecursive()."), + CALL_DELETE(CommonStatisticNames.OP_DELETE, + "Calls of delete()."), + CALL_EXIST(CommonStatisticNames.OP_EXISTS, + "Calls of exist()."), + CALL_GET_DELEGATION_TOKEN(CommonStatisticNames.OP_GET_DELEGATION_TOKEN, + "Calls of getDelegationToken()."), + CALL_LIST_STATUS(CommonStatisticNames.OP_LIST_STATUS, + "Calls of listStatus()."), + CALL_MKDIRS(CommonStatisticNames.OP_MKDIRS, + "Calls of mkdirs()."), + CALL_RENAME(CommonStatisticNames.OP_RENAME, + "Calls of rename()."), + DIRECTORIES_CREATED("directories_created", + "Total number of directories created through the object store."), + DIRECTORIES_DELETED("directories_deleted", + "Total number of directories deleted through the object store."), + FILES_CREATED("files_created", + "Total number of files created through the object store."), + FILES_DELETED("files_deleted", + "Total number of files deleted from the object store."), + ERROR_IGNORED("error_ignored", + "Errors caught and ignored."), + + //Network statistics. + CONNECTIONS_MADE("connections_made", + "Total number of times a connection was made with the data store."), + SEND_REQUESTS("send_requests", + "Total number of times http requests were sent to the data store."), + GET_RESPONSES("get_responses", + "Total number of times a response was received."), + BYTES_SENT("bytes_sent", + "Total bytes uploaded."), + BYTES_RECEIVED("bytes_received", + "Total bytes received."), + READ_THROTTLES("read_throttles", + "Total number of times a read operation is throttled."), + WRITE_THROTTLES("write_throttles", + "Total number of times a write operation is throttled."), + SERVER_UNAVAILABLE("server_unavailable", + "Total number of times HTTP 503 status code is received in response."), + + // HTTP Duration Trackers + HTTP_HEAD_REQUEST(StoreStatisticNames.ACTION_HTTP_HEAD_REQUEST, + "Time taken to complete a HEAD request", + AbfsHttpConstants.HTTP_METHOD_HEAD), + HTTP_GET_REQUEST(StoreStatisticNames.ACTION_HTTP_GET_REQUEST, + "Time taken to complete a GET request", + AbfsHttpConstants.HTTP_METHOD_GET), + HTTP_DELETE_REQUEST(StoreStatisticNames.ACTION_HTTP_DELETE_REQUEST, + "Time taken to complete a DELETE request", + AbfsHttpConstants.HTTP_METHOD_DELETE), + HTTP_PUT_REQUEST(StoreStatisticNames.ACTION_HTTP_PUT_REQUEST, + "Time taken to complete a PUT request", + AbfsHttpConstants.HTTP_METHOD_PUT), + HTTP_PATCH_REQUEST(StoreStatisticNames.ACTION_HTTP_PATCH_REQUEST, + "Time taken to complete a PATCH request", + AbfsHttpConstants.HTTP_METHOD_PATCH), + HTTP_POST_REQUEST(StoreStatisticNames.ACTION_HTTP_POST_REQUEST, + "Time taken to complete a POST request", + AbfsHttpConstants.HTTP_METHOD_POST), + + // Rename recovery + RENAME_RECOVERY("rename_recovery", + "Number of times Rename recoveries happened"), + METADATA_INCOMPLETE_RENAME_FAILURES("metadata_incomplete_rename_failures", + "Number of times rename operation failed due to metadata being " + + "incomplete"), + RENAME_PATH_ATTEMPTS("rename_path_attempts", + "Number of times we attempt to rename a path internally"); + + private String statName; + private String statDescription; + + //For http call stats only. + private String httpCall; + private static final Map HTTP_CALL_TO_NAME_MAP = new HashMap<>(); + + static { + for (AbfsStatistic statistic : values()) { + if (statistic.getHttpCall() != null) { + HTTP_CALL_TO_NAME_MAP.put(statistic.getHttpCall(), statistic.getStatName()); + } + } + } + + /** + * Constructor of AbfsStatistic to set statistic name and description. + * + * @param statName Name of the statistic. + * @param statDescription Description of the statistic. + */ + AbfsStatistic(String statName, String statDescription) { + this.statName = statName; + this.statDescription = statDescription; + } + + /** + * Constructor for AbfsStatistic for HTTP durationTrackers. + * + * @param statName Name of the statistic. + * @param statDescription Description of the statistic. + * @param httpCall HTTP call associated with the stat name. + */ + AbfsStatistic(String statName, String statDescription, String httpCall) { + this.statName = statName; + this.statDescription = statDescription; + this.httpCall = httpCall; + } + + /** + * Getter for statistic name. + * + * @return Name of statistic. + */ + public String getStatName() { + return statName; + } + + /** + * Getter for statistic description. + * + * @return Description of statistic. + */ + public String getStatDescription() { + return statDescription; + } + + /** + * Getter for http call for HTTP duration trackers. + * + * @return http call of a statistic. + */ + public String getHttpCall() { + return httpCall; + } + + /** + * Get the statistic name using the http call name. + * + * @param httpCall The HTTP call used to get the statistic name. + * @return Statistic name. + */ + public static String getStatNameFromHttpCall(String httpCall) { + return HTTP_CALL_TO_NAME_MAP.get(httpCall); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/Abfss.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/Abfss.java index c33265ce32406..ba20bbb5d7668 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/Abfss.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/Abfss.java @@ -43,4 +43,13 @@ public class Abfss extends DelegateToFileSystem { public int getUriDefaultPort() { return -1; } + + /** + * Close the file system; the FileContext API doesn't have an explicit close. + */ + @Override + protected void finalize() throws Throwable { + fsImpl.close(); + super.finalize(); + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java index 8eda2f3730400..5fb2c6e1700a8 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java @@ -26,24 +26,37 @@ import java.net.HttpURLConnection; import java.net.URI; import java.net.URISyntaxException; +import java.nio.file.AccessDeniedException; +import java.time.Duration; import java.util.Hashtable; import java.util.List; import java.util.ArrayList; import java.util.EnumSet; +import java.util.Map; +import java.util.Optional; +import java.util.UUID; import java.util.concurrent.Callable; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import javax.annotation.Nullable; + +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.security.ProviderUtils; +import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.azurebfs.commit.ResilientCommitByRename; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; -import org.apache.hadoop.fs.azurebfs.services.AbfsClientThrottlingIntercept; +import org.apache.hadoop.fs.azurebfs.services.AbfsListStatusRemoteIterator; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; @@ -54,12 +67,15 @@ import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathIOException; +import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.XAttrSetFlag; import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; import org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations; import org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.FileSystemOperationUnhandledException; @@ -68,36 +84,82 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.SASTokenProviderException; import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; import org.apache.hadoop.fs.azurebfs.security.AbfsDelegationTokenManager; +import org.apache.hadoop.fs.azurebfs.services.AbfsCounters; +import org.apache.hadoop.fs.azurebfs.services.AbfsLocatedFileStatus; +import org.apache.hadoop.fs.azurebfs.utils.Listener; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; +import org.apache.hadoop.fs.impl.AbstractFSBuilderImpl; +import org.apache.hadoop.fs.impl.OpenFileParameters; import org.apache.hadoop.fs.permission.AclEntry; import org.apache.hadoop.fs.permission.AclStatus; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; +import org.apache.hadoop.fs.store.DataBlocks; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.RateLimiting; +import org.apache.hadoop.util.RateLimitingFactory; +import org.apache.hadoop.util.functional.RemoteIterators; +import org.apache.hadoop.util.DurationInfo; +import org.apache.hadoop.util.LambdaUtils; import org.apache.hadoop.util.Progressable; +import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL; +import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL_DEFAULT; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_STANDARD_OPTIONS; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.*; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.DATA_BLOCKS_BUFFER; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_BLOCK_UPLOAD_ACTIVE_BLOCKS; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_BLOCK_UPLOAD_BUFFER_DIR; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.BLOCK_UPLOAD_ACTIVE_BLOCKS_DEFAULT; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DATA_BLOCKS_BUFFER_DEFAULT; +import static org.apache.hadoop.fs.azurebfs.constants.InternalConstants.CAPABILITY_SAFE_READAHEAD; import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; +import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.logIOStatisticsAtLevel; +import static org.apache.hadoop.util.functional.RemoteIterators.filteringRemoteIterator; +import static org.apache.hadoop.util.functional.RemoteIterators.mappingRemoteIterator; /** * A {@link org.apache.hadoop.fs.FileSystem} for reading and writing files stored on Windows Azure */ @InterfaceStability.Evolving -public class AzureBlobFileSystem extends FileSystem { +public class AzureBlobFileSystem extends FileSystem + implements IOStatisticsSource { public static final Logger LOG = LoggerFactory.getLogger(AzureBlobFileSystem.class); private URI uri; private Path workingDir; private AzureBlobFileSystemStore abfsStore; private boolean isClosed; + private final String fileSystemId = UUID.randomUUID().toString(); private boolean delegationTokenEnabled = false; private AbfsDelegationTokenManager delegationTokenManager; + private AbfsCounters abfsCounters; + private String clientCorrelationId; + private TracingHeaderFormat tracingHeaderFormat; + private Listener listener; + + /** Name of blockFactory to be used by AbfsOutputStream. */ + private String blockOutputBuffer; + /** BlockFactory instance to be used. */ + private DataBlocks.BlockFactory blockFactory; + /** Maximum Active blocks per OutputStream. */ + private int blockOutputActiveBlocks; + + /** Rate limiting for operations which use it to throttle their IO. */ + private RateLimiting rateLimiting; @Override public void initialize(URI uri, Configuration configuration) throws IOException { + configuration = ProviderUtils.excludeIncompatibleCredentialProviders( + configuration, AzureBlobFileSystem.class); uri = ensureAuthority(uri, configuration); super.initialize(uri, configuration); setConf(configuration); @@ -105,17 +167,49 @@ public void initialize(URI uri, Configuration configuration) LOG.debug("Initializing AzureBlobFileSystem for {}", uri); this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority()); - this.abfsStore = new AzureBlobFileSystemStore(uri, this.isSecureScheme(), configuration); + abfsCounters = new AbfsCountersImpl(uri); + // name of the blockFactory to be used. + this.blockOutputBuffer = configuration.getTrimmed(DATA_BLOCKS_BUFFER, + DATA_BLOCKS_BUFFER_DEFAULT); + // blockFactory used for this FS instance. + this.blockFactory = + DataBlocks.createFactory(FS_AZURE_BLOCK_UPLOAD_BUFFER_DIR, + configuration, blockOutputBuffer); + this.blockOutputActiveBlocks = + configuration.getInt(FS_AZURE_BLOCK_UPLOAD_ACTIVE_BLOCKS, + BLOCK_UPLOAD_ACTIVE_BLOCKS_DEFAULT); + if (blockOutputActiveBlocks < 1) { + blockOutputActiveBlocks = 1; + } + + // AzureBlobFileSystemStore with params in builder. + AzureBlobFileSystemStore.AzureBlobFileSystemStoreBuilder + systemStoreBuilder = + new AzureBlobFileSystemStore.AzureBlobFileSystemStoreBuilder() + .withUri(uri) + .withSecureScheme(this.isSecureScheme()) + .withConfiguration(configuration) + .withAbfsCounters(abfsCounters) + .withBlockFactory(blockFactory) + .withBlockOutputActiveBlocks(blockOutputActiveBlocks) + .build(); + + this.abfsStore = new AzureBlobFileSystemStore(systemStoreBuilder); LOG.trace("AzureBlobFileSystemStore init complete"); - final AbfsConfiguration abfsConfiguration = abfsStore.getAbfsConfiguration(); - + final AbfsConfiguration abfsConfiguration = abfsStore + .getAbfsConfiguration(); + clientCorrelationId = TracingContext.validateClientCorrelationID( + abfsConfiguration.getClientCorrelationId()); + tracingHeaderFormat = abfsConfiguration.getTracingHeaderFormat(); this.setWorkingDirectory(this.getHomeDirectory()); + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.CREATE_FILESYSTEM, tracingHeaderFormat, listener); if (abfsConfiguration.getCreateRemoteFileSystemDuringInitialization()) { - if (this.tryGetFileStatus(new Path(AbfsHttpConstants.ROOT_PATH)) == null) { + if (this.tryGetFileStatus(new Path(AbfsHttpConstants.ROOT_PATH), tracingContext) == null) { try { - this.createFileSystem(); + this.createFileSystem(tracingContext); } catch (AzureBlobFileSystemException ex) { checkException(null, ex, AzureServiceErrorCode.FILE_SYSTEM_ALREADY_EXISTS); } @@ -134,8 +228,7 @@ public void initialize(URI uri, Configuration configuration) } } - AbfsClientThrottlingIntercept.initializeSingleton(abfsConfiguration.isAutoThrottlingEnabled()); - + rateLimiting = RateLimitingFactory.create(abfsConfiguration.getRateLimit()); LOG.debug("Initializing AzureBlobFileSystem for {} complete", uri); } @@ -146,6 +239,7 @@ public String toString() { sb.append("uri=").append(uri); sb.append(", user='").append(abfsStore.getUser()).append('\''); sb.append(", primaryUserGroup='").append(abfsStore.getPrimaryGroup()).append('\''); + sb.append("[" + CAPABILITY_SAFE_READAHEAD + "]"); sb.append('}'); return sb.toString(); } @@ -159,39 +253,84 @@ public URI getUri() { return this.uri; } + public void registerListener(Listener listener1) { + listener = listener1; + } + @Override public FSDataInputStream open(final Path path, final int bufferSize) throws IOException { LOG.debug("AzureBlobFileSystem.open path: {} bufferSize: {}", path, bufferSize); + // bufferSize is unused. + return open(path, Optional.empty()); + } + private FSDataInputStream open(final Path path, + final Optional parameters) throws IOException { + statIncrement(CALL_OPEN); Path qualifiedPath = makeQualified(path); try { - InputStream inputStream = abfsStore.openFileForRead(qualifiedPath, statistics); + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.OPEN, tracingHeaderFormat, listener); + InputStream inputStream = abfsStore + .openFileForRead(qualifiedPath, parameters, statistics, tracingContext); return new FSDataInputStream(inputStream); - } catch(AzureBlobFileSystemException ex) { + } catch (AzureBlobFileSystemException ex) { checkException(path, ex); return null; } } + /** + * Takes config and other options through + * {@link org.apache.hadoop.fs.impl.OpenFileParameters}. Ensure that + * FileStatus entered is up-to-date, as it will be used to create the + * InputStream (with info such as contentLength, eTag) + * @param path The location of file to be opened + * @param parameters OpenFileParameters instance; can hold FileStatus, + * Configuration, bufferSize and mandatoryKeys + */ + @Override + protected CompletableFuture openFileWithOptions( + final Path path, final OpenFileParameters parameters) throws IOException { + LOG.debug("AzureBlobFileSystem.openFileWithOptions path: {}", path); + AbstractFSBuilderImpl.rejectUnknownMandatoryKeys( + parameters.getMandatoryKeys(), + FS_OPTION_OPENFILE_STANDARD_OPTIONS, + "for " + path); + return LambdaUtils.eval( + new CompletableFuture<>(), () -> + open(path, Optional.of(parameters))); + } + @Override - public FSDataOutputStream create(final Path f, final FsPermission permission, final boolean overwrite, final int bufferSize, - final short replication, final long blockSize, final Progressable progress) throws IOException { + public FSDataOutputStream create(final Path f, + final FsPermission permission, + final boolean overwrite, + final int bufferSize, + final short replication, + final long blockSize, + final Progressable progress) throws IOException { LOG.debug("AzureBlobFileSystem.create path: {} permission: {} overwrite: {} bufferSize: {}", f, permission, overwrite, blockSize); + statIncrement(CALL_CREATE); trailingPeriodCheck(f); Path qualifiedPath = makeQualified(f); try { - OutputStream outputStream = abfsStore.createFile(qualifiedPath, overwrite, - permission == null ? FsPermission.getFileDefault() : permission, FsPermission.getUMask(getConf())); + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.CREATE, overwrite, tracingHeaderFormat, listener); + OutputStream outputStream = abfsStore.createFile(qualifiedPath, statistics, overwrite, + permission == null ? FsPermission.getFileDefault() : permission, + FsPermission.getUMask(getConf()), tracingContext); + statIncrement(FILES_CREATED); return new FSDataOutputStream(outputStream, statistics); - } catch(AzureBlobFileSystemException ex) { + } catch (AzureBlobFileSystemException ex) { checkException(f, ex); return null; } @@ -203,8 +342,12 @@ public FSDataOutputStream createNonRecursive(final Path f, final FsPermission pe final boolean overwrite, final int bufferSize, final short replication, final long blockSize, final Progressable progress) throws IOException { + statIncrement(CALL_CREATE_NON_RECURSIVE); final Path parent = f.getParent(); - final FileStatus parentFileStatus = tryGetFileStatus(parent); + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.CREATE_NON_RECURSIVE, tracingHeaderFormat, + listener); + final FileStatus parentFileStatus = tryGetFileStatus(parent, tracingContext); if (parentFileStatus == null) { throw new FileNotFoundException("Cannot create file " @@ -216,8 +359,12 @@ public FSDataOutputStream createNonRecursive(final Path f, final FsPermission pe @Override @SuppressWarnings("deprecation") - public FSDataOutputStream createNonRecursive(final Path f, final FsPermission permission, - final EnumSet flags, final int bufferSize, final short replication, final long blockSize, + public FSDataOutputStream createNonRecursive(final Path f, + final FsPermission permission, + final EnumSet flags, + final int bufferSize, + final short replication, + final long blockSize, final Progressable progress) throws IOException { // Check if file should be appended or overwritten. Assume that the file @@ -241,26 +388,31 @@ public FSDataOutputStream createNonRecursive(final Path f, } @Override - public FSDataOutputStream append(final Path f, final int bufferSize, final Progressable progress) throws IOException { + public FSDataOutputStream append(final Path f, final int bufferSize, final Progressable progress) + throws IOException { LOG.debug( "AzureBlobFileSystem.append path: {} bufferSize: {}", f.toString(), bufferSize); - + statIncrement(CALL_APPEND); Path qualifiedPath = makeQualified(f); try { - OutputStream outputStream = abfsStore.openFileForWrite(qualifiedPath, false); + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.APPEND, tracingHeaderFormat, + listener); + OutputStream outputStream = abfsStore + .openFileForWrite(qualifiedPath, statistics, false, tracingContext); return new FSDataOutputStream(outputStream, statistics); - } catch(AzureBlobFileSystemException ex) { + } catch (AzureBlobFileSystemException ex) { checkException(f, ex); return null; } } public boolean rename(final Path src, final Path dst) throws IOException { - LOG.debug( - "AzureBlobFileSystem.rename src: {} dst: {}", src.toString(), dst.toString()); + LOG.debug("AzureBlobFileSystem.rename src: {} dst: {}", src, dst); + statIncrement(CALL_RENAME); trailingPeriodCheck(dst); @@ -271,9 +423,12 @@ public boolean rename(final Path src, final Path dst) throws IOException { Path qualifiedSrcPath = makeQualified(src); Path qualifiedDstPath = makeQualified(dst); + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.RENAME, true, tracingHeaderFormat, + listener); // rename under same folder; - if(makeQualified(parentFolder).equals(qualifiedDstPath)) { - return tryGetFileStatus(qualifiedSrcPath) != null; + if (makeQualified(parentFolder).equals(qualifiedDstPath)) { + return tryGetFileStatus(qualifiedSrcPath, tracingContext) != null; } FileStatus dstFileStatus = null; @@ -282,7 +437,7 @@ public boolean rename(final Path src, final Path dst) throws IOException { // - if it doesn't exist, return false // - if it is file, return true // - if it is dir, return false. - dstFileStatus = tryGetFileStatus(qualifiedDstPath); + dstFileStatus = tryGetFileStatus(qualifiedDstPath, tracingContext); if (dstFileStatus == null) { return false; } @@ -290,8 +445,8 @@ public boolean rename(final Path src, final Path dst) throws IOException { } // Non-HNS account need to check dst status on driver side. - if (!abfsStore.getIsNamespaceEnabled() && dstFileStatus == null) { - dstFileStatus = tryGetFileStatus(qualifiedDstPath); + if (!getIsNamespaceEnabled(tracingContext) && dstFileStatus == null) { + dstFileStatus = tryGetFileStatus(qualifiedDstPath, tracingContext); } try { @@ -307,28 +462,104 @@ public boolean rename(final Path src, final Path dst) throws IOException { qualifiedDstPath = makeQualified(adjustedDst); - abfsStore.rename(qualifiedSrcPath, qualifiedDstPath); + abfsStore.rename(qualifiedSrcPath, qualifiedDstPath, tracingContext, null); return true; - } catch(AzureBlobFileSystemException ex) { + } catch (AzureBlobFileSystemException ex) { + LOG.debug("Rename operation failed. ", ex); checkException( - src, - ex, - AzureServiceErrorCode.PATH_ALREADY_EXISTS, - AzureServiceErrorCode.INVALID_RENAME_SOURCE_PATH, - AzureServiceErrorCode.SOURCE_PATH_NOT_FOUND, - AzureServiceErrorCode.INVALID_SOURCE_OR_DESTINATION_RESOURCE_TYPE, - AzureServiceErrorCode.RENAME_DESTINATION_PARENT_PATH_NOT_FOUND, - AzureServiceErrorCode.INTERNAL_OPERATION_ABORT); + src, + ex, + AzureServiceErrorCode.PATH_ALREADY_EXISTS, + AzureServiceErrorCode.INVALID_RENAME_SOURCE_PATH, + AzureServiceErrorCode.SOURCE_PATH_NOT_FOUND, + AzureServiceErrorCode.INVALID_SOURCE_OR_DESTINATION_RESOURCE_TYPE, + AzureServiceErrorCode.RENAME_DESTINATION_PARENT_PATH_NOT_FOUND, + AzureServiceErrorCode.INTERNAL_OPERATION_ABORT); return false; } } + /** + * Private method to create resilient commit support. + * @return a new instance + * @param path destination path + * @throws IOException problem probing store capabilities + * @throws UnsupportedOperationException if the store lacks this support + */ + @InterfaceAudience.Private + public ResilientCommitByRename createResilientCommitSupport(final Path path) + throws IOException { + + if (!hasPathCapability(path, + CommonPathCapabilities.ETAGS_PRESERVED_IN_RENAME)) { + throw new UnsupportedOperationException( + "Resilient commit support not available for " + path); + } + return new ResilientCommitByRenameImpl(); + } + + /** + * Resilient commit support. + * Provided as a nested class to avoid contaminating the + * FS instance with too many private methods which end up + * being used widely (as has happened to the S3A FS) + */ + public class ResilientCommitByRenameImpl implements ResilientCommitByRename { + + /** + * Perform the rename. + * This will be rate limited, as well as able to recover + * from rename errors if the etag was passed in. + * @param source path to source file + * @param dest destination of rename. + * @param sourceEtag etag of source file. may be null or empty + * @return the outcome of the operation + * @throws IOException any rename failure which was not recovered from. + */ + public Pair commitSingleFileByRename( + final Path source, + final Path dest, + @Nullable final String sourceEtag) throws IOException { + + LOG.debug("renameFileWithEtag source: {} dest: {} etag {}", source, dest, sourceEtag); + statIncrement(CALL_RENAME); + + trailingPeriodCheck(dest); + Path qualifiedSrcPath = makeQualified(source); + Path qualifiedDstPath = makeQualified(dest); + + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.RENAME, true, tracingHeaderFormat, + listener); + + if (qualifiedSrcPath.equals(qualifiedDstPath)) { + // rename to itself is forbidden + throw new PathIOException(qualifiedSrcPath.toString(), "cannot rename object onto self"); + } + + // acquire one IO permit + final Duration waitTime = rateLimiting.acquire(1); + + try { + final boolean recovered = abfsStore.rename(qualifiedSrcPath, + qualifiedDstPath, tracingContext, sourceEtag); + return Pair.of(recovered, waitTime); + } catch (AzureBlobFileSystemException ex) { + LOG.debug("Rename operation failed. ", ex); + checkException(source, ex); + // never reached + return null; + } + + } + } + @Override public boolean delete(final Path f, final boolean recursive) throws IOException { LOG.debug( "AzureBlobFileSystem.delete path: {} recursive: {}", f.toString(), recursive); - + statIncrement(CALL_DELETE); Path qualifiedPath = makeQualified(f); if (f.isRoot()) { @@ -340,7 +571,10 @@ public boolean delete(final Path f, final boolean recursive) throws IOException } try { - abfsStore.delete(qualifiedPath, recursive); + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.DELETE, tracingHeaderFormat, + listener); + abfsStore.delete(qualifiedPath, recursive, tracingContext); return true; } catch (AzureBlobFileSystemException ex) { checkException(f, ex, AzureServiceErrorCode.PATH_NOT_FOUND); @@ -353,11 +587,14 @@ public boolean delete(final Path f, final boolean recursive) throws IOException public FileStatus[] listStatus(final Path f) throws IOException { LOG.debug( "AzureBlobFileSystem.listStatus path: {}", f.toString()); - + statIncrement(CALL_LIST_STATUS); Path qualifiedPath = makeQualified(f); try { - FileStatus[] result = abfsStore.listStatus(qualifiedPath); + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.LISTSTATUS, true, tracingHeaderFormat, + listener); + FileStatus[] result = abfsStore.listStatus(qualifiedPath, tracingContext); return result; } catch (AzureBlobFileSystemException ex) { checkException(f, ex); @@ -365,6 +602,26 @@ public FileStatus[] listStatus(final Path f) throws IOException { } } + /** + * Increment of an Abfs statistic. + * + * @param statistic AbfsStatistic that needs increment. + */ + private void statIncrement(AbfsStatistic statistic) { + incrementStatistic(statistic); + } + + /** + * Method for incrementing AbfsStatistic by a long value. + * + * @param statistic the Statistic to be incremented. + */ + private void incrementStatistic(AbfsStatistic statistic) { + if (abfsCounters != null) { + abfsCounters.incrementCounter(statistic, 1); + } + } + /** * Performs a check for (.) until root in the path to throw an exception. * The purpose is to differentiate between dir/dir1 and dir/dir1. @@ -375,7 +632,7 @@ public FileStatus[] listStatus(final Path f) throws IOException { * @throws IllegalArgumentException if the path has a trailing period (.) */ private void trailingPeriodCheck(Path path) throws IllegalArgumentException { - while (!path.isRoot()){ + while (!path.isRoot()) { String pathToString = path.toString(); if (pathToString.length() != 0) { if (pathToString.charAt(pathToString.length() - 1) == '.') { @@ -383,8 +640,7 @@ private void trailingPeriodCheck(Path path) throws IllegalArgumentException { "ABFS does not allow files or directories to end with a dot."); } path = path.getParent(); - } - else { + } else { break; } } @@ -394,7 +650,7 @@ private void trailingPeriodCheck(Path path) throws IllegalArgumentException { public boolean mkdirs(final Path f, final FsPermission permission) throws IOException { LOG.debug( "AzureBlobFileSystem.mkdirs path: {} permissions: {}", f, permission); - + statIncrement(CALL_MKDIRS); trailingPeriodCheck(f); final Path parentFolder = f.getParent(); @@ -406,11 +662,16 @@ public boolean mkdirs(final Path f, final FsPermission permission) throws IOExce Path qualifiedPath = makeQualified(f); try { - abfsStore.createDirectory(qualifiedPath, permission == null ? FsPermission.getDirDefault() : permission, - FsPermission.getUMask(getConf())); + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.MKDIR, false, tracingHeaderFormat, + listener); + abfsStore.createDirectory(qualifiedPath, + permission == null ? FsPermission.getDirDefault() : permission, + FsPermission.getUMask(getConf()), tracingContext); + statIncrement(DIRECTORIES_CREATED); return true; } catch (AzureBlobFileSystemException ex) { - checkException(f, ex, AzureServiceErrorCode.PATH_ALREADY_EXISTS); + checkException(f, ex); return true; } } @@ -423,24 +684,64 @@ public synchronized void close() throws IOException { // does all the delete-on-exit calls, and may be slow. super.close(); LOG.debug("AzureBlobFileSystem.close"); + if (getConf() != null) { + String iostatisticsLoggingLevel = + getConf().getTrimmed(IOSTATISTICS_LOGGING_LEVEL, + IOSTATISTICS_LOGGING_LEVEL_DEFAULT); + logIOStatisticsAtLevel(LOG, iostatisticsLoggingLevel, getIOStatistics()); + } IOUtils.cleanupWithLogger(LOG, abfsStore, delegationTokenManager); this.isClosed = true; + if (LOG.isDebugEnabled()) { + LOG.debug("Closing Abfs: {}", toString()); + } } @Override public FileStatus getFileStatus(final Path f) throws IOException { - LOG.debug("AzureBlobFileSystem.getFileStatus path: {}", f); + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.GET_FILESTATUS, tracingHeaderFormat, + listener); + return getFileStatus(f, tracingContext); + } - Path qualifiedPath = makeQualified(f); + private FileStatus getFileStatus(final Path path, + TracingContext tracingContext) throws IOException { + LOG.debug("AzureBlobFileSystem.getFileStatus path: {}", path); + statIncrement(CALL_GET_FILE_STATUS); + Path qualifiedPath = makeQualified(path); try { - return abfsStore.getFileStatus(qualifiedPath); - } catch(AzureBlobFileSystemException ex) { - checkException(f, ex); + return abfsStore.getFileStatus(qualifiedPath, tracingContext); + } catch (AzureBlobFileSystemException ex) { + checkException(path, ex); return null; } } + /** + * Break the current lease on an ABFS file if it exists. A lease that is broken cannot be + * renewed. A new lease may be obtained on the file immediately. + * + * @param f file name + * @throws IOException on any exception while breaking the lease + */ + public void breakLease(final Path f) throws IOException { + LOG.debug("AzureBlobFileSystem.breakLease path: {}", f); + + Path qualifiedPath = makeQualified(f); + + try (DurationInfo ignored = new DurationInfo(LOG, false, "Break lease for %s", + qualifiedPath)) { + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.BREAK_LEASE, tracingHeaderFormat, + listener); + abfsStore.breakLease(qualifiedPath, tracingContext); + } catch (AzureBlobFileSystemException ex) { + checkException(f, ex); + } + } + /** * Qualify a path to one which uses this FileSystem and, if relative, * made absolute. @@ -463,7 +764,6 @@ public Path makeQualified(Path path) { return super.makeQualified(path); } - @Override public Path getWorkingDirectory() { return this.workingDir; @@ -486,8 +786,8 @@ public String getScheme() { @Override public Path getHomeDirectory() { return makeQualified(new Path( - FileSystemConfigurations.USER_HOME_DIRECTORY_PREFIX - + "/" + abfsStore.getUser())); + FileSystemConfigurations.USER_HOME_DIRECTORY_PREFIX + + "/" + abfsStore.getUser())); } /** @@ -511,8 +811,8 @@ public BlockLocation[] getFileBlockLocations(FileStatus file, } final String blobLocationHost = abfsStore.getAbfsConfiguration().getAzureBlockLocationHost(); - final String[] name = { blobLocationHost }; - final String[] host = { blobLocationHost }; + final String[] name = {blobLocationHost}; + final String[] host = {blobLocationHost}; long blockSize = file.getBlockSize(); if (blockSize <= 0) { throw new IllegalArgumentException( @@ -567,6 +867,11 @@ private boolean deleteRoot() throws IOException { @Override public Void call() throws Exception { delete(fs.getPath(), fs.isDirectory()); + if (fs.isDirectory()) { + statIncrement(DIRECTORIES_DELETED); + } else { + statIncrement(FILES_DELETED); + } return null; } }); @@ -582,15 +887,14 @@ public Void call() throws Exception { } }); } - } - finally { + } finally { executorService.shutdownNow(); } return true; } - /** + /** * Set owner of a path (i.e. a file or a directory). * The parameters owner and group cannot both be null. * @@ -603,7 +907,11 @@ public void setOwner(final Path path, final String owner, final String group) throws IOException { LOG.debug( "AzureBlobFileSystem.setOwner path: {}", path); - if (!getIsNamespaceEnabled()) { + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.SET_OWNER, true, tracingHeaderFormat, + listener); + + if (!getIsNamespaceEnabled(tracingContext)) { super.setOwner(path, owner, group); return; } @@ -616,8 +924,9 @@ public void setOwner(final Path path, final String owner, final String group) try { abfsStore.setOwner(qualifiedPath, - owner, - group); + owner, + group, + tracingContext); } catch (AzureBlobFileSystemException ex) { checkException(path, ex); } @@ -634,7 +943,10 @@ public void setOwner(final Path path, final String owner, final String group) * @throws IllegalArgumentException If name is null or empty or if value is null */ @Override - public void setXAttr(final Path path, final String name, final byte[] value, final EnumSet flag) + public void setXAttr(final Path path, + final String name, + final byte[] value, + final EnumSet flag) throws IOException { LOG.debug("AzureBlobFileSystem.setXAttr path: {}", path); @@ -642,15 +954,21 @@ public void setXAttr(final Path path, final String name, final byte[] value, fin throw new IllegalArgumentException("A valid name and value must be specified."); } + Path qualifiedPath = makeQualified(path); + try { - Hashtable properties = abfsStore.getPathStatus(path); + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.SET_ATTR, true, tracingHeaderFormat, + listener); + Hashtable properties = abfsStore + .getPathStatus(qualifiedPath, tracingContext); String xAttrName = ensureValidAttributeName(name); boolean xAttrExists = properties.containsKey(xAttrName); XAttrSetFlag.validate(name, xAttrExists, flag); String xAttrValue = abfsStore.decodeAttribute(value); properties.put(xAttrName, xAttrValue); - abfsStore.setPathProperties(path, properties); + abfsStore.setPathProperties(qualifiedPath, properties, tracingContext); } catch (AzureBlobFileSystemException ex) { checkException(path, ex); } @@ -675,9 +993,15 @@ public byte[] getXAttr(final Path path, final String name) throw new IllegalArgumentException("A valid name must be specified."); } + Path qualifiedPath = makeQualified(path); + byte[] value = null; try { - Hashtable properties = abfsStore.getPathStatus(path); + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.GET_ATTR, true, tracingHeaderFormat, + listener); + Hashtable properties = abfsStore + .getPathStatus(qualifiedPath, tracingContext); String xAttrName = ensureValidAttributeName(name); if (properties.containsKey(xAttrName)) { String xAttrValue = properties.get(xAttrName); @@ -704,7 +1028,10 @@ private static String ensureValidAttributeName(String attribute) { public void setPermission(final Path path, final FsPermission permission) throws IOException { LOG.debug("AzureBlobFileSystem.setPermission path: {}", path); - if (!getIsNamespaceEnabled()) { + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.SET_PERMISSION, true, tracingHeaderFormat, listener); + + if (!getIsNamespaceEnabled(tracingContext)) { super.setPermission(path, permission); return; } @@ -716,8 +1043,7 @@ public void setPermission(final Path path, final FsPermission permission) Path qualifiedPath = makeQualified(path); try { - abfsStore.setPermission(qualifiedPath, - permission); + abfsStore.setPermission(qualifiedPath, permission, tracingContext); } catch (AzureBlobFileSystemException ex) { checkException(path, ex); } @@ -736,12 +1062,15 @@ public void setPermission(final Path path, final FsPermission permission) @Override public void modifyAclEntries(final Path path, final List aclSpec) throws IOException { - LOG.debug("AzureBlobFileSystem.modifyAclEntries path: {}", path.toString()); + LOG.debug("AzureBlobFileSystem.modifyAclEntries path: {}", path); + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.MODIFY_ACL, true, tracingHeaderFormat, + listener); - if (!getIsNamespaceEnabled()) { + if (!getIsNamespaceEnabled(tracingContext)) { throw new UnsupportedOperationException( "modifyAclEntries is only supported by storage accounts with the " - + "hierarchical namespace enabled."); + + "hierarchical namespace enabled."); } if (aclSpec == null || aclSpec.isEmpty()) { @@ -751,8 +1080,7 @@ public void modifyAclEntries(final Path path, final List aclSpec) Path qualifiedPath = makeQualified(path); try { - abfsStore.modifyAclEntries(qualifiedPath, - aclSpec); + abfsStore.modifyAclEntries(qualifiedPath, aclSpec, tracingContext); } catch (AzureBlobFileSystemException ex) { checkException(path, ex); } @@ -770,11 +1098,14 @@ public void modifyAclEntries(final Path path, final List aclSpec) public void removeAclEntries(final Path path, final List aclSpec) throws IOException { LOG.debug("AzureBlobFileSystem.removeAclEntries path: {}", path); + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.REMOVE_ACL_ENTRIES, true, + tracingHeaderFormat, listener); - if (!getIsNamespaceEnabled()) { + if (!getIsNamespaceEnabled(tracingContext)) { throw new UnsupportedOperationException( "removeAclEntries is only supported by storage accounts with the " - + "hierarchical namespace enabled."); + + "hierarchical namespace enabled."); } if (aclSpec == null || aclSpec.isEmpty()) { @@ -784,7 +1115,7 @@ public void removeAclEntries(final Path path, final List aclSpec) Path qualifiedPath = makeQualified(path); try { - abfsStore.removeAclEntries(qualifiedPath, aclSpec); + abfsStore.removeAclEntries(qualifiedPath, aclSpec, tracingContext); } catch (AzureBlobFileSystemException ex) { checkException(path, ex); } @@ -799,17 +1130,20 @@ public void removeAclEntries(final Path path, final List aclSpec) @Override public void removeDefaultAcl(final Path path) throws IOException { LOG.debug("AzureBlobFileSystem.removeDefaultAcl path: {}", path); + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.REMOVE_DEFAULT_ACL, true, + tracingHeaderFormat, listener); - if (!getIsNamespaceEnabled()) { + if (!getIsNamespaceEnabled(tracingContext)) { throw new UnsupportedOperationException( "removeDefaultAcl is only supported by storage accounts with the " - + "hierarchical namespace enabled."); + + "hierarchical namespace enabled."); } Path qualifiedPath = makeQualified(path); try { - abfsStore.removeDefaultAcl(qualifiedPath); + abfsStore.removeDefaultAcl(qualifiedPath, tracingContext); } catch (AzureBlobFileSystemException ex) { checkException(path, ex); } @@ -826,17 +1160,20 @@ public void removeDefaultAcl(final Path path) throws IOException { @Override public void removeAcl(final Path path) throws IOException { LOG.debug("AzureBlobFileSystem.removeAcl path: {}", path); + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.REMOVE_ACL, true, tracingHeaderFormat, + listener); - if (!getIsNamespaceEnabled()) { + if (!getIsNamespaceEnabled(tracingContext)) { throw new UnsupportedOperationException( "removeAcl is only supported by storage accounts with the " - + "hierarchical namespace enabled."); + + "hierarchical namespace enabled."); } Path qualifiedPath = makeQualified(path); try { - abfsStore.removeAcl(qualifiedPath); + abfsStore.removeAcl(qualifiedPath, tracingContext); } catch (AzureBlobFileSystemException ex) { checkException(path, ex); } @@ -856,11 +1193,14 @@ public void removeAcl(final Path path) throws IOException { public void setAcl(final Path path, final List aclSpec) throws IOException { LOG.debug("AzureBlobFileSystem.setAcl path: {}", path); + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.SET_ACL, true, tracingHeaderFormat, + listener); - if (!getIsNamespaceEnabled()) { + if (!getIsNamespaceEnabled(tracingContext)) { throw new UnsupportedOperationException( "setAcl is only supported by storage accounts with the hierarchical " - + "namespace enabled."); + + "namespace enabled."); } if (aclSpec == null || aclSpec.size() == 0) { @@ -870,7 +1210,7 @@ public void setAcl(final Path path, final List aclSpec) Path qualifiedPath = makeQualified(path); try { - abfsStore.setAcl(qualifiedPath, aclSpec); + abfsStore.setAcl(qualifiedPath, aclSpec, tracingContext); } catch (AzureBlobFileSystemException ex) { checkException(path, ex); } @@ -885,18 +1225,20 @@ public void setAcl(final Path path, final List aclSpec) */ @Override public AclStatus getAclStatus(final Path path) throws IOException { - LOG.debug("AzureBlobFileSystem.getAclStatus path: {}", path.toString()); + LOG.debug("AzureBlobFileSystem.getAclStatus path: {}", path); + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.GET_ACL_STATUS, true, tracingHeaderFormat, listener); - if (!getIsNamespaceEnabled()) { + if (!getIsNamespaceEnabled(tracingContext)) { throw new UnsupportedOperationException( "getAclStatus is only supported by storage account with the " - + "hierarchical namespace enabled."); + + "hierarchical namespace enabled."); } Path qualifiedPath = makeQualified(path); try { - return abfsStore.getAclStatus(qualifiedPath); + return abfsStore.getAclStatus(qualifiedPath, tracingContext); } catch (AzureBlobFileSystemException ex) { checkException(path, ex); return null; @@ -920,26 +1262,91 @@ public void access(final Path path, final FsAction mode) throws IOException { LOG.debug("AzureBlobFileSystem.access path : {}, mode : {}", path, mode); Path qualifiedPath = makeQualified(path); try { - this.abfsStore.access(qualifiedPath, mode); + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.ACCESS, tracingHeaderFormat, + listener); + this.abfsStore.access(qualifiedPath, mode, tracingContext); } catch (AzureBlobFileSystemException ex) { checkCheckAccessException(path, ex); } } - private FileStatus tryGetFileStatus(final Path f) { + /** + * Incrementing exists() calls from superclass for statistic collection. + * + * @param f source path. + * @return true if the path exists. + * @throws IOException + */ + @Override + public boolean exists(Path f) throws IOException { + statIncrement(CALL_EXIST); + return super.exists(f); + } + + @Override + public RemoteIterator listStatusIterator(Path path) + throws IOException { + LOG.debug("AzureBlobFileSystem.listStatusIterator path : {}", path); + if (abfsStore.getAbfsConfiguration().enableAbfsListIterator()) { + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.LISTSTATUS, true, tracingHeaderFormat, listener); + AbfsListStatusRemoteIterator abfsLsItr = + new AbfsListStatusRemoteIterator(path, abfsStore, + tracingContext); + return RemoteIterators.typeCastingRemoteIterator(abfsLsItr); + } else { + return super.listStatusIterator(path); + } + } + + /** + * Incremental listing of located status entries, + * preserving etags. + * @param path path to list + * @param filter a path filter + * @return iterator of results. + * @throws FileNotFoundException source path not found. + * @throws IOException other values. + */ + @Override + protected RemoteIterator listLocatedStatus( + final Path path, + final PathFilter filter) + throws FileNotFoundException, IOException { + + LOG.debug("AzureBlobFileSystem.listStatusIterator path : {}", path); + // get a paged iterator over the source data, filtering out non-matching + // entries. + final RemoteIterator sourceEntries = filteringRemoteIterator( + listStatusIterator(path), + (st) -> filter.accept(st.getPath())); + // and then map that to a remote iterator of located file status + // entries, propagating any etags. + return mappingRemoteIterator(sourceEntries, + st -> new AbfsLocatedFileStatus(st, + st.isFile() + ? getFileBlockLocations(st, 0, st.getLen()) + : null)); + } + + private FileStatus tryGetFileStatus(final Path f, TracingContext tracingContext) { try { - return getFileStatus(f); + return getFileStatus(f, tracingContext); } catch (IOException ex) { LOG.debug("File not found {}", f); + statIncrement(ERROR_IGNORED); return null; } } private boolean fileSystemExists() throws IOException { LOG.debug( - "AzureBlobFileSystem.fileSystemExists uri: {}", uri); + "AzureBlobFileSystem.fileSystemExists uri: {}", uri); try { - abfsStore.getFilesystemProperties(); + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.TEST_OP, tracingHeaderFormat, listener); + abfsStore.getFilesystemProperties(tracingContext); } catch (AzureBlobFileSystemException ex) { try { checkException(null, ex); @@ -947,17 +1354,18 @@ private boolean fileSystemExists() throws IOException { // there is not way to get the storage error code // workaround here is to check its status code. } catch (FileNotFoundException e) { + statIncrement(ERROR_IGNORED); return false; } } return true; } - private void createFileSystem() throws IOException { + private void createFileSystem(TracingContext tracingContext) throws IOException { LOG.debug( "AzureBlobFileSystem.createFileSystem uri: {}", uri); try { - abfsStore.createFilesystem(); + abfsStore.createFilesystem(tracingContext); } catch (AzureBlobFileSystemException ex) { checkException(null, ex); } @@ -1058,25 +1466,31 @@ private void checkCheckAccessException(final Path path, * @param allowedErrorCodesList varargs list of error codes. * @throws IOException if the exception error code is not on the allowed list. */ - private void checkException(final Path path, - final AzureBlobFileSystemException exception, - final AzureServiceErrorCode... allowedErrorCodesList) throws IOException { + @VisibleForTesting + public static void checkException(final Path path, + final AzureBlobFileSystemException exception, + final AzureServiceErrorCode... allowedErrorCodesList) throws IOException { if (exception instanceof AbfsRestOperationException) { AbfsRestOperationException ere = (AbfsRestOperationException) exception; if (ArrayUtils.contains(allowedErrorCodesList, ere.getErrorCode())) { return; } - int statusCode = ere.getStatusCode(); - //AbfsRestOperationException.getMessage() contains full error info including path/uri. - if (statusCode == HttpURLConnection.HTTP_NOT_FOUND) { - throw (IOException) new FileNotFoundException(ere.getMessage()) + String message = ere.getMessage(); + + switch (ere.getStatusCode()) { + case HttpURLConnection.HTTP_NOT_FOUND: + throw (IOException) new FileNotFoundException(message) .initCause(exception); - } else if (statusCode == HttpURLConnection.HTTP_CONFLICT) { - throw (IOException) new FileAlreadyExistsException(ere.getMessage()) + case HttpURLConnection.HTTP_CONFLICT: + throw (IOException) new FileAlreadyExistsException(message) .initCause(exception); - } else { + case HttpURLConnection.HTTP_FORBIDDEN: + case HttpURLConnection.HTTP_UNAUTHORIZED: + throw (IOException) new AccessDeniedException(message) + .initCause(exception); + default: throw ere; } } else if (exception instanceof SASTokenProviderException) { @@ -1120,6 +1534,7 @@ private Throwable getRootCause(Throwable throwable) { */ @Override public synchronized Token getDelegationToken(final String renewer) throws IOException { + statIncrement(CALL_GET_DELEGATION_TOKEN); return this.delegationTokenEnabled ? this.delegationTokenManager.getDelegationToken(renewer) : super.getDelegationToken(renewer); } @@ -1143,6 +1558,11 @@ FileSystem.Statistics getFsStatistics() { return this.statistics; } + @VisibleForTesting + void setListenerOperation(FSOperationType operation) { + listener.setOperation(operation); + } + @VisibleForTesting static class FileSystemOperation { private final T result; @@ -1159,7 +1579,7 @@ public boolean failed() { } @VisibleForTesting - AzureBlobFileSystemStore getAbfsStore() { + public AzureBlobFileSystemStore getAbfsStore() { return abfsStore; } @@ -1178,8 +1598,30 @@ AbfsDelegationTokenManager getDelegationTokenManager() { } @VisibleForTesting - boolean getIsNamespaceEnabled() throws AzureBlobFileSystemException { - return abfsStore.getIsNamespaceEnabled(); + boolean getIsNamespaceEnabled(TracingContext tracingContext) + throws AzureBlobFileSystemException { + return abfsStore.getIsNamespaceEnabled(tracingContext); + } + + /** + * Returns the counter() map in IOStatistics containing all the counters + * and their values. + * + * @return Map of IOStatistics counters. + */ + @VisibleForTesting + Map getInstrumentationMap() { + return abfsCounters.toMap(); + } + + @VisibleForTesting + String getFileSystemId() { + return fileSystemId; + } + + @VisibleForTesting + String getClientCorrelationId() { + return clientCorrelationId; } @Override @@ -1190,11 +1632,32 @@ public boolean hasPathCapability(final Path path, final String capability) switch (validatePathCapabilityArgs(p, capability)) { case CommonPathCapabilities.FS_PERMISSIONS: case CommonPathCapabilities.FS_APPEND: + case CommonPathCapabilities.ETAGS_AVAILABLE: return true; + + case CommonPathCapabilities.ETAGS_PRESERVED_IN_RENAME: case CommonPathCapabilities.FS_ACLS: - return getIsNamespaceEnabled(); + return getIsNamespaceEnabled( + new TracingContext(clientCorrelationId, fileSystemId, + FSOperationType.HAS_PATH_CAPABILITY, tracingHeaderFormat, + listener)); + + // probe for presence of the HADOOP-18546 readahead fix. + case CAPABILITY_SAFE_READAHEAD: + return true; + default: return super.hasPathCapability(p, capability); } } + + /** + * Getter for IOStatistic instance in AzureBlobFilesystem. + * + * @return the IOStatistic instance from abfsCounters. + */ + @Override + public IOStatistics getIOStatistics() { + return abfsCounters != null ? abfsCounters.getIOStatistics() : null; + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index bff0e455cf00c..cd33da401c9d3 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -21,6 +21,7 @@ import java.io.File; import java.io.IOException; import java.io.OutputStream; +import java.lang.reflect.InvocationTargetException; import java.io.UnsupportedEncodingException; import java.net.HttpURLConnection; import java.net.MalformedURLException; @@ -34,11 +35,11 @@ import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; import java.nio.charset.StandardCharsets; -import java.text.ParseException; import java.text.SimpleDateFormat; import java.time.Instant; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.HashSet; @@ -46,23 +47,35 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Optional; import java.util.Set; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.base.Strings; +import java.util.WeakHashMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Strings; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Futures; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableFuture; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.EtagSource; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; import org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes; +import org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations; import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.ConcurrentWriteOperationDetectedException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.FileSystemOperationUnhandledException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidAbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidFileSystemPropertyException; @@ -71,87 +84,137 @@ import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultEntrySchema; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.TrileanConversionException; +import org.apache.hadoop.fs.azurebfs.enums.Trilean; import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; import org.apache.hadoop.fs.azurebfs.extensions.ExtensionHelper; import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; +import org.apache.hadoop.fs.azurebfs.oauth2.AzureADAuthenticator; import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformer; +import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformerInterface; import org.apache.hadoop.fs.azurebfs.services.AbfsAclHelper; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsClientContext; +import org.apache.hadoop.fs.azurebfs.services.AbfsClientContextBuilder; +import org.apache.hadoop.fs.azurebfs.services.AbfsClientRenameResult; +import org.apache.hadoop.fs.azurebfs.services.AbfsCounters; import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.azurebfs.services.AbfsInputStream; +import org.apache.hadoop.fs.azurebfs.services.AbfsInputStreamContext; +import org.apache.hadoop.fs.azurebfs.services.AbfsInputStreamStatisticsImpl; import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; +import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStreamContext; +import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStreamStatisticsImpl; import org.apache.hadoop.fs.azurebfs.services.AbfsPermission; import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; import org.apache.hadoop.fs.azurebfs.services.AuthType; import org.apache.hadoop.fs.azurebfs.services.ExponentialRetryPolicy; +import org.apache.hadoop.fs.azurebfs.services.AbfsLease; import org.apache.hadoop.fs.azurebfs.services.SharedKeyCredentials; import org.apache.hadoop.fs.azurebfs.services.AbfsPerfTracker; import org.apache.hadoop.fs.azurebfs.services.AbfsPerfInfo; +import org.apache.hadoop.fs.azurebfs.services.ListingSupport; import org.apache.hadoop.fs.azurebfs.utils.Base64; import org.apache.hadoop.fs.azurebfs.utils.CRC64; +import org.apache.hadoop.fs.azurebfs.utils.DateTimeUtils; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.fs.azurebfs.utils.UriUtils; +import org.apache.hadoop.fs.impl.OpenFileParameters; import org.apache.hadoop.fs.permission.AclEntry; import org.apache.hadoop.fs.permission.AclStatus; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.fs.store.DataBlocks; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.BlockingThreadPoolExecutorService; +import org.apache.hadoop.util.SemaphoredDelegatingExecutor; +import org.apache.hadoop.util.concurrent.HadoopExecutors; import org.apache.http.client.utils.URIBuilder; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.METADATA_INCOMPLETE_RENAME_FAILURES; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.RENAME_RECOVERY; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_EQUALS; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_FORWARD_SLASH; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_HYPHEN; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_PLUS; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_STAR; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_UNDERSCORE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DIRECTORY; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FILE; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ROOT_PATH; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.SINGLE_WHITE_SPACE; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.TOKEN_VERSION; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_ABFS_ENDPOINT; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_BUFFERED_PREAD_DISABLE; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_IDENTITY_TRANSFORM_CLASS; /** * Provides the bridging logic between Hadoop's abstract filesystem and Azure Storage. */ @InterfaceAudience.Public @InterfaceStability.Evolving -public class AzureBlobFileSystemStore implements Closeable { +public class AzureBlobFileSystemStore implements Closeable, ListingSupport { private static final Logger LOG = LoggerFactory.getLogger(AzureBlobFileSystemStore.class); private AbfsClient client; private URI uri; private String userName; private String primaryUserGroup; - private static final String DATE_TIME_PATTERN = "E, dd MMM yyyy HH:mm:ss z"; private static final String TOKEN_DATE_PATTERN = "yyyy-MM-dd'T'HH:mm:ss.SSSSSSS'Z'"; private static final String XMS_PROPERTIES_ENCODING = "ISO-8859-1"; private static final int GET_SET_AGGREGATE_COUNT = 2; + private final Map leaseRefs; + private final AbfsConfiguration abfsConfiguration; private final Set azureAtomicRenameDirSet; - private boolean isNamespaceEnabledSet; - private boolean isNamespaceEnabled; + private Set azureInfiniteLeaseDirSet; + private Trilean isNamespaceEnabled; private final AuthType authType; private final UserGroupInformation userGroupInformation; - private final IdentityTransformer identityTransformer; + private final IdentityTransformerInterface identityTransformer; private final AbfsPerfTracker abfsPerfTracker; + private final AbfsCounters abfsCounters; + + /** + * The set of directories where we should store files as append blobs. + */ + private Set appendBlobDirSet; + + /** BlockFactory being used by this instance.*/ + private DataBlocks.BlockFactory blockFactory; + /** Number of active data blocks per AbfsOutputStream */ + private int blockOutputActiveBlocks; + /** Bounded ThreadPool for this instance. */ + private ExecutorService boundedThreadPool; - public AzureBlobFileSystemStore(URI uri, boolean isSecureScheme, Configuration configuration) - throws IOException { - this.uri = uri; + /** + * FileSystem Store for {@link AzureBlobFileSystem} for Abfs operations. + * Built using the {@link AzureBlobFileSystemStoreBuilder} with parameters + * required. + * @param abfsStoreBuilder Builder for AzureBlobFileSystemStore. + * @throws IOException Throw IOE in case of failure during constructing. + */ + public AzureBlobFileSystemStore( + AzureBlobFileSystemStoreBuilder abfsStoreBuilder) throws IOException { + this.uri = abfsStoreBuilder.uri; String[] authorityParts = authorityParts(uri); final String fileSystemName = authorityParts[0]; final String accountName = authorityParts[1]; + leaseRefs = Collections.synchronizedMap(new WeakHashMap<>()); + try { - this.abfsConfiguration = new AbfsConfiguration(configuration, accountName); + this.abfsConfiguration = new AbfsConfiguration(abfsStoreBuilder.configuration, accountName); } catch (IllegalAccessException exception) { throw new FileSystemOperationUnhandledException(exception); } LOG.trace("AbfsConfiguration init complete"); + this.isNamespaceEnabled = abfsConfiguration.getIsNamespaceEnabledAccount(); + this.userGroupInformation = UserGroupInformation.getCurrentUser(); this.userName = userGroupInformation.getShortUserName(); LOG.trace("UGI init complete"); @@ -170,13 +233,47 @@ public AzureBlobFileSystemStore(URI uri, boolean isSecureScheme, Configuration c this.azureAtomicRenameDirSet = new HashSet<>(Arrays.asList( abfsConfiguration.getAzureAtomicRenameDirs().split(AbfsHttpConstants.COMMA))); + updateInfiniteLeaseDirs(); this.authType = abfsConfiguration.getAuthType(accountName); boolean usingOauth = (authType == AuthType.OAuth); - boolean useHttps = (usingOauth || abfsConfiguration.isHttpsAlwaysUsed()) ? true : isSecureScheme; + boolean useHttps = (usingOauth || abfsConfiguration.isHttpsAlwaysUsed()) ? true : abfsStoreBuilder.isSecureScheme; this.abfsPerfTracker = new AbfsPerfTracker(fileSystemName, accountName, this.abfsConfiguration); + this.abfsCounters = abfsStoreBuilder.abfsCounters; initializeClient(uri, fileSystemName, accountName, useHttps); - this.identityTransformer = new IdentityTransformer(abfsConfiguration.getRawConfiguration()); + final Class identityTransformerClass = + abfsStoreBuilder.configuration.getClass(FS_AZURE_IDENTITY_TRANSFORM_CLASS, IdentityTransformer.class, + IdentityTransformerInterface.class); + try { + this.identityTransformer = + identityTransformerClass.getConstructor(Configuration.class).newInstance(abfsStoreBuilder.configuration); + } catch (IllegalAccessException | InstantiationException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException e) { + throw new IOException(e); + } LOG.trace("IdentityTransformer init complete"); + + // Extract the directories that should contain append blobs + String appendBlobDirs = abfsConfiguration.getAppendBlobDirs(); + if (appendBlobDirs.trim().isEmpty()) { + this.appendBlobDirSet = new HashSet(); + } else { + this.appendBlobDirSet = new HashSet<>(Arrays.asList( + abfsConfiguration.getAppendBlobDirs().split(AbfsHttpConstants.COMMA))); + } + this.blockFactory = abfsStoreBuilder.blockFactory; + this.blockOutputActiveBlocks = abfsStoreBuilder.blockOutputActiveBlocks; + this.boundedThreadPool = BlockingThreadPoolExecutorService.newInstance( + abfsConfiguration.getWriteMaxConcurrentRequestCount(), + abfsConfiguration.getMaxWriteRequestsToQueue(), + 10L, TimeUnit.SECONDS, + "abfs-bounded"); + } + + /** + * Checks if the given key in Azure Storage should be stored as a page + * blob instead of block blob. + */ + public boolean isAppendBlobKey(String key) { + return isKeyForDirectorySet(key, appendBlobDirSet); } /** @@ -195,7 +292,28 @@ public String getPrimaryGroup() { @Override public void close() throws IOException { - IOUtils.cleanupWithLogger(LOG, client); + List> futures = new ArrayList<>(); + for (AbfsLease lease : leaseRefs.keySet()) { + if (lease == null) { + continue; + } + ListenableFuture future = client.submit(() -> lease.free()); + futures.add(future); + } + try { + Futures.allAsList(futures).get(); + // shutdown the threadPool and set it to null. + HadoopExecutors.shutdown(boundedThreadPool, LOG, + 30, TimeUnit.SECONDS); + boundedThreadPool = null; + } catch (InterruptedException e) { + LOG.error("Interrupted freeing leases", e); + Thread.currentThread().interrupt(); + } catch (ExecutionException e) { + LOG.error("Error freeing leases", e); + } finally { + IOUtils.cleanupWithLogger(LOG, client); + } } byte[] encodeAttribute(String value) throws UnsupportedEncodingException { @@ -230,27 +348,35 @@ private String[] authorityParts(URI uri) throws InvalidUriAuthorityException, In return authorityParts; } - public boolean getIsNamespaceEnabled() throws AzureBlobFileSystemException { - if (!isNamespaceEnabledSet) { + public boolean getIsNamespaceEnabled(TracingContext tracingContext) + throws AzureBlobFileSystemException { + try { + return this.isNamespaceEnabled.toBoolean(); + } catch (TrileanConversionException e) { + LOG.debug("isNamespaceEnabled is UNKNOWN; fall back and determine through" + + " getAcl server call", e); + } - LOG.debug("Get root ACL status"); - try (AbfsPerfInfo perfInfo = startTracking("getIsNamespaceEnabled", "getAclStatus")) { - AbfsRestOperation op = client.getAclStatus(AbfsHttpConstants.FORWARD_SLASH + AbfsHttpConstants.ROOT_PATH); - perfInfo.registerResult(op.getResult()); - isNamespaceEnabled = true; - perfInfo.registerSuccess(true); - } catch (AbfsRestOperationException ex) { - // Get ACL status is a HEAD request, its response doesn't contain errorCode - // So can only rely on its status code to determine its account type. - if (HttpURLConnection.HTTP_BAD_REQUEST != ex.getStatusCode()) { - throw ex; - } - isNamespaceEnabled = false; + LOG.debug("Get root ACL status"); + try (AbfsPerfInfo perfInfo = startTracking("getIsNamespaceEnabled", + "getAclStatus")) { + AbfsRestOperation op = client + .getAclStatus(AbfsHttpConstants.ROOT_PATH, tracingContext); + perfInfo.registerResult(op.getResult()); + isNamespaceEnabled = Trilean.getTrilean(true); + perfInfo.registerSuccess(true); + } catch (AbfsRestOperationException ex) { + // Get ACL status is a HEAD request, its response doesn't contain + // errorCode + // So can only rely on its status code to determine its account type. + if (HttpURLConnection.HTTP_BAD_REQUEST != ex.getStatusCode()) { + throw ex; } - isNamespaceEnabledSet = true; + + isNamespaceEnabled = Trilean.getTrilean(false); } - return isNamespaceEnabled; + return isNamespaceEnabled.toBoolean(); } @VisibleForTesting @@ -288,7 +414,8 @@ public AbfsConfiguration getAbfsConfiguration() { return this.abfsConfiguration; } - public Hashtable getFilesystemProperties() throws AzureBlobFileSystemException { + public Hashtable getFilesystemProperties( + TracingContext tracingContext) throws AzureBlobFileSystemException { try (AbfsPerfInfo perfInfo = startTracking("getFilesystemProperties", "getFilesystemProperties")) { LOG.debug("getFilesystemProperties for filesystem: {}", @@ -296,7 +423,8 @@ public Hashtable getFilesystemProperties() throws AzureBlobFileS final Hashtable parsedXmsProperties; - final AbfsRestOperation op = client.getFilesystemProperties(); + final AbfsRestOperation op = client + .getFilesystemProperties(tracingContext); perfInfo.registerResult(op.getResult()); final String xMsProperties = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_PROPERTIES); @@ -308,7 +436,8 @@ public Hashtable getFilesystemProperties() throws AzureBlobFileS } } - public void setFilesystemProperties(final Hashtable properties) + public void setFilesystemProperties( + final Hashtable properties, TracingContext tracingContext) throws AzureBlobFileSystemException { if (properties == null || properties.isEmpty()) { LOG.trace("setFilesystemProperties no properties present"); @@ -328,19 +457,22 @@ public void setFilesystemProperties(final Hashtable properties) throw new InvalidAbfsRestOperationException(ex); } - final AbfsRestOperation op = client.setFilesystemProperties(commaSeparatedProperties); + final AbfsRestOperation op = client + .setFilesystemProperties(commaSeparatedProperties, tracingContext); perfInfo.registerResult(op.getResult()).registerSuccess(true); } } - public Hashtable getPathStatus(final Path path) throws AzureBlobFileSystemException { + public Hashtable getPathStatus(final Path path, + TracingContext tracingContext) throws AzureBlobFileSystemException { try (AbfsPerfInfo perfInfo = startTracking("getPathStatus", "getPathStatus")){ LOG.debug("getPathStatus for filesystem: {} path: {}", client.getFileSystem(), path); final Hashtable parsedXmsProperties; - final AbfsRestOperation op = client.getPathStatus(AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path)); + final AbfsRestOperation op = client + .getPathStatus(getRelativePath(path), true, tracingContext); perfInfo.registerResult(op.getResult()); final String xMsProperties = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_PROPERTIES); @@ -353,7 +485,9 @@ public Hashtable getPathStatus(final Path path) throws AzureBlob } } - public void setPathProperties(final Path path, final Hashtable properties) throws AzureBlobFileSystemException { + public void setPathProperties(final Path path, + final Hashtable properties, TracingContext tracingContext) + throws AzureBlobFileSystemException { try (AbfsPerfInfo perfInfo = startTracking("setPathProperties", "setPathProperties")){ LOG.debug("setFilesystemProperties for filesystem: {} path: {} with properties: {}", client.getFileSystem(), @@ -366,62 +500,225 @@ public void setPathProperties(final Path path, final Hashtable p } catch (CharacterCodingException ex) { throw new InvalidAbfsRestOperationException(ex); } - final AbfsRestOperation op = client.setPathProperties(AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path), commaSeparatedProperties); + final AbfsRestOperation op = client + .setPathProperties(getRelativePath(path), commaSeparatedProperties, + tracingContext); perfInfo.registerResult(op.getResult()).registerSuccess(true); } } - public void createFilesystem() throws AzureBlobFileSystemException { + public void createFilesystem(TracingContext tracingContext) + throws AzureBlobFileSystemException { try (AbfsPerfInfo perfInfo = startTracking("createFilesystem", "createFilesystem")){ LOG.debug("createFilesystem for filesystem: {}", client.getFileSystem()); - final AbfsRestOperation op = client.createFilesystem(); + final AbfsRestOperation op = client.createFilesystem(tracingContext); perfInfo.registerResult(op.getResult()).registerSuccess(true); } } - public void deleteFilesystem() throws AzureBlobFileSystemException { + public void deleteFilesystem(TracingContext tracingContext) + throws AzureBlobFileSystemException { try (AbfsPerfInfo perfInfo = startTracking("deleteFilesystem", "deleteFilesystem")) { LOG.debug("deleteFilesystem for filesystem: {}", client.getFileSystem()); - final AbfsRestOperation op = client.deleteFilesystem(); + final AbfsRestOperation op = client.deleteFilesystem(tracingContext); perfInfo.registerResult(op.getResult()).registerSuccess(true); } } - public OutputStream createFile(final Path path, final boolean overwrite, final FsPermission permission, - final FsPermission umask) throws AzureBlobFileSystemException { + public OutputStream createFile(final Path path, + final FileSystem.Statistics statistics, final boolean overwrite, + final FsPermission permission, final FsPermission umask, + TracingContext tracingContext) throws IOException { try (AbfsPerfInfo perfInfo = startTracking("createFile", "createPath")) { - boolean isNamespaceEnabled = getIsNamespaceEnabled(); + boolean isNamespaceEnabled = getIsNamespaceEnabled(tracingContext); LOG.debug("createFile filesystem: {} path: {} overwrite: {} permission: {} umask: {} isNamespaceEnabled: {}", client.getFileSystem(), path, overwrite, - permission.toString(), - umask.toString(), + permission, + umask, isNamespaceEnabled); - final AbfsRestOperation op = client.createPath(AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path), true, overwrite, - isNamespaceEnabled ? getOctalNotation(permission) : null, - isNamespaceEnabled ? getOctalNotation(umask) : null); + String relativePath = getRelativePath(path); + boolean isAppendBlob = false; + if (isAppendBlobKey(path.toString())) { + isAppendBlob = true; + } + + // if "fs.azure.enable.conditional.create.overwrite" is enabled and + // is a create request with overwrite=true, create will follow different + // flow. + boolean triggerConditionalCreateOverwrite = false; + if (overwrite + && abfsConfiguration.isConditionalCreateOverwriteEnabled()) { + triggerConditionalCreateOverwrite = true; + } + + AbfsRestOperation op; + if (triggerConditionalCreateOverwrite) { + op = conditionalCreateOverwriteFile(relativePath, + statistics, + isNamespaceEnabled ? getOctalNotation(permission) : null, + isNamespaceEnabled ? getOctalNotation(umask) : null, + isAppendBlob, + tracingContext + ); + + } else { + op = client.createPath(relativePath, true, + overwrite, + isNamespaceEnabled ? getOctalNotation(permission) : null, + isNamespaceEnabled ? getOctalNotation(umask) : null, + isAppendBlob, + null, + tracingContext); + + } perfInfo.registerResult(op.getResult()).registerSuccess(true); + AbfsLease lease = maybeCreateLease(relativePath, tracingContext); + return new AbfsOutputStream( + populateAbfsOutputStreamContext( + isAppendBlob, + lease, client, - AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path), + statistics, + relativePath, 0, - abfsConfiguration.getWriteBufferSize(), - abfsConfiguration.isFlushEnabled(), - abfsConfiguration.isOutputStreamFlushDisabled()); + tracingContext)); } } - public void createDirectory(final Path path, final FsPermission permission, final FsPermission umask) + /** + * Conditional create overwrite flow ensures that create overwrites is done + * only if there is match for eTag of existing file. + * @param relativePath + * @param statistics + * @param permission + * @param umask + * @param isAppendBlob + * @return + * @throws AzureBlobFileSystemException + */ + private AbfsRestOperation conditionalCreateOverwriteFile(final String relativePath, + final FileSystem.Statistics statistics, + final String permission, + final String umask, + final boolean isAppendBlob, + TracingContext tracingContext) throws AzureBlobFileSystemException { + AbfsRestOperation op; + + try { + // Trigger a create with overwrite=false first so that eTag fetch can be + // avoided for cases when no pre-existing file is present (major portion + // of create file traffic falls into the case of no pre-existing file). + op = client.createPath(relativePath, true, false, permission, umask, + isAppendBlob, null, tracingContext); + + } catch (AbfsRestOperationException e) { + if (e.getStatusCode() == HttpURLConnection.HTTP_CONFLICT) { + // File pre-exists, fetch eTag + try { + op = client.getPathStatus(relativePath, false, tracingContext); + } catch (AbfsRestOperationException ex) { + if (ex.getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND) { + // Is a parallel access case, as file which was found to be + // present went missing by this request. + throw new ConcurrentWriteOperationDetectedException( + "Parallel access to the create path detected. Failing request " + + "to honor single writer semantics"); + } else { + throw ex; + } + } + + String eTag = op.getResult() + .getResponseHeader(HttpHeaderConfigurations.ETAG); + + try { + // overwrite only if eTag matches with the file properties fetched befpre + op = client.createPath(relativePath, true, true, permission, umask, + isAppendBlob, eTag, tracingContext); + } catch (AbfsRestOperationException ex) { + if (ex.getStatusCode() == HttpURLConnection.HTTP_PRECON_FAILED) { + // Is a parallel access case, as file with eTag was just queried + // and precondition failure can happen only when another file with + // different etag got created. + throw new ConcurrentWriteOperationDetectedException( + "Parallel access to the create path detected. Failing request " + + "to honor single writer semantics"); + } else { + throw ex; + } + } + } else { + throw e; + } + } + + return op; + } + + /** + * Method to populate AbfsOutputStreamContext with different parameters to + * be used to construct {@link AbfsOutputStream}. + * + * @param isAppendBlob is Append blob support enabled? + * @param lease instance of AbfsLease for this AbfsOutputStream. + * @param client AbfsClient. + * @param statistics FileSystem statistics. + * @param path Path for AbfsOutputStream. + * @param position Position or offset of the file being opened, set to 0 + * when creating a new file, but needs to be set for APPEND + * calls on the same file. + * @param tracingContext instance of TracingContext for this AbfsOutputStream. + * @return AbfsOutputStreamContext instance with the desired parameters. + */ + private AbfsOutputStreamContext populateAbfsOutputStreamContext( + boolean isAppendBlob, + AbfsLease lease, + AbfsClient client, + FileSystem.Statistics statistics, + String path, + long position, + TracingContext tracingContext) { + int bufferSize = abfsConfiguration.getWriteBufferSize(); + if (isAppendBlob && bufferSize > FileSystemConfigurations.APPENDBLOB_MAX_WRITE_BUFFER_SIZE) { + bufferSize = FileSystemConfigurations.APPENDBLOB_MAX_WRITE_BUFFER_SIZE; + } + return new AbfsOutputStreamContext(abfsConfiguration.getSasTokenRenewPeriodForStreamsInSeconds()) + .withWriteBufferSize(bufferSize) + .enableExpectHeader(abfsConfiguration.isExpectHeaderEnabled()) + .enableFlush(abfsConfiguration.isFlushEnabled()) + .enableSmallWriteOptimization(abfsConfiguration.isSmallWriteOptimizationEnabled()) + .disableOutputStreamFlush(abfsConfiguration.isOutputStreamFlushDisabled()) + .withStreamStatistics(new AbfsOutputStreamStatisticsImpl()) + .withAppendBlob(isAppendBlob) + .withWriteMaxConcurrentRequestCount(abfsConfiguration.getWriteMaxConcurrentRequestCount()) + .withMaxWriteRequestsToQueue(abfsConfiguration.getMaxWriteRequestsToQueue()) + .withLease(lease) + .withBlockFactory(blockFactory) + .withBlockOutputActiveBlocks(blockOutputActiveBlocks) + .withClient(client) + .withPosition(position) + .withFsStatistics(statistics) + .withPath(path) + .withExecutorService(new SemaphoredDelegatingExecutor(boundedThreadPool, + blockOutputActiveBlocks, true)) + .withTracingContext(tracingContext) + .build(); + } + + public void createDirectory(final Path path, final FsPermission permission, + final FsPermission umask, TracingContext tracingContext) throws AzureBlobFileSystemException { try (AbfsPerfInfo perfInfo = startTracking("createDirectory", "createPath")) { - boolean isNamespaceEnabled = getIsNamespaceEnabled(); + boolean isNamespaceEnabled = getIsNamespaceEnabled(tracingContext); LOG.debug("createDirectory filesystem: {} path: {} permission: {} umask: {} isNamespaceEnabled: {}", client.getFileSystem(), path, @@ -429,54 +726,114 @@ public void createDirectory(final Path path, final FsPermission permission, fina umask, isNamespaceEnabled); - final AbfsRestOperation op = client.createPath(AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path), false, true, + boolean overwrite = + !isNamespaceEnabled || abfsConfiguration.isEnabledMkdirOverwrite(); + final AbfsRestOperation op = client.createPath(getRelativePath(path), + false, overwrite, isNamespaceEnabled ? getOctalNotation(permission) : null, - isNamespaceEnabled ? getOctalNotation(umask) : null); + isNamespaceEnabled ? getOctalNotation(umask) : null, false, null, + tracingContext); perfInfo.registerResult(op.getResult()).registerSuccess(true); } } - public AbfsInputStream openFileForRead(final Path path, final FileSystem.Statistics statistics) - throws AzureBlobFileSystemException { - try (AbfsPerfInfo perfInfo = startTracking("openFileForRead", "getPathStatus")) { - LOG.debug("openFileForRead filesystem: {} path: {}", - client.getFileSystem(), - path); - - final AbfsRestOperation op = client.getPathStatus(AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path)); - perfInfo.registerResult(op.getResult()); + public AbfsInputStream openFileForRead(final Path path, + final FileSystem.Statistics statistics, TracingContext tracingContext) + throws IOException { + return openFileForRead(path, Optional.empty(), statistics, + tracingContext); + } - final String resourceType = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_RESOURCE_TYPE); - final long contentLength = Long.parseLong(op.getResult().getResponseHeader(HttpHeaderConfigurations.CONTENT_LENGTH)); - final String eTag = op.getResult().getResponseHeader(HttpHeaderConfigurations.ETAG); + public AbfsInputStream openFileForRead(Path path, + final Optional parameters, + final FileSystem.Statistics statistics, TracingContext tracingContext) + throws IOException { + try (AbfsPerfInfo perfInfo = startTracking("openFileForRead", + "getPathStatus")) { + LOG.debug("openFileForRead filesystem: {} path: {}", + client.getFileSystem(), path); + + FileStatus fileStatus = parameters.map(OpenFileParameters::getStatus) + .orElse(null); + String relativePath = getRelativePath(path); + String resourceType, eTag; + long contentLength; + if (fileStatus instanceof VersionedFileStatus) { + path = path.makeQualified(this.uri, path); + Preconditions.checkArgument(fileStatus.getPath().equals(path), + String.format( + "Filestatus path [%s] does not match with given path [%s]", + fileStatus.getPath(), path)); + resourceType = fileStatus.isFile() ? FILE : DIRECTORY; + contentLength = fileStatus.getLen(); + eTag = ((VersionedFileStatus) fileStatus).getVersion(); + } else { + if (fileStatus != null) { + LOG.debug( + "Fallback to getPathStatus REST call as provided filestatus " + + "is not of type VersionedFileStatus"); + } + AbfsHttpOperation op = client.getPathStatus(relativePath, false, + tracingContext).getResult(); + resourceType = op.getResponseHeader( + HttpHeaderConfigurations.X_MS_RESOURCE_TYPE); + contentLength = Long.parseLong( + op.getResponseHeader(HttpHeaderConfigurations.CONTENT_LENGTH)); + eTag = op.getResponseHeader(HttpHeaderConfigurations.ETAG); + } if (parseIsDirectory(resourceType)) { throw new AbfsRestOperationException( - AzureServiceErrorCode.PATH_NOT_FOUND.getStatusCode(), - AzureServiceErrorCode.PATH_NOT_FOUND.getErrorCode(), - "openFileForRead must be used with files and not directories", - null); + AzureServiceErrorCode.PATH_NOT_FOUND.getStatusCode(), + AzureServiceErrorCode.PATH_NOT_FOUND.getErrorCode(), + "openFileForRead must be used with files and not directories", + null); } perfInfo.registerSuccess(true); // Add statistics for InputStream - return new AbfsInputStream(client, statistics, - AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path), contentLength, - abfsConfiguration.getReadBufferSize(), abfsConfiguration.getReadAheadQueueDepth(), - abfsConfiguration.getTolerateOobAppends(), eTag); + return new AbfsInputStream(client, statistics, relativePath, + contentLength, populateAbfsInputStreamContext( + parameters.map(OpenFileParameters::getOptions)), + eTag, tracingContext); } } - public OutputStream openFileForWrite(final Path path, final boolean overwrite) throws - AzureBlobFileSystemException { + private AbfsInputStreamContext populateAbfsInputStreamContext( + Optional options) { + boolean bufferedPreadDisabled = options + .map(c -> c.getBoolean(FS_AZURE_BUFFERED_PREAD_DISABLE, false)) + .orElse(false); + return new AbfsInputStreamContext(abfsConfiguration.getSasTokenRenewPeriodForStreamsInSeconds()) + .withReadBufferSize(abfsConfiguration.getReadBufferSize()) + .withReadAheadQueueDepth(abfsConfiguration.getReadAheadQueueDepth()) + .withTolerateOobAppends(abfsConfiguration.getTolerateOobAppends()) + .isReadAheadEnabled(abfsConfiguration.isReadAheadEnabled()) + .withReadSmallFilesCompletely(abfsConfiguration.readSmallFilesCompletely()) + .withOptimizeFooterRead(abfsConfiguration.optimizeFooterRead()) + .withReadAheadRange(abfsConfiguration.getReadAheadRange()) + .withStreamStatistics(new AbfsInputStreamStatisticsImpl()) + .withShouldReadBufferSizeAlways( + abfsConfiguration.shouldReadBufferSizeAlways()) + .withReadAheadBlockSize(abfsConfiguration.getReadAheadBlockSize()) + .withBufferedPreadDisabled(bufferedPreadDisabled) + .build(); + } + + public OutputStream openFileForWrite(final Path path, + final FileSystem.Statistics statistics, final boolean overwrite, + TracingContext tracingContext) throws IOException { try (AbfsPerfInfo perfInfo = startTracking("openFileForWrite", "getPathStatus")) { LOG.debug("openFileForWrite filesystem: {} path: {} overwrite: {}", client.getFileSystem(), path, overwrite); - final AbfsRestOperation op = client.getPathStatus(AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path)); + String relativePath = getRelativePath(path); + + final AbfsRestOperation op = client + .getPathStatus(relativePath, false, tracingContext); perfInfo.registerResult(op.getResult()); final String resourceType = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_RESOURCE_TYPE); @@ -494,17 +851,54 @@ public OutputStream openFileForWrite(final Path path, final boolean overwrite) t perfInfo.registerSuccess(true); + boolean isAppendBlob = false; + if (isAppendBlobKey(path.toString())) { + isAppendBlob = true; + } + + AbfsLease lease = maybeCreateLease(relativePath, tracingContext); + return new AbfsOutputStream( + populateAbfsOutputStreamContext( + isAppendBlob, + lease, client, - AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path), + statistics, + relativePath, offset, - abfsConfiguration.getWriteBufferSize(), - abfsConfiguration.isFlushEnabled(), - abfsConfiguration.isOutputStreamFlushDisabled()); + tracingContext)); } } - public void rename(final Path source, final Path destination) throws + /** + * Break any current lease on an ABFS file. + * + * @param path file name + * @param tracingContext TracingContext instance to track correlation IDs + * @throws AzureBlobFileSystemException on any exception while breaking the lease + */ + public void breakLease(final Path path, final TracingContext tracingContext) throws AzureBlobFileSystemException { + LOG.debug("lease path: {}", path); + + client.breakLease(getRelativePath(path), tracingContext); + } + + /** + * Rename a file or directory. + * If a source etag is passed in, the operation will attempt to recover + * from a missing source file by probing the destination for + * existence and comparing etags. + * @param source path to source file + * @param destination destination of rename. + * @param tracingContext trace context + * @param sourceEtag etag of source file. may be null or empty + * @throws AzureBlobFileSystemException failure, excluding any recovery from overload failures. + * @return true if recovery was needed and succeeded. + */ + public boolean rename(final Path source, + final Path destination, + final TracingContext tracingContext, + final String sourceEtag) throws AzureBlobFileSystemException { final Instant startAggregate = abfsPerfTracker.getLatencyInstant(); long countAggregate = 0; @@ -522,25 +916,38 @@ public void rename(final Path source, final Path destination) throws String continuation = null; + String sourceRelativePath = getRelativePath(source); + String destinationRelativePath = getRelativePath(destination); + // was any operation recovered from? + boolean recovered = false; + do { try (AbfsPerfInfo perfInfo = startTracking("rename", "renamePath")) { - AbfsRestOperation op = client.renamePath(AbfsHttpConstants.FORWARD_SLASH + getRelativePath(source), - AbfsHttpConstants.FORWARD_SLASH + getRelativePath(destination), continuation); + boolean isNamespaceEnabled = getIsNamespaceEnabled(tracingContext); + final AbfsClientRenameResult abfsClientRenameResult = + client.renamePath(sourceRelativePath, destinationRelativePath, + continuation, tracingContext, sourceEtag, false, + isNamespaceEnabled); + + AbfsRestOperation op = abfsClientRenameResult.getOp(); perfInfo.registerResult(op.getResult()); continuation = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_CONTINUATION); perfInfo.registerSuccess(true); countAggregate++; shouldContinue = continuation != null && !continuation.isEmpty(); - + // update the recovery flag. + recovered |= abfsClientRenameResult.isRenameRecovered(); + populateRenameRecoveryStatistics(abfsClientRenameResult); if (!shouldContinue) { perfInfo.registerAggregates(startAggregate, countAggregate); } } } while (shouldContinue); + return recovered; } - public void delete(final Path path, final boolean recursive) - throws AzureBlobFileSystemException { + public void delete(final Path path, final boolean recursive, + TracingContext tracingContext) throws AzureBlobFileSystemException { final Instant startAggregate = abfsPerfTracker.getLatencyInstant(); long countAggregate = 0; boolean shouldContinue = true; @@ -552,10 +959,12 @@ public void delete(final Path path, final boolean recursive) String continuation = null; + String relativePath = getRelativePath(path); + do { try (AbfsPerfInfo perfInfo = startTracking("delete", "deletePath")) { - AbfsRestOperation op = client.deletePath( - AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path), recursive, continuation); + AbfsRestOperation op = client + .deletePath(relativePath, recursive, continuation, tracingContext); perfInfo.registerResult(op.getResult()); continuation = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_CONTINUATION); perfInfo.registerSuccess(true); @@ -569,9 +978,10 @@ public void delete(final Path path, final boolean recursive) } while (shouldContinue); } - public FileStatus getFileStatus(final Path path) throws IOException { + public FileStatus getFileStatus(final Path path, + TracingContext tracingContext) throws IOException { try (AbfsPerfInfo perfInfo = startTracking("getFileStatus", "undetermined")) { - boolean isNamespaceEnabled = getIsNamespaceEnabled(); + boolean isNamespaceEnabled = getIsNamespaceEnabled(tracingContext); LOG.debug("getFileStatus filesystem: {} path: {} isNamespaceEnabled: {}", client.getFileSystem(), path, @@ -581,21 +991,21 @@ public FileStatus getFileStatus(final Path path) throws IOException { if (path.isRoot()) { if (isNamespaceEnabled) { perfInfo.registerCallee("getAclStatus"); - op = client.getAclStatus(AbfsHttpConstants.FORWARD_SLASH + AbfsHttpConstants.ROOT_PATH); + op = client.getAclStatus(getRelativePath(path), tracingContext); } else { perfInfo.registerCallee("getFilesystemProperties"); - op = client.getFilesystemProperties(); + op = client.getFilesystemProperties(tracingContext); } } else { perfInfo.registerCallee("getPathStatus"); - op = client.getPathStatus(AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path)); + op = client.getPathStatus(getRelativePath(path), false, tracingContext); } perfInfo.registerResult(op.getResult()); final long blockSize = abfsConfiguration.getAzureBlockSize(); final AbfsHttpOperation result = op.getResult(); - final String eTag = result.getResponseHeader(HttpHeaderConfigurations.ETAG); + String eTag = extractEtagHeader(result); final String lastModified = result.getResponseHeader(HttpHeaderConfigurations.LAST_MODIFIED); final String permissions = result.getResponseHeader((HttpHeaderConfigurations.X_MS_PERMISSIONS)); final boolean hasAcl = AbfsPermission.isExtendedAcl(permissions); @@ -632,7 +1042,7 @@ public FileStatus getFileStatus(final Path path) throws IOException { resourceIsDir, 1, blockSize, - parseLastModifiedTime(lastModified), + DateTimeUtils.parseLastModifiedTime(lastModified), path, eTag); } @@ -640,10 +1050,12 @@ public FileStatus getFileStatus(final Path path) throws IOException { /** * @param path The list path. + * @param tracingContext Tracks identifiers for request header * @return the entries in the path. * */ - public FileStatus[] listStatus(final Path path) throws IOException { - return listStatus(path, null); + @Override + public FileStatus[] listStatus(final Path path, TracingContext tracingContext) throws IOException { + return listStatus(path, null, tracingContext); } /** @@ -654,11 +1066,21 @@ public FileStatus[] listStatus(final Path path) throws IOException { * Notice that if startFrom is a non-existent entry name, then the list response contains * all entries after this non-existent entry in lexical order: * listStatus(Path("/folder"), "cfile") will return "/folder/hfile" and "/folder/ifile". - * + * @param tracingContext Tracks identifiers for request header * @return the entries in the path start from "startFrom" in lexical order. * */ @InterfaceStability.Unstable - public FileStatus[] listStatus(final Path path, final String startFrom) throws IOException { + @Override + public FileStatus[] listStatus(final Path path, final String startFrom, TracingContext tracingContext) throws IOException { + List fileStatuses = new ArrayList<>(); + listStatus(path, startFrom, fileStatuses, true, null, tracingContext); + return fileStatuses.toArray(new FileStatus[fileStatuses.size()]); + } + + @Override + public String listStatus(final Path path, final String startFrom, + List fileStatuses, final boolean fetchAll, + String continuation, TracingContext tracingContext) throws IOException { final Instant startAggregate = abfsPerfTracker.getLatencyInstant(); long countAggregate = 0; boolean shouldContinue = true; @@ -668,21 +1090,22 @@ public FileStatus[] listStatus(final Path path, final String startFrom) throws I path, startFrom); - final String relativePath = path.isRoot() ? AbfsHttpConstants.EMPTY_STRING : getRelativePath(path); - String continuation = null; + final String relativePath = getRelativePath(path); - // generate continuation token if a valid startFrom is provided. - if (startFrom != null && !startFrom.isEmpty()) { - continuation = getIsNamespaceEnabled() - ? generateContinuationTokenForXns(startFrom) - : generateContinuationTokenForNonXns(path.isRoot() ? ROOT_PATH : relativePath, startFrom); + if (continuation == null || continuation.isEmpty()) { + // generate continuation token if a valid startFrom is provided. + if (startFrom != null && !startFrom.isEmpty()) { + continuation = getIsNamespaceEnabled(tracingContext) + ? generateContinuationTokenForXns(startFrom) + : generateContinuationTokenForNonXns(relativePath, startFrom); + } } - ArrayList fileStatuses = new ArrayList<>(); do { try (AbfsPerfInfo perfInfo = startTracking("listStatus", "listPath")) { AbfsRestOperation op = client.listPath(relativePath, false, - abfsConfiguration.getListMaxResults(), continuation); + abfsConfiguration.getListMaxResults(), continuation, + tracingContext); perfInfo.registerResult(op.getResult()); continuation = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_CONTINUATION); ListResultSchema retrievedSchema = op.getResult().getListResultSchema(); @@ -708,7 +1131,8 @@ public FileStatus[] listStatus(final Path path, final String startFrom) throws I long contentLength = entry.contentLength() == null ? 0 : entry.contentLength(); boolean isDirectory = entry.isDirectory() == null ? false : entry.isDirectory(); if (entry.lastModified() != null && !entry.lastModified().isEmpty()) { - lastModifiedMillis = parseLastModifiedTime(entry.lastModified()); + lastModifiedMillis = DateTimeUtils.parseLastModifiedTime( + entry.lastModified()); } Path entryPath = new Path(File.separator + entry.name()); @@ -731,7 +1155,8 @@ public FileStatus[] listStatus(final Path path, final String startFrom) throws I perfInfo.registerSuccess(true); countAggregate++; - shouldContinue = continuation != null && !continuation.isEmpty(); + shouldContinue = + fetchAll && continuation != null && !continuation.isEmpty(); if (!shouldContinue) { perfInfo.registerAggregates(startAggregate, countAggregate); @@ -739,7 +1164,7 @@ public FileStatus[] listStatus(final Path path, final String startFrom) throws I } } while (shouldContinue); - return fileStatuses.toArray(new FileStatus[fileStatuses.size()]); + return continuation; } // generate continuation token for xns account @@ -763,12 +1188,13 @@ private String generateContinuationTokenForXns(final String firstEntryName) { } // generate continuation token for non-xns account - private String generateContinuationTokenForNonXns(final String path, final String firstEntryName) { + private String generateContinuationTokenForNonXns(String path, final String firstEntryName) { Preconditions.checkArgument(!Strings.isNullOrEmpty(firstEntryName) && !firstEntryName.startsWith(AbfsHttpConstants.ROOT_PATH), "startFrom must be a dir/file name and it can not be a full path"); // Notice: non-xns continuation token requires full path (first "/" is not included) for startFrom + path = AbfsClient.getDirectoryQueryParameter(path); final String startFrom = (path.isEmpty() || path.equals(ROOT_PATH)) ? firstEntryName : path + ROOT_PATH + firstEntryName; @@ -797,9 +1223,9 @@ private String generateContinuationTokenForNonXns(final String path, final Strin return encodedTokenBuilder.toString(); } - public void setOwner(final Path path, final String owner, final String group) throws - AzureBlobFileSystemException { - if (!getIsNamespaceEnabled()) { + public void setOwner(final Path path, final String owner, final String group, + TracingContext tracingContext) throws AzureBlobFileSystemException { + if (!getIsNamespaceEnabled(tracingContext)) { throw new UnsupportedOperationException( "This operation is only valid for storage accounts with the hierarchical namespace enabled."); } @@ -809,25 +1235,25 @@ public void setOwner(final Path path, final String owner, final String group) th LOG.debug( "setOwner filesystem: {} path: {} owner: {} group: {}", client.getFileSystem(), - path.toString(), + path, owner, group); final String transformedOwner = identityTransformer.transformUserOrGroupForSetRequest(owner); final String transformedGroup = identityTransformer.transformUserOrGroupForSetRequest(group); - final AbfsRestOperation op = client.setOwner( - AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path, true), + final AbfsRestOperation op = client.setOwner(getRelativePath(path), transformedOwner, - transformedGroup); + transformedGroup, + tracingContext); perfInfo.registerResult(op.getResult()).registerSuccess(true); } } - public void setPermission(final Path path, final FsPermission permission) throws - AzureBlobFileSystemException { - if (!getIsNamespaceEnabled()) { + public void setPermission(final Path path, final FsPermission permission, + TracingContext tracingContext) throws AzureBlobFileSystemException { + if (!getIsNamespaceEnabled(tracingContext)) { throw new UnsupportedOperationException( "This operation is only valid for storage accounts with the hierarchical namespace enabled."); } @@ -837,20 +1263,20 @@ public void setPermission(final Path path, final FsPermission permission) throws LOG.debug( "setPermission filesystem: {} path: {} permission: {}", client.getFileSystem(), - path.toString(), - permission.toString()); + path, + permission); - final AbfsRestOperation op = client.setPermission( - AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path, true), - String.format(AbfsHttpConstants.PERMISSION_FORMAT, permission.toOctal())); + final AbfsRestOperation op = client.setPermission(getRelativePath(path), + String.format(AbfsHttpConstants.PERMISSION_FORMAT, + permission.toOctal()), tracingContext); perfInfo.registerResult(op.getResult()).registerSuccess(true); } } - public void modifyAclEntries(final Path path, final List aclSpec) throws - AzureBlobFileSystemException { - if (!getIsNamespaceEnabled()) { + public void modifyAclEntries(final Path path, final List aclSpec, + TracingContext tracingContext) throws AzureBlobFileSystemException { + if (!getIsNamespaceEnabled(tracingContext)) { throw new UnsupportedOperationException( "This operation is only valid for storage accounts with the hierarchical namespace enabled."); } @@ -860,14 +1286,17 @@ public void modifyAclEntries(final Path path, final List aclSpec) thro LOG.debug( "modifyAclEntries filesystem: {} path: {} aclSpec: {}", client.getFileSystem(), - path.toString(), + path, AclEntry.aclSpecToString(aclSpec)); identityTransformer.transformAclEntriesForSetRequest(aclSpec); final Map modifyAclEntries = AbfsAclHelper.deserializeAclSpec(AclEntry.aclSpecToString(aclSpec)); boolean useUpn = AbfsAclHelper.isUpnFormatAclEntries(modifyAclEntries); - final AbfsRestOperation op = client.getAclStatus(AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path, true), useUpn); + String relativePath = getRelativePath(path); + + final AbfsRestOperation op = client + .getAclStatus(relativePath, useUpn, tracingContext); perfInfoGet.registerResult(op.getResult()); final String eTag = op.getResult().getResponseHeader(HttpHeaderConfigurations.ETAG); @@ -878,9 +1307,9 @@ public void modifyAclEntries(final Path path, final List aclSpec) thro perfInfoGet.registerSuccess(true).finishTracking(); try (AbfsPerfInfo perfInfoSet = startTracking("modifyAclEntries", "setAcl")) { - final AbfsRestOperation setAclOp - = client.setAcl(AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path, true), - AbfsAclHelper.serializeAclSpec(aclEntries), eTag); + final AbfsRestOperation setAclOp = client + .setAcl(relativePath, AbfsAclHelper.serializeAclSpec(aclEntries), + eTag, tracingContext); perfInfoSet.registerResult(setAclOp.getResult()) .registerSuccess(true) .registerAggregates(perfInfoGet.getTrackingStart(), GET_SET_AGGREGATE_COUNT); @@ -888,8 +1317,9 @@ public void modifyAclEntries(final Path path, final List aclSpec) thro } } - public void removeAclEntries(final Path path, final List aclSpec) throws AzureBlobFileSystemException { - if (!getIsNamespaceEnabled()) { + public void removeAclEntries(final Path path, final List aclSpec, + TracingContext tracingContext) throws AzureBlobFileSystemException { + if (!getIsNamespaceEnabled(tracingContext)) { throw new UnsupportedOperationException( "This operation is only valid for storage accounts with the hierarchical namespace enabled."); } @@ -899,14 +1329,17 @@ public void removeAclEntries(final Path path, final List aclSpec) thro LOG.debug( "removeAclEntries filesystem: {} path: {} aclSpec: {}", client.getFileSystem(), - path.toString(), + path, AclEntry.aclSpecToString(aclSpec)); identityTransformer.transformAclEntriesForSetRequest(aclSpec); final Map removeAclEntries = AbfsAclHelper.deserializeAclSpec(AclEntry.aclSpecToString(aclSpec)); boolean isUpnFormat = AbfsAclHelper.isUpnFormatAclEntries(removeAclEntries); - final AbfsRestOperation op = client.getAclStatus(AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path, true), isUpnFormat); + String relativePath = getRelativePath(path); + + final AbfsRestOperation op = client + .getAclStatus(relativePath, isUpnFormat, tracingContext); perfInfoGet.registerResult(op.getResult()); final String eTag = op.getResult().getResponseHeader(HttpHeaderConfigurations.ETAG); @@ -917,9 +1350,9 @@ public void removeAclEntries(final Path path, final List aclSpec) thro perfInfoGet.registerSuccess(true).finishTracking(); try (AbfsPerfInfo perfInfoSet = startTracking("removeAclEntries", "setAcl")) { - final AbfsRestOperation setAclOp = - client.setAcl(AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path, true), - AbfsAclHelper.serializeAclSpec(aclEntries), eTag); + final AbfsRestOperation setAclOp = client + .setAcl(relativePath, AbfsAclHelper.serializeAclSpec(aclEntries), + eTag, tracingContext); perfInfoSet.registerResult(setAclOp.getResult()) .registerSuccess(true) .registerAggregates(perfInfoGet.getTrackingStart(), GET_SET_AGGREGATE_COUNT); @@ -927,8 +1360,9 @@ public void removeAclEntries(final Path path, final List aclSpec) thro } } - public void removeDefaultAcl(final Path path) throws AzureBlobFileSystemException { - if (!getIsNamespaceEnabled()) { + public void removeDefaultAcl(final Path path, TracingContext tracingContext) + throws AzureBlobFileSystemException { + if (!getIsNamespaceEnabled(tracingContext)) { throw new UnsupportedOperationException( "This operation is only valid for storage accounts with the hierarchical namespace enabled."); } @@ -938,9 +1372,12 @@ public void removeDefaultAcl(final Path path) throws AzureBlobFileSystemExceptio LOG.debug( "removeDefaultAcl filesystem: {} path: {}", client.getFileSystem(), - path.toString()); + path); + + String relativePath = getRelativePath(path); - final AbfsRestOperation op = client.getAclStatus(AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path, true)); + final AbfsRestOperation op = client + .getAclStatus(relativePath, tracingContext); perfInfoGet.registerResult(op.getResult()); final String eTag = op.getResult().getResponseHeader(HttpHeaderConfigurations.ETAG); final Map aclEntries = AbfsAclHelper.deserializeAclSpec(op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_ACL)); @@ -957,9 +1394,9 @@ public void removeDefaultAcl(final Path path) throws AzureBlobFileSystemExceptio perfInfoGet.registerSuccess(true).finishTracking(); try (AbfsPerfInfo perfInfoSet = startTracking("removeDefaultAcl", "setAcl")) { - final AbfsRestOperation setAclOp = - client.setAcl(AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path, true), - AbfsAclHelper.serializeAclSpec(aclEntries), eTag); + final AbfsRestOperation setAclOp = client + .setAcl(relativePath, AbfsAclHelper.serializeAclSpec(aclEntries), + eTag, tracingContext); perfInfoSet.registerResult(setAclOp.getResult()) .registerSuccess(true) .registerAggregates(perfInfoGet.getTrackingStart(), GET_SET_AGGREGATE_COUNT); @@ -967,8 +1404,9 @@ public void removeDefaultAcl(final Path path) throws AzureBlobFileSystemExceptio } } - public void removeAcl(final Path path) throws AzureBlobFileSystemException { - if (!getIsNamespaceEnabled()) { + public void removeAcl(final Path path, TracingContext tracingContext) + throws AzureBlobFileSystemException { + if (!getIsNamespaceEnabled(tracingContext)) { throw new UnsupportedOperationException( "This operation is only valid for storage accounts with the hierarchical namespace enabled."); } @@ -978,9 +1416,12 @@ public void removeAcl(final Path path) throws AzureBlobFileSystemException { LOG.debug( "removeAcl filesystem: {} path: {}", client.getFileSystem(), - path.toString()); + path); + + String relativePath = getRelativePath(path); - final AbfsRestOperation op = client.getAclStatus(AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path, true)); + final AbfsRestOperation op = client + .getAclStatus(relativePath, tracingContext); perfInfoGet.registerResult(op.getResult()); final String eTag = op.getResult().getResponseHeader(HttpHeaderConfigurations.ETAG); @@ -994,9 +1435,9 @@ public void removeAcl(final Path path) throws AzureBlobFileSystemException { perfInfoGet.registerSuccess(true).finishTracking(); try (AbfsPerfInfo perfInfoSet = startTracking("removeAcl", "setAcl")) { - final AbfsRestOperation setAclOp = - client.setAcl(AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path, true), - AbfsAclHelper.serializeAclSpec(newAclEntries), eTag); + final AbfsRestOperation setAclOp = client + .setAcl(relativePath, AbfsAclHelper.serializeAclSpec(newAclEntries), + eTag, tracingContext); perfInfoSet.registerResult(setAclOp.getResult()) .registerSuccess(true) .registerAggregates(perfInfoGet.getTrackingStart(), GET_SET_AGGREGATE_COUNT); @@ -1004,8 +1445,9 @@ public void removeAcl(final Path path) throws AzureBlobFileSystemException { } } - public void setAcl(final Path path, final List aclSpec) throws AzureBlobFileSystemException { - if (!getIsNamespaceEnabled()) { + public void setAcl(final Path path, final List aclSpec, + TracingContext tracingContext) throws AzureBlobFileSystemException { + if (!getIsNamespaceEnabled(tracingContext)) { throw new UnsupportedOperationException( "This operation is only valid for storage accounts with the hierarchical namespace enabled."); } @@ -1015,14 +1457,17 @@ public void setAcl(final Path path, final List aclSpec) throws AzureBl LOG.debug( "setAcl filesystem: {} path: {} aclspec: {}", client.getFileSystem(), - path.toString(), + path, AclEntry.aclSpecToString(aclSpec)); identityTransformer.transformAclEntriesForSetRequest(aclSpec); final Map aclEntries = AbfsAclHelper.deserializeAclSpec(AclEntry.aclSpecToString(aclSpec)); final boolean isUpnFormat = AbfsAclHelper.isUpnFormatAclEntries(aclEntries); - final AbfsRestOperation op = client.getAclStatus(AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path, true), isUpnFormat); + String relativePath = getRelativePath(path); + + final AbfsRestOperation op = client + .getAclStatus(relativePath, isUpnFormat, tracingContext); perfInfoGet.registerResult(op.getResult()); final String eTag = op.getResult().getResponseHeader(HttpHeaderConfigurations.ETAG); @@ -1034,8 +1479,8 @@ public void setAcl(final Path path, final List aclSpec) throws AzureBl try (AbfsPerfInfo perfInfoSet = startTracking("setAcl", "setAcl")) { final AbfsRestOperation setAclOp = - client.setAcl(AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path, true), - AbfsAclHelper.serializeAclSpec(aclEntries), eTag); + client.setAcl(relativePath, + AbfsAclHelper.serializeAclSpec(aclEntries), eTag, tracingContext); perfInfoSet.registerResult(setAclOp.getResult()) .registerSuccess(true) .registerAggregates(perfInfoGet.getTrackingStart(), GET_SET_AGGREGATE_COUNT); @@ -1043,8 +1488,9 @@ public void setAcl(final Path path, final List aclSpec) throws AzureBl } } - public AclStatus getAclStatus(final Path path) throws IOException { - if (!getIsNamespaceEnabled()) { + public AclStatus getAclStatus(final Path path, TracingContext tracingContext) + throws IOException { + if (!getIsNamespaceEnabled(tracingContext)) { throw new UnsupportedOperationException( "This operation is only valid for storage accounts with the hierarchical namespace enabled."); } @@ -1054,9 +1500,10 @@ public AclStatus getAclStatus(final Path path) throws IOException { LOG.debug( "getAclStatus filesystem: {} path: {}", client.getFileSystem(), - path.toString()); + path); - AbfsRestOperation op = client.getAclStatus(AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path, true)); + AbfsRestOperation op = client + .getAclStatus(getRelativePath(path), tracingContext); AbfsHttpOperation result = op.getResult(); perfInfo.registerResult(result); @@ -1089,21 +1536,19 @@ public AclStatus getAclStatus(final Path path) throws IOException { } } - public void access(final Path path, final FsAction mode) - throws AzureBlobFileSystemException { + public void access(final Path path, final FsAction mode, + TracingContext tracingContext) throws AzureBlobFileSystemException { LOG.debug("access for filesystem: {}, path: {}, mode: {}", this.client.getFileSystem(), path, mode); if (!this.abfsConfiguration.isCheckAccessEnabled() - || !getIsNamespaceEnabled()) { + || !getIsNamespaceEnabled(tracingContext)) { LOG.debug("Returning; either check access is not enabled or the account" + " used is not namespace enabled"); return; } try (AbfsPerfInfo perfInfo = startTracking("access", "checkAccess")) { - String relativePath = - AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path, true); final AbfsRestOperation op = this.client - .checkAccess(relativePath, mode.SYMBOL); + .checkAccess(getRelativePath(path), mode.SYMBOL, tracingContext); perfInfo.registerResult(op.getResult()).registerSuccess(true); } } @@ -1112,7 +1557,26 @@ public boolean isAtomicRenameKey(String key) { return isKeyForDirectorySet(key, azureAtomicRenameDirSet); } - private void initializeClient(URI uri, String fileSystemName, String accountName, boolean isSecure) + public boolean isInfiniteLeaseKey(String key) { + if (azureInfiniteLeaseDirSet.isEmpty()) { + return false; + } + return isKeyForDirectorySet(key, azureInfiniteLeaseDirSet); + } + + /** + * A on-off operation to initialize AbfsClient for AzureBlobFileSystem + * Operations. + * + * @param uri Uniform resource identifier for Abfs. + * @param fileSystemName Name of the fileSystem being used. + * @param accountName Name of the account being used to access Azure + * data store. + * @param isSecure Tells if https is being used or http. + * @throws IOException + */ + private void initializeClient(URI uri, String fileSystemName, + String accountName, boolean isSecure) throws IOException { if (this.client != null) { return; @@ -1133,6 +1597,10 @@ private void initializeClient(URI uri, String fileSystemName, String accountName AccessTokenProvider tokenProvider = null; SASTokenProvider sasTokenProvider = null; + if (authType == AuthType.OAuth) { + AzureADAuthenticator.init(abfsConfiguration); + } + if (authType == AuthType.SharedKey) { LOG.trace("Fetching SharedKey credentials"); int dotIndex = accountName.indexOf(AbfsHttpConstants.DOT); @@ -1155,38 +1623,38 @@ private void initializeClient(URI uri, String fileSystemName, String accountName LOG.trace("Initializing AbfsClient for {}", baseUrl); if (tokenProvider != null) { this.client = new AbfsClient(baseUrl, creds, abfsConfiguration, - new ExponentialRetryPolicy(abfsConfiguration.getMaxIoRetries()), - tokenProvider, abfsPerfTracker); + tokenProvider, + populateAbfsClientContext()); } else { this.client = new AbfsClient(baseUrl, creds, abfsConfiguration, - new ExponentialRetryPolicy(abfsConfiguration.getMaxIoRetries()), - sasTokenProvider, abfsPerfTracker); + sasTokenProvider, + populateAbfsClientContext()); } LOG.trace("AbfsClient init complete"); } + /** + * Populate a new AbfsClientContext instance with the desired properties. + * + * @return an instance of AbfsClientContext. + */ + private AbfsClientContext populateAbfsClientContext() { + return new AbfsClientContextBuilder() + .withExponentialRetryPolicy( + new ExponentialRetryPolicy(abfsConfiguration)) + .withAbfsCounters(abfsCounters) + .withAbfsPerfTracker(abfsPerfTracker) + .build(); + } + private String getOctalNotation(FsPermission fsPermission) { Preconditions.checkNotNull(fsPermission, "fsPermission"); return String.format(AbfsHttpConstants.PERMISSION_FORMAT, fsPermission.toOctal()); } private String getRelativePath(final Path path) { - return getRelativePath(path, false); - } - - private String getRelativePath(final Path path, final boolean allowRootPath) { Preconditions.checkNotNull(path, "path"); - final String relativePath = path.toUri().getPath(); - - if (relativePath.length() == 0 || (relativePath.length() == 1 && relativePath.charAt(0) == Path.SEPARATOR_CHAR)) { - return allowRootPath ? AbfsHttpConstants.ROOT_PATH : AbfsHttpConstants.EMPTY_STRING; - } - - if (relativePath.charAt(0) == Path.SEPARATOR_CHAR) { - return relativePath.substring(1); - } - - return relativePath; + return path.toUri().getPath(); } private long parseContentLength(final String contentLength) { @@ -1202,18 +1670,6 @@ private boolean parseIsDirectory(final String resourceType) { && resourceType.equalsIgnoreCase(AbfsHttpConstants.DIRECTORY); } - private long parseLastModifiedTime(final String lastModifiedTime) { - long parsedTime = 0; - try { - Date utcDate = new SimpleDateFormat(DATE_TIME_PATTERN, Locale.US).parse(lastModifiedTime); - parsedTime = utcDate.getTime(); - } catch (ParseException e) { - LOG.error("Failed to parse the date {}", lastModifiedTime); - } finally { - return parsedTime; - } - } - private String convertXmsPropertiesToCommaSeparatedString(final Hashtable properties) throws CharacterCodingException { StringBuilder commaSeparatedProperties = new StringBuilder(); @@ -1307,10 +1763,27 @@ private AbfsPerfInfo startTracking(String callerName, String calleeName) { return new AbfsPerfInfo(abfsPerfTracker, callerName, calleeName); } - private static class VersionedFileStatus extends FileStatus { - private final String version; + /** + * A File status with version info extracted from the etag value returned + * in a LIST or HEAD request. + * The etag is included in the java serialization. + */ + private static final class VersionedFileStatus extends FileStatus + implements EtagSource { + + /** + * The superclass is declared serializable; this subclass can also + * be serialized. + */ + private static final long serialVersionUID = -2009013240419749458L; - VersionedFileStatus( + /** + * The etag of an object. + * Not-final so that serialization via reflection will preserve the value. + */ + private String version; + + private VersionedFileStatus( final String owner, final String group, final FsPermission fsPermission, final boolean hasAcl, final long length, final boolean isdir, final int blockReplication, final long blocksize, final long modificationTime, final Path path, @@ -1371,6 +1844,11 @@ public String getVersion() { return this.version; } + @Override + public String getEtag() { + return getVersion(); + } + @Override public String toString() { final StringBuilder sb = new StringBuilder( @@ -1382,8 +1860,139 @@ public String toString() { } } + /** + * A builder class for AzureBlobFileSystemStore. + */ + public static final class AzureBlobFileSystemStoreBuilder { + + private URI uri; + private boolean isSecureScheme; + private Configuration configuration; + private AbfsCounters abfsCounters; + private DataBlocks.BlockFactory blockFactory; + private int blockOutputActiveBlocks; + + public AzureBlobFileSystemStoreBuilder withUri(URI value) { + this.uri = value; + return this; + } + + public AzureBlobFileSystemStoreBuilder withSecureScheme(boolean value) { + this.isSecureScheme = value; + return this; + } + + public AzureBlobFileSystemStoreBuilder withConfiguration( + Configuration value) { + this.configuration = value; + return this; + } + + public AzureBlobFileSystemStoreBuilder withAbfsCounters( + AbfsCounters value) { + this.abfsCounters = value; + return this; + } + + public AzureBlobFileSystemStoreBuilder withBlockFactory( + DataBlocks.BlockFactory value) { + this.blockFactory = value; + return this; + } + + public AzureBlobFileSystemStoreBuilder withBlockOutputActiveBlocks( + int value) { + this.blockOutputActiveBlocks = value; + return this; + } + + public AzureBlobFileSystemStoreBuilder build() { + return this; + } + } + @VisibleForTesting - AbfsClient getClient() { + public AbfsClient getClient() { return this.client; } -} \ No newline at end of file + + @VisibleForTesting + void setClient(AbfsClient client) { + this.client = client; + } + + @VisibleForTesting + void setNamespaceEnabled(Trilean isNamespaceEnabled){ + this.isNamespaceEnabled = isNamespaceEnabled; + } + + private void updateInfiniteLeaseDirs() { + this.azureInfiniteLeaseDirSet = new HashSet<>(Arrays.asList( + abfsConfiguration.getAzureInfiniteLeaseDirs().split(AbfsHttpConstants.COMMA))); + // remove the empty string, since isKeyForDirectory returns true for empty strings + // and we don't want to default to enabling infinite lease dirs + this.azureInfiniteLeaseDirSet.remove(""); + } + + private AbfsLease maybeCreateLease(String relativePath, TracingContext tracingContext) + throws AzureBlobFileSystemException { + boolean enableInfiniteLease = isInfiniteLeaseKey(relativePath); + if (!enableInfiniteLease) { + return null; + } + AbfsLease lease = new AbfsLease(client, relativePath, tracingContext); + leaseRefs.put(lease, null); + return lease; + } + + @VisibleForTesting + boolean areLeasesFreed() { + for (AbfsLease lease : leaseRefs.keySet()) { + if (lease != null && !lease.isFreed()) { + return false; + } + } + return true; + } + + /** + * Get the etag header from a response, stripping any quotations. + * see: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag + * @param result response to process. + * @return the quote-unwrapped etag. + */ + public static String extractEtagHeader(AbfsHttpOperation result) { + String etag = result.getResponseHeader(HttpHeaderConfigurations.ETAG); + if (etag != null) { + // strip out any wrapper "" quotes which come back, for consistency with + // list calls + if (etag.startsWith("W/\"")) { + // Weak etag + etag = etag.substring(3); + } else if (etag.startsWith("\"")) { + // strong etag + etag = etag.substring(1); + } + if (etag.endsWith("\"")) { + // trailing quote + etag = etag.substring(0, etag.length() - 1); + } + } + return etag; + } + + /** + * Increment rename recovery based counters in IOStatistics. + * + * @param abfsClientRenameResult Result of an ABFS rename operation. + */ + private void populateRenameRecoveryStatistics( + AbfsClientRenameResult abfsClientRenameResult) { + if (abfsClientRenameResult.isRenameRecovered()) { + abfsCounters.incrementCounter(RENAME_RECOVERY, 1); + } + if (abfsClientRenameResult.isIncompleteMetadataState()) { + abfsCounters.incrementCounter(METADATA_INCOMPLETE_RENAME_FAILURES, 1); + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/commit/AbfsManifestStoreOperations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/commit/AbfsManifestStoreOperations.java new file mode 100644 index 0000000000000..6bfab3a8515a9 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/commit/AbfsManifestStoreOperations.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.commit; + +import java.io.IOException; +import java.time.Duration; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathIOException; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.files.FileEntry; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.impl.ManifestStoreOperationsThroughFileSystem; + +/** + * Extension of StoreOperationsThroughFileSystem with ABFS awareness. + * Purely for use by jobs committing work through the manifest committer. + * The {@link AzureManifestCommitterFactory} will configure + * this as the binding to the FS. + * + * ADLS Gen2 stores support etag-recovery on renames, but not WASB + * stores. + */ +@InterfaceAudience.LimitedPrivate("mapreduce") +@InterfaceStability.Unstable +public class AbfsManifestStoreOperations extends + ManifestStoreOperationsThroughFileSystem { + + private static final Logger LOG = LoggerFactory.getLogger( + AbfsManifestStoreOperations.class); + + /** + * Classname, which can be declared in jpb configurations. + */ + public static final String NAME = AbfsManifestStoreOperations.class.getName(); + + /** + * Resilient rename calls; only available on an ADLS Gen2 store. + * Will be null after binding if the FS isn't compatible. + */ + private ResilientCommitByRename resilientCommitByRename; + + /** + * Are etags preserved in renames? + */ + private boolean etagsPreserved; + + @Override + public AzureBlobFileSystem getFileSystem() { + return (AzureBlobFileSystem) super.getFileSystem(); + } + + /** + * Bind to the store. + * + * @param filesystem FS. + * @param path path to work under + * @throws IOException binding problems. + */ + @Override + public void bindToFileSystem(FileSystem filesystem, Path path) throws IOException { + if (!(filesystem instanceof AzureBlobFileSystem)) { + throw new PathIOException(path.toString(), + "Not an abfs filesystem: " + filesystem.getClass()); + } + super.bindToFileSystem(filesystem, path); + try { + resilientCommitByRename = getFileSystem().createResilientCommitSupport(path); + // this also means that etags are preserved. + etagsPreserved = true; + LOG.debug("Bonded to filesystem with resilient commits under path {}", path); + } catch (UnsupportedOperationException e) { + LOG.debug("No resilient commit support under path {}", path); + } + } + + /** + * Etags are preserved through Gen2 stores, but not wasb stores. + * @param path path to probe. + * @return true if this store preserves etags. + */ + @Override + public boolean storePreservesEtagsThroughRenames(final Path path) { + return etagsPreserved; + } + + /** + * Resilient commits available on hierarchical stores. + * @return true if the FS can use etags on renames. + */ + @Override + public boolean storeSupportsResilientCommit() { + return resilientCommitByRename != null; + } + + /** + * Commit a file through an internal ABFS operation. + * If resilient commit is unavailable, invokes the superclass, which + * will raise an UnsupportedOperationException + * @param entry entry to commit + * @return the outcome + * @throws IOException any failure in resilient commit. + * @throws UnsupportedOperationException if not available. + */ + @Override + public CommitFileResult commitFile(final FileEntry entry) throws IOException { + + if (resilientCommitByRename != null) { + final Pair result = + resilientCommitByRename.commitSingleFileByRename( + entry.getSourcePath(), + entry.getDestPath(), + entry.getEtag()); + return CommitFileResult.fromResilientCommit(result.getLeft(), + result.getRight()); + } else { + return super.commitFile(entry); + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/commit/AzureManifestCommitterFactory.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/commit/AzureManifestCommitterFactory.java new file mode 100644 index 0000000000000..b760fa7a4ac53 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/commit/AzureManifestCommitterFactory.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.commit; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.ManifestCommitter; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.ManifestCommitterFactory; + +import static org.apache.hadoop.mapreduce.lib.output.committer.manifest.ManifestCommitterConstants.OPT_STORE_OPERATIONS_CLASS; + +/** + * A Committer for the manifest committer which performs all bindings needed + * to work best with abfs. + * This includes, at a minimum, switching to the abfs-specific manifest store operations. + * + * This classname is referenced in configurations, so MUST NOT change. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class AzureManifestCommitterFactory extends ManifestCommitterFactory { + + /** + * Classname, which can be declared in job configurations. + */ + public static final String NAME = ManifestCommitterFactory.class.getName(); + + @Override + public ManifestCommitter createOutputCommitter(final Path outputPath, + final TaskAttemptContext context) throws IOException { + final Configuration conf = context.getConfiguration(); + // use ABFS Store operations + conf.set(OPT_STORE_OPERATIONS_CLASS, + AbfsManifestStoreOperations.NAME); + return super.createOutputCommitter(outputPath, context); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/commit/ResilientCommitByRename.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/commit/ResilientCommitByRename.java new file mode 100644 index 0000000000000..2e91392a661b1 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/commit/ResilientCommitByRename.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.commit; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.time.Duration; +import javax.annotation.Nullable; + +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathIOException; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; + +/** + * API exclusively for committing files. + * + * This is only for use by (@link {@link AbfsManifestStoreOperations}, + * and is intended to be implemented by ABFS. + * To ensure that there is no need to add mapreduce JARs to the + * classpath just to work with ABFS, this interface + * MUST NOT refer to anything in the + * {@code org.apache.hadoop.mapreduce} package. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public interface ResilientCommitByRename extends IOStatisticsSource { + + /** + * Rename source file to dest path *Exactly*; no subdirectory games here. + * if the method does not raise an exception,then + * the data at dest is the data which was at source. + * + * Requirements + * + *

    +   *   exists(FS, source) else raise FileNotFoundException
    +   *   source != dest else raise PathIOException
    +   *   not exists(FS, dest)
    +   *   isDir(FS, dest.getParent)
    +   * 
    + *
      + *
    1. source != dest else raise PathIOException
    2. + *
    3. source must exist else raise FileNotFoundException
    4. + *
    5. source must exist and be a file
    6. + *
    7. dest must not exist;
    8. + *
    9. dest.getParent() must be a dir
    10. + *
    11. if sourceEtag is non-empty, it MAY be used to qualify/validate the rename.
    12. + *
    + * + * The outcome of the operation is undefined if source is not a file, dest exists, + * dest.getParent() doesn't exist/is a file. + * That is: implementations SHOULD assume that the code calling this method has + * set up the destination directory tree and is only invoking this call on a file. + * Accordingly: implementations MAY skip validation checks + * + * Post Conditions on a successful operation: + *
    +   * FS' where:
    +   *     not exists(FS', source)
    +   *     and exists(FS', dest)
    +   *     and data(FS', dest) == data (FS, source)
    +   * 
    + * This is exactly the same outcome as `FileSystem.rename()` when the same preconditions + * are met. This API call simply restricts the operation to file rename with strict + * conditions, (no need to be 'clever' about dest path calculation) and the ability + * to pass in etags, modtimes and file status values. + * + * @param source path to source file + * @param dest destination of rename. + * @param sourceEtag etag of source file. may be null or empty + * @return true if recovery was needed. + * @throws FileNotFoundException source file not found + * @throws PathIOException failure, including source and dest being the same path + * @throws IOException any other exception + */ + Pair commitSingleFileByRename( + Path source, + Path dest, + @Nullable String sourceEtag) throws IOException; + + +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/commit/package-info.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/commit/package-info.java new file mode 100644 index 0000000000000..3567377350d6b --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/commit/package-info.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Support for manifest committer. + * Unless otherwise stated: classes are private. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +package org.apache.hadoop.fs.azurebfs.commit; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java index c6ade9cb99d75..7e4ddfa675a4c 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java @@ -39,13 +39,22 @@ public final class AbfsHttpConstants { public static final String GET_ACCESS_CONTROL = "getAccessControl"; public static final String CHECK_ACCESS = "checkAccess"; public static final String GET_STATUS = "getStatus"; + public static final String ACQUIRE_LEASE_ACTION = "acquire"; + public static final String BREAK_LEASE_ACTION = "break"; + public static final String RELEASE_LEASE_ACTION = "release"; + public static final String RENEW_LEASE_ACTION = "renew"; + public static final String DEFAULT_LEASE_BREAK_PERIOD = "0"; public static final String DEFAULT_TIMEOUT = "90"; + public static final String APPEND_BLOB_TYPE = "appendblob"; public static final String TOKEN_VERSION = "2"; + public static final String JAVA_VENDOR = "java.vendor"; public static final String JAVA_VERSION = "java.version"; public static final String OS_NAME = "os.name"; public static final String OS_VERSION = "os.version"; + public static final String OS_ARCH = "os.arch"; + public static final String APN_VERSION = "APN/1.0"; public static final String CLIENT_VERSION = "Azure Blob FS/" + VersionInfo.getVersion(); // Abfs Http Verb @@ -55,6 +64,11 @@ public final class AbfsHttpConstants { public static final String HTTP_METHOD_PATCH = "PATCH"; public static final String HTTP_METHOD_POST = "POST"; public static final String HTTP_METHOD_PUT = "PUT"; + /** + * All status codes less than http 100 signify error + * and should qualify for retry. + */ + public static final int HTTP_CONTINUE = 100; // Abfs generic constants public static final String SINGLE_WHITE_SPACE = " "; @@ -71,6 +85,8 @@ public final class AbfsHttpConstants { public static final String SEMICOLON = ";"; public static final String AT = "@"; public static final String HTTP_HEADER_PREFIX = "x-ms-"; + public static final String HASH = "#"; + public static final String TRUE = "true"; public static final String PLUS_ENCODE = "%20"; public static final String FORWARD_SLASH_ENCODE = "%2F"; @@ -92,6 +108,9 @@ public final class AbfsHttpConstants { public static final String DEFAULT_SCOPE = "default:"; public static final String PERMISSION_FORMAT = "%04d"; public static final String SUPER_USER = "$superuser"; + // The HTTP 100 Continue informational status response code indicates that everything so far + // is OK and that the client should continue with the request or ignore it if it is already finished. + public static final String HUNDRED_CONTINUE = "100-continue"; public static final char CHAR_FORWARD_SLASH = '/'; public static final char CHAR_EXCLAMATION_POINT = '!'; @@ -100,6 +119,17 @@ public final class AbfsHttpConstants { public static final char CHAR_EQUALS = '='; public static final char CHAR_STAR = '*'; public static final char CHAR_PLUS = '+'; + /** + * Value that differentiates categories of the http_status. + *
    +   * 100 - 199 : Informational responses
    +   * 200 - 299 : Successful responses
    +   * 300 - 399 : Redirection messages
    +   * 400 - 499 : Client error responses
    +   * 500 - 599 : Server error responses
    +   * 
    + */ + public static final Integer HTTP_STATUS_CATEGORY_QUOTIENT = 100; private AbfsHttpConstants() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java index a63e95353497f..872364a8e6167 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java @@ -20,6 +20,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.FileSystem; /** * Responsible to keep all the Azure Blob File System configurations keys in Hadoop configuration file. @@ -27,19 +28,91 @@ @InterfaceAudience.Public @InterfaceStability.Evolving public final class ConfigurationKeys { + + /** + * Config to specify if the configured account is HNS enabled or not. If + * this config is not set, getacl call is made on account filesystem root + * path to determine HNS status. + */ + public static final String FS_AZURE_ACCOUNT_IS_HNS_ENABLED = "fs.azure.account.hns.enabled"; + /** + * Enable or disable expect hundred continue header. + * Value: {@value}. + */ + public static final String FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED = "fs.azure.account.expect.header.enabled"; public static final String FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME = "fs.azure.account.key"; public static final String FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME_REGX = "fs\\.azure\\.account\\.key\\.(.*)"; public static final String FS_AZURE_SECURE_MODE = "fs.azure.secure.mode"; + public static final String FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED = "fs.azure.account.throttling.enabled"; // Retry strategy defined by the user public static final String AZURE_MIN_BACKOFF_INTERVAL = "fs.azure.io.retry.min.backoff.interval"; public static final String AZURE_MAX_BACKOFF_INTERVAL = "fs.azure.io.retry.max.backoff.interval"; public static final String AZURE_BACKOFF_INTERVAL = "fs.azure.io.retry.backoff.interval"; public static final String AZURE_MAX_IO_RETRIES = "fs.azure.io.retry.max.retries"; + public static final String AZURE_CUSTOM_TOKEN_FETCH_RETRY_COUNT = "fs.azure.custom.token.fetch.retry.count"; + + // Retry strategy for getToken calls + public static final String AZURE_OAUTH_TOKEN_FETCH_RETRY_COUNT = "fs.azure.oauth.token.fetch.retry.max.retries"; + public static final String AZURE_OAUTH_TOKEN_FETCH_RETRY_MIN_BACKOFF = "fs.azure.oauth.token.fetch.retry.min.backoff.interval"; + public static final String AZURE_OAUTH_TOKEN_FETCH_RETRY_MAX_BACKOFF = "fs.azure.oauth.token.fetch.retry.max.backoff.interval"; + public static final String AZURE_OAUTH_TOKEN_FETCH_RETRY_DELTA_BACKOFF = "fs.azure.oauth.token.fetch.retry.delta.backoff"; // Read and write buffer sizes defined by the user + public static final String AZURE_WRITE_MAX_CONCURRENT_REQUESTS = "fs.azure.write.max.concurrent.requests"; + public static final String AZURE_WRITE_MAX_REQUESTS_TO_QUEUE = "fs.azure.write.max.requests.to.queue"; public static final String AZURE_WRITE_BUFFER_SIZE = "fs.azure.write.request.size"; + + /** + * Maximum Number of blocks a single output stream can have + * active (uploading, or queued to the central FileSystem + * instance's pool of queued operations. + * This stops a single stream overloading the shared thread pool. + * {@value} + *

    + * Default is {@link FileSystemConfigurations#BLOCK_UPLOAD_ACTIVE_BLOCKS_DEFAULT} + */ + public static final String FS_AZURE_BLOCK_UPLOAD_ACTIVE_BLOCKS = + "fs.azure.block.upload.active.blocks"; + + /** + * Buffer directory path for uploading AbfsOutputStream data blocks. + * Value: {@value} + */ + public static final String FS_AZURE_BLOCK_UPLOAD_BUFFER_DIR = + "fs.azure.buffer.dir"; + + /** + * What data block buffer to use. + *
    + * Options include: "disk"(Default), "array", and "bytebuffer". + *
    + * Default is {@link FileSystemConfigurations#DATA_BLOCKS_BUFFER_DEFAULT}. + * Value: {@value} + */ + public static final String DATA_BLOCKS_BUFFER = + "fs.azure.data.blocks.buffer"; + + /** If the data size written by Hadoop app is small, i.e. data size : + * (a) before any of HFlush/HSync call is made or + * (b) between 2 HFlush/Hsync API calls + * is less than write buffer size, 2 separate calls, one for append and + * another for flush are made. + * By enabling the small write optimization, a single call will be made to + * perform both append and flush operations and hence reduce request count. + */ + public static final String AZURE_ENABLE_SMALL_WRITE_OPTIMIZATION = "fs.azure.write.enableappendwithflush"; public static final String AZURE_READ_BUFFER_SIZE = "fs.azure.read.request.size"; + public static final String AZURE_READ_SMALL_FILES_COMPLETELY = "fs.azure.read.smallfilescompletely"; + public static final String AZURE_READ_OPTIMIZE_FOOTER_READ = "fs.azure.read.optimizefooterread"; + + /** + * Read ahead range parameter which can be set by user. + * Default value is {@link FileSystemConfigurations#DEFAULT_READ_AHEAD_RANGE}. + * This might reduce number of calls to remote as next requested + * data could already be present in buffer {@value}. + */ + public static final String AZURE_READ_AHEAD_RANGE = "fs.azure.readahead.range"; public static final String AZURE_BLOCK_SIZE_PROPERTY_NAME = "fs.azure.block.size"; public static final String AZURE_BLOCK_LOCATION_HOST_PROPERTY_NAME = "fs.azure.block.location.impersonatedhost"; public static final String AZURE_CONCURRENT_CONNECTION_VALUE_OUT = "fs.azure.concurrentRequestCount.out"; @@ -49,9 +122,30 @@ public final class ConfigurationKeys { public static final String AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION = "fs.azure.createRemoteFileSystemDuringInitialization"; public static final String AZURE_SKIP_USER_GROUP_METADATA_DURING_INITIALIZATION = "fs.azure.skipUserGroupMetadataDuringInitialization"; public static final String FS_AZURE_ENABLE_AUTOTHROTTLING = "fs.azure.enable.autothrottling"; + public static final String FS_AZURE_ACCOUNT_OPERATION_IDLE_TIMEOUT = "fs.azure.account.operation.idle.timeout"; + public static final String FS_AZURE_ANALYSIS_PERIOD = "fs.azure.analysis.period"; public static final String FS_AZURE_ALWAYS_USE_HTTPS = "fs.azure.always.use.https"; public static final String FS_AZURE_ATOMIC_RENAME_KEY = "fs.azure.atomic.rename.key"; + /** This config ensures that during create overwrite an existing file will be + * overwritten only if there is a match on the eTag of existing file. + */ + public static final String FS_AZURE_ENABLE_CONDITIONAL_CREATE_OVERWRITE = "fs.azure.enable.conditional.create.overwrite"; + public static final String FS_AZURE_ENABLE_MKDIR_OVERWRITE = "fs.azure.enable.mkdir.overwrite"; + /** Provides a config to provide comma separated path prefixes on which Appendblob based files are created + * Default is empty. **/ + public static final String FS_AZURE_APPEND_BLOB_KEY = "fs.azure.appendblob.directories"; + /** Provides a config to provide comma separated path prefixes which support infinite leases. + * Files under these paths will be leased when created or opened for writing and the lease will + * be released when the file is closed. The lease may be broken with the breakLease method on + * AzureBlobFileSystem. Default is empty. + * **/ + public static final String FS_AZURE_INFINITE_LEASE_KEY = "fs.azure.infinite-lease.directories"; + /** Provides a number of threads to use for lease operations for infinite lease directories. + * Must be set to a minimum of 1 if infinite lease directories are to be used. Default is 0. **/ + public static final String FS_AZURE_LEASE_THREADS = "fs.azure.lease.threads"; public static final String FS_AZURE_READ_AHEAD_QUEUE_DEPTH = "fs.azure.readaheadqueue.depth"; + public static final String FS_AZURE_ALWAYS_READ_BUFFER_SIZE = "fs.azure.read.alwaysReadBufferSize"; + public static final String FS_AZURE_READ_AHEAD_BLOCK_SIZE = "fs.azure.read.readahead.blocksize"; /** Provides a config control to enable or disable ABFS Flush operations - * HFlush and HSync. Default is true. **/ public static final String FS_AZURE_ENABLE_FLUSH = "fs.azure.enable.flush"; @@ -62,6 +156,14 @@ public final class ConfigurationKeys { * Default value of this config is true. **/ public static final String FS_AZURE_DISABLE_OUTPUTSTREAM_FLUSH = "fs.azure.disable.outputstream.flush"; public static final String FS_AZURE_USER_AGENT_PREFIX_KEY = "fs.azure.user.agent.prefix"; + /** + * The client correlation ID provided over config that will be added to + * x-ms-client-request-Id header. Defaults to empty string if the length and + * character constraints are not satisfied. **/ + public static final String FS_AZURE_CLIENT_CORRELATIONID = "fs.azure.client.correlationid"; + public static final String FS_AZURE_TRACINGHEADER_FORMAT = "fs.azure.tracingheader.format"; + public static final String FS_AZURE_CLUSTER_NAME = "fs.azure.cluster.name"; + public static final String FS_AZURE_CLUSTER_TYPE = "fs.azure.cluster.type"; public static final String FS_AZURE_SSL_CHANNEL_MODE_KEY = "fs.azure.ssl.channel.mode"; /** Provides a config to enable/disable the checkAccess API. * By default this will be @@ -93,6 +195,17 @@ public final class ConfigurationKeys { public static final String AZURE_KEY_ACCOUNT_KEYPROVIDER = "fs.azure.account.keyprovider"; public static final String AZURE_KEY_ACCOUNT_SHELLKEYPROVIDER_SCRIPT = "fs.azure.shellkeyprovider.script"; + /** + * Enable or disable readahead buffer in AbfsInputStream. + * Value: {@value}. + */ + public static final String FS_AZURE_ENABLE_READAHEAD = "fs.azure.enable.readahead"; + + /** Setting this true will make the driver use it's own RemoteIterator implementation */ + public static final String FS_AZURE_ENABLE_ABFS_LIST_ITERATOR = "fs.azure.enable.abfslistiterator"; + /** Server side encryption key */ + public static final String FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY = "fs.azure.client-provided-encryption-key"; + /** End point of ABFS account: {@value}. */ public static final String AZURE_ABFS_ENDPOINT = "fs.azure.abfs.endpoint"; /** Key for auth type properties: {@value}. */ @@ -122,6 +235,12 @@ public final class ConfigurationKeys { /** Key for enabling the tracking of ABFS API latency and sending the latency numbers to the ABFS API service */ public static final String FS_AZURE_ABFS_LATENCY_TRACK = "fs.azure.abfs.latency.track"; + /** Key for rate limit capacity, as used by IO operations which try to throttle themselves. */ + public static final String FS_AZURE_ABFS_IO_RATE_LIMIT = "fs.azure.io.rate.limit"; + + /** Add extra resilience to rename failures, at the expense of performance. */ + public static final String FS_AZURE_ABFS_RENAME_RESILIENCE = "fs.azure.enable.rename.resilience"; + public static String accountProperty(String property, String account) { return property + "." + account; } @@ -132,5 +251,21 @@ public static String accountProperty(String property, String account) { /** Key for SAS token provider **/ public static final String FS_AZURE_SAS_TOKEN_PROVIDER_TYPE = "fs.azure.sas.token.provider.type"; + /** For performance, AbfsInputStream/AbfsOutputStream re-use SAS tokens until the expiry is within this number of seconds. **/ + public static final String FS_AZURE_SAS_TOKEN_RENEW_PERIOD_FOR_STREAMS = "fs.azure.sas.token.renew.period.for.streams"; + + /** Key to enable custom identity transformation. */ + public static final String FS_AZURE_IDENTITY_TRANSFORM_CLASS = "fs.azure.identity.transformer.class"; + /** Key for Local User to Service Principal file location. */ + public static final String FS_AZURE_LOCAL_USER_SP_MAPPING_FILE_PATH = "fs.azure.identity.transformer.local.service.principal.mapping.file.path"; + /** Key for Local Group to Service Group file location. */ + public static final String FS_AZURE_LOCAL_GROUP_SG_MAPPING_FILE_PATH = "fs.azure.identity.transformer.local.service.group.mapping.file.path"; + /** + * Optional config to enable a lock free pread which will bypass buffer in AbfsInputStream. + * This is not a config which can be set at cluster level. It can be used as + * an option on FutureDataInputStreamBuilder. + * @see FileSystem#openFile(org.apache.hadoop.fs.Path) + */ + public static final String FS_AZURE_BUFFERED_PREAD_DISABLE = "fs.azure.buffered.pread.disable"; private ConfigurationKeys() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FSOperationType.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FSOperationType.java new file mode 100644 index 0000000000000..6b6e98c9c7082 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FSOperationType.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"), you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.constants; + +public enum FSOperationType { + ACCESS("AS"), + APPEND("AP"), + BREAK_LEASE("BL"), + CREATE("CR"), + CREATE_FILESYSTEM("CF"), + CREATE_NON_RECURSIVE("CN"), + DELETE("DL"), + GET_ACL_STATUS("GA"), + GET_ATTR("GR"), + GET_FILESTATUS("GF"), + LISTSTATUS("LS"), + MKDIR("MK"), + MODIFY_ACL("MA"), + OPEN("OP"), + HAS_PATH_CAPABILITY("PC"), + SET_PERMISSION("SP"), + READ("RE"), + RELEASE_LEASE("RL"), + REMOVE_ACL("RA"), + REMOVE_ACL_ENTRIES("RT"), + REMOVE_DEFAULT_ACL("RD"), + RENAME("RN"), + SET_ATTR("SR"), + SET_OWNER("SO"), + SET_ACL("SA"), + TEST_OP("TS"), + WRITE("WR"); + + private final String opCode; + + FSOperationType(String opCode) { + this.opCode = opCode; + } + + @Override + public String toString() { + return opCode; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java index c6b308ed5f889..32f9966e30ae9 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java @@ -22,31 +22,54 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; + /** * Responsible to keep all the Azure Blob File System related configurations. */ @InterfaceAudience.Public @InterfaceStability.Evolving public final class FileSystemConfigurations { + + public static final String DEFAULT_FS_AZURE_ACCOUNT_IS_HNS_ENABLED = ""; + public static final boolean DEFAULT_FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED = true; public static final String USER_HOME_DIRECTORY_PREFIX = "/user"; + private static final int SIXTY_SECONDS = 60 * 1000; + // Retry parameter defaults. public static final int DEFAULT_MIN_BACKOFF_INTERVAL = 3 * 1000; // 3s public static final int DEFAULT_MAX_BACKOFF_INTERVAL = 30 * 1000; // 30s public static final int DEFAULT_BACKOFF_INTERVAL = 3 * 1000; // 3s public static final int DEFAULT_MAX_RETRY_ATTEMPTS = 30; + public static final int DEFAULT_CUSTOM_TOKEN_FETCH_RETRY_COUNT = 3; - private static final int ONE_KB = 1024; - private static final int ONE_MB = ONE_KB * ONE_KB; + // Retry parameter defaults. + public static final int DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MAX_ATTEMPTS = 5; + public static final int DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MIN_BACKOFF_INTERVAL = 0; + public static final int DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MAX_BACKOFF_INTERVAL = SIXTY_SECONDS; + public static final int DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_DELTA_BACKOFF = 2; + + public static final int ONE_KB = 1024; + public static final int ONE_MB = ONE_KB * ONE_KB; // Default upload and download buffer size public static final int DEFAULT_WRITE_BUFFER_SIZE = 8 * ONE_MB; // 8 MB + public static final int APPENDBLOB_MAX_WRITE_BUFFER_SIZE = 4 * ONE_MB; // 4 MB + public static final boolean DEFAULT_AZURE_ENABLE_SMALL_WRITE_OPTIMIZATION = false; public static final int DEFAULT_READ_BUFFER_SIZE = 4 * ONE_MB; // 4 MB + public static final boolean DEFAULT_READ_SMALL_FILES_COMPLETELY = false; + public static final boolean DEFAULT_OPTIMIZE_FOOTER_READ = false; + public static final boolean DEFAULT_ALWAYS_READ_BUFFER_SIZE = false; + public static final int DEFAULT_READ_AHEAD_BLOCK_SIZE = 4 * ONE_MB; + public static final int DEFAULT_READ_AHEAD_RANGE = 64 * ONE_KB; // 64 KB public static final int MIN_BUFFER_SIZE = 16 * ONE_KB; // 16 KB public static final int MAX_BUFFER_SIZE = 100 * ONE_MB; // 100 MB public static final long MAX_AZURE_BLOCK_SIZE = 256 * 1024 * 1024L; // changing default abfs blocksize to 256MB public static final String AZURE_BLOCK_LOCATION_HOST_DEFAULT = "localhost"; - public static final int DEFAULT_AZURE_LIST_MAX_RESULTS = 500; + public static final int DEFAULT_AZURE_LIST_MAX_RESULTS = 5000; + + public static final String SERVER_SIDE_ENCRYPTION_ALGORITHM = "AES256"; public static final int MAX_CONCURRENT_READ_THREADS = 12; public static final int MAX_CONCURRENT_WRITE_THREADS = 8; @@ -55,11 +78,25 @@ public final class FileSystemConfigurations { public static final boolean DEFAULT_AZURE_SKIP_USER_GROUP_METADATA_DURING_INITIALIZATION = false; public static final String DEFAULT_FS_AZURE_ATOMIC_RENAME_DIRECTORIES = "/hbase"; + public static final boolean DEFAULT_FS_AZURE_ENABLE_CONDITIONAL_CREATE_OVERWRITE = true; + public static final boolean DEFAULT_FS_AZURE_ENABLE_MKDIR_OVERWRITE = true; + public static final String DEFAULT_FS_AZURE_APPEND_BLOB_DIRECTORIES = ""; + public static final String DEFAULT_FS_AZURE_INFINITE_LEASE_DIRECTORIES = ""; + public static final int DEFAULT_LEASE_THREADS = 0; + public static final int MIN_LEASE_THREADS = 0; + public static final int DEFAULT_LEASE_DURATION = -1; + public static final int INFINITE_LEASE_DURATION = -1; + public static final int MIN_LEASE_DURATION = 15; + public static final int MAX_LEASE_DURATION = 60; + + public static final int DEFAULT_READ_AHEAD_QUEUE_DEPTH = 2; - public static final int DEFAULT_READ_AHEAD_QUEUE_DEPTH = -1; public static final boolean DEFAULT_ENABLE_FLUSH = true; public static final boolean DEFAULT_DISABLE_OUTPUTSTREAM_FLUSH = true; public static final boolean DEFAULT_ENABLE_AUTOTHROTTLING = true; + public static final boolean DEFAULT_FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED = true; + public static final int DEFAULT_ACCOUNT_OPERATION_IDLE_TIMEOUT_MS = 60_000; + public static final int DEFAULT_ANALYSIS_PERIOD_MS = 10_000; public static final DelegatingSSLSocketFactory.SSLChannelMode DEFAULT_FS_AZURE_SSL_CHANNEL_MODE = DelegatingSSLSocketFactory.SSLChannelMode.Default; @@ -68,8 +105,43 @@ public final class FileSystemConfigurations { public static final boolean DEFAULT_ENABLE_HTTPS = true; public static final boolean DEFAULT_USE_UPN = false; - public static final boolean DEFAULT_ENABLE_CHECK_ACCESS = false; + public static final boolean DEFAULT_ENABLE_CHECK_ACCESS = true; public static final boolean DEFAULT_ABFS_LATENCY_TRACK = false; + public static final long DEFAULT_SAS_TOKEN_RENEW_PERIOD_FOR_STREAMS_IN_SECONDS = 120; + + public static final boolean DEFAULT_ENABLE_READAHEAD = true; + public static final String DEFAULT_FS_AZURE_USER_AGENT_PREFIX = EMPTY_STRING; + public static final String DEFAULT_VALUE_UNKNOWN = "UNKNOWN"; + + public static final boolean DEFAULT_DELETE_CONSIDERED_IDEMPOTENT = true; + public static final int DEFAULT_CLOCK_SKEW_WITH_SERVER_IN_MS = 5 * 60 * 1000; // 5 mins + + public static final int STREAM_ID_LEN = 12; + public static final boolean DEFAULT_ENABLE_ABFS_LIST_ITERATOR = true; + public static final boolean DEFAULT_ENABLE_ABFS_RENAME_RESILIENCE = true; + + /** + * Limit of queued block upload operations before writes + * block for an OutputStream. Value: {@value} + */ + public static final int BLOCK_UPLOAD_ACTIVE_BLOCKS_DEFAULT = 20; + + /** + * Buffer blocks to disk. + * Capacity is limited to available disk space. + */ + public static final String DATA_BLOCKS_BUFFER_DISK = "disk"; + + /** + * Default buffer option: {@value}. + */ + public static final String DATA_BLOCKS_BUFFER_DEFAULT = + DATA_BLOCKS_BUFFER_DISK; + + /** + * IO rate limit. Value: {@value} + */ + public static final int RATE_LIMIT_DEFAULT = 10_000; private FileSystemConfigurations() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java index 79bba094f0e44..b123e90170e69 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java @@ -43,6 +43,7 @@ public final class HttpHeaderConfigurations { public static final String USER_AGENT = "User-Agent"; public static final String X_HTTP_METHOD_OVERRIDE = "X-HTTP-Method-Override"; public static final String X_MS_CLIENT_REQUEST_ID = "x-ms-client-request-id"; + public static final String X_MS_EXISTING_RESOURCE_TYPE = "x-ms-existing-resource-type"; public static final String X_MS_DATE = "x-ms-date"; public static final String X_MS_REQUEST_ID = "x-ms-request-id"; public static final String X_MS_VERSION = "x-ms-version"; @@ -59,6 +60,17 @@ public final class HttpHeaderConfigurations { public static final String X_MS_UMASK = "x-ms-umask"; public static final String X_MS_NAMESPACE_ENABLED = "x-ms-namespace-enabled"; public static final String X_MS_ABFS_CLIENT_LATENCY = "x-ms-abfs-client-latency"; + public static final String X_MS_ENCRYPTION_KEY = "x-ms-encryption-key"; + public static final String X_MS_ENCRYPTION_KEY_SHA256 = "x-ms-encryption-key-sha256"; + public static final String X_MS_ENCRYPTION_ALGORITHM = "x-ms-encryption-algorithm"; + public static final String X_MS_REQUEST_SERVER_ENCRYPTED = "x-ms-request-server-encrypted"; + public static final String X_MS_SERVER_ENCRYPTED = "x-ms-server-encrypted"; + public static final String X_MS_LEASE_ACTION = "x-ms-lease-action"; + public static final String X_MS_LEASE_DURATION = "x-ms-lease-duration"; + public static final String X_MS_LEASE_ID = "x-ms-lease-id"; + public static final String X_MS_PROPOSED_LEASE_ID = "x-ms-proposed-lease-id"; + public static final String X_MS_LEASE_BREAK_PERIOD = "x-ms-lease-break-period"; + public static final String EXPECT = "Expect"; private HttpHeaderConfigurations() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpQueryParams.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpQueryParams.java index 9f735f729cb56..e9bb95cad21cd 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpQueryParams.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpQueryParams.java @@ -36,8 +36,16 @@ public final class HttpQueryParams { public static final String QUERY_PARAM_POSITION = "position"; public static final String QUERY_PARAM_TIMEOUT = "timeout"; public static final String QUERY_PARAM_RETAIN_UNCOMMITTED_DATA = "retainUncommittedData"; + public static final String QUERY_PARAM_FLUSH = "flush"; public static final String QUERY_PARAM_CLOSE = "close"; public static final String QUERY_PARAM_UPN = "upn"; + public static final String QUERY_PARAM_BLOBTYPE = "blobtype"; + + //query params for SAS + public static final String QUERY_PARAM_SAOID = "saoid"; + public static final String QUERY_PARAM_SKOID = "skoid"; + public static final String QUERY_PARAM_SUOID = "suoid"; + public static final String QUERY_PARAM_SIGNATURE = "sig"; private HttpQueryParams() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/InternalConstants.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/InternalConstants.java new file mode 100644 index 0000000000000..85603b0bfd8ab --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/InternalConstants.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.constants; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Constants which are used internally and which don't fit into the other + * classes. + * For use within the {@code hadoop-azure} module only. + */ +@InterfaceAudience.Private +public final class InternalConstants { + + private InternalConstants() { + } + + /** + * Does this version of the store have safe readahead? + * Possible combinations of this and the probe + * {@code "fs.capability.etags.available"}. + *

      + *
    1. {@value}: store is safe
    2. + *
    3. no etags: store is safe
    4. + *
    5. etags and not {@value}: store is UNSAFE
    6. + *
    + */ + public static final String CAPABILITY_SAFE_READAHEAD = + "fs.azure.capability.readahead.safe"; +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/annotations/ConfigurationValidationAnnotations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/annotations/ConfigurationValidationAnnotations.java index 82c571a3b03b3..9fbe5a22cdf77 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/annotations/ConfigurationValidationAnnotations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/annotations/ConfigurationValidationAnnotations.java @@ -46,6 +46,22 @@ public class ConfigurationValidationAnnotations { boolean ThrowIfInvalid() default false; } + @Target({ ElementType.FIELD }) + @Retention(RetentionPolicy.RUNTIME) + public @interface IntegerWithOutlierConfigurationValidatorAnnotation { + String ConfigurationKey(); + + int MaxValue() default Integer.MAX_VALUE; + + int MinValue() default Integer.MIN_VALUE; + + int OutlierValue() default Integer.MIN_VALUE; + + int DefaultValue(); + + boolean ThrowIfInvalid() default false; + } + /** * Describes the requirements when validating the annotated long field. */ diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/AbfsRestOperationException.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/AbfsRestOperationException.java index 73b98942d026a..6c53762363840 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/AbfsRestOperationException.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/AbfsRestOperationException.java @@ -87,7 +87,7 @@ private static String formatMessage(final AbfsHttpOperation abfsHttpOperation) { "Operation failed: \"%1$s\", %2$s, HEAD, %3$s", abfsHttpOperation.getStatusDescription(), abfsHttpOperation.getStatusCode(), - abfsHttpOperation.getUrl().toString()); + abfsHttpOperation.getMaskedUrl()); } return String.format( @@ -95,7 +95,7 @@ private static String formatMessage(final AbfsHttpOperation abfsHttpOperation) { abfsHttpOperation.getStatusDescription(), abfsHttpOperation.getStatusCode(), abfsHttpOperation.getMethod(), - abfsHttpOperation.getUrl().toString(), + abfsHttpOperation.getMaskedUrl(), abfsHttpOperation.getStorageErrorCode(), // Remove break line to ensure the request id and timestamp can be shown in console. abfsHttpOperation.getStorageErrorMessage().replaceAll("\\n", " ")); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/AzureBlobFileSystemException.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/AzureBlobFileSystemException.java index 9b1bead886e6c..d829c5ac6779c 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/AzureBlobFileSystemException.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/AzureBlobFileSystemException.java @@ -37,6 +37,10 @@ public AzureBlobFileSystemException(final String message, final Exception innerE super(message, innerException); } + public AzureBlobFileSystemException(final String message, final Throwable innerThrowable) { + super(message, innerThrowable); + } + @Override public String toString() { if (this.getMessage() == null && this.getCause() == null) { diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/ConcurrentWriteOperationDetectedException.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/ConcurrentWriteOperationDetectedException.java new file mode 100644 index 0000000000000..79813ddfe6400 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/ConcurrentWriteOperationDetectedException.java @@ -0,0 +1,32 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.contracts.exceptions; + +/** + * Thrown when a concurrent write operation is detected. + */ +@org.apache.hadoop.classification.InterfaceAudience.Public +@org.apache.hadoop.classification.InterfaceStability.Evolving +public class ConcurrentWriteOperationDetectedException + extends AzureBlobFileSystemException { + + public ConcurrentWriteOperationDetectedException(String message) { + super(message); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidAbfsRestOperationException.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidAbfsRestOperationException.java index aba1d8c1efa2b..285297024c710 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidAbfsRestOperationException.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidAbfsRestOperationException.java @@ -24,17 +24,39 @@ import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; /** - * Exception to wrap invalid Azure service error responses. + * Exception to wrap invalid Azure service error responses and exceptions + * raised on network IO. */ @InterfaceAudience.Public @InterfaceStability.Evolving public class InvalidAbfsRestOperationException extends AbfsRestOperationException { + + private static final String ERROR_MESSAGE = "InvalidAbfsRestOperationException"; + public InvalidAbfsRestOperationException( final Exception innerException) { super( AzureServiceErrorCode.UNKNOWN.getStatusCode(), AzureServiceErrorCode.UNKNOWN.getErrorCode(), - "InvalidAbfsRestOperationException", + innerException != null + ? innerException.toString() + : ERROR_MESSAGE, innerException); } + + /** + * Adds the retry count along with the exception. + * @param innerException The inner exception which is originally caught. + * @param retryCount The retry count when the exception was thrown. + */ + public InvalidAbfsRestOperationException( + final Exception innerException, int retryCount) { + super( + AzureServiceErrorCode.UNKNOWN.getStatusCode(), + AzureServiceErrorCode.UNKNOWN.getErrorCode(), + innerException != null + ? innerException.toString() + : ERROR_MESSAGE + " RetryCount: " + retryCount, + innerException); + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/TrileanConversionException.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/TrileanConversionException.java new file mode 100644 index 0000000000000..87eb05cdc6270 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/TrileanConversionException.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.contracts.exceptions; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Thrown when tried to convert Trilean.UNKNOWN to boolean. Only Trilean.TRUE + * and Trilean.FALSE can be converted to boolean. + */ +@InterfaceAudience.Public +@InterfaceStability.Stable +public final class TrileanConversionException + extends AzureBlobFileSystemException { + public TrileanConversionException() { + super("Cannot convert Trilean.UNKNOWN to boolean"); + } + +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AppendRequestParameters.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AppendRequestParameters.java new file mode 100644 index 0000000000000..57e559a60ec84 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AppendRequestParameters.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.contracts.services; + +/** + * Saves the different request parameters for append + */ +public class AppendRequestParameters { + public enum Mode { + APPEND_MODE, + FLUSH_MODE, + FLUSH_CLOSE_MODE + } + + private final long position; + private final int offset; + private final int length; + private final Mode mode; + private final boolean isAppendBlob; + private final String leaseId; + private boolean isExpectHeaderEnabled; + + public AppendRequestParameters(final long position, + final int offset, + final int length, + final Mode mode, + final boolean isAppendBlob, + final String leaseId, + final boolean isExpectHeaderEnabled) { + this.position = position; + this.offset = offset; + this.length = length; + this.mode = mode; + this.isAppendBlob = isAppendBlob; + this.leaseId = leaseId; + this.isExpectHeaderEnabled = isExpectHeaderEnabled; + } + + public long getPosition() { + return this.position; + } + + public int getoffset() { + return this.offset; + } + + public int getLength() { + return this.length; + } + + public Mode getMode() { + return this.mode; + } + + public boolean isAppendBlob() { + return this.isAppendBlob; + } + + public String getLeaseId() { + return this.leaseId; + } + + public boolean isExpectHeaderEnabled() { + return isExpectHeaderEnabled; + } + + public void setExpectHeaderEnabled(boolean expectHeaderEnabled) { + isExpectHeaderEnabled = expectHeaderEnabled; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java index 8bc31c4f92b2a..8a5e9db8553d8 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java @@ -66,6 +66,10 @@ public String getErrorCode() { return this.errorCode; } + public String getErrorMessage() { + return this.errorMessage; + } + public static List getAzureServiceCode(int httpStatusCode) { List errorCodes = new ArrayList<>(); if (httpStatusCode == UNKNOWN.httpStatusCode) { diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/ListResultEntrySchema.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/ListResultEntrySchema.java index 1de9dfaeeb910..a9883dd2ce5fc 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/ListResultEntrySchema.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/ListResultEntrySchema.java @@ -18,7 +18,8 @@ package org.apache.hadoop.fs.azurebfs.contracts.services; -import org.codehaus.jackson.annotate.JsonProperty; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.hadoop.classification.InterfaceStability; @@ -26,6 +27,7 @@ * The ListResultEntrySchema model. */ @InterfaceStability.Evolving +@JsonIgnoreProperties(ignoreUnknown = true) public class ListResultEntrySchema { /** * The name property. diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/ListResultSchema.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/ListResultSchema.java index 32597423c86ff..dc7da04b5bd4f 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/ListResultSchema.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/ListResultSchema.java @@ -20,7 +20,8 @@ import java.util.List; -import org.codehaus.jackson.annotate.JsonProperty; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.hadoop.classification.InterfaceStability; @@ -28,6 +29,7 @@ * The ListResultSchema model. */ @InterfaceStability.Evolving +@JsonIgnoreProperties(ignoreUnknown = true) public class ListResultSchema { /** * The paths property. diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/diagnostics/IntegerConfigurationBasicValidator.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/diagnostics/IntegerConfigurationBasicValidator.java index 26c7d2f0ac19c..9d4beb74bbe3f 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/diagnostics/IntegerConfigurationBasicValidator.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/diagnostics/IntegerConfigurationBasicValidator.java @@ -31,11 +31,18 @@ public class IntegerConfigurationBasicValidator extends ConfigurationBasicValidator implements ConfigurationValidator { private final int min; private final int max; + private final int outlier; public IntegerConfigurationBasicValidator(final int min, final int max, final int defaultVal, final String configKey, final boolean throwIfInvalid) { + this(min, min, max, defaultVal, configKey, throwIfInvalid); + } + + public IntegerConfigurationBasicValidator(final int outlier, final int min, final int max, + final int defaultVal, final String configKey, final boolean throwIfInvalid) { super(configKey, defaultVal, throwIfInvalid); this.min = min; this.max = max; + this.outlier = outlier; } public Integer validate(final String configValue) throws InvalidConfigurationValueException { @@ -47,10 +54,14 @@ public Integer validate(final String configValue) throws InvalidConfigurationVal try { result = Integer.parseInt(configValue); // throw an exception if a 'within bounds' value is missing - if (getThrowIfInvalid() && (result < this.min || result > this.max)) { + if (getThrowIfInvalid() && (result != outlier) && (result < this.min || result > this.max)) { throw new InvalidConfigurationValueException(getConfigKey()); } + if (result == outlier) { + return result; + } + // set the value to the nearest bound if it's out of bounds if (result < this.min) { return this.min; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/enums/Trilean.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/enums/Trilean.java new file mode 100644 index 0000000000000..dc5f43913fb09 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/enums/Trilean.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.enums; + +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.TrileanConversionException; + +/** + * Enum to represent 3 values, TRUE, FALSE and UNKNOWN. Can be used where + * boolean is not enough to hold the information. + */ +public enum Trilean { + + FALSE, TRUE, UNKNOWN; + + private static final String TRUE_STR = "true"; + private static final String FALSE_STR = "false"; + + /** + * Converts boolean to Trilean. + * + * @param isTrue the boolean to convert. + * @return the corresponding Trilean for the passed boolean isTrue. + */ + public static Trilean getTrilean(final boolean isTrue) { + if (isTrue) { + return Trilean.TRUE; + } + + return Trilean.FALSE; + } + + /** + * Converts String to Trilean. + * + * @param str the string to convert. + * @return the corresponding Trilean for the passed string str. + */ + public static Trilean getTrilean(String str) { + if (TRUE_STR.equalsIgnoreCase(str)) { + return Trilean.TRUE; + } + + if (FALSE_STR.equalsIgnoreCase(str)) { + return Trilean.FALSE; + } + + return Trilean.UNKNOWN; + } + + /** + * Converts the Trilean enum to boolean. + * + * @return the corresponding boolean. + * @throws TrileanConversionException when tried to convert Trilean.UNKNOWN. + */ + public boolean toBoolean() throws TrileanConversionException { + if (this == Trilean.UNKNOWN) { + throw new TrileanConversionException(); + } + + return Boolean.valueOf(this.name()); + } + +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/enums/package-info.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/enums/package-info.java new file mode 100644 index 0000000000000..b2a9b0f468de3 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/enums/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +package org.apache.hadoop.fs.azurebfs.enums; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/extensions/SASTokenProvider.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/extensions/SASTokenProvider.java index 9cfe2bc12ed10..a2cd292b0b230 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/extensions/SASTokenProvider.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/extensions/SASTokenProvider.java @@ -32,22 +32,23 @@ @InterfaceStability.Unstable public interface SASTokenProvider { - String CONCAT_SOURCE_OPERATION = "concat-source"; - String CONCAT_TARGET_OPERATION = "concat-target"; - String CREATEFILE_OPERATION = "create"; + String CHECK_ACCESS_OPERATION = "check-access"; + String CREATE_DIRECTORY_OPERATION = "create-directory"; + String CREATE_FILE_OPERATION = "create-file"; String DELETE_OPERATION = "delete"; - String EXECUTE_OPERATION = "execute"; - String GETACL_OPERATION = "getaclstatus"; - String GETFILESTATUS_OPERATION = "getfilestatus"; - String LISTSTATUS_OPERATION = "liststatus"; - String MKDIR_OPERATION = "mkdir"; + String DELETE_RECURSIVE_OPERATION = "delete-recursive"; + String GET_ACL_OPERATION = "get-acl"; + String GET_STATUS_OPERATION = "get-status"; + String GET_PROPERTIES_OPERATION = "get-properties"; + String LIST_OPERATION = "list"; String READ_OPERATION = "read"; String RENAME_SOURCE_OPERATION = "rename-source"; String RENAME_DESTINATION_OPERATION = "rename-destination"; - String SETACL_OPERATION = "setacl"; - String SETOWNER_OPERATION = "setowner"; - String SETPERMISSION_OPERATION = "setpermission"; - String APPEND_OPERATION = "write"; + String SET_ACL_OPERATION = "set-acl"; + String SET_OWNER_OPERATION = "set-owner"; + String SET_PERMISSION_OPERATION = "set-permission"; + String SET_PROPERTIES_OPERATION = "set-properties"; + String WRITE_OPERATION = "write"; /** * Initialize authorizer for Azure Blob File System. diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/AzureADAuthenticator.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/AzureADAuthenticator.java index 93c40a7120677..9f1aa8e576d32 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/AzureADAuthenticator.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/AzureADAuthenticator.java @@ -18,22 +18,25 @@ package org.apache.hadoop.fs.azurebfs.oauth2; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; +import java.net.MalformedURLException; import java.net.URL; import java.nio.charset.StandardCharsets; import java.util.Date; import java.util.Hashtable; import java.util.Map; -import com.google.common.base.Preconditions; -import org.codehaus.jackson.JsonFactory; -import org.codehaus.jackson.JsonParser; -import org.codehaus.jackson.JsonToken; +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonToken; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.azurebfs.services.AbfsIoUtils; @@ -52,13 +55,20 @@ public final class AzureADAuthenticator { private static final Logger LOG = LoggerFactory.getLogger(AzureADAuthenticator.class); private static final String RESOURCE_NAME = "https://storage.azure.com/"; + private static final String SCOPE = "https://storage.azure.com/.default"; private static final int CONNECT_TIMEOUT = 30 * 1000; private static final int READ_TIMEOUT = 30 * 1000; + private static ExponentialRetryPolicy tokenFetchRetryPolicy; + private AzureADAuthenticator() { // no operation } + public static void init(AbfsConfiguration abfsConfiguration) { + tokenFetchRetryPolicy = abfsConfiguration.getOauthTokenFetchRetryPolicy(); + } + /** * gets Azure Active Directory token using the user ID and password of * a service principal (that is, Web App in Azure Active Directory). @@ -80,14 +90,18 @@ private AzureADAuthenticator() { * @throws IOException throws IOException if there is a failure in connecting to Azure AD */ public static AzureADToken getTokenUsingClientCreds(String authEndpoint, - String clientId, String clientSecret) - throws IOException { + String clientId, String clientSecret) throws IOException { Preconditions.checkNotNull(authEndpoint, "authEndpoint"); Preconditions.checkNotNull(clientId, "clientId"); Preconditions.checkNotNull(clientSecret, "clientSecret"); + boolean isVersion2AuthenticationEndpoint = authEndpoint.contains("/oauth2/v2.0/"); QueryParams qp = new QueryParams(); - qp.add("resource", RESOURCE_NAME); + if (isVersion2AuthenticationEndpoint) { + qp.add("scope", SCOPE); + } else { + qp.add("resource", RESOURCE_NAME); + } qp.add("grant_type", "client_credentials"); qp.add("client_id", clientId); qp.add("client_secret", clientSecret); @@ -230,12 +244,23 @@ public String getMessage() { final StringBuilder sb = new StringBuilder(); sb.append("HTTP Error "); sb.append(httpErrorCode); - sb.append("; url='").append(url).append('\''); - sb.append(' '); + if (!url.isEmpty()) { + sb.append("; url='").append(url).append('\'').append(' '); + } + sb.append(super.getMessage()); - sb.append("; requestId='").append(requestId).append('\''); - sb.append("; contentType='").append(contentType).append('\''); - sb.append("; response '").append(body).append('\''); + if (!requestId.isEmpty()) { + sb.append("; requestId='").append(requestId).append('\''); + } + + if (!contentType.isEmpty()) { + sb.append("; contentType='").append(contentType).append('\''); + } + + if (!body.isEmpty()) { + sb.append("; response '").append(body).append('\''); + } + return sb.toString(); } } @@ -266,13 +291,14 @@ private static AzureADToken getTokenCall(String authEndpoint, String body, Hashtable headers, String httpMethod, boolean isMsi) throws IOException { AzureADToken token = null; - ExponentialRetryPolicy retryPolicy - = new ExponentialRetryPolicy(3, 0, 1000, 2); int httperror = 0; IOException ex = null; boolean succeeded = false; + boolean isRecoverableFailure = true; int retryCount = 0; + boolean shouldRetry; + LOG.trace("First execution of REST operation getTokenSingleCall"); do { httperror = 0; ex = null; @@ -282,17 +308,38 @@ private static AzureADToken getTokenCall(String authEndpoint, String body, httperror = e.httpErrorCode; ex = e; } catch (IOException e) { - ex = e; + httperror = -1; + isRecoverableFailure = isRecoverableFailure(e); + ex = new HttpException(httperror, "", String + .format("AzureADAuthenticator.getTokenCall threw %s : %s", + e.getClass().getTypeName(), e.getMessage()), authEndpoint, "", + ""); } succeeded = ((httperror == 0) && (ex == null)); + shouldRetry = !succeeded && isRecoverableFailure + && tokenFetchRetryPolicy.shouldRetry(retryCount, httperror); retryCount++; - } while (!succeeded && retryPolicy.shouldRetry(retryCount, httperror)); + if (shouldRetry) { + LOG.debug("Retrying getTokenSingleCall. RetryCount = {}", retryCount); + try { + Thread.sleep(tokenFetchRetryPolicy.getRetryInterval(retryCount)); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + } while (shouldRetry); if (!succeeded) { throw ex; } return token; } + private static boolean isRecoverableFailure(IOException e) { + return !(e instanceof MalformedURLException + || e instanceof FileNotFoundException); + } + private static AzureADToken getTokenSingleCall(String authEndpoint, String payload, Hashtable headers, String httpMethod, boolean isMsi) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/ClientCredsTokenProvider.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/ClientCredsTokenProvider.java index 9a46018ec6217..d432e3b2ce0c5 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/ClientCredsTokenProvider.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/ClientCredsTokenProvider.java @@ -20,7 +20,7 @@ import java.io.IOException; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/CustomTokenProviderAdapter.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/CustomTokenProviderAdapter.java index 37cfa6f1d2910..889976041d46e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/CustomTokenProviderAdapter.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/CustomTokenProviderAdapter.java @@ -22,7 +22,8 @@ import java.io.IOException; import java.net.URI; -import com.google.common.base.Preconditions; +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,6 +31,7 @@ import org.apache.hadoop.fs.azurebfs.extensions.BoundDTExtension; import org.apache.hadoop.fs.azurebfs.extensions.CustomTokenProviderAdaptee; import org.apache.hadoop.fs.azurebfs.extensions.ExtensionHelper; +import org.apache.hadoop.fs.azurebfs.oauth2.AzureADAuthenticator.HttpException; /** * Provides tokens based on custom implementation, following the Adapter Design @@ -38,6 +40,7 @@ public final class CustomTokenProviderAdapter extends AccessTokenProvider implements BoundDTExtension { + private final int fetchTokenRetryCount; private CustomTokenProviderAdaptee adaptee; private static final Logger LOG = LoggerFactory.getLogger(AccessTokenProvider.class); @@ -45,17 +48,57 @@ public final class CustomTokenProviderAdapter extends AccessTokenProvider * Constructs a token provider based on the custom token provider. * * @param adaptee the custom token provider + * @param customTokenFetchRetryCount max retry count for customTokenFetch */ - public CustomTokenProviderAdapter(CustomTokenProviderAdaptee adaptee) { + public CustomTokenProviderAdapter(CustomTokenProviderAdaptee adaptee, int customTokenFetchRetryCount) { Preconditions.checkNotNull(adaptee, "adaptee"); this.adaptee = adaptee; + fetchTokenRetryCount = customTokenFetchRetryCount; } protected AzureADToken refreshToken() throws IOException { LOG.debug("AADToken: refreshing custom based token"); AzureADToken azureADToken = new AzureADToken(); - azureADToken.setAccessToken(adaptee.getAccessToken()); + + String accessToken = null; + + Exception ex; + boolean succeeded = false; + // Custom token providers should have their own retry policies, + // Providing a linear retry option for the the retry count + // mentioned in config "fs.azure.custom.token.fetch.retry.count" + int retryCount = fetchTokenRetryCount; + do { + ex = null; + try { + accessToken = adaptee.getAccessToken(); + LOG.trace("CustomTokenProvider Access token fetch was successful with retry count {}", + (fetchTokenRetryCount - retryCount)); + } catch (Exception e) { + LOG.debug("CustomTokenProvider Access token fetch failed with retry count {}", + (fetchTokenRetryCount - retryCount)); + ex = e; + } + + succeeded = (ex == null); + retryCount--; + } while (!succeeded && (retryCount) >= 0); + + if (!succeeded) { + HttpException httpEx = new HttpException( + -1, + "", + String.format("CustomTokenProvider getAccessToken threw %s : %s", + ex.getClass().getTypeName(), ex.getMessage()), + "", + "", + "" + ); + throw httpEx; + } + + azureADToken.setAccessToken(accessToken); azureADToken.setExpiry(adaptee.getExpiryTime()); return azureADToken; @@ -96,4 +139,9 @@ public String getUserAgentSuffix() { String suffix = ExtensionHelper.getUserAgentSuffix(adaptee, ""); return suffix != null ? suffix : ""; } + + @VisibleForTesting + protected CustomTokenProviderAdaptee getCustomTokenProviderAdaptee() { + return adaptee; + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/IdentityTransformer.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/IdentityTransformer.java index 6844afb9b2a54..dea2f96b60f0a 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/IdentityTransformer.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/IdentityTransformer.java @@ -21,7 +21,7 @@ import java.util.List; import java.util.Locale; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,7 +42,7 @@ /** * Perform transformation for Azure Active Directory identities used in owner, group and acls. */ -public class IdentityTransformer { +public class IdentityTransformer implements IdentityTransformerInterface { private static final Logger LOG = LoggerFactory.getLogger(IdentityTransformer.class); private boolean isSecure; @@ -100,7 +100,8 @@ public IdentityTransformer(Configuration configuration) throws IOException { * @param localIdentity the local user or group, should be parsed from UserGroupInformation. * @return owner or group after transformation. * */ - public String transformIdentityForGetRequest(String originalIdentity, boolean isUserName, String localIdentity) { + public String transformIdentityForGetRequest(String originalIdentity, boolean isUserName, String localIdentity) + throws IOException { if (originalIdentity == null) { originalIdentity = localIdentity; // localIdentity might be a full name, so continue the transformation. @@ -198,7 +199,7 @@ public void transformAclEntriesForSetRequest(final List aclEntries) { if (isInSubstitutionList(name)) { transformedName = servicePrincipalId; } else if (aclEntry.getType().equals(AclEntryType.USER) // case 2: when the owner is a short name - && shouldUseFullyQualifiedUserName(name)) { // of the user principal name (UPN). + && shouldUseFullyQualifiedUserName(name)) { // of the user principal name (UPN). // Notice: for group type ACL entry, if name is shortName. // It won't be converted to Full Name. This is // to make the behavior consistent with HDI. @@ -242,7 +243,8 @@ && shouldUseFullyQualifiedUserName(name)) { // of the user principal * @param localUser local user name * @param localGroup local primary group * */ - public void transformAclEntriesForGetRequest(final List aclEntries, String localUser, String localGroup) { + public void transformAclEntriesForGetRequest(final List aclEntries, String localUser, String localGroup) + throws IOException { if (skipUserIdentityReplacement) { return; } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/IdentityTransformerInterface.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/IdentityTransformerInterface.java new file mode 100644 index 0000000000000..00f93eae30bd4 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/IdentityTransformerInterface.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.azurebfs.oauth2; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.fs.permission.AclEntry; + +/** + * {@code IdentityTransformerInterface} defines the set of translation + * operations that any identity transformer implementation must provide. + */ +public interface IdentityTransformerInterface { + + /** + * Perform identity transformation for the Get request. + * @param originalIdentity the original user or group in the get request. + * @param isUserName indicate whether the input originalIdentity is an owner name or owning group name. + * @param localIdentity the local user or group, should be parsed from UserGroupInformation. + * @return owner or group after transformation. + */ + String transformIdentityForGetRequest(String originalIdentity, boolean isUserName, String localIdentity) + throws IOException; + + /** + * Perform Identity transformation when setting owner on a path. + * @param userOrGroup the user or group to be set as owner. + * @return user or group after transformation. + */ + String transformUserOrGroupForSetRequest(String userOrGroup); + + /** + * Perform Identity transformation when calling setAcl(),removeAclEntries() and modifyAclEntries(). + * @param aclEntries list of AclEntry. + */ + void transformAclEntriesForSetRequest(final List aclEntries); + + /** + * Perform Identity transformation when calling GetAclStatus(). + * @param aclEntries list of AclEntry. + * @param localUser local user name. + * @param localGroup local primary group. + */ + void transformAclEntriesForGetRequest(final List aclEntries, String localUser, String localGroup) + throws IOException; +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/LocalIdentityTransformer.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/LocalIdentityTransformer.java new file mode 100644 index 0000000000000..5d5371014b761 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/LocalIdentityTransformer.java @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.azurebfs.oauth2; + +import java.io.IOException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azurebfs.utils.IdentityHandler; +import org.apache.hadoop.fs.azurebfs.utils.TextFileBasedIdentityHandler; + +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_LOCAL_USER_SP_MAPPING_FILE_PATH; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_LOCAL_GROUP_SG_MAPPING_FILE_PATH; + + +/** + * A subclass of {@link IdentityTransformer} that translates the AAD to Local + * identity using {@link IdentityHandler}. + * + * {@link TextFileBasedIdentityHandler} is a {@link IdentityHandler} implements + * translation operation which returns identity mapped to AAD identity. + */ +public class LocalIdentityTransformer extends IdentityTransformer { + + private static final Logger LOG = LoggerFactory.getLogger(LocalIdentityTransformer.class); + + private IdentityHandler localToAadIdentityLookup; + + public LocalIdentityTransformer(Configuration configuration) throws IOException { + super(configuration); + this.localToAadIdentityLookup = + new TextFileBasedIdentityHandler(configuration.get(FS_AZURE_LOCAL_USER_SP_MAPPING_FILE_PATH), + configuration.get(FS_AZURE_LOCAL_GROUP_SG_MAPPING_FILE_PATH)); + } + + /** + * Perform identity transformation for the Get request results. + * @param originalIdentity the original user or group in the get request results: FileStatus, AclStatus. + * @param isUserName indicate whether the input originalIdentity is an owner name or owning group name. + * @param localIdentity the local user or group, should be parsed from UserGroupInformation. + * @return local identity. + */ + @Override + public String transformIdentityForGetRequest(String originalIdentity, boolean isUserName, String localIdentity) + throws IOException { + String localIdentityForOrig = isUserName ? localToAadIdentityLookup.lookupForLocalUserIdentity(originalIdentity) + : localToAadIdentityLookup.lookupForLocalGroupIdentity(originalIdentity); + + if (localIdentityForOrig == null || localIdentityForOrig.isEmpty()) { + return super.transformIdentityForGetRequest(originalIdentity, isUserName, localIdentity); + } + + return localIdentityForOrig; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/RefreshTokenBasedTokenProvider.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/RefreshTokenBasedTokenProvider.java index 1c1bd2b3b5511..4f35d1522d99f 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/RefreshTokenBasedTokenProvider.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/RefreshTokenBasedTokenProvider.java @@ -20,7 +20,7 @@ import java.io.IOException; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/UserPasswordTokenProvider.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/UserPasswordTokenProvider.java index 3dad32ec6f51a..3d9d8b865a059 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/UserPasswordTokenProvider.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/UserPasswordTokenProvider.java @@ -19,7 +19,7 @@ import java.io.IOException; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/security/AbfsDelegationTokenManager.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/security/AbfsDelegationTokenManager.java index c8d6b803f4650..46d5505217378 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/security/AbfsDelegationTokenManager.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/security/AbfsDelegationTokenManager.java @@ -23,8 +23,8 @@ import java.io.IOException; import java.net.URI; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 6e1de68b5de51..1767274f36068 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -21,66 +21,125 @@ import java.io.Closeable; import java.io.IOException; import java.io.UnsupportedEncodingException; +import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.util.ArrayList; +import java.util.Base64; import java.util.List; import java.util.Locale; +import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.fs.store.LogExactlyOnce; +import org.apache.hadoop.thirdparty.com.google.common.base.Strings; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.FutureCallback; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Futures; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableFuture; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableScheduledFuture; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningScheduledExecutorService; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; -import com.google.common.annotations.VisibleForTesting; -import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; -import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; -import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; -import org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; +import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; +import org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidUriException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.SASTokenProviderException; import org.apache.hadoop.fs.azurebfs.extensions.ExtensionHelper; import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; +import org.apache.hadoop.util.concurrent.HadoopExecutors; +import static org.apache.commons.lang3.StringUtils.isEmpty; +import static org.apache.commons.lang3.StringUtils.isNotEmpty; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.RENAME_PATH_ATTEMPTS; +import static org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.extractEtagHeader; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.*; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_DELETE_CONSIDERED_IDEMPOTENT; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.SERVER_SIDE_ENCRYPTION_ALGORITHM; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.HTTPS_SCHEME; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.*; import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.*; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.RENAME_DESTINATION_PARENT_PATH_NOT_FOUND; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.SOURCE_PATH_NOT_FOUND; /** * AbfsClient. */ public class AbfsClient implements Closeable { public static final Logger LOG = LoggerFactory.getLogger(AbfsClient.class); + private final URL baseUrl; private final SharedKeyCredentials sharedKeyCredentials; - private final String xMsVersion = "2018-11-09"; + private final String xMsVersion = "2019-12-12"; private final ExponentialRetryPolicy retryPolicy; private final String filesystem; private final AbfsConfiguration abfsConfiguration; private final String userAgent; private final AbfsPerfTracker abfsPerfTracker; + private final String clientProvidedEncryptionKey; + private final String clientProvidedEncryptionKeySHA; private final String accountName; private final AuthType authType; private AccessTokenProvider tokenProvider; private SASTokenProvider sasTokenProvider; + private final AbfsCounters abfsCounters; + private final AbfsThrottlingIntercept intercept; + + private final ListeningScheduledExecutorService executorService; + + private boolean renameResilience; + + /** + * logging the rename failure if metadata is in an incomplete state. + */ + private static final LogExactlyOnce ABFS_METADATA_INCOMPLETE_RENAME_FAILURE = new LogExactlyOnce(LOG); private AbfsClient(final URL baseUrl, final SharedKeyCredentials sharedKeyCredentials, final AbfsConfiguration abfsConfiguration, - final ExponentialRetryPolicy exponentialRetryPolicy, - final AbfsPerfTracker abfsPerfTracker) { + final AbfsClientContext abfsClientContext) + throws IOException { this.baseUrl = baseUrl; this.sharedKeyCredentials = sharedKeyCredentials; String baseUrlString = baseUrl.toString(); this.filesystem = baseUrlString.substring(baseUrlString.lastIndexOf(FORWARD_SLASH) + 1); this.abfsConfiguration = abfsConfiguration; - this.retryPolicy = exponentialRetryPolicy; + this.retryPolicy = abfsClientContext.getExponentialRetryPolicy(); this.accountName = abfsConfiguration.getAccountName().substring(0, abfsConfiguration.getAccountName().indexOf(AbfsHttpConstants.DOT)); this.authType = abfsConfiguration.getAuthType(accountName); + this.intercept = AbfsThrottlingInterceptFactory.getInstance(accountName, abfsConfiguration); + this.renameResilience = abfsConfiguration.getRenameResilience(); + + String encryptionKey = this.abfsConfiguration + .getClientProvidedEncryptionKey(); + if (encryptionKey != null) { + this.clientProvidedEncryptionKey = getBase64EncodedString(encryptionKey); + this.clientProvidedEncryptionKeySHA = getBase64EncodedString( + getSHA256Hash(encryptionKey)); + } else { + this.clientProvidedEncryptionKey = null; + this.clientProvidedEncryptionKeySHA = null; + } String sslProviderName = null; @@ -98,32 +157,56 @@ private AbfsClient(final URL baseUrl, final SharedKeyCredentials sharedKeyCreden } this.userAgent = initializeUserAgent(abfsConfiguration, sslProviderName); - this.abfsPerfTracker = abfsPerfTracker; + this.abfsPerfTracker = abfsClientContext.getAbfsPerfTracker(); + this.abfsCounters = abfsClientContext.getAbfsCounters(); + + ThreadFactory tf = + new ThreadFactoryBuilder().setNameFormat("AbfsClient Lease Ops").setDaemon(true).build(); + this.executorService = MoreExecutors.listeningDecorator( + HadoopExecutors.newScheduledThreadPool(this.abfsConfiguration.getNumLeaseThreads(), tf)); } public AbfsClient(final URL baseUrl, final SharedKeyCredentials sharedKeyCredentials, final AbfsConfiguration abfsConfiguration, - final ExponentialRetryPolicy exponentialRetryPolicy, final AccessTokenProvider tokenProvider, - final AbfsPerfTracker abfsPerfTracker) { - this(baseUrl, sharedKeyCredentials, abfsConfiguration, exponentialRetryPolicy, abfsPerfTracker); + final AbfsClientContext abfsClientContext) + throws IOException { + this(baseUrl, sharedKeyCredentials, abfsConfiguration, abfsClientContext); this.tokenProvider = tokenProvider; } public AbfsClient(final URL baseUrl, final SharedKeyCredentials sharedKeyCredentials, final AbfsConfiguration abfsConfiguration, - final ExponentialRetryPolicy exponentialRetryPolicy, final SASTokenProvider sasTokenProvider, - final AbfsPerfTracker abfsPerfTracker) { - this(baseUrl, sharedKeyCredentials, abfsConfiguration, exponentialRetryPolicy, abfsPerfTracker); + final AbfsClientContext abfsClientContext) + throws IOException { + this(baseUrl, sharedKeyCredentials, abfsConfiguration, abfsClientContext); this.sasTokenProvider = sasTokenProvider; } + private byte[] getSHA256Hash(String key) throws IOException { + try { + final MessageDigest digester = MessageDigest.getInstance("SHA-256"); + return digester.digest(key.getBytes(StandardCharsets.UTF_8)); + } catch (NoSuchAlgorithmException e) { + throw new IOException(e); + } + } + + private String getBase64EncodedString(String key) { + return getBase64EncodedString(key.getBytes(StandardCharsets.UTF_8)); + } + + private String getBase64EncodedString(byte[] bytes) { + return Base64.getEncoder().encodeToString(bytes); + } + @Override public void close() throws IOException { if (tokenProvider instanceof Closeable) { IOUtils.cleanupWithLogger(LOG, (Closeable) tokenProvider); } + HadoopExecutors.shutdown(executorService, LOG, 0, TimeUnit.SECONDS); } public String getFileSystem() { @@ -142,6 +225,10 @@ SharedKeyCredentials getSharedKeyCredentials() { return sharedKeyCredentials; } + AbfsThrottlingIntercept getIntercept() { + return intercept; + } + List createDefaultHeaders() { final List requestHeaders = new ArrayList(); requestHeaders.add(new AbfsHttpHeader(X_MS_VERSION, xMsVersion)); @@ -154,13 +241,25 @@ List createDefaultHeaders() { return requestHeaders; } + private void addCustomerProvidedKeyHeaders( + final List requestHeaders) { + if (clientProvidedEncryptionKey != null) { + requestHeaders.add( + new AbfsHttpHeader(X_MS_ENCRYPTION_KEY, clientProvidedEncryptionKey)); + requestHeaders.add(new AbfsHttpHeader(X_MS_ENCRYPTION_KEY_SHA256, + clientProvidedEncryptionKeySHA)); + requestHeaders.add(new AbfsHttpHeader(X_MS_ENCRYPTION_ALGORITHM, + SERVER_SIDE_ENCRYPTION_ALGORITHM)); + } + } + AbfsUriQueryBuilder createDefaultUriQueryBuilder() { final AbfsUriQueryBuilder abfsUriQueryBuilder = new AbfsUriQueryBuilder(); abfsUriQueryBuilder.addQuery(QUERY_PARAM_TIMEOUT, DEFAULT_TIMEOUT); return abfsUriQueryBuilder; } - public AbfsRestOperation createFilesystem() throws AzureBlobFileSystemException { + public AbfsRestOperation createFilesystem(TracingContext tracingContext) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); final AbfsUriQueryBuilder abfsUriQueryBuilder = new AbfsUriQueryBuilder(); @@ -173,11 +272,11 @@ public AbfsRestOperation createFilesystem() throws AzureBlobFileSystemException HTTP_METHOD_PUT, url, requestHeaders); - op.execute(); + op.execute(tracingContext); return op; } - public AbfsRestOperation setFilesystemProperties(final String properties) throws AzureBlobFileSystemException { + public AbfsRestOperation setFilesystemProperties(final String properties, TracingContext tracingContext) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); // JDK7 does not support PATCH, so to workaround the issue we will use // PUT and specify the real method in the X-Http-Method-Override header. @@ -197,23 +296,23 @@ public AbfsRestOperation setFilesystemProperties(final String properties) throws HTTP_METHOD_PUT, url, requestHeaders); - op.execute(); + op.execute(tracingContext); return op; } public AbfsRestOperation listPath(final String relativePath, final boolean recursive, final int listMaxResults, - final String continuation) throws AzureBlobFileSystemException { + final String continuation, TracingContext tracingContext) + throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESOURCE, FILESYSTEM); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_DIRECTORY, relativePath == null ? AbfsHttpConstants.EMPTY_STRING - : relativePath); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_DIRECTORY, getDirectoryQueryParameter(relativePath)); abfsUriQueryBuilder.addQuery(QUERY_PARAM_RECURSIVE, String.valueOf(recursive)); abfsUriQueryBuilder.addQuery(QUERY_PARAM_CONTINUATION, continuation); abfsUriQueryBuilder.addQuery(QUERY_PARAM_MAXRESULTS, String.valueOf(listMaxResults)); abfsUriQueryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_UPN, String.valueOf(abfsConfiguration.isUpnUsed())); - appendSASTokenToQuery(relativePath, SASTokenProvider.LISTSTATUS_OPERATION, abfsUriQueryBuilder); + appendSASTokenToQuery(relativePath, SASTokenProvider.LIST_OPERATION, abfsUriQueryBuilder); final URL url = createRequestUrl(abfsUriQueryBuilder.toString()); final AbfsRestOperation op = new AbfsRestOperation( @@ -222,11 +321,11 @@ public AbfsRestOperation listPath(final String relativePath, final boolean recur HTTP_METHOD_GET, url, requestHeaders); - op.execute(); + op.execute(tracingContext); return op; } - public AbfsRestOperation getFilesystemProperties() throws AzureBlobFileSystemException { + public AbfsRestOperation getFilesystemProperties(TracingContext tracingContext) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); @@ -239,11 +338,11 @@ public AbfsRestOperation getFilesystemProperties() throws AzureBlobFileSystemExc HTTP_METHOD_HEAD, url, requestHeaders); - op.execute(); + op.execute(tracingContext); return op; } - public AbfsRestOperation deleteFilesystem() throws AzureBlobFileSystemException { + public AbfsRestOperation deleteFilesystem(TracingContext tracingContext) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); @@ -256,13 +355,18 @@ public AbfsRestOperation deleteFilesystem() throws AzureBlobFileSystemException HTTP_METHOD_DELETE, url, requestHeaders); - op.execute(); + op.execute(tracingContext); return op; } public AbfsRestOperation createPath(final String path, final boolean isFile, final boolean overwrite, - final String permission, final String umask) throws AzureBlobFileSystemException { + final String permission, final String umask, + final boolean isAppendBlob, final String eTag, + TracingContext tracingContext) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); + if (isFile) { + addCustomerProvidedKeyHeaders(requestHeaders); + } if (!overwrite) { requestHeaders.add(new AbfsHttpHeader(IF_NONE_MATCH, AbfsHttpConstants.STAR)); } @@ -275,12 +379,19 @@ public AbfsRestOperation createPath(final String path, final boolean isFile, fin requestHeaders.add(new AbfsHttpHeader(HttpHeaderConfigurations.X_MS_UMASK, umask)); } + if (eTag != null && !eTag.isEmpty()) { + requestHeaders.add(new AbfsHttpHeader(HttpHeaderConfigurations.IF_MATCH, eTag)); + } + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESOURCE, isFile ? FILE : DIRECTORY); + if (isAppendBlob) { + abfsUriQueryBuilder.addQuery(QUERY_PARAM_BLOBTYPE, APPEND_BLOB_TYPE); + } String operation = isFile - ? SASTokenProvider.CREATEFILE_OPERATION - : SASTokenProvider.MKDIR_OPERATION; + ? SASTokenProvider.CREATE_FILE_OPERATION + : SASTokenProvider.CREATE_DIRECTORY_OPERATION; appendSASTokenToQuery(path, operation, abfsUriQueryBuilder); final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); @@ -290,14 +401,164 @@ public AbfsRestOperation createPath(final String path, final boolean isFile, fin HTTP_METHOD_PUT, url, requestHeaders); - op.execute(); + try { + op.execute(tracingContext); + } catch (AzureBlobFileSystemException ex) { + // If we have no HTTP response, throw the original exception. + if (!op.hasResult()) { + throw ex; + } + if (!isFile && op.getResult().getStatusCode() == HttpURLConnection.HTTP_CONFLICT) { + String existingResource = + op.getResult().getResponseHeader(X_MS_EXISTING_RESOURCE_TYPE); + if (existingResource != null && existingResource.equals(DIRECTORY)) { + return op; //don't throw ex on mkdirs for existing directory + } + } + throw ex; + } return op; } - public AbfsRestOperation renamePath(String source, final String destination, final String continuation) - throws AzureBlobFileSystemException { + public AbfsRestOperation acquireLease(final String path, int duration, TracingContext tracingContext) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); + requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ACTION, ACQUIRE_LEASE_ACTION)); + requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_DURATION, Integer.toString(duration))); + requestHeaders.add(new AbfsHttpHeader(X_MS_PROPOSED_LEASE_ID, UUID.randomUUID().toString())); + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + + final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = new AbfsRestOperation( + AbfsRestOperationType.LeasePath, + this, + HTTP_METHOD_POST, + url, + requestHeaders); + op.execute(tracingContext); + return op; + } + + public AbfsRestOperation renewLease(final String path, final String leaseId, + TracingContext tracingContext) throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + + requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ACTION, RENEW_LEASE_ACTION)); + requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ID, leaseId)); + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + + final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = new AbfsRestOperation( + AbfsRestOperationType.LeasePath, + this, + HTTP_METHOD_POST, + url, + requestHeaders); + op.execute(tracingContext); + return op; + } + + public AbfsRestOperation releaseLease(final String path, + final String leaseId, TracingContext tracingContext) throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + + requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ACTION, RELEASE_LEASE_ACTION)); + requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ID, leaseId)); + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + + final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = new AbfsRestOperation( + AbfsRestOperationType.LeasePath, + this, + HTTP_METHOD_POST, + url, + requestHeaders); + op.execute(tracingContext); + return op; + } + + public AbfsRestOperation breakLease(final String path, + TracingContext tracingContext) throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + + requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ACTION, BREAK_LEASE_ACTION)); + requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_BREAK_PERIOD, DEFAULT_LEASE_BREAK_PERIOD)); + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + + final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = new AbfsRestOperation( + AbfsRestOperationType.LeasePath, + this, + HTTP_METHOD_POST, + url, + requestHeaders); + op.execute(tracingContext); + return op; + } + + + /** + * Rename a file or directory. + * If a source etag is passed in, the operation will attempt to recover + * from a missing source file by probing the destination for + * existence and comparing etags. + * The second value in the result will be true to indicate that this + * took place. + * As rename recovery is only attempted if the source etag is non-empty, + * in normal rename operations rename recovery will never happen. + * + * @param source path to source file + * @param destination destination of rename. + * @param continuation continuation. + * @param tracingContext trace context + * @param sourceEtag etag of source file. may be null or empty + * @param isMetadataIncompleteState was there a rename failure due to + * incomplete metadata state? + * @param isNamespaceEnabled whether namespace enabled account or not + * @return AbfsClientRenameResult result of rename operation indicating the + * AbfsRest operation, rename recovery and incomplete metadata state failure. + * @throws AzureBlobFileSystemException failure, excluding any recovery from overload failures. + */ + public AbfsClientRenameResult renamePath( + final String source, + final String destination, + final String continuation, + final TracingContext tracingContext, + String sourceEtag, + boolean isMetadataIncompleteState, + boolean isNamespaceEnabled) + throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + + final boolean hasEtag = !isEmpty(sourceEtag); + + boolean shouldAttemptRecovery = renameResilience && isNamespaceEnabled; + if (!hasEtag && shouldAttemptRecovery) { + // in case eTag is already not supplied to the API + // and rename resilience is expected and it is an HNS enabled account + // fetch the source etag to be used later in recovery + try { + final AbfsRestOperation srcStatusOp = getPathStatus(source, + false, tracingContext); + if (srcStatusOp.hasResult()) { + final AbfsHttpOperation result = srcStatusOp.getResult(); + sourceEtag = extractEtagHeader(result); + // and update the directory status. + boolean isDir = checkIsDir(result); + shouldAttemptRecovery = !isDir; + LOG.debug("Retrieved etag of source for rename recovery: {}; isDir={}", sourceEtag, isDir); + } + } catch (AbfsRestOperationException e) { + throw new AbfsRestOperationException(e.getStatusCode(), SOURCE_PATH_NOT_FOUND.getErrorCode(), + e.getMessage(), e); + } + + } + String encodedRenameSource = urlEncode(FORWARD_SLASH + this.getFileSystem() + source); if (authType == AuthType.SAS) { final AbfsUriQueryBuilder srcQueryBuilder = new AbfsUriQueryBuilder(); @@ -314,54 +575,325 @@ public AbfsRestOperation renamePath(String source, final String destination, fin appendSASTokenToQuery(destination, SASTokenProvider.RENAME_DESTINATION_OPERATION, abfsUriQueryBuilder); final URL url = createRequestUrl(destination, abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = new AbfsRestOperation( + final AbfsRestOperation op = createRenameRestOperation(url, requestHeaders); + try { + incrementAbfsRenamePath(); + op.execute(tracingContext); + // AbfsClientResult contains the AbfsOperation, If recovery happened or + // not, and the incompleteMetaDataState is true or false. + // If we successfully rename a path and isMetadataIncompleteState was + // true, then rename was recovered, else it didn't, this is why + // isMetadataIncompleteState is used for renameRecovery(as the 2nd param). + return new AbfsClientRenameResult(op, isMetadataIncompleteState, isMetadataIncompleteState); + } catch (AzureBlobFileSystemException e) { + // If we have no HTTP response, throw the original exception. + if (!op.hasResult()) { + throw e; + } + + // ref: HADOOP-18242. Rename failure occurring due to a rare case of + // tracking metadata being in incomplete state. + if (op.getResult().getStorageErrorCode() + .equals(RENAME_DESTINATION_PARENT_PATH_NOT_FOUND.getErrorCode()) + && !isMetadataIncompleteState) { + //Logging + ABFS_METADATA_INCOMPLETE_RENAME_FAILURE + .info("Rename Failure attempting to resolve tracking metadata state and retrying."); + // rename recovery should be attempted in this case also + shouldAttemptRecovery = true; + isMetadataIncompleteState = true; + String sourceEtagAfterFailure = sourceEtag; + if (isEmpty(sourceEtagAfterFailure)) { + // Doing a HEAD call resolves the incomplete metadata state and + // then we can retry the rename operation. + AbfsRestOperation sourceStatusOp = getPathStatus(source, false, + tracingContext); + // Extract the sourceEtag, using the status Op, and set it + // for future rename recovery. + AbfsHttpOperation sourceStatusResult = sourceStatusOp.getResult(); + sourceEtagAfterFailure = extractEtagHeader(sourceStatusResult); + } + renamePath(source, destination, continuation, tracingContext, + sourceEtagAfterFailure, isMetadataIncompleteState, isNamespaceEnabled); + } + // if we get out of the condition without a successful rename, then + // it isn't metadata incomplete state issue. + isMetadataIncompleteState = false; + + // setting default rename recovery success to false + boolean etagCheckSucceeded = false; + if (shouldAttemptRecovery) { + etagCheckSucceeded = renameIdempotencyCheckOp( + source, + sourceEtag, op, destination, tracingContext); + } + if (!etagCheckSucceeded) { + // idempotency did not return different result + // throw back the exception + throw e; + } + return new AbfsClientRenameResult(op, true, isMetadataIncompleteState); + } + } + + private boolean checkIsDir(AbfsHttpOperation result) { + String resourceType = result.getResponseHeader( + HttpHeaderConfigurations.X_MS_RESOURCE_TYPE); + return resourceType != null + && resourceType.equalsIgnoreCase(AbfsHttpConstants.DIRECTORY); + } + + @VisibleForTesting + AbfsRestOperation createRenameRestOperation(URL url, List requestHeaders) { + AbfsRestOperation op = new AbfsRestOperation( AbfsRestOperationType.RenamePath, this, HTTP_METHOD_PUT, url, requestHeaders); - op.execute(); return op; } - public AbfsRestOperation append(final String path, final long position, final byte[] buffer, final int offset, - final int length) throws AzureBlobFileSystemException { + private void incrementAbfsRenamePath() { + abfsCounters.incrementCounter(RENAME_PATH_ATTEMPTS, 1); + } + + /** + * Check if the rename request failure is post a retry and if earlier rename + * request might have succeeded at back-end. + * + * If a source etag was passed in, and the error was 404, get the + * etag of any file at the destination. + * If it matches the source etag, then the rename is considered + * a success. + * Exceptions raised in the probe of the destination are swallowed, + * so that they do not interfere with the original rename failures. + * @param source source path + * @param op Rename request REST operation response with non-null HTTP response + * @param destination rename destination path + * @param sourceEtag etag of source file. may be null or empty + * @param tracingContext Tracks identifiers for request header + * @return true if the file was successfully copied + */ + public boolean renameIdempotencyCheckOp( + final String source, + final String sourceEtag, + final AbfsRestOperation op, + final String destination, + TracingContext tracingContext) { + Preconditions.checkArgument(op.hasResult(), "Operations has null HTTP response"); + + // removing isDir from debug logs as it can be misleading + LOG.debug("rename({}, {}) failure {}; retry={} etag {}", + source, destination, op.getResult().getStatusCode(), op.isARetriedRequest(), sourceEtag); + if (!(op.isARetriedRequest() + && (op.getResult().getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND))) { + // only attempt recovery if the failure was a 404 on a retried rename request. + return false; + } + + if (isNotEmpty(sourceEtag)) { + // Server has returned HTTP 404, we have an etag, so see + // if the rename has actually taken place, + LOG.info("rename {} to {} failed, checking etag of destination", + source, destination); + try { + final AbfsRestOperation destStatusOp = getPathStatus(destination, false, tracingContext); + final AbfsHttpOperation result = destStatusOp.getResult(); + + final boolean recovered = result.getStatusCode() == HttpURLConnection.HTTP_OK + && sourceEtag.equals(extractEtagHeader(result)); + LOG.info("File rename has taken place: recovery {}", + recovered ? "succeeded" : "failed"); + return recovered; + + } catch (AzureBlobFileSystemException ex) { + // GetFileStatus on the destination failed, the rename did not take place + // or some other failure. log and swallow. + LOG.debug("Failed to get status of path {}", destination, ex); + } + } else { + LOG.debug("No source etag; unable to probe for the operation's success"); + } + return false; + } + + @VisibleForTesting + boolean isSourceDestEtagEqual(String sourceEtag, AbfsHttpOperation result) { + return sourceEtag.equals(extractEtagHeader(result)); + } + + public AbfsRestOperation append(final String path, final byte[] buffer, + AppendRequestParameters reqParams, final String cachedSasToken, TracingContext tracingContext) + throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); + addCustomerProvidedKeyHeaders(requestHeaders); + if (reqParams.isExpectHeaderEnabled()) { + requestHeaders.add(new AbfsHttpHeader(EXPECT, HUNDRED_CONTINUE)); + } // JDK7 does not support PATCH, so to workaround the issue we will use // PUT and specify the real method in the X-Http-Method-Override header. requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, - HTTP_METHOD_PATCH)); + HTTP_METHOD_PATCH)); + if (reqParams.getLeaseId() != null) { + requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ID, reqParams.getLeaseId())); + } final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, APPEND_ACTION); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_POSITION, Long.toString(position)); - appendSASTokenToQuery(path, SASTokenProvider.APPEND_OPERATION, abfsUriQueryBuilder); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_POSITION, Long.toString(reqParams.getPosition())); + + if ((reqParams.getMode() == AppendRequestParameters.Mode.FLUSH_MODE) || ( + reqParams.getMode() == AppendRequestParameters.Mode.FLUSH_CLOSE_MODE)) { + abfsUriQueryBuilder.addQuery(QUERY_PARAM_FLUSH, TRUE); + if (reqParams.getMode() == AppendRequestParameters.Mode.FLUSH_CLOSE_MODE) { + abfsUriQueryBuilder.addQuery(QUERY_PARAM_CLOSE, TRUE); + } + } + + // AbfsInputStream/AbfsOutputStream reuse SAS tokens for better performance + String sasTokenForReuse = appendSASTokenToQuery(path, SASTokenProvider.WRITE_OPERATION, + abfsUriQueryBuilder, cachedSasToken); final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = new AbfsRestOperation( - AbfsRestOperationType.Append, - this, + final AbfsRestOperation op = getAbfsRestOperationForAppend(AbfsRestOperationType.Append, HTTP_METHOD_PUT, url, - requestHeaders, buffer, offset, length); - op.execute(); + requestHeaders, + buffer, + reqParams.getoffset(), + reqParams.getLength(), + sasTokenForReuse); + try { + op.execute(tracingContext); + } catch (AzureBlobFileSystemException e) { + /* + If the http response code indicates a user error we retry + the same append request with expect header being disabled. + When "100-continue" header is enabled but a non Http 100 response comes, + the response message might not get set correctly by the server. + So, this handling is to avoid breaking of backward compatibility + if someone has taken dependency on the exception message, + which is created using the error string present in the response header. + */ + int responseStatusCode = ((AbfsRestOperationException) e).getStatusCode(); + if (checkUserError(responseStatusCode) && reqParams.isExpectHeaderEnabled()) { + LOG.debug("User error, retrying without 100 continue enabled for the given path {}", path); + reqParams.setExpectHeaderEnabled(false); + return this.append(path, buffer, reqParams, cachedSasToken, + tracingContext); + } + // If we have no HTTP response, throw the original exception. + if (!op.hasResult()) { + throw e; + } + if (reqParams.isAppendBlob() + && appendSuccessCheckOp(op, path, + (reqParams.getPosition() + reqParams.getLength()), tracingContext)) { + final AbfsRestOperation successOp = getAbfsRestOperationForAppend( + AbfsRestOperationType.Append, + HTTP_METHOD_PUT, + url, + requestHeaders, + buffer, + reqParams.getoffset(), + reqParams.getLength(), + sasTokenForReuse); + successOp.hardSetResult(HttpURLConnection.HTTP_OK); + return successOp; + } + throw e; + } + return op; } - public AbfsRestOperation flush(final String path, final long position, boolean retainUncommittedData, boolean isClose) - throws AzureBlobFileSystemException { + /** + * Returns the rest operation for append. + * @param operationType The AbfsRestOperationType. + * @param httpMethod specifies the httpMethod. + * @param url specifies the url. + * @param requestHeaders This includes the list of request headers. + * @param buffer The buffer to write into. + * @param bufferOffset The buffer offset. + * @param bufferLength The buffer Length. + * @param sasTokenForReuse The sasToken. + * @return AbfsRestOperation op. + */ + @VisibleForTesting + AbfsRestOperation getAbfsRestOperationForAppend(final AbfsRestOperationType operationType, + final String httpMethod, + final URL url, + final List requestHeaders, + final byte[] buffer, + final int bufferOffset, + final int bufferLength, + final String sasTokenForReuse) { + return new AbfsRestOperation( + operationType, + this, + httpMethod, + url, + requestHeaders, + buffer, + bufferOffset, + bufferLength, sasTokenForReuse); + } + + /** + * Returns true if the status code lies in the range of user error. + * @param responseStatusCode http response status code. + * @return True or False. + */ + private boolean checkUserError(int responseStatusCode) { + return (responseStatusCode >= HttpURLConnection.HTTP_BAD_REQUEST + && responseStatusCode < HttpURLConnection.HTTP_INTERNAL_ERROR); + } + + // For AppendBlob its possible that the append succeeded in the backend but the request failed. + // However a retry would fail with an InvalidQueryParameterValue + // (as the current offset would be unacceptable). + // Hence, we pass/succeed the appendblob append call + // in case we are doing a retry after checking the length of the file + public boolean appendSuccessCheckOp(AbfsRestOperation op, final String path, + final long length, TracingContext tracingContext) throws AzureBlobFileSystemException { + if ((op.isARetriedRequest()) + && (op.getResult().getStatusCode() == HttpURLConnection.HTTP_BAD_REQUEST)) { + final AbfsRestOperation destStatusOp = getPathStatus(path, false, tracingContext); + if (destStatusOp.getResult().getStatusCode() == HttpURLConnection.HTTP_OK) { + String fileLength = destStatusOp.getResult().getResponseHeader( + HttpHeaderConfigurations.CONTENT_LENGTH); + if (length <= Long.parseLong(fileLength)) { + LOG.debug("Returning success response from append blob idempotency code"); + return true; + } + } + } + return false; + } + + public AbfsRestOperation flush(final String path, final long position, + boolean retainUncommittedData, boolean isClose, + final String cachedSasToken, final String leaseId, + TracingContext tracingContext) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); + addCustomerProvidedKeyHeaders(requestHeaders); // JDK7 does not support PATCH, so to workaround the issue we will use // PUT and specify the real method in the X-Http-Method-Override header. requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, HTTP_METHOD_PATCH)); + if (leaseId != null) { + requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ID, leaseId)); + } final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, FLUSH_ACTION); abfsUriQueryBuilder.addQuery(QUERY_PARAM_POSITION, Long.toString(position)); abfsUriQueryBuilder.addQuery(QUERY_PARAM_RETAIN_UNCOMMITTED_DATA, String.valueOf(retainUncommittedData)); abfsUriQueryBuilder.addQuery(QUERY_PARAM_CLOSE, String.valueOf(isClose)); - appendSASTokenToQuery(path, SASTokenProvider.APPEND_OPERATION, abfsUriQueryBuilder); + // AbfsInputStream/AbfsOutputStream reuse SAS tokens for better performance + String sasTokenForReuse = appendSASTokenToQuery(path, SASTokenProvider.WRITE_OPERATION, + abfsUriQueryBuilder, cachedSasToken); final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); final AbfsRestOperation op = new AbfsRestOperation( @@ -369,14 +901,16 @@ public AbfsRestOperation flush(final String path, final long position, boolean r this, HTTP_METHOD_PUT, url, - requestHeaders); - op.execute(); + requestHeaders, sasTokenForReuse); + op.execute(tracingContext); return op; } - public AbfsRestOperation setPathProperties(final String path, final String properties) + public AbfsRestOperation setPathProperties(final String path, final String properties, + TracingContext tracingContext) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); + addCustomerProvidedKeyHeaders(requestHeaders); // JDK7 does not support PATCH, so to workaround the issue we will use // PUT and specify the real method in the X-Http-Method-Override header. requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, @@ -386,6 +920,7 @@ public AbfsRestOperation setPathProperties(final String path, final String prope final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, SET_PROPERTIES_ACTION); + appendSASTokenToQuery(path, SASTokenProvider.SET_PROPERTIES_OPERATION, abfsUriQueryBuilder); final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); final AbfsRestOperation op = new AbfsRestOperation( @@ -394,16 +929,27 @@ public AbfsRestOperation setPathProperties(final String path, final String prope HTTP_METHOD_PUT, url, requestHeaders); - op.execute(); + op.execute(tracingContext); return op; } - public AbfsRestOperation getPathStatus(final String path) throws AzureBlobFileSystemException { + public AbfsRestOperation getPathStatus(final String path, final boolean includeProperties, + TracingContext tracingContext) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + String operation = SASTokenProvider.GET_PROPERTIES_OPERATION; + if (!includeProperties) { + // The default action (operation) is implicitly to get properties and this action requires read permission + // because it reads user defined properties. If the action is getStatus or getAclStatus, then + // only traversal (execute) permission is required. + abfsUriQueryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_ACTION, AbfsHttpConstants.GET_STATUS); + operation = SASTokenProvider.GET_STATUS_OPERATION; + } else { + addCustomerProvidedKeyHeaders(requestHeaders); + } abfsUriQueryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_UPN, String.valueOf(abfsConfiguration.isUpnUsed())); - appendSASTokenToQuery(path, SASTokenProvider.GETFILESTATUS_OPERATION, abfsUriQueryBuilder); + appendSASTokenToQuery(path, operation, abfsUriQueryBuilder); final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); final AbfsRestOperation op = new AbfsRestOperation( @@ -412,22 +958,25 @@ public AbfsRestOperation getPathStatus(final String path) throws AzureBlobFileSy HTTP_METHOD_HEAD, url, requestHeaders); - op.execute(); + op.execute(tracingContext); return op; } public AbfsRestOperation read(final String path, final long position, final byte[] buffer, final int bufferOffset, - final int bufferLength, final String eTag) throws AzureBlobFileSystemException { + final int bufferLength, final String eTag, String cachedSasToken, + TracingContext tracingContext) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); + addCustomerProvidedKeyHeaders(requestHeaders); requestHeaders.add(new AbfsHttpHeader(RANGE, String.format("bytes=%d-%d", position, position + bufferLength - 1))); requestHeaders.add(new AbfsHttpHeader(IF_MATCH, eTag)); final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); - appendSASTokenToQuery(path, SASTokenProvider.READ_OPERATION, abfsUriQueryBuilder); + // AbfsInputStream/AbfsOutputStream reuse SAS tokens for better performance + String sasTokenForReuse = appendSASTokenToQuery(path, SASTokenProvider.READ_OPERATION, + abfsUriQueryBuilder, cachedSasToken); final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = new AbfsRestOperation( AbfsRestOperationType.ReadFile, this, @@ -436,20 +985,22 @@ public AbfsRestOperation read(final String path, final long position, final byte requestHeaders, buffer, bufferOffset, - bufferLength); - op.execute(); + bufferLength, sasTokenForReuse); + op.execute(tracingContext); return op; } - public AbfsRestOperation deletePath(final String path, final boolean recursive, final String continuation) + public AbfsRestOperation deletePath(final String path, final boolean recursive, final String continuation, + TracingContext tracingContext) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(QUERY_PARAM_RECURSIVE, String.valueOf(recursive)); abfsUriQueryBuilder.addQuery(QUERY_PARAM_CONTINUATION, continuation); - appendSASTokenToQuery(path, SASTokenProvider.DELETE_OPERATION, abfsUriQueryBuilder); + String operation = recursive ? SASTokenProvider.DELETE_RECURSIVE_OPERATION : SASTokenProvider.DELETE_OPERATION; + appendSASTokenToQuery(path, operation, abfsUriQueryBuilder); final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); final AbfsRestOperation op = new AbfsRestOperation( @@ -458,11 +1009,65 @@ public AbfsRestOperation deletePath(final String path, final boolean recursive, HTTP_METHOD_DELETE, url, requestHeaders); - op.execute(); + try { + op.execute(tracingContext); + } catch (AzureBlobFileSystemException e) { + // If we have no HTTP response, throw the original exception. + if (!op.hasResult()) { + throw e; + } + final AbfsRestOperation idempotencyOp = deleteIdempotencyCheckOp(op); + if (idempotencyOp.getResult().getStatusCode() + == op.getResult().getStatusCode()) { + // idempotency did not return different result + // throw back the exception + throw e; + } else { + return idempotencyOp; + } + } + + return op; + } + + /** + * Check if the delete request failure is post a retry and if delete failure + * qualifies to be a success response assuming idempotency. + * + * There are below scenarios where delete could be incorrectly deducted as + * success post request retry: + * 1. Target was originally not existing and initial delete request had to be + * re-tried. + * 2. Parallel delete issued from any other store interface rather than + * delete issued from this filesystem instance. + * These are few corner cases and usually returning a success at this stage + * should help the job to continue. + * @param op Delete request REST operation response with non-null HTTP response + * @return REST operation response post idempotency check + */ + public AbfsRestOperation deleteIdempotencyCheckOp(final AbfsRestOperation op) { + Preconditions.checkArgument(op.hasResult(), "Operations has null HTTP response"); + if ((op.isARetriedRequest()) + && (op.getResult().getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND) + && DEFAULT_DELETE_CONSIDERED_IDEMPOTENT) { + // Server has returned HTTP 404, which means path no longer + // exists. Assuming delete result to be idempotent, return success. + final AbfsRestOperation successOp = new AbfsRestOperation( + AbfsRestOperationType.DeletePath, + this, + HTTP_METHOD_DELETE, + op.getUrl(), + op.getRequestHeaders()); + successOp.hardSetResult(HttpURLConnection.HTTP_OK); + LOG.debug("Returning success response from delete idempotency logic"); + return successOp; + } + return op; } - public AbfsRestOperation setOwner(final String path, final String owner, final String group) + public AbfsRestOperation setOwner(final String path, final String owner, final String group, + TracingContext tracingContext) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); // JDK7 does not support PATCH, so to workaround the issue we will use @@ -479,7 +1084,7 @@ public AbfsRestOperation setOwner(final String path, final String owner, final S final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_ACTION, AbfsHttpConstants.SET_ACCESS_CONTROL); - appendSASTokenToQuery(path, SASTokenProvider.SETOWNER_OPERATION, abfsUriQueryBuilder); + appendSASTokenToQuery(path, SASTokenProvider.SET_OWNER_OPERATION, abfsUriQueryBuilder); final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); final AbfsRestOperation op = new AbfsRestOperation( @@ -488,11 +1093,12 @@ public AbfsRestOperation setOwner(final String path, final String owner, final S AbfsHttpConstants.HTTP_METHOD_PUT, url, requestHeaders); - op.execute(); + op.execute(tracingContext); return op; } - public AbfsRestOperation setPermission(final String path, final String permission) + public AbfsRestOperation setPermission(final String path, final String permission, + TracingContext tracingContext) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); // JDK7 does not support PATCH, so to workaround the issue we will use @@ -504,7 +1110,7 @@ public AbfsRestOperation setPermission(final String path, final String permissio final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_ACTION, AbfsHttpConstants.SET_ACCESS_CONTROL); - appendSASTokenToQuery(path, SASTokenProvider.SETPERMISSION_OPERATION, abfsUriQueryBuilder); + appendSASTokenToQuery(path, SASTokenProvider.SET_PERMISSION_OPERATION, abfsUriQueryBuilder); final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); final AbfsRestOperation op = new AbfsRestOperation( @@ -513,15 +1119,17 @@ public AbfsRestOperation setPermission(final String path, final String permissio AbfsHttpConstants.HTTP_METHOD_PUT, url, requestHeaders); - op.execute(); + op.execute(tracingContext); return op; } - public AbfsRestOperation setAcl(final String path, final String aclSpecString) throws AzureBlobFileSystemException { - return setAcl(path, aclSpecString, AbfsHttpConstants.EMPTY_STRING); + public AbfsRestOperation setAcl(final String path, final String aclSpecString, + TracingContext tracingContext) throws AzureBlobFileSystemException { + return setAcl(path, aclSpecString, AbfsHttpConstants.EMPTY_STRING, tracingContext); } - public AbfsRestOperation setAcl(final String path, final String aclSpecString, final String eTag) + public AbfsRestOperation setAcl(final String path, final String aclSpecString, final String eTag, + TracingContext tracingContext) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); // JDK7 does not support PATCH, so to workaround the issue we will use @@ -537,7 +1145,7 @@ public AbfsRestOperation setAcl(final String path, final String aclSpecString, f final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_ACTION, AbfsHttpConstants.SET_ACCESS_CONTROL); - appendSASTokenToQuery(path, SASTokenProvider.SETACL_OPERATION, abfsUriQueryBuilder); + appendSASTokenToQuery(path, SASTokenProvider.SET_ACL_OPERATION, abfsUriQueryBuilder); final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); final AbfsRestOperation op = new AbfsRestOperation( @@ -546,21 +1154,23 @@ public AbfsRestOperation setAcl(final String path, final String aclSpecString, f AbfsHttpConstants.HTTP_METHOD_PUT, url, requestHeaders); - op.execute(); + op.execute(tracingContext); return op; } - public AbfsRestOperation getAclStatus(final String path) throws AzureBlobFileSystemException { - return getAclStatus(path, abfsConfiguration.isUpnUsed()); + public AbfsRestOperation getAclStatus(final String path, TracingContext tracingContext) + throws AzureBlobFileSystemException { + return getAclStatus(path, abfsConfiguration.isUpnUsed(), tracingContext); } - public AbfsRestOperation getAclStatus(final String path, final boolean useUPN) throws AzureBlobFileSystemException { + public AbfsRestOperation getAclStatus(final String path, final boolean useUPN, + TracingContext tracingContext) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_ACTION, AbfsHttpConstants.GET_ACCESS_CONTROL); abfsUriQueryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_UPN, String.valueOf(useUPN)); - appendSASTokenToQuery(path, SASTokenProvider.GETACL_OPERATION, abfsUriQueryBuilder); + appendSASTokenToQuery(path, SASTokenProvider.GET_ACL_OPERATION, abfsUriQueryBuilder); final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); final AbfsRestOperation op = new AbfsRestOperation( @@ -569,7 +1179,7 @@ public AbfsRestOperation getAclStatus(final String path, final boolean useUPN) t AbfsHttpConstants.HTTP_METHOD_HEAD, url, requestHeaders); - op.execute(); + op.execute(tracingContext); return op; } @@ -579,39 +1189,87 @@ public AbfsRestOperation getAclStatus(final String path, final boolean useUPN) t * * @param path Path for which access check needs to be performed * @param rwx The permission to be checked on the path + * @param tracingContext Tracks identifiers for request header * @return The {@link AbfsRestOperation} object for the operation * @throws AzureBlobFileSystemException in case of bad requests */ - public AbfsRestOperation checkAccess(String path, String rwx) + public AbfsRestOperation checkAccess(String path, String rwx, TracingContext tracingContext) throws AzureBlobFileSystemException { AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, CHECK_ACCESS); abfsUriQueryBuilder.addQuery(QUERY_FS_ACTION, rwx); + appendSASTokenToQuery(path, SASTokenProvider.CHECK_ACCESS_OPERATION, abfsUriQueryBuilder); URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); AbfsRestOperation op = new AbfsRestOperation( AbfsRestOperationType.CheckAccess, this, AbfsHttpConstants.HTTP_METHOD_HEAD, url, createDefaultHeaders()); - op.execute(); + op.execute(tracingContext); return op; } + /** + * Get the directory query parameter used by the List Paths REST API and used + * as the path in the continuation token. If the input path is null or the + * root path "/", empty string is returned. If the input path begins with '/', + * the return value is the substring beginning at offset 1. Otherwise, the + * input path is returned. + * @param path the path to be listed. + * @return the value of the directory query parameter + */ + public static String getDirectoryQueryParameter(final String path) { + String directory = path; + if (Strings.isNullOrEmpty(directory)) { + directory = AbfsHttpConstants.EMPTY_STRING; + } else if (directory.charAt(0) == '/') { + directory = directory.substring(1); + } + return directory; + } + /** * If configured for SAS AuthType, appends SAS token to queryBuilder * @param path * @param operation * @param queryBuilder + * @return sasToken - returned for optional re-use. * @throws SASTokenProviderException */ - private void appendSASTokenToQuery(String path, String operation, AbfsUriQueryBuilder queryBuilder) throws SASTokenProviderException { + private String appendSASTokenToQuery(String path, String operation, AbfsUriQueryBuilder queryBuilder) throws SASTokenProviderException { + return appendSASTokenToQuery(path, operation, queryBuilder, null); + } + + /** + * If configured for SAS AuthType, appends SAS token to queryBuilder + * @param path + * @param operation + * @param queryBuilder + * @param cachedSasToken - previously acquired SAS token to be reused. + * @return sasToken - returned for optional re-use. + * @throws SASTokenProviderException + */ + private String appendSASTokenToQuery(String path, + String operation, + AbfsUriQueryBuilder queryBuilder, + String cachedSasToken) + throws SASTokenProviderException { + String sasToken = null; if (this.authType == AuthType.SAS) { try { LOG.trace("Fetch SAS token for {} on {}", operation, path); - String sasToken = sasTokenProvider.getSASToken(this.accountName, - this.filesystem, path, operation); - if ((sasToken == null) || sasToken.isEmpty()) { - throw new UnsupportedOperationException("SASToken received is empty or null"); + if (cachedSasToken == null) { + sasToken = sasTokenProvider.getSASToken(this.accountName, + this.filesystem, path, operation); + if ((sasToken == null) || sasToken.isEmpty()) { + throw new UnsupportedOperationException("SASToken received is empty or null"); + } + } else { + sasToken = cachedSasToken; + LOG.trace("Using cached SAS token."); + } + // if SAS Token contains a prefix of ?, it should be removed + if (sasToken.charAt(0) == '?') { + sasToken = sasToken.substring(1); } - queryBuilder.setSASToken(sasToken); LOG.trace("SAS token fetch complete for {} on {}", operation, path); } catch (Exception ex) { @@ -621,13 +1279,15 @@ private void appendSASTokenToQuery(String path, String operation, AbfsUriQueryBu ex.toString())); } } + return sasToken; } private URL createRequestUrl(final String query) throws AzureBlobFileSystemException { return createRequestUrl(EMPTY_STRING, query); } - private URL createRequestUrl(final String path, final String query) + @VisibleForTesting + protected URL createRequestUrl(final String path, final String query) throws AzureBlobFileSystemException { final String base = baseUrl.toString(); String encodedPath = path; @@ -679,32 +1339,60 @@ public AuthType getAuthType() { @VisibleForTesting String initializeUserAgent(final AbfsConfiguration abfsConfiguration, - final String sslProviderName) { + final String sslProviderName) { + StringBuilder sb = new StringBuilder(); - sb.append("(JavaJRE "); + + sb.append(APN_VERSION); + sb.append(SINGLE_WHITE_SPACE); + sb.append(CLIENT_VERSION); + sb.append(SINGLE_WHITE_SPACE); + + sb.append("("); + + sb.append(System.getProperty(JAVA_VENDOR) + .replaceAll(SINGLE_WHITE_SPACE, EMPTY_STRING)); + sb.append(SINGLE_WHITE_SPACE); + sb.append("JavaJRE"); + sb.append(SINGLE_WHITE_SPACE); sb.append(System.getProperty(JAVA_VERSION)); - sb.append("; "); - sb.append( - System.getProperty(OS_NAME).replaceAll(SINGLE_WHITE_SPACE, EMPTY_STRING)); - sb.append(" "); + sb.append(SEMICOLON); + sb.append(SINGLE_WHITE_SPACE); + + sb.append(System.getProperty(OS_NAME) + .replaceAll(SINGLE_WHITE_SPACE, EMPTY_STRING)); + sb.append(SINGLE_WHITE_SPACE); sb.append(System.getProperty(OS_VERSION)); - if (sslProviderName != null && !sslProviderName.isEmpty()) { - sb.append("; "); - sb.append(sslProviderName); - } - String tokenProviderField = - ExtensionHelper.getUserAgentSuffix(tokenProvider, ""); - if (!tokenProviderField.isEmpty()) { - sb.append("; ").append(tokenProviderField); - } + sb.append(FORWARD_SLASH); + sb.append(System.getProperty(OS_ARCH)); + sb.append(SEMICOLON); + + appendIfNotEmpty(sb, sslProviderName, true); + appendIfNotEmpty(sb, + ExtensionHelper.getUserAgentSuffix(tokenProvider, EMPTY_STRING), true); + + sb.append(SINGLE_WHITE_SPACE); + sb.append(abfsConfiguration.getClusterName()); + sb.append(FORWARD_SLASH); + sb.append(abfsConfiguration.getClusterType()); + sb.append(")"); - final String userAgentComment = sb.toString(); - String customUserAgentId = abfsConfiguration.getCustomUserAgentPrefix(); - if (customUserAgentId != null && !customUserAgentId.isEmpty()) { - return String.format(Locale.ROOT, CLIENT_VERSION + " %s %s", - userAgentComment, customUserAgentId); + + appendIfNotEmpty(sb, abfsConfiguration.getCustomUserAgentPrefix(), false); + + return String.format(Locale.ROOT, sb.toString()); + } + + private void appendIfNotEmpty(StringBuilder sb, String regEx, + boolean shouldAppendSemiColon) { + if (regEx == null || regEx.trim().isEmpty()) { + return; + } + sb.append(SINGLE_WHITE_SPACE); + sb.append(regEx); + if (shouldAppendSemiColon) { + sb.append(SEMICOLON); } - return String.format(Locale.ROOT, CLIENT_VERSION + " %s", userAgentComment); } @VisibleForTesting @@ -716,4 +1404,42 @@ URL getBaseUrl() { public SASTokenProvider getSasTokenProvider() { return this.sasTokenProvider; } + + /** + * Getter for abfsCounters from AbfsClient. + * @return AbfsCounters instance. + */ + protected AbfsCounters getAbfsCounters() { + return abfsCounters; + } + + /** + * Getter for abfsConfiguration from AbfsClient. + * @return AbfsConfiguration instance + */ + protected AbfsConfiguration getAbfsConfiguration() { + return abfsConfiguration; + } + + public int getNumLeaseThreads() { + return abfsConfiguration.getNumLeaseThreads(); + } + + public ListenableScheduledFuture schedule(Callable callable, long delay, + TimeUnit timeUnit) { + return executorService.schedule(callable, delay, timeUnit); + } + + public ListenableFuture submit(Runnable runnable) { + return executorService.submit(runnable); + } + + public void addCallback(ListenableFuture future, FutureCallback callback) { + Futures.addCallback(future, callback, executorService); + } + + @VisibleForTesting + protected AccessTokenProvider getTokenProvider() { + return tokenProvider; + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientContext.java new file mode 100644 index 0000000000000..ad20550af7c3f --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientContext.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +/** + * Class to hold extra configurations for AbfsClient and further classes + * inside AbfsClient. + */ +public class AbfsClientContext { + + private final ExponentialRetryPolicy exponentialRetryPolicy; + private final AbfsPerfTracker abfsPerfTracker; + private final AbfsCounters abfsCounters; + + AbfsClientContext( + ExponentialRetryPolicy exponentialRetryPolicy, + AbfsPerfTracker abfsPerfTracker, + AbfsCounters abfsCounters) { + this.exponentialRetryPolicy = exponentialRetryPolicy; + this.abfsPerfTracker = abfsPerfTracker; + this.abfsCounters = abfsCounters; + } + + public ExponentialRetryPolicy getExponentialRetryPolicy() { + return exponentialRetryPolicy; + } + + public AbfsPerfTracker getAbfsPerfTracker() { + return abfsPerfTracker; + } + + public AbfsCounters getAbfsCounters() { + return abfsCounters; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientContextBuilder.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientContextBuilder.java new file mode 100644 index 0000000000000..00513f7138d53 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientContextBuilder.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +/** + * A builder for AbfsClientContext class with different options to select and + * build from. + */ +public class AbfsClientContextBuilder { + + private ExponentialRetryPolicy exponentialRetryPolicy; + private AbfsPerfTracker abfsPerfTracker; + private AbfsCounters abfsCounters; + + public AbfsClientContextBuilder withExponentialRetryPolicy( + final ExponentialRetryPolicy exponentialRetryPolicy) { + this.exponentialRetryPolicy = exponentialRetryPolicy; + return this; + } + + public AbfsClientContextBuilder withAbfsPerfTracker( + final AbfsPerfTracker abfsPerfTracker) { + this.abfsPerfTracker = abfsPerfTracker; + return this; + } + + public AbfsClientContextBuilder withAbfsCounters(final AbfsCounters abfsCounters) { + this.abfsCounters = abfsCounters; + return this; + } + + /** + * Build the context and get the instance with the properties selected. + * + * @return an instance of AbfsClientContext. + */ + public AbfsClientContext build() { + //validate the values + return new AbfsClientContext(exponentialRetryPolicy, abfsPerfTracker, + abfsCounters); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientRenameResult.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientRenameResult.java new file mode 100644 index 0000000000000..76648cfc44b59 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientRenameResult.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +/** + * A class to store the Result of an AbfsClient rename operation, signifying the + * AbfsRestOperation result and the rename recovery. + */ +public class AbfsClientRenameResult { + + /** Abfs Rest Operation. */ + private final AbfsRestOperation op; + /** Flag indicating recovery took place. */ + private final boolean renameRecovered; + /** Abfs storage tracking metadata is in an incomplete state. */ + private final boolean isIncompleteMetadataState; + + /** + * Constructing an ABFS rename operation result. + * @param op The AbfsRestOperation. + * @param renameRecovered Did rename recovery took place? + * @param isIncompleteMetadataState Did the rename failed due to incomplete + * metadata state and had to be retried? + */ + public AbfsClientRenameResult( + AbfsRestOperation op, + boolean renameRecovered, + boolean isIncompleteMetadataState) { + this.op = op; + this.renameRecovered = renameRecovered; + this.isIncompleteMetadataState = isIncompleteMetadataState; + } + + public AbfsRestOperation getOp() { + return op; + } + + public boolean isRenameRecovered() { + return renameRecovered; + } + + public boolean isIncompleteMetadataState() { + return isIncompleteMetadataState; + } + + @Override + public String toString() { + return "AbfsClientRenameResult{" + + "op=" + + op + + ", renameRecovered=" + + renameRecovered + + ", isIncompleteMetadataState=" + + isIncompleteMetadataState + + '}'; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingAnalyzer.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingAnalyzer.java index f1e5aaae6835c..f1eb3a2a77476 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingAnalyzer.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingAnalyzer.java @@ -20,20 +20,23 @@ import java.util.Timer; import java.util.TimerTask; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.util.Preconditions; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.util.Time.now; + class AbfsClientThrottlingAnalyzer { private static final Logger LOG = LoggerFactory.getLogger( AbfsClientThrottlingAnalyzer.class); - private static final int DEFAULT_ANALYSIS_PERIOD_MS = 10 * 1000; private static final int MIN_ANALYSIS_PERIOD_MS = 1000; private static final int MAX_ANALYSIS_PERIOD_MS = 30000; private static final double MIN_ACCEPTABLE_ERROR_PERCENTAGE = .1; @@ -50,42 +53,38 @@ class AbfsClientThrottlingAnalyzer { private String name = null; private Timer timer = null; private AtomicReference blobMetrics = null; + private AtomicLong lastExecutionTime = null; + private final AtomicBoolean isOperationOnAccountIdle = new AtomicBoolean(false); + private AbfsConfiguration abfsConfiguration = null; + private boolean accountLevelThrottlingEnabled = true; private AbfsClientThrottlingAnalyzer() { // hide default constructor } - /** - * Creates an instance of the AbfsClientThrottlingAnalyzer class with - * the specified name. - * - * @param name a name used to identify this instance. - * @throws IllegalArgumentException if name is null or empty. - */ - AbfsClientThrottlingAnalyzer(String name) throws IllegalArgumentException { - this(name, DEFAULT_ANALYSIS_PERIOD_MS); - } - /** * Creates an instance of the AbfsClientThrottlingAnalyzer class with * the specified name and period. * * @param name A name used to identify this instance. - * @param period The frequency, in milliseconds, at which metrics are - * analyzed. + * @param abfsConfiguration The configuration set. * @throws IllegalArgumentException If name is null or empty. * If period is less than 1000 or greater than 30000 milliseconds. */ - AbfsClientThrottlingAnalyzer(String name, int period) + AbfsClientThrottlingAnalyzer(String name, AbfsConfiguration abfsConfiguration) throws IllegalArgumentException { Preconditions.checkArgument( StringUtils.isNotEmpty(name), "The argument 'name' cannot be null or empty."); + int period = abfsConfiguration.getAnalysisPeriod(); Preconditions.checkArgument( period >= MIN_ANALYSIS_PERIOD_MS && period <= MAX_ANALYSIS_PERIOD_MS, "The argument 'period' must be between 1000 and 30000."); this.name = name; - this.analysisPeriodMs = period; + this.abfsConfiguration = abfsConfiguration; + this.accountLevelThrottlingEnabled = abfsConfiguration.accountThrottlingEnabled(); + this.analysisPeriodMs = abfsConfiguration.getAnalysisPeriod(); + this.lastExecutionTime = new AtomicLong(now()); this.blobMetrics = new AtomicReference( new AbfsOperationMetrics(System.currentTimeMillis())); this.timer = new Timer( @@ -95,6 +94,47 @@ private AbfsClientThrottlingAnalyzer() { analysisPeriodMs); } + /** + * Resumes the timer if it was stopped. + */ + private void resumeTimer() { + blobMetrics = new AtomicReference( + new AbfsOperationMetrics(System.currentTimeMillis())); + timer.schedule(new TimerTaskImpl(), + analysisPeriodMs, + analysisPeriodMs); + isOperationOnAccountIdle.set(false); + } + + /** + * Synchronized method to suspend or resume timer. + * @param timerFunctionality resume or suspend. + * @param timerTask The timertask object. + * @return true or false. + */ + private synchronized boolean timerOrchestrator(TimerFunctionality timerFunctionality, + TimerTask timerTask) { + switch (timerFunctionality) { + case RESUME: + if (isOperationOnAccountIdle.get()) { + resumeTimer(); + } + break; + case SUSPEND: + if (accountLevelThrottlingEnabled && (System.currentTimeMillis() + - lastExecutionTime.get() >= getOperationIdleTimeout())) { + isOperationOnAccountIdle.set(true); + timerTask.cancel(); + timer.purge(); + return true; + } + break; + default: + break; + } + return false; + } + /** * Updates metrics with results from the current storage operation. * @@ -104,26 +144,32 @@ private AbfsClientThrottlingAnalyzer() { public void addBytesTransferred(long count, boolean isFailedOperation) { AbfsOperationMetrics metrics = blobMetrics.get(); if (isFailedOperation) { - metrics.bytesFailed.addAndGet(count); - metrics.operationsFailed.incrementAndGet(); + metrics.addBytesFailed(count); + metrics.incrementOperationsFailed(); } else { - metrics.bytesSuccessful.addAndGet(count); - metrics.operationsSuccessful.incrementAndGet(); + metrics.addBytesSuccessful(count); + metrics.incrementOperationsSuccessful(); } + blobMetrics.set(metrics); } /** * Suspends the current storage operation, as necessary, to reduce throughput. + * @return true if Thread sleeps(Throttling occurs) else false. */ - public void suspendIfNecessary() { + public boolean suspendIfNecessary() { + lastExecutionTime.set(now()); + timerOrchestrator(TimerFunctionality.RESUME, null); int duration = sleepDuration; if (duration > 0) { try { Thread.sleep(duration); + return true; } catch (InterruptedException ie) { Thread.currentThread().interrupt(); } } + return false; } @VisibleForTesting @@ -131,19 +177,27 @@ int getSleepDuration() { return sleepDuration; } + int getOperationIdleTimeout() { + return abfsConfiguration.getAccountOperationIdleTimeout(); + } + + AtomicBoolean getIsOperationOnAccountIdle() { + return isOperationOnAccountIdle; + } + private int analyzeMetricsAndUpdateSleepDuration(AbfsOperationMetrics metrics, int sleepDuration) { final double percentageConversionFactor = 100; - double bytesFailed = metrics.bytesFailed.get(); - double bytesSuccessful = metrics.bytesSuccessful.get(); - double operationsFailed = metrics.operationsFailed.get(); - double operationsSuccessful = metrics.operationsSuccessful.get(); + double bytesFailed = metrics.getBytesFailed().get(); + double bytesSuccessful = metrics.getBytesSuccessful().get(); + double operationsFailed = metrics.getOperationsFailed().get(); + double operationsSuccessful = metrics.getOperationsSuccessful().get(); double errorPercentage = (bytesFailed <= 0) ? 0 : (percentageConversionFactor * bytesFailed / (bytesFailed + bytesSuccessful)); - long periodMs = metrics.endTime - metrics.startTime; + long periodMs = metrics.getEndTime() - metrics.getStartTime(); double newSleepDuration; @@ -235,10 +289,13 @@ public void run() { } long now = System.currentTimeMillis(); - if (now - blobMetrics.get().startTime >= analysisPeriodMs) { + if (timerOrchestrator(TimerFunctionality.SUSPEND, this)) { + return; + } + if (now - blobMetrics.get().getStartTime() >= analysisPeriodMs) { AbfsOperationMetrics oldMetrics = blobMetrics.getAndSet( new AbfsOperationMetrics(now)); - oldMetrics.endTime = now; + oldMetrics.setEndTime(now); sleepDuration = analyzeMetricsAndUpdateSleepDuration(oldMetrics, sleepDuration); } @@ -249,24 +306,4 @@ public void run() { } } } - - /** - * Stores Abfs operation metrics during each analysis period. - */ - static class AbfsOperationMetrics { - private AtomicLong bytesFailed; - private AtomicLong bytesSuccessful; - private AtomicLong operationsFailed; - private AtomicLong operationsSuccessful; - private long endTime; - private long startTime; - - AbfsOperationMetrics(long startTime) { - this.startTime = startTime; - this.bytesFailed = new AtomicLong(); - this.bytesSuccessful = new AtomicLong(); - this.operationsFailed = new AtomicLong(); - this.operationsSuccessful = new AtomicLong(); - } - } -} \ No newline at end of file +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingIntercept.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingIntercept.java index 1c6ce17a38c3c..3bb225d4be862 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingIntercept.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingIntercept.java @@ -19,12 +19,17 @@ package org.apache.hadoop.fs.azurebfs.services; import java.net.HttpURLConnection; +import java.util.concurrent.locks.ReentrantLock; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.AbfsStatistic; import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; +import static java.net.HttpURLConnection.HTTP_UNAVAILABLE; + /** * Throttles Azure Blob File System read and write operations to achieve maximum * throughput by minimizing errors. The errors occur when the account ingress @@ -37,35 +42,101 @@ * and sleeps just enough to minimize errors, allowing optimal ingress and/or * egress throughput. */ -public final class AbfsClientThrottlingIntercept { +public final class AbfsClientThrottlingIntercept implements AbfsThrottlingIntercept { private static final Logger LOG = LoggerFactory.getLogger( AbfsClientThrottlingIntercept.class); private static final String RANGE_PREFIX = "bytes="; - private static AbfsClientThrottlingIntercept singleton = null; - private AbfsClientThrottlingAnalyzer readThrottler = null; - private AbfsClientThrottlingAnalyzer writeThrottler = null; - private static boolean isAutoThrottlingEnabled = false; + private static AbfsClientThrottlingIntercept singleton; // singleton, initialized in static initialization block + private static final ReentrantLock LOCK = new ReentrantLock(); + private final AbfsClientThrottlingAnalyzer readThrottler; + private final AbfsClientThrottlingAnalyzer writeThrottler; + private final String accountName; // Hide default constructor - private AbfsClientThrottlingIntercept() { - readThrottler = new AbfsClientThrottlingAnalyzer("read"); - writeThrottler = new AbfsClientThrottlingAnalyzer("write"); + public AbfsClientThrottlingIntercept(String accountName, AbfsConfiguration abfsConfiguration) { + this.accountName = accountName; + this.readThrottler = setAnalyzer("read " + accountName, abfsConfiguration); + this.writeThrottler = setAnalyzer("write " + accountName, abfsConfiguration); + LOG.debug("Client-side throttling is enabled for the ABFS file system for the account : {}", accountName); } - public static synchronized void initializeSingleton(boolean enableAutoThrottling) { - if (!enableAutoThrottling) { - return; - } + // Hide default constructor + private AbfsClientThrottlingIntercept(AbfsConfiguration abfsConfiguration) { + // Account name is kept as empty as same instance is shared across all accounts. + this.accountName = ""; + this.readThrottler = setAnalyzer("read", abfsConfiguration); + this.writeThrottler = setAnalyzer("write", abfsConfiguration); + LOG.debug("Client-side throttling is enabled for the ABFS file system using singleton intercept"); + } + + /** + * Sets the analyzer for the intercept. + * @param name Name of the analyzer. + * @param abfsConfiguration The configuration. + * @return AbfsClientThrottlingAnalyzer instance. + */ + private AbfsClientThrottlingAnalyzer setAnalyzer(String name, AbfsConfiguration abfsConfiguration) { + return new AbfsClientThrottlingAnalyzer(name, abfsConfiguration); + } + + /** + * Returns the analyzer for read operations. + * @return AbfsClientThrottlingAnalyzer for read. + */ + AbfsClientThrottlingAnalyzer getReadThrottler() { + return readThrottler; + } + + /** + * Returns the analyzer for write operations. + * @return AbfsClientThrottlingAnalyzer for write. + */ + AbfsClientThrottlingAnalyzer getWriteThrottler() { + return writeThrottler; + } + + /** + * Creates a singleton object of the AbfsClientThrottlingIntercept. + * which is shared across all filesystem instances. + * @param abfsConfiguration configuration set. + * @return singleton object of intercept. + */ + static AbfsClientThrottlingIntercept initializeSingleton(AbfsConfiguration abfsConfiguration) { if (singleton == null) { - singleton = new AbfsClientThrottlingIntercept(); - isAutoThrottlingEnabled = true; - LOG.debug("Client-side throttling is enabled for the ABFS file system."); + LOCK.lock(); + try { + if (singleton == null) { + singleton = new AbfsClientThrottlingIntercept(abfsConfiguration); + LOG.debug("Client-side throttling is enabled for the ABFS file system."); + } + } finally { + LOCK.unlock(); + } } + return singleton; } - static void updateMetrics(AbfsRestOperationType operationType, - AbfsHttpOperation abfsHttpOperation) { - if (!isAutoThrottlingEnabled || abfsHttpOperation == null) { + /** + * Updates the metrics for the case when response code signifies throttling + * but there are some expected bytes to be sent. + * @param isThrottledOperation returns true if status code is HTTP_UNAVAILABLE + * @param abfsHttpOperation Used for status code and data transferred. + * @return true if the operation is throttled and has some bytes to transfer. + */ + private boolean updateBytesTransferred(boolean isThrottledOperation, + AbfsHttpOperation abfsHttpOperation) { + return isThrottledOperation && abfsHttpOperation.getExpectedBytesToBeSent() > 0; + } + + /** + * Updates the metrics for successful and failed read and write operations. + * @param operationType Only applicable for read and write operations. + * @param abfsHttpOperation Used for status code and data transferred. + */ + @Override + public void updateMetrics(AbfsRestOperationType operationType, + AbfsHttpOperation abfsHttpOperation) { + if (abfsHttpOperation == null) { return; } @@ -77,11 +148,24 @@ static void updateMetrics(AbfsRestOperationType operationType, boolean isFailedOperation = (status < HttpURLConnection.HTTP_OK || status >= HttpURLConnection.HTTP_INTERNAL_ERROR); + // If status code is 503, it is considered as a throttled operation. + boolean isThrottledOperation = (status == HTTP_UNAVAILABLE); + switch (operationType) { case Append: contentLength = abfsHttpOperation.getBytesSent(); + if (contentLength == 0) { + /* + Signifies the case where we could not update the bytesSent due to + throttling but there were some expectedBytesToBeSent. + */ + if (updateBytesTransferred(isThrottledOperation, abfsHttpOperation)) { + LOG.debug("Updating metrics due to throttling for path {}", abfsHttpOperation.getConnUrl().getPath()); + contentLength = abfsHttpOperation.getExpectedBytesToBeSent(); + } + } if (contentLength > 0) { - singleton.writeThrottler.addBytesTransferred(contentLength, + writeThrottler.addBytesTransferred(contentLength, isFailedOperation); } break; @@ -89,7 +173,7 @@ static void updateMetrics(AbfsRestOperationType operationType, String range = abfsHttpOperation.getConnection().getRequestProperty(HttpHeaderConfigurations.RANGE); contentLength = getContentLengthIfKnown(range); if (contentLength > 0) { - singleton.readThrottler.addBytesTransferred(contentLength, + readThrottler.addBytesTransferred(contentLength, isFailedOperation); } break; @@ -103,17 +187,21 @@ static void updateMetrics(AbfsRestOperationType operationType, * uses this to suspend the request, if necessary, to minimize errors and * maximize throughput. */ - static void sendingRequest(AbfsRestOperationType operationType) { - if (!isAutoThrottlingEnabled) { - return; - } - + @Override + public void sendingRequest(AbfsRestOperationType operationType, + AbfsCounters abfsCounters) { switch (operationType) { case ReadFile: - singleton.readThrottler.suspendIfNecessary(); + if (readThrottler.suspendIfNecessary() + && abfsCounters != null) { + abfsCounters.incrementCounter(AbfsStatistic.READ_THROTTLES, 1); + } break; case Append: - singleton.writeThrottler.suspendIfNecessary(); + if (writeThrottler.suspendIfNecessary() + && abfsCounters != null) { + abfsCounters.incrementCounter(AbfsStatistic.WRITE_THROTTLES, 1); + } break; default: break; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsCounters.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsCounters.java new file mode 100644 index 0000000000000..2dac63b166adc --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsCounters.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.util.Map; + +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.azurebfs.AbfsStatistic; +import org.apache.hadoop.fs.statistics.DurationTracker; +import org.apache.hadoop.fs.statistics.DurationTrackerFactory; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; + +/** + * An interface for Abfs counters. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public interface AbfsCounters extends IOStatisticsSource, DurationTrackerFactory { + + /** + * Increment a AbfsStatistic by a long value. + * + * @param statistic AbfsStatistic to be incremented. + * @param value the value to increment the statistic by. + */ + void incrementCounter(AbfsStatistic statistic, long value); + + /** + * Form a String of the all the statistics and present in an organized manner. + * + * @param prefix the prefix to be set. + * @param separator the separator between the statistic name and value. + * @param suffix the suffix to be used. + * @param all enable all the statistics to be displayed or not. + * @return String of all the statistics and their values. + */ + String formString(String prefix, String separator, String suffix, + boolean all); + + /** + * Convert all the statistics into a key-value pair map to be used for + * testing. + * + * @return map with statistic name as key and statistic value as the map + * value. + */ + @VisibleForTesting + Map toMap(); + + /** + * Start a DurationTracker for a request. + * + * @param key Name of the DurationTracker statistic. + * @return an instance of DurationTracker. + */ + @Override + DurationTracker trackDuration(String key); +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsErrors.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsErrors.java new file mode 100644 index 0000000000000..e15795efee68d --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsErrors.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.azurebfs.services; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_LEASE_THREADS; + +/** + * ABFS error constants. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public final class AbfsErrors { + public static final String ERR_WRITE_WITHOUT_LEASE = "Attempted to write to file without lease"; + public static final String ERR_LEASE_EXPIRED = "A lease ID was specified, but the lease for the" + + " resource has expired"; + public static final String ERR_NO_LEASE_ID_SPECIFIED = "There is currently a lease on the " + + "resource and no lease ID was specified in the request"; + public static final String ERR_PARALLEL_ACCESS_DETECTED = "Parallel access to the create path " + + "detected. Failing request to honor single writer semantics"; + public static final String ERR_ACQUIRING_LEASE = "Unable to acquire lease"; + public static final String ERR_LEASE_ALREADY_PRESENT = "There is already a lease present"; + public static final String ERR_LEASE_NOT_PRESENT = "There is currently no lease on the resource"; + public static final String ERR_LEASE_ID_NOT_PRESENT = "The lease ID is not present with the " + + "specified lease operation"; + public static final String ERR_LEASE_DID_NOT_MATCH = "The lease ID specified did not match the " + + "lease ID for the resource with the specified lease operation"; + public static final String ERR_LEASE_BROKEN = "The lease ID matched, but the lease has been " + + "broken explicitly and cannot be renewed"; + public static final String ERR_LEASE_FUTURE_EXISTS = "There is already an existing lease " + + "operation"; + public static final String ERR_NO_LEASE_THREADS = "Lease desired but no lease threads " + + "configured, set " + FS_AZURE_LEASE_THREADS; + + private AbfsErrors() {} +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index 881d41f65f27d..a47720ab6972c 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -21,29 +21,29 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.io.UnsupportedEncodingException; import java.net.HttpURLConnection; import java.net.URL; -import java.net.URLEncoder; import java.util.List; -import java.util.UUID; import javax.net.ssl.HttpsURLConnection; import javax.net.ssl.SSLSocketFactory; -import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; -import org.codehaus.jackson.JsonFactory; -import org.codehaus.jackson.JsonParser; -import org.codehaus.jackson.JsonToken; -import org.codehaus.jackson.map.ObjectMapper; - +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonToken; +import com.fasterxml.jackson.databind.ObjectMapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.fs.azurebfs.utils.UriUtils; import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; import org.apache.hadoop.fs.azurebfs.contracts.services.AbfsPerfLoggable; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; +import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.EXPECT; /** * Represents an HTTP operation. @@ -61,18 +61,21 @@ public class AbfsHttpOperation implements AbfsPerfLoggable { private final String method; private final URL url; + private String maskedUrl; + private String maskedEncodedUrl; private HttpURLConnection connection; private int statusCode; private String statusDescription; private String storageErrorCode = ""; private String storageErrorMessage = ""; - private String clientRequestId = ""; private String requestId = ""; + private String expectedAppendPos = ""; private ListResultSchema listResultSchema = null; // metrics private int bytesSent; + private int expectedBytesToBeSent; private long bytesReceived; // optional trace enabled metrics @@ -80,6 +83,31 @@ public class AbfsHttpOperation implements AbfsPerfLoggable { private long connectionTimeMs; private long sendRequestTimeMs; private long recvResponseTimeMs; + private boolean shouldMask = false; + + public static AbfsHttpOperation getAbfsHttpOperationWithFixedResult( + final URL url, + final String method, + final int httpStatus) { + AbfsHttpOperationWithFixedResult httpOp + = new AbfsHttpOperationWithFixedResult(url, method, httpStatus); + return httpOp; + } + + /** + * Constructor for FixedResult instance, avoiding connection init. + * @param url request url + * @param method Http method + * @param httpStatus HttpStatus + */ + protected AbfsHttpOperation(final URL url, + final String method, + final int httpStatus) { + this.isTraceEnabled = LOG.isTraceEnabled(); + this.url = url; + this.method = method; + this.statusCode = httpStatus; + } protected HttpURLConnection getConnection() { return connection; @@ -89,8 +117,8 @@ public String getMethod() { return method; } - public URL getUrl() { - return url; + public String getHost() { + return url.getHost(); } public int getStatusCode() { @@ -110,17 +138,30 @@ public String getStorageErrorMessage() { } public String getClientRequestId() { - return clientRequestId; + return this.connection + .getRequestProperty(HttpHeaderConfigurations.X_MS_CLIENT_REQUEST_ID); + } + + public String getExpectedAppendPos() { + return expectedAppendPos; } public String getRequestId() { return requestId; } + public void setMaskForSAS() { + shouldMask = true; + } + public int getBytesSent() { return bytesSent; } + public int getExpectedBytesToBeSent() { + return expectedBytesToBeSent; + } + public long getBytesReceived() { return bytesReceived; } @@ -136,13 +177,14 @@ public String getResponseHeader(String httpHeader) { // Returns a trace message for the request @Override public String toString() { - final String urlStr = url.toString(); final StringBuilder sb = new StringBuilder(); sb.append(statusCode); sb.append(","); sb.append(storageErrorCode); + sb.append(","); + sb.append(expectedAppendPos); sb.append(",cid="); - sb.append(clientRequestId); + sb.append(getClientRequestId()); sb.append(",rid="); sb.append(requestId); if (isTraceEnabled) { @@ -160,19 +202,12 @@ public String toString() { sb.append(","); sb.append(method); sb.append(","); - sb.append(urlStr); + sb.append(getMaskedUrl()); return sb.toString(); } // Returns a trace message for the ABFS API logging service to consume public String getLogString() { - String urlStr = null; - - try { - urlStr = URLEncoder.encode(url.toString(), "UTF-8"); - } catch(UnsupportedEncodingException e) { - urlStr = "https%3A%2F%2Ffailed%2Fto%2Fencode%2Furl"; - } final StringBuilder sb = new StringBuilder(); sb.append("s=") @@ -180,7 +215,7 @@ public String getLogString() { .append(" e=") .append(storageErrorCode) .append(" ci=") - .append(clientRequestId) + .append(getClientRequestId()) .append(" ri=") .append(requestId); @@ -200,11 +235,30 @@ public String getLogString() { .append(" m=") .append(method) .append(" u=") - .append(urlStr); + .append(getMaskedEncodedUrl()); return sb.toString(); } + public String getMaskedUrl() { + if (!shouldMask) { + return url.toString(); + } + if (maskedUrl != null) { + return maskedUrl; + } + maskedUrl = UriUtils.getMaskedUrl(url); + return maskedUrl; + } + + public String getMaskedEncodedUrl() { + if (maskedEncodedUrl != null) { + return maskedEncodedUrl; + } + maskedEncodedUrl = UriUtils.encodedUrlStr(getMaskedUrl()); + return maskedEncodedUrl; + } + /** * Initializes a new HTTP request and opens the connection. * @@ -219,7 +273,6 @@ public AbfsHttpOperation(final URL url, final String method, final List= 0) ? readAheadQueueDepth : Runtime.getRuntime().availableProcessors(); - this.tolerateOobAppends = tolerateOobAppends; + this.bufferSize = abfsInputStreamContext.getReadBufferSize(); + this.readAheadQueueDepth = abfsInputStreamContext.getReadAheadQueueDepth(); + this.tolerateOobAppends = abfsInputStreamContext.isTolerateOobAppends(); this.eTag = eTag; - this.readAheadEnabled = true; + this.readAheadRange = abfsInputStreamContext.getReadAheadRange(); + this.readAheadEnabled = abfsInputStreamContext.isReadAheadEnabled(); + this.alwaysReadBufferSize + = abfsInputStreamContext.shouldReadBufferSizeAlways(); + this.bufferedPreadDisabled = abfsInputStreamContext + .isBufferedPreadDisabled(); + this.cachedSasToken = new CachedSASToken( + abfsInputStreamContext.getSasTokenRenewPeriodForStreamsInSeconds()); + this.streamStatistics = abfsInputStreamContext.getStreamStatistics(); + this.inputStreamId = createInputStreamId(); + this.tracingContext = new TracingContext(tracingContext); + this.tracingContext.setOperation(FSOperationType.READ); + this.tracingContext.setStreamID(inputStreamId); + this.context = abfsInputStreamContext; + readAheadBlockSize = abfsInputStreamContext.getReadAheadBlockSize(); + + // Propagate the config values to ReadBufferManager so that the first instance + // to initialize can set the readAheadBlockSize + ReadBufferManager.setReadBufferManagerConfigs(readAheadBlockSize); + if (streamStatistics != null) { + ioStatistics = streamStatistics.getIOStatistics(); + } } public String getPath() { return path; } + private String createInputStreamId() { + return StringUtils.right(UUID.randomUUID().toString(), STREAM_ID_LEN); + } + + @Override + public int read(long position, byte[] buffer, int offset, int length) + throws IOException { + // When bufferedPreadDisabled = true, this API does not use any shared buffer, + // cursor position etc. So this is implemented as NOT synchronized. HBase + // kind of random reads on a shared file input stream will greatly get + // benefited by such implementation. + // Strict close check at the begin of the API only not for the entire flow. + synchronized (this) { + if (closed) { + throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); + } + } + LOG.debug("pread requested offset = {} len = {} bufferedPreadDisabled = {}", + offset, length, bufferedPreadDisabled); + if (!bufferedPreadDisabled) { + return super.read(position, buffer, offset, length); + } + validatePositionedReadArgs(position, buffer, offset, length); + if (length == 0) { + return 0; + } + if (streamStatistics != null) { + streamStatistics.readOperationStarted(); + } + int bytesRead = readRemote(position, buffer, offset, length, tracingContext); + if (statistics != null) { + statistics.incrementBytesRead(bytesRead); + } + if (streamStatistics != null) { + streamStatistics.bytesRead(bytesRead); + } + return bytesRead; + } + @Override public int read() throws IOException { byte[] b = new byte[1]; @@ -97,12 +217,52 @@ public int read() throws IOException { @Override public synchronized int read(final byte[] b, final int off, final int len) throws IOException { + // check if buffer is null before logging the length + if (b != null) { + LOG.debug("read requested b.length = {} offset = {} len = {}", b.length, + off, len); + } else { + LOG.debug("read requested b = null offset = {} len = {}", off, len); + } + int currentOff = off; int currentLen = len; int lastReadBytes; int totalReadBytes = 0; + if (streamStatistics != null) { + streamStatistics.readOperationStarted(); + } + incrementReadOps(); do { - lastReadBytes = readOneBlock(b, currentOff, currentLen); + + // limit is the maximum amount of data present in buffer. + // fCursor is the current file pointer. Thus maximum we can + // go back and read from buffer is fCursor - limit. + // There maybe case that we read less than requested data. + long filePosAtStartOfBuffer = fCursor - limit; + if (nextReadPos >= filePosAtStartOfBuffer && nextReadPos <= fCursor) { + // Determining position in buffer from where data is to be read. + bCursor = (int) (nextReadPos - filePosAtStartOfBuffer); + + // When bCursor == limit, buffer will be filled again. + // So in this case we are not actually reading from buffer. + if (bCursor != limit && streamStatistics != null) { + streamStatistics.seekInBuffer(); + } + } else { + // Clearing the buffer and setting the file pointer + // based on previous seek() call. + fCursor = nextReadPos; + limit = 0; + bCursor = 0; + } + if (shouldReadFully()) { + lastReadBytes = readFileCompletely(b, currentOff, currentLen); + } else if (shouldReadLastBlock()) { + lastReadBytes = readLastBlock(b, currentOff, currentLen); + } else { + lastReadBytes = readOneBlock(b, currentOff, currentLen); + } if (lastReadBytes > 0) { currentOff += lastReadBytes; currentLen -= lastReadBytes; @@ -115,25 +275,24 @@ public synchronized int read(final byte[] b, final int off, final int len) throw return totalReadBytes > 0 ? totalReadBytes : lastReadBytes; } - private int readOneBlock(final byte[] b, final int off, final int len) throws IOException { - if (closed) { - throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); - } + private boolean shouldReadFully() { + return this.firstRead && this.context.readSmallFilesCompletely() + && this.contentLength <= this.bufferSize; + } - Preconditions.checkNotNull(b); + private boolean shouldReadLastBlock() { + long footerStart = max(0, this.contentLength - FOOTER_SIZE); + return this.firstRead && this.context.optimizeFooterRead() + && this.fCursor >= footerStart; + } + private int readOneBlock(final byte[] b, final int off, final int len) throws IOException { if (len == 0) { return 0; } - - if (this.available() == 0) { + if (!validate(b, off, len)) { return -1; } - - if (off < 0 || len < 0 || len > b.length - off) { - throw new IndexOutOfBoundsException(); - } - //If buffer is empty, then fill the buffer. if (bCursor == limit) { //If EOF, then return -1 @@ -146,14 +305,26 @@ private int readOneBlock(final byte[] b, final int off, final int len) throws IO bCursor = 0; limit = 0; if (buffer == null) { + LOG.debug("created new buffer size {}", bufferSize); buffer = new byte[bufferSize]; } - // Enable readAhead when reading sequentially - if (-1 == fCursorAfterLastRead || fCursorAfterLastRead == fCursor || b.length >= bufferSize) { + if (alwaysReadBufferSize) { bytesRead = readInternal(fCursor, buffer, 0, bufferSize, false); } else { - bytesRead = readInternal(fCursor, buffer, 0, b.length, true); + // Enable readAhead when reading sequentially + if (-1 == fCursorAfterLastRead || fCursorAfterLastRead == fCursor || b.length >= bufferSize) { + LOG.debug("Sequential read with read ahead size of {}", bufferSize); + bytesRead = readInternal(fCursor, buffer, 0, bufferSize, false); + } else { + // Enabling read ahead for random reads as well to reduce number of remote calls. + int lengthWithReadAhead = Math.min(b.length + readAheadRange, bufferSize); + LOG.debug("Random read with read ahead size of {}", lengthWithReadAhead); + bytesRead = readInternal(fCursor, buffer, 0, lengthWithReadAhead, true); + } + } + if (firstRead) { + firstRead = false; } if (bytesRead == -1) { @@ -164,20 +335,137 @@ private int readOneBlock(final byte[] b, final int off, final int len) throws IO fCursor += bytesRead; fCursorAfterLastRead = fCursor; } + return copyToUserBuffer(b, off, len); + } + + private int readFileCompletely(final byte[] b, final int off, final int len) + throws IOException { + if (len == 0) { + return 0; + } + if (!validate(b, off, len)) { + return -1; + } + savePointerState(); + // data need to be copied to user buffer from index bCursor, bCursor has + // to be the current fCusor + bCursor = (int) fCursor; + return optimisedRead(b, off, len, 0, contentLength); + } + + private int readLastBlock(final byte[] b, final int off, final int len) + throws IOException { + if (len == 0) { + return 0; + } + if (!validate(b, off, len)) { + return -1; + } + savePointerState(); + // data need to be copied to user buffer from index bCursor, + // AbfsInutStream buffer is going to contain data from last block start. In + // that case bCursor will be set to fCursor - lastBlockStart + long lastBlockStart = max(0, contentLength - bufferSize); + bCursor = (int) (fCursor - lastBlockStart); + // 0 if contentlength is < buffersize + long actualLenToRead = min(bufferSize, contentLength); + return optimisedRead(b, off, len, lastBlockStart, actualLenToRead); + } + + private int optimisedRead(final byte[] b, final int off, final int len, + final long readFrom, final long actualLen) throws IOException { + fCursor = readFrom; + int totalBytesRead = 0; + int lastBytesRead = 0; + try { + buffer = new byte[bufferSize]; + for (int i = 0; + i < MAX_OPTIMIZED_READ_ATTEMPTS && fCursor < contentLength; i++) { + lastBytesRead = readInternal(fCursor, buffer, limit, + (int) actualLen - limit, true); + if (lastBytesRead > 0) { + totalBytesRead += lastBytesRead; + limit += lastBytesRead; + fCursor += lastBytesRead; + fCursorAfterLastRead = fCursor; + } + } + } catch (IOException e) { + LOG.debug("Optimized read failed. Defaulting to readOneBlock {}", e); + restorePointerState(); + return readOneBlock(b, off, len); + } finally { + firstRead = false; + } + if (totalBytesRead < 1) { + restorePointerState(); + return -1; + } + // If the read was partial and the user requested part of data has + // not read then fallback to readoneblock. When limit is smaller than + // bCursor that means the user requested data has not been read. + if (fCursor < contentLength && bCursor > limit) { + restorePointerState(); + return readOneBlock(b, off, len); + } + return copyToUserBuffer(b, off, len); + } + + private void savePointerState() { + // Saving the current state for fall back ifn case optimization fails + this.limitBkp = this.limit; + this.fCursorBkp = this.fCursor; + this.fCursorAfterLastReadBkp = this.fCursorAfterLastRead; + this.bCursorBkp = this.bCursor; + } + + private void restorePointerState() { + // Saving the current state for fall back ifn case optimization fails + this.limit = this.limitBkp; + this.fCursor = this.fCursorBkp; + this.fCursorAfterLastRead = this.fCursorAfterLastReadBkp; + this.bCursor = this.bCursorBkp; + } + + private boolean validate(final byte[] b, final int off, final int len) + throws IOException { + if (closed) { + throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); + } + + Preconditions.checkNotNull(b); + LOG.debug("read one block requested b.length = {} off {} len {}", b.length, + off, len); + + if (this.available() == 0) { + return false; + } + if (off < 0 || len < 0 || len > b.length - off) { + throw new IndexOutOfBoundsException(); + } + return true; + } + + private int copyToUserBuffer(byte[] b, int off, int len){ //If there is anything in the buffer, then return lesser of (requested bytes) and (bytes in buffer) //(bytes returned may be less than requested) int bytesRemaining = limit - bCursor; - int bytesToRead = Math.min(len, bytesRemaining); + int bytesToRead = min(len, bytesRemaining); System.arraycopy(buffer, bCursor, b, off, bytesToRead); bCursor += bytesToRead; + nextReadPos += bytesToRead; if (statistics != null) { statistics.incrementBytesRead(bytesToRead); } + if (streamStatistics != null) { + // Bytes read from the local buffer. + streamStatistics.bytesReadFromBuffer(bytesToRead); + streamStatistics.bytesRead(bytesToRead); + } return bytesToRead; } - private int readInternal(final long position, final byte[] b, final int offset, final int length, final boolean bypassReadAhead) throws IOException { if (readAheadEnabled && !bypassReadAhead) { @@ -189,30 +477,46 @@ private int readInternal(final long position, final byte[] b, final int offset, // queue read-aheads int numReadAheads = this.readAheadQueueDepth; - long nextSize; long nextOffset = position; + // First read to queue needs to be of readBufferSize and later + // of readAhead Block size + long nextSize = min((long) bufferSize, contentLength - nextOffset); + LOG.debug("read ahead enabled issuing readheads num = {}", numReadAheads); + TracingContext readAheadTracingContext = new TracingContext(tracingContext); + readAheadTracingContext.setPrimaryRequestID(); while (numReadAheads > 0 && nextOffset < contentLength) { - nextSize = Math.min((long) bufferSize, contentLength - nextOffset); - ReadBufferManager.getBufferManager().queueReadAhead(this, nextOffset, (int) nextSize); + LOG.debug("issuing read ahead requestedOffset = {} requested size {}", + nextOffset, nextSize); + ReadBufferManager.getBufferManager().queueReadAhead(this, nextOffset, (int) nextSize, + new TracingContext(readAheadTracingContext)); nextOffset = nextOffset + nextSize; numReadAheads--; + // From next round onwards should be of readahead block size. + nextSize = min((long) readAheadBlockSize, contentLength - nextOffset); } // try reading from buffers first receivedBytes = ReadBufferManager.getBufferManager().getBlock(this, position, length, b); + bytesFromReadAhead += receivedBytes; if (receivedBytes > 0) { + incrementReadOps(); + LOG.debug("Received data from read ahead, not doing remote read"); + if (streamStatistics != null) { + streamStatistics.readAheadBytesRead(receivedBytes); + } return receivedBytes; } // got nothing from read-ahead, do our own read now - receivedBytes = readRemote(position, b, offset, length); + receivedBytes = readRemote(position, b, offset, length, new TracingContext(tracingContext)); return receivedBytes; } else { - return readRemote(position, b, offset, length); + LOG.debug("read ahead disabled, reading remote"); + return readRemote(position, b, offset, length, new TracingContext(tracingContext)); } } - int readRemote(long position, byte[] b, int offset, int length) throws IOException { + int readRemote(long position, byte[] b, int offset, int length, TracingContext tracingContext) throws IOException { if (position < 0) { throw new IllegalArgumentException("attempting to read from negative offset"); } @@ -234,8 +538,17 @@ int readRemote(long position, byte[] b, int offset, int length) throws IOExcepti final AbfsRestOperation op; AbfsPerfTracker tracker = client.getAbfsPerfTracker(); try (AbfsPerfInfo perfInfo = new AbfsPerfInfo(tracker, "readRemote", "read")) { - op = client.read(path, position, b, offset, length, tolerateOobAppends ? "*" : eTag); + if (streamStatistics != null) { + streamStatistics.remoteReadOperation(); + } + LOG.trace("Trigger client.read for path={} position={} offset={} length={}", path, position, offset, length); + op = client.read(path, position, b, offset, length, + tolerateOobAppends ? "*" : eTag, cachedSasToken.get(), tracingContext); + cachedSasToken.update(op.getSasToken()); + LOG.debug("issuing HTTP GET request params position = {} b.length = {} " + + "offset = {} length = {}", position, b.length, offset, length); perfInfo.registerResult(op.getResult()).registerSuccess(true); + incrementReadOps(); } catch (AzureBlobFileSystemException ex) { if (ex instanceof AbfsRestOperationException) { AbfsRestOperationException ere = (AbfsRestOperationException) ex; @@ -246,12 +559,26 @@ int readRemote(long position, byte[] b, int offset, int length) throws IOExcepti throw new IOException(ex); } long bytesRead = op.getResult().getBytesReceived(); + if (streamStatistics != null) { + streamStatistics.remoteBytesRead(bytesRead); + } if (bytesRead > Integer.MAX_VALUE) { throw new IOException("Unexpected Content-Length"); } + LOG.debug("HTTP request read bytes = {}", bytesRead); + bytesFromRemoteRead += bytesRead; return (int) bytesRead; } + /** + * Increment Read Operations. + */ + private void incrementReadOps() { + if (statistics != null) { + statistics.incrementReadOps(1); + } + } + /** * Seek to given position in stream. * @param n position to seek to @@ -260,6 +587,7 @@ int readRemote(long position, byte[] b, int offset, int length) throws IOExcepti */ @Override public synchronized void seek(long n) throws IOException { + LOG.debug("requested seek to position {}", n); if (closed) { throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); } @@ -270,17 +598,13 @@ public synchronized void seek(long n) throws IOException { throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF); } - if (n>=fCursor-limit && n<=fCursor) { // within buffer - bCursor = (int) (n-(fCursor-limit)); - return; + if (streamStatistics != null) { + streamStatistics.seek(n, fCursor); } // next read will read from here - fCursor = n; - - //invalidate buffer - limit = 0; - bCursor = 0; + nextReadPos = n; + LOG.debug("set nextReadPos to {}", nextReadPos); } @Override @@ -351,7 +675,11 @@ public synchronized long getPos() throws IOException { if (closed) { throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); } - return fCursor - limit + bCursor; + return nextReadPos < 0 ? 0 : nextReadPos; + } + + public TracingContext getTracingContext() { + return tracingContext; } /** @@ -366,8 +694,10 @@ public boolean seekToNewSource(long l) throws IOException { @Override public synchronized void close() throws IOException { + LOG.debug("Closing {}", this); closed = true; buffer = null; // de-reference the buffer so it can be GC'ed sooner + ReadBufferManager.getBufferManager().purgeBuffersForStream(this); } /** @@ -415,4 +745,116 @@ public boolean hasCapability(String capability) { byte[] getBuffer() { return buffer; } + + @VisibleForTesting + public boolean isReadAheadEnabled() { + return readAheadEnabled; + } + + @VisibleForTesting + public int getReadAheadRange() { + return readAheadRange; + } + + @VisibleForTesting + protected void setCachedSasToken(final CachedSASToken cachedSasToken) { + this.cachedSasToken = cachedSasToken; + } + + @VisibleForTesting + public String getStreamID() { + return inputStreamId; + } + + /** + * Getter for AbfsInputStreamStatistics. + * + * @return an instance of AbfsInputStreamStatistics. + */ + @VisibleForTesting + public AbfsInputStreamStatistics getStreamStatistics() { + return streamStatistics; + } + + @VisibleForTesting + public void registerListener(Listener listener1) { + listener = listener1; + tracingContext.setListener(listener); + } + + /** + * Getter for bytes read from readAhead buffer that fills asynchronously. + * + * @return value of the counter in long. + */ + @VisibleForTesting + public long getBytesFromReadAhead() { + return bytesFromReadAhead; + } + + /** + * Getter for bytes read remotely from the data store. + * + * @return value of the counter in long. + */ + @VisibleForTesting + public long getBytesFromRemoteRead() { + return bytesFromRemoteRead; + } + + @VisibleForTesting + public int getBufferSize() { + return bufferSize; + } + + @VisibleForTesting + public int getReadAheadQueueDepth() { + return readAheadQueueDepth; + } + + @VisibleForTesting + public boolean shouldAlwaysReadBufferSize() { + return alwaysReadBufferSize; + } + + @Override + public IOStatistics getIOStatistics() { + return ioStatistics; + } + + /** + * Get the statistics of the stream. + * @return a string value. + */ + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(super.toString()); + sb.append("AbfsInputStream@(").append(this.hashCode()).append("){"); + sb.append("[" + CAPABILITY_SAFE_READAHEAD + "]"); + if (streamStatistics != null) { + sb.append(", ").append(streamStatistics); + } + sb.append("}"); + return sb.toString(); + } + + @VisibleForTesting + int getBCursor() { + return this.bCursor; + } + + @VisibleForTesting + long getFCursor() { + return this.fCursor; + } + + @VisibleForTesting + long getFCursorAfterLastRead() { + return this.fCursorAfterLastRead; + } + + @VisibleForTesting + long getLimit() { + return this.limit; + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStreamContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStreamContext.java new file mode 100644 index 0000000000000..05afc7b9858da --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStreamContext.java @@ -0,0 +1,183 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; + +/** + * Class to hold extra input stream configs. + */ +public class AbfsInputStreamContext extends AbfsStreamContext { + // Retaining logger of AbfsInputStream + private static final Logger LOG = LoggerFactory.getLogger(AbfsInputStream.class); + + private int readBufferSize; + + private int readAheadQueueDepth; + + private boolean tolerateOobAppends; + + private boolean isReadAheadEnabled = true; + + private boolean alwaysReadBufferSize; + + private int readAheadBlockSize; + + private int readAheadRange; + + private AbfsInputStreamStatistics streamStatistics; + + private boolean readSmallFilesCompletely; + + private boolean optimizeFooterRead; + + private boolean bufferedPreadDisabled; + + public AbfsInputStreamContext(final long sasTokenRenewPeriodForStreamsInSeconds) { + super(sasTokenRenewPeriodForStreamsInSeconds); + } + + public AbfsInputStreamContext withReadBufferSize(final int readBufferSize) { + this.readBufferSize = readBufferSize; + return this; + } + + public AbfsInputStreamContext withReadAheadQueueDepth( + final int readAheadQueueDepth) { + this.readAheadQueueDepth = (readAheadQueueDepth >= 0) + ? readAheadQueueDepth + : Runtime.getRuntime().availableProcessors(); + return this; + } + + public AbfsInputStreamContext withTolerateOobAppends( + final boolean tolerateOobAppends) { + this.tolerateOobAppends = tolerateOobAppends; + return this; + } + + public AbfsInputStreamContext isReadAheadEnabled( + final boolean isReadAheadEnabled) { + this.isReadAheadEnabled = isReadAheadEnabled; + return this; + } + + public AbfsInputStreamContext withReadAheadRange( + final int readAheadRange) { + this.readAheadRange = readAheadRange; + return this; + } + + public AbfsInputStreamContext withStreamStatistics( + final AbfsInputStreamStatistics streamStatistics) { + this.streamStatistics = streamStatistics; + return this; + } + + public AbfsInputStreamContext withReadSmallFilesCompletely( + final boolean readSmallFilesCompletely) { + this.readSmallFilesCompletely = readSmallFilesCompletely; + return this; + } + + public AbfsInputStreamContext withOptimizeFooterRead( + final boolean optimizeFooterRead) { + this.optimizeFooterRead = optimizeFooterRead; + return this; + } + + public AbfsInputStreamContext withShouldReadBufferSizeAlways( + final boolean alwaysReadBufferSize) { + this.alwaysReadBufferSize = alwaysReadBufferSize; + return this; + } + + public AbfsInputStreamContext withReadAheadBlockSize( + final int readAheadBlockSize) { + this.readAheadBlockSize = readAheadBlockSize; + return this; + } + + public AbfsInputStreamContext withBufferedPreadDisabled( + final boolean bufferedPreadDisabled) { + this.bufferedPreadDisabled = bufferedPreadDisabled; + return this; + } + + public AbfsInputStreamContext build() { + if (readBufferSize > readAheadBlockSize) { + LOG.debug( + "fs.azure.read.request.size[={}] is configured for higher size than " + + "fs.azure.read.readahead.blocksize[={}]. Auto-align " + + "readAhead block size to be same as readRequestSize.", + readBufferSize, readAheadBlockSize); + readAheadBlockSize = readBufferSize; + } + // Validation of parameters to be done here. + Preconditions.checkArgument(readAheadRange > 0, + "Read ahead range should be greater than 0"); + return this; + } + + public int getReadBufferSize() { + return readBufferSize; + } + + public int getReadAheadQueueDepth() { + return readAheadQueueDepth; + } + + public boolean isTolerateOobAppends() { + return tolerateOobAppends; + } + + public boolean isReadAheadEnabled() { + return isReadAheadEnabled; + } + + public int getReadAheadRange() { + return readAheadRange; + } + + public AbfsInputStreamStatistics getStreamStatistics() { + return streamStatistics; + } + + public boolean readSmallFilesCompletely() { + return this.readSmallFilesCompletely; + } + + public boolean optimizeFooterRead() { + return this.optimizeFooterRead; + } + + public boolean shouldReadBufferSizeAlways() { + return alwaysReadBufferSize; + } + + public int getReadAheadBlockSize() { + return readAheadBlockSize; + } + + public boolean isBufferedPreadDisabled() { + return bufferedPreadDisabled; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStreamStatistics.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStreamStatistics.java new file mode 100644 index 0000000000000..00663467fe233 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStreamStatistics.java @@ -0,0 +1,110 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; + +/** + * Interface for statistics for the AbfsInputStream. + */ +@InterfaceStability.Unstable +public interface AbfsInputStreamStatistics extends IOStatisticsSource { + /** + * Seek backwards, incrementing the seek and backward seek counters. + * + * @param negativeOffset how far was the seek? + * This is expected to be negative. + */ + void seekBackwards(long negativeOffset); + + /** + * Record a forward seek, adding a seek operation, a forward + * seek operation, and any bytes skipped. + * + * @param skipped number of bytes skipped by reading from the stream. + * If the seek was implemented by a close + reopen, set this to zero. + */ + void seekForwards(long skipped); + + /** + * Record a forward or backward seek, adding a seek operation, a forward or + * a backward seek operation, and number of bytes skipped. + * + * @param seekTo seek to the position. + * @param currentPos current position. + */ + void seek(long seekTo, long currentPos); + + /** + * Increment the bytes read counter by the number of bytes; + * no-op if the argument is negative. + * + * @param bytes number of bytes read. + */ + void bytesRead(long bytes); + + /** + * Record the total bytes read from buffer. + * + * @param bytes number of bytes that are read from buffer. + */ + void bytesReadFromBuffer(long bytes); + + /** + * Records the total number of seeks done in the buffer. + */ + void seekInBuffer(); + + /** + * A {@code read(byte[] buf, int off, int len)} operation has started. + */ + void readOperationStarted(); + + /** + * Records a successful remote read operation. + */ + void remoteReadOperation(); + + /** + * Records the bytes read from readAhead buffer. + * @param bytes the bytes to be incremented. + */ + void readAheadBytesRead(long bytes); + + /** + * Records bytes read remotely after nothing from readAheadBuffer was read. + * @param bytes the bytes to be incremented. + */ + void remoteBytesRead(long bytes); + + /** + * Get the IOStatisticsStore instance from AbfsInputStreamStatistics. + * @return instance of IOStatisticsStore which extends IOStatistics. + */ + IOStatistics getIOStatistics(); + + /** + * Makes the string of all the AbfsInputStream statistics. + * @return the string with all the statistics. + */ + @Override + String toString(); +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStreamStatisticsImpl.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStreamStatisticsImpl.java new file mode 100644 index 0000000000000..bd09762976d7f --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStreamStatisticsImpl.java @@ -0,0 +1,277 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.StreamStatisticNames; +import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; + +import static org.apache.hadoop.fs.statistics.StoreStatisticNames.ACTION_HTTP_GET_REQUEST; +import static org.apache.hadoop.fs.statistics.StoreStatisticNames.SUFFIX_MEAN; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.iostatisticsStore; + +/** + * Stats for the AbfsInputStream. + */ +public class AbfsInputStreamStatisticsImpl + implements AbfsInputStreamStatistics { + + private final IOStatisticsStore ioStatisticsStore = iostatisticsStore() + .withCounters( + StreamStatisticNames.STREAM_READ_SEEK_OPERATIONS, + StreamStatisticNames.STREAM_READ_SEEK_FORWARD_OPERATIONS, + StreamStatisticNames.STREAM_READ_SEEK_BACKWARD_OPERATIONS, + StreamStatisticNames.STREAM_READ_BYTES, + StreamStatisticNames.STREAM_READ_SEEK_BYTES_SKIPPED, + StreamStatisticNames.STREAM_READ_OPERATIONS, + StreamStatisticNames.STREAM_READ_SEEK_BYTES_BACKWARDS, + StreamStatisticNames.SEEK_IN_BUFFER, + StreamStatisticNames.BYTES_READ_BUFFER, + StreamStatisticNames.REMOTE_READ_OP, + StreamStatisticNames.READ_AHEAD_BYTES_READ, + StreamStatisticNames.REMOTE_BYTES_READ + ) + .withDurationTracking(ACTION_HTTP_GET_REQUEST) + .build(); + + /* Reference to the atomic counter for frequently updated counters to avoid + * cost of the map lookup on every increment. + */ + private final AtomicLong bytesRead = + ioStatisticsStore.getCounterReference(StreamStatisticNames.STREAM_READ_BYTES); + private final AtomicLong readOps = + ioStatisticsStore.getCounterReference(StreamStatisticNames.STREAM_READ_OPERATIONS); + private final AtomicLong seekOps = + ioStatisticsStore.getCounterReference(StreamStatisticNames.STREAM_READ_SEEK_OPERATIONS); + + /** + * Seek backwards, incrementing the seek and backward seek counters. + * + * @param negativeOffset how far was the seek? + * This is expected to be negative. + */ + @Override + public void seekBackwards(long negativeOffset) { + seekOps.incrementAndGet(); + ioStatisticsStore.incrementCounter(StreamStatisticNames.STREAM_READ_SEEK_BACKWARD_OPERATIONS); + ioStatisticsStore.incrementCounter(StreamStatisticNames.STREAM_READ_SEEK_BYTES_BACKWARDS, negativeOffset); + } + + /** + * Record a forward seek, adding a seek operation, a forward + * seek operation, and any bytes skipped. + * + * @param skipped number of bytes skipped by reading from the stream. + * If the seek was implemented by a close + reopen, set this to zero. + */ + @Override + public void seekForwards(long skipped) { + seekOps.incrementAndGet(); + ioStatisticsStore.incrementCounter(StreamStatisticNames.STREAM_READ_SEEK_FORWARD_OPERATIONS); + ioStatisticsStore.incrementCounter(StreamStatisticNames.STREAM_READ_SEEK_BYTES_SKIPPED, skipped); + } + + /** + * Record a forward or backward seek, adding a seek operation, a forward or + * a backward seek operation, and number of bytes skipped. + * The seek direction will be calculated based on the parameters. + * + * @param seekTo seek to the position. + * @param currentPos current position. + */ + @Override + public void seek(long seekTo, long currentPos) { + if (seekTo >= currentPos) { + this.seekForwards(seekTo - currentPos); + } else { + this.seekBackwards(currentPos - seekTo); + } + } + + /** + * Increment the bytes read counter by the number of bytes; + * no-op if the argument is negative. + * + * @param bytes number of bytes read. + */ + @Override + public void bytesRead(long bytes) { + bytesRead.addAndGet(bytes); + } + + /** + * {@inheritDoc} + * + * Total bytes read from the buffer. + * + * @param bytes number of bytes that are read from buffer. + */ + @Override + public void bytesReadFromBuffer(long bytes) { + ioStatisticsStore.incrementCounter(StreamStatisticNames.BYTES_READ_BUFFER, bytes); + } + + /** + * {@inheritDoc} + * + * Increment the number of seeks in the buffer. + */ + @Override + public void seekInBuffer() { + ioStatisticsStore.incrementCounter(StreamStatisticNames.SEEK_IN_BUFFER); + } + + /** + * A {@code read(byte[] buf, int off, int len)} operation has started. + */ + @Override + public void readOperationStarted() { + readOps.incrementAndGet(); + } + + /** + * Total bytes read from readAhead buffer during a read operation. + * + * @param bytes the bytes to be incremented. + */ + @Override + public void readAheadBytesRead(long bytes) { + ioStatisticsStore.incrementCounter(StreamStatisticNames.READ_AHEAD_BYTES_READ, bytes); + } + + /** + * Total bytes read remotely after nothing was read from readAhead buffer. + * + * @param bytes the bytes to be incremented. + */ + @Override + public void remoteBytesRead(long bytes) { + ioStatisticsStore.incrementCounter(StreamStatisticNames.REMOTE_BYTES_READ, bytes); + } + + /** + * {@inheritDoc} + * + * Increment the counter when a remote read operation occurs. + */ + @Override + public void remoteReadOperation() { + ioStatisticsStore.incrementCounter(StreamStatisticNames.REMOTE_READ_OP); + } + + /** + * Getter for IOStatistics instance used. + * @return IOStatisticsStore instance which extends IOStatistics. + */ + @Override + public IOStatistics getIOStatistics() { + return ioStatisticsStore; + } + + @VisibleForTesting + public long getSeekOperations() { + return ioStatisticsStore.counters().get(StreamStatisticNames.STREAM_READ_SEEK_OPERATIONS); + } + + @VisibleForTesting + public long getForwardSeekOperations() { + return ioStatisticsStore.counters().get(StreamStatisticNames.STREAM_READ_SEEK_FORWARD_OPERATIONS); + } + + @VisibleForTesting + public long getBackwardSeekOperations() { + return ioStatisticsStore.counters().get(StreamStatisticNames.STREAM_READ_SEEK_BACKWARD_OPERATIONS); + } + + @VisibleForTesting + public long getBytesRead() { + return ioStatisticsStore.counters().get(StreamStatisticNames.STREAM_READ_BYTES); + } + + @VisibleForTesting + public long getBytesSkippedOnSeek() { + return ioStatisticsStore.counters().get(StreamStatisticNames.STREAM_READ_SEEK_BYTES_SKIPPED); + } + + @VisibleForTesting + public long getBytesBackwardsOnSeek() { + return ioStatisticsStore.counters().get(StreamStatisticNames.STREAM_READ_SEEK_BYTES_BACKWARDS); + } + + @VisibleForTesting + public long getSeekInBuffer() { + return ioStatisticsStore.counters().get(StreamStatisticNames.SEEK_IN_BUFFER); + + } + + @VisibleForTesting + public long getReadOperations() { + return ioStatisticsStore.counters().get(StreamStatisticNames.STREAM_READ_OPERATIONS); + } + + @VisibleForTesting + public long getBytesReadFromBuffer() { + return ioStatisticsStore.counters().get(StreamStatisticNames.BYTES_READ_BUFFER); + } + + @VisibleForTesting + public long getRemoteReadOperations() { + return ioStatisticsStore.counters().get(StreamStatisticNames.REMOTE_READ_OP); + } + + @VisibleForTesting + public long getReadAheadBytesRead() { + return ioStatisticsStore.counters().get(StreamStatisticNames.READ_AHEAD_BYTES_READ); + } + + @VisibleForTesting + public long getRemoteBytesRead() { + return ioStatisticsStore.counters().get(StreamStatisticNames.REMOTE_BYTES_READ); + } + + /** + * Getter for the mean value of the time taken to complete a HTTP GET + * request by AbfsInputStream. + * @return mean value. + */ + @VisibleForTesting + public double getActionHttpGetRequest() { + return ioStatisticsStore.meanStatistics(). + get(ACTION_HTTP_GET_REQUEST + SUFFIX_MEAN).mean(); + } + + /** + * String operator describes all the current statistics. + * Important: there are no guarantees as to the stability + * of this value. + * + * @return the current values of the stream statistics. + */ + @Override + public String toString() { + final StringBuilder sb = new StringBuilder( + "StreamStatistics{"); + sb.append(ioStatisticsStore.toString()); + sb.append('}'); + return sb.toString(); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsIoUtils.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsIoUtils.java index be2dcc54ed8b7..44fa2d8d8bd56 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsIoUtils.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsIoUtils.java @@ -58,6 +58,9 @@ public static void dumpHeadersToDebugLog(final String origin, if (key.contains("Cookie")) { values = "*cookie info*"; } + if (key.equals("sig")) { + values = "XXXX"; + } LOG.debug(" {}={}", key, values); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsLease.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsLease.java new file mode 100644 index 0000000000000..2e97598ef04f3 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsLease.java @@ -0,0 +1,204 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.FutureCallback; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableScheduledFuture; +import org.apache.hadoop.thirdparty.org.checkerframework.checker.nullness.qual.Nullable; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.io.retry.RetryPolicies; +import org.apache.hadoop.io.retry.RetryPolicy; + +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.INFINITE_LEASE_DURATION; +import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ERR_ACQUIRING_LEASE; +import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ERR_LEASE_FUTURE_EXISTS; +import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ERR_NO_LEASE_THREADS; + +/** + * AbfsLease manages an Azure blob lease. It acquires an infinite lease on instantiation and + * releases the lease when free() is called. Use it to prevent writes to the blob by other + * processes that don't have the lease. + * + * Creating a new Lease object blocks the caller until the Azure blob lease is acquired. It will + * retry a fixed number of times before failing if there is a problem acquiring the lease. + * + * Call free() to release the Lease. If the holder process dies, AzureBlobFileSystem breakLease + * will need to be called before another client will be able to write to the file. + */ +public final class AbfsLease { + private static final Logger LOG = LoggerFactory.getLogger(AbfsLease.class); + + // Number of retries for acquiring lease + static final int DEFAULT_LEASE_ACQUIRE_MAX_RETRIES = 7; + // Retry interval for acquiring lease in secs + static final int DEFAULT_LEASE_ACQUIRE_RETRY_INTERVAL = 10; + + private final AbfsClient client; + private final String path; + private final TracingContext tracingContext; + + // Lease status variables + private volatile boolean leaseFreed; + private volatile String leaseID = null; + private volatile Throwable exception = null; + private volatile int acquireRetryCount = 0; + private volatile ListenableScheduledFuture future = null; + + public static class LeaseException extends AzureBlobFileSystemException { + public LeaseException(Throwable t) { + super(ERR_ACQUIRING_LEASE + ": " + t, t); + } + + public LeaseException(String s) { + super(s); + } + } + + public AbfsLease(AbfsClient client, String path, TracingContext tracingContext) throws AzureBlobFileSystemException { + this(client, path, DEFAULT_LEASE_ACQUIRE_MAX_RETRIES, + DEFAULT_LEASE_ACQUIRE_RETRY_INTERVAL, tracingContext); + } + + @VisibleForTesting + public AbfsLease(AbfsClient client, String path, int acquireMaxRetries, + int acquireRetryInterval, TracingContext tracingContext) throws AzureBlobFileSystemException { + this.leaseFreed = false; + this.client = client; + this.path = path; + this.tracingContext = tracingContext; + + if (client.getNumLeaseThreads() < 1) { + throw new LeaseException(ERR_NO_LEASE_THREADS); + } + + // Try to get the lease a specified number of times, else throw an error + RetryPolicy retryPolicy = RetryPolicies.retryUpToMaximumCountWithFixedSleep( + acquireMaxRetries, acquireRetryInterval, TimeUnit.SECONDS); + acquireLease(retryPolicy, 0, acquireRetryInterval, 0, + new TracingContext(tracingContext)); + + while (leaseID == null && exception == null) { + try { + future.get(); + } catch (Exception e) { + LOG.debug("Got exception waiting for acquire lease future. Checking if lease ID or " + + "exception have been set", e); + } + } + if (exception != null) { + LOG.error("Failed to acquire lease on {}", path); + throw new LeaseException(exception); + } + + LOG.debug("Acquired lease {} on {}", leaseID, path); + } + + private void acquireLease(RetryPolicy retryPolicy, int numRetries, + int retryInterval, long delay, TracingContext tracingContext) + throws LeaseException { + LOG.debug("Attempting to acquire lease on {}, retry {}", path, numRetries); + if (future != null && !future.isDone()) { + throw new LeaseException(ERR_LEASE_FUTURE_EXISTS); + } + future = client.schedule(() -> client.acquireLease(path, + INFINITE_LEASE_DURATION, tracingContext), + delay, TimeUnit.SECONDS); + client.addCallback(future, new FutureCallback() { + @Override + public void onSuccess(@Nullable AbfsRestOperation op) { + leaseID = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_LEASE_ID); + LOG.debug("Acquired lease {} on {}", leaseID, path); + } + + @Override + public void onFailure(Throwable throwable) { + try { + if (RetryPolicy.RetryAction.RetryDecision.RETRY + == retryPolicy.shouldRetry(null, numRetries, 0, true).action) { + LOG.debug("Failed to acquire lease on {}, retrying: {}", path, throwable); + acquireRetryCount++; + acquireLease(retryPolicy, numRetries + 1, retryInterval, + retryInterval, tracingContext); + } else { + exception = throwable; + } + } catch (Exception e) { + exception = throwable; + } + } + }); + } + + /** + * Cancel future and free the lease. If an exception occurs while releasing the lease, the error + * will be logged. If the lease cannot be released, AzureBlobFileSystem breakLease will need to + * be called before another client will be able to write to the file. + */ + public void free() { + if (leaseFreed) { + return; + } + try { + LOG.debug("Freeing lease: path {}, lease id {}", path, leaseID); + if (future != null && !future.isDone()) { + future.cancel(true); + } + TracingContext tracingContext = new TracingContext(this.tracingContext); + tracingContext.setOperation(FSOperationType.RELEASE_LEASE); + client.releaseLease(path, leaseID, tracingContext); + } catch (IOException e) { + LOG.warn("Exception when trying to release lease {} on {}. Lease will need to be broken: {}", + leaseID, path, e.getMessage()); + } finally { + // Even if releasing the lease fails (e.g. because the file was deleted), + // make sure to record that we freed the lease + leaseFreed = true; + LOG.debug("Freed lease {} on {}", leaseID, path); + } + } + + public boolean isFreed() { + return leaseFreed; + } + + public String getLeaseID() { + return leaseID; + } + + @VisibleForTesting + public int getAcquireRetryCount() { + return acquireRetryCount; + } + + @VisibleForTesting + public TracingContext getTracingContext() { + return tracingContext; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsListResult.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsListResult.java new file mode 100644 index 0000000000000..0b63a342936a8 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsListResult.java @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.util.Collections; +import java.util.Iterator; + +import org.apache.hadoop.fs.FileStatus; + +/** + * Class to store listStatus results for AbfsListStatusRemoteIterator. The + * results can either be of type Iterator or an exception thrown during the + * operation + */ +public class AbfsListResult { + private IOException listException = null; + + private Iterator fileStatusIterator + = Collections.emptyIterator(); + + AbfsListResult(IOException ex) { + this.listException = ex; + } + + AbfsListResult(Iterator fileStatusIterator) { + this.fileStatusIterator = fileStatusIterator; + } + + IOException getListingException() { + return listException; + } + + Iterator getFileStatusIterator() { + return fileStatusIterator; + } + + boolean isFailedListing() { + return (listException != null); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsListStatusRemoteIterator.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsListStatusRemoteIterator.java new file mode 100644 index 0000000000000..3fecb1f059115 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsListStatusRemoteIterator.java @@ -0,0 +1,167 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; + +public class AbfsListStatusRemoteIterator + implements RemoteIterator { + + private static final Logger LOG = LoggerFactory + .getLogger(AbfsListStatusRemoteIterator.class); + + private static final boolean FETCH_ALL_FALSE = false; + private static final int MAX_QUEUE_SIZE = 10; + private static final long POLL_WAIT_TIME_IN_MS = 250; + + private final Path path; + private final ListingSupport listingSupport; + private final ArrayBlockingQueue listResultQueue; + private final TracingContext tracingContext; + + private volatile boolean isAsyncInProgress = false; + private boolean isIterationComplete = false; + private String continuation; + private Iterator currIterator; + + public AbfsListStatusRemoteIterator(final Path path, + final ListingSupport listingSupport, TracingContext tracingContext) + throws IOException { + this.path = path; + this.listingSupport = listingSupport; + this.tracingContext = tracingContext; + listResultQueue = new ArrayBlockingQueue<>(MAX_QUEUE_SIZE); + currIterator = Collections.emptyIterator(); + addNextBatchIteratorToQueue(); + fetchBatchesAsync(); + } + + @Override + public boolean hasNext() throws IOException { + if (currIterator.hasNext()) { + return true; + } + currIterator = getNextIterator(); + return currIterator.hasNext(); + } + + @Override + public FileStatus next() throws IOException { + if (!this.hasNext()) { + throw new NoSuchElementException(); + } + return currIterator.next(); + } + + private Iterator getNextIterator() throws IOException { + fetchBatchesAsync(); + try { + AbfsListResult listResult = null; + while (listResult == null + && (!isIterationComplete || !listResultQueue.isEmpty())) { + listResult = listResultQueue.poll(POLL_WAIT_TIME_IN_MS, TimeUnit.MILLISECONDS); + } + if (listResult == null) { + return Collections.emptyIterator(); + } else if (listResult.isFailedListing()) { + throw listResult.getListingException(); + } else { + return listResult.getFileStatusIterator(); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.error("Thread got interrupted: {}", e); + throw new IOException(e); + } + } + + private void fetchBatchesAsync() { + if (isAsyncInProgress || isIterationComplete) { + return; + } + synchronized (this) { + if (isAsyncInProgress || isIterationComplete) { + return; + } + isAsyncInProgress = true; + } + CompletableFuture.runAsync(() -> asyncOp()); + } + + private void asyncOp() { + try { + while (!isIterationComplete && listResultQueue.size() <= MAX_QUEUE_SIZE) { + addNextBatchIteratorToQueue(); + } + } catch (IOException ioe) { + LOG.error("Fetching filestatuses failed", ioe); + try { + listResultQueue.put(new AbfsListResult(ioe)); + } catch (InterruptedException interruptedException) { + Thread.currentThread().interrupt(); + LOG.error("Thread got interrupted: {}", interruptedException); + } + } finally { + synchronized (this) { + isAsyncInProgress = false; + } + } + } + + private synchronized void addNextBatchIteratorToQueue() + throws IOException { + List fileStatuses = new ArrayList<>(); + try { + try { + continuation = listingSupport.listStatus(path, null, fileStatuses, + FETCH_ALL_FALSE, continuation, tracingContext); + } catch (AbfsRestOperationException ex) { + AzureBlobFileSystem.checkException(path, ex); + } + if (!fileStatuses.isEmpty()) { + listResultQueue.put(new AbfsListResult(fileStatuses.iterator())); + } + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + LOG.error("Thread interrupted", ie); + } + if (continuation == null || continuation.isEmpty()) { + isIterationComplete = true; + } + } + +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsLocatedFileStatus.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsLocatedFileStatus.java new file mode 100644 index 0000000000000..29da2c504355a --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsLocatedFileStatus.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.EtagSource; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.LocatedFileStatus; + +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull; + +/** + * {@link LocatedFileStatus} extended to also carry an ETag. + */ +public class AbfsLocatedFileStatus extends LocatedFileStatus implements EtagSource { + + private static final long serialVersionUID = -8185960773314341594L; + + /** + * etag; may be null. + */ + private final String etag; + + public AbfsLocatedFileStatus(FileStatus status, BlockLocation[] locations) { + super(checkNotNull(status), locations); + if (status instanceof EtagSource) { + this.etag = ((EtagSource) status).getEtag(); + } else { + this.etag = null; + } + } + + @Override + public String getEtag() { + return etag; + } + + @Override + public String toString() { + return "AbfsLocatedFileStatus{" + + "etag='" + etag + '\'' + "} " + + super.toString(); + } + // equals() and hashCode() overridden to avoid FindBugs warning. + // Base implementation is equality on Path only, which is still appropriate. + + @Override + public boolean equals(Object o) { + return super.equals(o); + } + + @Override + public int hashCode() { + return super.hashCode(); + } + +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsNoOpThrottlingIntercept.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsNoOpThrottlingIntercept.java new file mode 100644 index 0000000000000..6b84e583c337a --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsNoOpThrottlingIntercept.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +final class AbfsNoOpThrottlingIntercept implements AbfsThrottlingIntercept { + + public static final AbfsNoOpThrottlingIntercept INSTANCE = new AbfsNoOpThrottlingIntercept(); + + private AbfsNoOpThrottlingIntercept() { + } + + @Override + public void updateMetrics(final AbfsRestOperationType operationType, + final AbfsHttpOperation abfsHttpOperation) { + } + + @Override + public void sendingRequest(final AbfsRestOperationType operationType, + final AbfsCounters abfsCounters) { + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOperationMetrics.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOperationMetrics.java new file mode 100644 index 0000000000000..2e53367d39fd2 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOperationMetrics.java @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.util.concurrent.atomic.AtomicLong; + +/** + * Stores Abfs operation metrics during each analysis period. + */ +class AbfsOperationMetrics { + + /** + * No of bytes which could not be transferred due to a failed operation. + */ + private final AtomicLong bytesFailed; + + /** + * No of bytes successfully transferred during a successful operation. + */ + private final AtomicLong bytesSuccessful; + + /** + * Total no of failed operations. + */ + private final AtomicLong operationsFailed; + + /** + * Total no of successful operations. + */ + private final AtomicLong operationsSuccessful; + + /** + * Time when collection of metrics ended. + */ + private long endTime; + + /** + * Time when the collection of metrics started. + */ + private final long startTime; + + AbfsOperationMetrics(long startTime) { + this.startTime = startTime; + this.bytesFailed = new AtomicLong(); + this.bytesSuccessful = new AtomicLong(); + this.operationsFailed = new AtomicLong(); + this.operationsSuccessful = new AtomicLong(); + } + + /** + * + * @return bytes failed to transfer. + */ + AtomicLong getBytesFailed() { + return bytesFailed; + } + + /** + * + * @return bytes successfully transferred. + */ + AtomicLong getBytesSuccessful() { + return bytesSuccessful; + } + + /** + * + * @return no of operations failed. + */ + AtomicLong getOperationsFailed() { + return operationsFailed; + } + + /** + * + * @return no of successful operations. + */ + AtomicLong getOperationsSuccessful() { + return operationsSuccessful; + } + + /** + * + * @return end time of metric collection. + */ + long getEndTime() { + return endTime; + } + + /** + * + * @param endTime sets the end time. + */ + void setEndTime(final long endTime) { + this.endTime = endTime; + } + + /** + * + * @return start time of metric collection. + */ + long getStartTime() { + return startTime; + } + + void addBytesFailed(long bytes) { + this.getBytesFailed().addAndGet(bytes); + } + + void addBytesSuccessful(long bytes) { + this.getBytesSuccessful().addAndGet(bytes); + } + + void incrementOperationsFailed() { + this.getOperationsFailed().incrementAndGet(); + } + + void incrementOperationsSuccessful() { + this.getOperationsSuccessful().incrementAndGet(); + } + +} + diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java index 7e9746d118ce8..d989a94afaa9e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java @@ -20,41 +20,67 @@ import java.io.FileNotFoundException; import java.io.IOException; -import java.io.InterruptedIOException; import java.io.OutputStream; import java.net.HttpURLConnection; -import java.nio.ByteBuffer; -import java.util.Locale; import java.util.concurrent.ConcurrentLinkedDeque; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.ExecutorCompletionService; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.Callable; import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; - +import java.util.UUID; + +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.PathIOException; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; -import org.apache.hadoop.io.ElasticByteBufferPool; +import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; +import org.apache.hadoop.fs.azurebfs.utils.CachedSASToken; +import org.apache.hadoop.fs.azurebfs.utils.Listener; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; +import org.apache.hadoop.fs.store.DataBlocks; +import org.apache.hadoop.fs.FileSystem.Statistics; import org.apache.hadoop.fs.FSExceptionMessages; import org.apache.hadoop.fs.StreamCapabilities; import org.apache.hadoop.fs.Syncable; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.STREAM_ID_LEN; +import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ERR_WRITE_WITHOUT_LEASE; +import static org.apache.hadoop.fs.impl.StoreImplementationUtils.isProbeForSyncable; import static org.apache.hadoop.io.IOUtils.wrapException; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters.Mode.APPEND_MODE; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters.Mode.FLUSH_CLOSE_MODE; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters.Mode.FLUSH_MODE; +import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkState; /** * The BlobFsOutputStream for Rest AbfsClient. */ -public class AbfsOutputStream extends OutputStream implements Syncable, StreamCapabilities { +public class AbfsOutputStream extends OutputStream implements Syncable, + StreamCapabilities, IOStatisticsSource { + private final AbfsClient client; private final String path; + /** The position in the file being uploaded, where the next block would be + * uploaded. + * This is used in constructing the AbfsClient requests to ensure that, + * even if blocks are uploaded out of order, they are reassembled in + * correct order. + * */ private long position; private boolean closed; private boolean supportFlush; private boolean disableOutputStreamFlush; + private boolean enableSmallWriteOptimization; + private boolean isAppendBlob; + private boolean isExpectHeaderEnabled; private volatile IOException lastError; private long lastFlushOffset; @@ -63,50 +89,94 @@ public class AbfsOutputStream extends OutputStream implements Syncable, StreamCa private final int bufferSize; private byte[] buffer; private int bufferIndex; + private int numOfAppendsToServerSinceLastFlush; private final int maxConcurrentRequestCount; + private final int maxRequestsThatCanBeQueued; private ConcurrentLinkedDeque writeOperations; - private final ThreadPoolExecutor threadExecutor; - private final ExecutorCompletionService completionService; - /** - * Queue storing buffers with the size of the Azure block ready for - * reuse. The pool allows reusing the blocks instead of allocating new - * blocks. After the data is sent to the service, the buffer is returned - * back to the queue - */ - private final ElasticByteBufferPool byteBufferPool - = new ElasticByteBufferPool(); - - public AbfsOutputStream( - final AbfsClient client, - final String path, - final long position, - final int bufferSize, - final boolean supportFlush, - final boolean disableOutputStreamFlush) { - this.client = client; - this.path = path; - this.position = position; + // SAS tokens can be re-used until they expire + private CachedSASToken cachedSasToken; + private final String outputStreamId; + private final TracingContext tracingContext; + private Listener listener; + + private AbfsLease lease; + private String leaseId; + + private final Statistics statistics; + private final AbfsOutputStreamStatistics outputStreamStatistics; + private IOStatistics ioStatistics; + + private static final Logger LOG = + LoggerFactory.getLogger(AbfsOutputStream.class); + + /** Factory for blocks. */ + private final DataBlocks.BlockFactory blockFactory; + + /** Current data block. Null means none currently active. */ + private DataBlocks.DataBlock activeBlock; + + /** Count of blocks uploaded. */ + private long blockCount = 0; + + /** The size of a single block. */ + private final int blockSize; + + /** Executor service to carry out the parallel upload requests. */ + private final ListeningExecutorService executorService; + + public AbfsOutputStream(AbfsOutputStreamContext abfsOutputStreamContext) + throws IOException { + this.client = abfsOutputStreamContext.getClient(); + this.statistics = abfsOutputStreamContext.getStatistics(); + this.path = abfsOutputStreamContext.getPath(); + this.position = abfsOutputStreamContext.getPosition(); this.closed = false; - this.supportFlush = supportFlush; - this.disableOutputStreamFlush = disableOutputStreamFlush; + this.supportFlush = abfsOutputStreamContext.isEnableFlush(); + this.isExpectHeaderEnabled = abfsOutputStreamContext.isExpectHeaderEnabled(); + this.disableOutputStreamFlush = abfsOutputStreamContext + .isDisableOutputStreamFlush(); + this.enableSmallWriteOptimization + = abfsOutputStreamContext.isEnableSmallWriteOptimization(); + this.isAppendBlob = abfsOutputStreamContext.isAppendBlob(); this.lastError = null; this.lastFlushOffset = 0; - this.bufferSize = bufferSize; - this.buffer = byteBufferPool.getBuffer(false, bufferSize).array(); + this.bufferSize = abfsOutputStreamContext.getWriteBufferSize(); this.bufferIndex = 0; + this.numOfAppendsToServerSinceLastFlush = 0; this.writeOperations = new ConcurrentLinkedDeque<>(); + this.outputStreamStatistics = abfsOutputStreamContext.getStreamStatistics(); - this.maxConcurrentRequestCount = 4 * Runtime.getRuntime().availableProcessors(); + if (this.isAppendBlob) { + this.maxConcurrentRequestCount = 1; + } else { + this.maxConcurrentRequestCount = abfsOutputStreamContext + .getWriteMaxConcurrentRequestCount(); + } + this.maxRequestsThatCanBeQueued = abfsOutputStreamContext + .getMaxWriteRequestsToQueue(); + + this.lease = abfsOutputStreamContext.getLease(); + this.leaseId = abfsOutputStreamContext.getLeaseId(); + this.executorService = + MoreExecutors.listeningDecorator(abfsOutputStreamContext.getExecutorService()); + this.cachedSasToken = new CachedSASToken( + abfsOutputStreamContext.getSasTokenRenewPeriodForStreamsInSeconds()); + this.outputStreamId = createOutputStreamId(); + this.tracingContext = new TracingContext(abfsOutputStreamContext.getTracingContext()); + this.tracingContext.setStreamID(outputStreamId); + this.tracingContext.setOperation(FSOperationType.WRITE); + this.ioStatistics = outputStreamStatistics.getIOStatistics(); + this.blockFactory = abfsOutputStreamContext.getBlockFactory(); + this.blockSize = bufferSize; + // create that first block. This guarantees that an open + close sequence + // writes a 0-byte entry. + createBlockIfNeeded(); + } - this.threadExecutor - = new ThreadPoolExecutor(maxConcurrentRequestCount, - maxConcurrentRequestCount, - 10L, - TimeUnit.SECONDS, - new LinkedBlockingQueue<>()); - this.completionService = new ExecutorCompletionService<>(this.threadExecutor); + private String createOutputStreamId() { + return StringUtils.right(UUID.randomUUID().toString(), STREAM_ID_LEN); } /** @@ -117,13 +187,7 @@ public AbfsOutputStream( */ @Override public boolean hasCapability(String capability) { - switch (capability.toLowerCase(Locale.ENGLISH)) { - case StreamCapabilities.HSYNC: - case StreamCapabilities.HFLUSH: - return supportFlush; - default: - return false; - } + return supportFlush && isProbeForSyncable(capability); } /** @@ -154,32 +218,201 @@ public void write(final int byteVal) throws IOException { @Override public synchronized void write(final byte[] data, final int off, final int length) throws IOException { + // validate if data is not null and index out of bounds. + DataBlocks.validateWriteArgs(data, off, length); maybeThrowLastError(); - Preconditions.checkArgument(data != null, "null data"); - if (off < 0 || length < 0 || length > data.length - off) { throw new IndexOutOfBoundsException(); } - int currentOffset = off; - int writableBytes = bufferSize - bufferIndex; - int numberOfBytesToWrite = length; - - while (numberOfBytesToWrite > 0) { - if (writableBytes <= numberOfBytesToWrite) { - System.arraycopy(data, currentOffset, buffer, bufferIndex, writableBytes); - bufferIndex += writableBytes; - writeCurrentBufferToService(); - currentOffset += writableBytes; - numberOfBytesToWrite = numberOfBytesToWrite - writableBytes; - } else { - System.arraycopy(data, currentOffset, buffer, bufferIndex, numberOfBytesToWrite); - bufferIndex += numberOfBytesToWrite; - numberOfBytesToWrite = 0; + if (hasLease() && isLeaseFreed()) { + throw new PathIOException(path, ERR_WRITE_WITHOUT_LEASE); + } + DataBlocks.DataBlock block = createBlockIfNeeded(); + int written = block.write(data, off, length); + int remainingCapacity = block.remainingCapacity(); + + if (written < length) { + // Number of bytes to write is more than the data block capacity, + // trigger an upload and then write on the next block. + LOG.debug("writing more data than block capacity -triggering upload"); + uploadCurrentBlock(); + // tail recursion is mildly expensive, but given buffer sizes must be MB. + // it's unlikely to recurse very deeply. + this.write(data, off + written, length - written); + } else { + if (remainingCapacity == 0) { + // the whole buffer is done, trigger an upload + uploadCurrentBlock(); } + } + incrementWriteOps(); + } - writableBytes = bufferSize - bufferIndex; + /** + * Demand create a destination block. + * + * @return the active block; null if there isn't one. + * @throws IOException on any failure to create + */ + private synchronized DataBlocks.DataBlock createBlockIfNeeded() + throws IOException { + if (activeBlock == null) { + blockCount++; + activeBlock = blockFactory + .create(blockCount, this.blockSize, outputStreamStatistics); + } + return activeBlock; + } + + /** + * Start an asynchronous upload of the current block. + * + * @throws IOException Problems opening the destination for upload, + * initializing the upload, or if a previous operation has failed. + */ + private synchronized void uploadCurrentBlock() throws IOException { + checkState(hasActiveBlock(), "No active block"); + LOG.debug("Writing block # {}", blockCount); + try { + uploadBlockAsync(getActiveBlock(), false, false); + } finally { + // set the block to null, so the next write will create a new block. + clearActiveBlock(); + } + } + + /** + * Upload a block of data. + * This will take the block. + * + * @param blockToUpload block to upload. + * @throws IOException upload failure + */ + private void uploadBlockAsync(DataBlocks.DataBlock blockToUpload, + boolean isFlush, boolean isClose) + throws IOException { + if (this.isAppendBlob) { + writeAppendBlobCurrentBufferToService(); + return; + } + if (!blockToUpload.hasData()) { + return; + } + numOfAppendsToServerSinceLastFlush++; + + final int bytesLength = blockToUpload.dataSize(); + final long offset = position; + position += bytesLength; + outputStreamStatistics.bytesToUpload(bytesLength); + outputStreamStatistics.writeCurrentBuffer(); + DataBlocks.BlockUploadData blockUploadData = blockToUpload.startUpload(); + final Future job = + executorService.submit(() -> { + AbfsPerfTracker tracker = + client.getAbfsPerfTracker(); + try (AbfsPerfInfo perfInfo = new AbfsPerfInfo(tracker, + "writeCurrentBufferToService", "append")) { + AppendRequestParameters.Mode + mode = APPEND_MODE; + if (isFlush & isClose) { + mode = FLUSH_CLOSE_MODE; + } else if (isFlush) { + mode = FLUSH_MODE; + } + /* + * Parameters Required for an APPEND call. + * offset(here) - refers to the position in the file. + * bytesLength - Data to be uploaded from the block. + * mode - If it's append, flush or flush_close. + * leaseId - The AbfsLeaseId for this request. + */ + AppendRequestParameters reqParams = new AppendRequestParameters( + offset, 0, bytesLength, mode, false, leaseId, isExpectHeaderEnabled); + AbfsRestOperation op = + client.append(path, blockUploadData.toByteArray(), reqParams, + cachedSasToken.get(), new TracingContext(tracingContext)); + cachedSasToken.update(op.getSasToken()); + perfInfo.registerResult(op.getResult()); + perfInfo.registerSuccess(true); + outputStreamStatistics.uploadSuccessful(bytesLength); + return null; + } finally { + IOUtils.close(blockUploadData); + } + }); + writeOperations.add(new WriteOperation(job, offset, bytesLength)); + + // Try to shrink the queue + shrinkWriteOperationQueue(); + } + + /** + * A method to set the lastError if an exception is caught. + * @param ex Exception caught. + * @throws IOException Throws the lastError. + */ + private void failureWhileSubmit(Exception ex) throws IOException { + if (ex instanceof AbfsRestOperationException) { + if (((AbfsRestOperationException) ex).getStatusCode() + == HttpURLConnection.HTTP_NOT_FOUND) { + throw new FileNotFoundException(ex.getMessage()); + } + } + if (ex instanceof IOException) { + lastError = (IOException) ex; + } else { + lastError = new IOException(ex); + } + throw lastError; + } + + /** + * Synchronized accessor to the active block. + * + * @return the active block; null if there isn't one. + */ + private synchronized DataBlocks.DataBlock getActiveBlock() { + return activeBlock; + } + + /** + * Predicate to query whether or not there is an active block. + * + * @return true if there is an active block. + */ + private synchronized boolean hasActiveBlock() { + return activeBlock != null; + } + + /** + * Is there an active block and is there any data in it to upload? + * + * @return true if there is some data to upload in an active block else false. + */ + private boolean hasActiveBlockDataToUpload() { + return hasActiveBlock() && getActiveBlock().hasData(); + } + + /** + * Clear the active block. + */ + private void clearActiveBlock() { + if (activeBlock != null) { + LOG.debug("Clearing active block"); + } + synchronized (this) { + activeBlock = null; + } + } + + /** + * Increment Write Operations. + */ + private void incrementWriteOps() { + if (statistics != null) { + statistics.incrementWriteOps(1); } } @@ -231,6 +464,15 @@ public void hflush() throws IOException { } } + public String getStreamID() { + return outputStreamId; + } + + public void registerListener(Listener listener1) { + listener = listener1; + tracingContext.setListener(listener); + } + /** * Force all data in the output stream to be written to Azure storage. * Wait to return until this is complete. Close the access to the stream and @@ -247,7 +489,6 @@ public synchronized void close() throws IOException { try { flushInternal(true); - threadExecutor.shutdown(); } catch (IOException e) { // Problems surface in try-with-resources clauses if // the exception thrown in a close == the one already thrown @@ -255,72 +496,108 @@ public synchronized void close() throws IOException { // See HADOOP-16785 throw wrapException(path, e.getMessage(), e); } finally { + if (hasLease()) { + lease.free(); + lease = null; + } lastError = new IOException(FSExceptionMessages.STREAM_IS_CLOSED); buffer = null; bufferIndex = 0; closed = true; writeOperations.clear(); - if (!threadExecutor.isShutdown()) { - threadExecutor.shutdownNow(); + if (hasActiveBlock()) { + clearActiveBlock(); } } + LOG.debug("Closing AbfsOutputStream : {}", this); } private synchronized void flushInternal(boolean isClose) throws IOException { maybeThrowLastError(); - writeCurrentBufferToService(); + + // if its a flush post write < buffersize, send flush parameter in append + if (!isAppendBlob + && enableSmallWriteOptimization + && (numOfAppendsToServerSinceLastFlush == 0) // there are no ongoing store writes + && (writeOperations.size() == 0) // double checking no appends in progress + && hasActiveBlockDataToUpload()) { // there is + // some data that is pending to be written + smallWriteOptimizedflushInternal(isClose); + return; + } + + if (hasActiveBlockDataToUpload()) { + uploadCurrentBlock(); + } flushWrittenBytesToService(isClose); + numOfAppendsToServerSinceLastFlush = 0; + } + + private synchronized void smallWriteOptimizedflushInternal(boolean isClose) throws IOException { + // writeCurrentBufferToService will increment numOfAppendsToServerSinceLastFlush + uploadBlockAsync(getActiveBlock(), true, isClose); + waitForAppendsToComplete(); + shrinkWriteOperationQueue(); + maybeThrowLastError(); + numOfAppendsToServerSinceLastFlush = 0; } private synchronized void flushInternalAsync() throws IOException { maybeThrowLastError(); - writeCurrentBufferToService(); + if (hasActiveBlockDataToUpload()) { + uploadCurrentBlock(); + } + waitForAppendsToComplete(); flushWrittenBytesToServiceAsync(); } - private synchronized void writeCurrentBufferToService() throws IOException { - if (bufferIndex == 0) { + /** + * Appending the current active data block to service. Clearing the active + * data block and releasing all buffered data. + * @throws IOException if there is any failure while starting an upload for + * the dataBlock or while closing the BlockUploadData. + */ + private void writeAppendBlobCurrentBufferToService() throws IOException { + DataBlocks.DataBlock activeBlock = getActiveBlock(); + // No data, return. + if (!hasActiveBlockDataToUpload()) { return; } - final byte[] bytes = buffer; - final int bytesLength = bufferIndex; - buffer = byteBufferPool.getBuffer(false, bufferSize).array(); - bufferIndex = 0; + final int bytesLength = activeBlock.dataSize(); + DataBlocks.BlockUploadData uploadData = activeBlock.startUpload(); + clearActiveBlock(); + outputStreamStatistics.writeCurrentBuffer(); + outputStreamStatistics.bytesToUpload(bytesLength); final long offset = position; position += bytesLength; - - if (threadExecutor.getQueue().size() >= maxConcurrentRequestCount * 2) { - waitForTaskToComplete(); + AbfsPerfTracker tracker = client.getAbfsPerfTracker(); + try (AbfsPerfInfo perfInfo = new AbfsPerfInfo(tracker, + "writeCurrentBufferToService", "append")) { + AppendRequestParameters reqParams = new AppendRequestParameters(offset, 0, + bytesLength, APPEND_MODE, true, leaseId, isExpectHeaderEnabled); + AbfsRestOperation op = client.append(path, uploadData.toByteArray(), reqParams, + cachedSasToken.get(), new TracingContext(tracingContext)); + cachedSasToken.update(op.getSasToken()); + outputStreamStatistics.uploadSuccessful(bytesLength); + + perfInfo.registerResult(op.getResult()); + perfInfo.registerSuccess(true); + return; + } catch (Exception ex) { + outputStreamStatistics.uploadFailed(bytesLength); + failureWhileSubmit(ex); + } finally { + IOUtils.close(uploadData); } - - final Future job = completionService.submit(new Callable() { - @Override - public Void call() throws Exception { - AbfsPerfTracker tracker = client.getAbfsPerfTracker(); - try (AbfsPerfInfo perfInfo = new AbfsPerfInfo(tracker, - "writeCurrentBufferToService", "append")) { - AbfsRestOperation op = client.append(path, offset, bytes, 0, - bytesLength); - perfInfo.registerResult(op.getResult()); - byteBufferPool.putBuffer(ByteBuffer.wrap(bytes)); - perfInfo.registerSuccess(true); - return null; - } - } - }); - - writeOperations.add(new WriteOperation(job, offset, bytesLength)); - - // Try to shrink the queue - shrinkWriteOperationQueue(); } - private synchronized void flushWrittenBytesToService(boolean isClose) throws IOException { + private synchronized void waitForAppendsToComplete() throws IOException { for (WriteOperation writeOperation : writeOperations) { try { writeOperation.task.get(); } catch (Exception ex) { + outputStreamStatistics.uploadFailed(writeOperation.length); if (ex.getCause() instanceof AbfsRestOperationException) { if (((AbfsRestOperationException) ex.getCause()).getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND) { throw new FileNotFoundException(ex.getMessage()); @@ -334,6 +611,10 @@ private synchronized void flushWrittenBytesToService(boolean isClose) throws IOE throw lastError; } } + } + + private synchronized void flushWrittenBytesToService(boolean isClose) throws IOException { + waitForAppendsToComplete(); flushWrittenBytesToServiceInternal(position, false, isClose); } @@ -348,10 +629,17 @@ private synchronized void flushWrittenBytesToServiceAsync() throws IOException { private synchronized void flushWrittenBytesToServiceInternal(final long offset, final boolean retainUncommitedData, final boolean isClose) throws IOException { + // flush is called for appendblob only on close + if (this.isAppendBlob && !isClose) { + return; + } + AbfsPerfTracker tracker = client.getAbfsPerfTracker(); try (AbfsPerfInfo perfInfo = new AbfsPerfInfo(tracker, "flushWrittenBytesToServiceInternal", "flush")) { - AbfsRestOperation op = client.flush(path, offset, retainUncommitedData, isClose); + AbfsRestOperation op = client.flush(path, offset, retainUncommitedData, isClose, + cachedSasToken.get(), leaseId, new TracingContext(tracingContext)); + cachedSasToken.update(op.getSasToken()); perfInfo.registerResult(op.getResult()).registerSuccess(true); } catch (AzureBlobFileSystemException ex) { if (ex instanceof AbfsRestOperationException) { @@ -359,7 +647,8 @@ private synchronized void flushWrittenBytesToServiceInternal(final long offset, throw new FileNotFoundException(ex.getMessage()); } } - throw new IOException(ex); + lastError = new IOException(ex); + throw lastError; } this.lastFlushOffset = offset; } @@ -370,10 +659,14 @@ private synchronized void flushWrittenBytesToServiceInternal(final long offset, */ private synchronized void shrinkWriteOperationQueue() throws IOException { try { - while (writeOperations.peek() != null && writeOperations.peek().task.isDone()) { - writeOperations.peek().task.get(); - lastTotalAppendOffset += writeOperations.peek().length; + WriteOperation peek = writeOperations.peek(); + while (peek != null && peek.task.isDone()) { + peek.task.get(); + lastTotalAppendOffset += peek.length; writeOperations.remove(); + peek = writeOperations.peek(); + // Incrementing statistics to indicate queue has been shrunk. + outputStreamStatistics.queueShrunk(); } } catch (Exception e) { if (e.getCause() instanceof AzureBlobFileSystemException) { @@ -385,22 +678,6 @@ private synchronized void shrinkWriteOperationQueue() throws IOException { } } - private void waitForTaskToComplete() throws IOException { - boolean completed; - for (completed = false; completionService.poll() != null; completed = true) { - // keep polling until there is no data - } - - if (!completed) { - try { - completionService.take(); - } catch (InterruptedException e) { - lastError = (IOException) new InterruptedIOException(e.toString()).initCause(e); - throw lastError; - } - } - } - private static class WriteOperation { private final Future task; private final long startOffset; @@ -419,6 +696,74 @@ private static class WriteOperation { @VisibleForTesting public synchronized void waitForPendingUploads() throws IOException { - waitForTaskToComplete(); + waitForAppendsToComplete(); + } + + /** + * Getter method for AbfsOutputStream statistics. + * + * @return statistics for AbfsOutputStream. + */ + @VisibleForTesting + public AbfsOutputStreamStatistics getOutputStreamStatistics() { + return outputStreamStatistics; + } + + /** + * Getter to get the size of the task queue. + * + * @return the number of writeOperations in AbfsOutputStream. + */ + @VisibleForTesting + public int getWriteOperationsSize() { + return writeOperations.size(); + } + + @VisibleForTesting + int getMaxConcurrentRequestCount() { + return this.maxConcurrentRequestCount; + } + + @VisibleForTesting + int getMaxRequestsThatCanBeQueued() { + return maxRequestsThatCanBeQueued; + } + + @VisibleForTesting + Boolean isAppendBlobStream() { + return isAppendBlob; + } + + @Override + public IOStatistics getIOStatistics() { + return ioStatistics; + } + + @VisibleForTesting + public boolean isLeaseFreed() { + if (lease == null) { + return true; + } + return lease.isFreed(); + } + + @VisibleForTesting + public boolean hasLease() { + return lease != null; + } + + /** + * Appending AbfsOutputStream statistics to base toString(). + * + * @return String with AbfsOutputStream statistics. + */ + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(super.toString()); + sb.append("AbfsOutputStream@").append(this.hashCode()); + sb.append("){"); + sb.append(outputStreamStatistics.toString()); + sb.append("}"); + return sb.toString(); } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamContext.java new file mode 100644 index 0000000000000..ed89733036741 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamContext.java @@ -0,0 +1,264 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.util.concurrent.ExecutorService; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.store.DataBlocks; + +/** + * Class to hold extra output stream configs. + */ +public class AbfsOutputStreamContext extends AbfsStreamContext { + + private int writeBufferSize; + + private boolean enableFlush; + + private boolean enableExpectHeader; + + private boolean enableSmallWriteOptimization; + + private boolean disableOutputStreamFlush; + + private AbfsOutputStreamStatistics streamStatistics; + + private boolean isAppendBlob; + + private int writeMaxConcurrentRequestCount; + + private int maxWriteRequestsToQueue; + + private AbfsLease lease; + + private DataBlocks.BlockFactory blockFactory; + + private int blockOutputActiveBlocks; + + private AbfsClient client; + + private long position; + + private FileSystem.Statistics statistics; + + private String path; + + private ExecutorService executorService; + + private TracingContext tracingContext; + + public AbfsOutputStreamContext(final long sasTokenRenewPeriodForStreamsInSeconds) { + super(sasTokenRenewPeriodForStreamsInSeconds); + } + + public AbfsOutputStreamContext withWriteBufferSize( + final int writeBufferSize) { + this.writeBufferSize = writeBufferSize; + return this; + } + + public AbfsOutputStreamContext enableFlush(final boolean enableFlush) { + this.enableFlush = enableFlush; + return this; + } + + public AbfsOutputStreamContext enableExpectHeader(final boolean enableExpectHeader) { + this.enableExpectHeader = enableExpectHeader; + return this; + } + + public AbfsOutputStreamContext enableSmallWriteOptimization(final boolean enableSmallWriteOptimization) { + this.enableSmallWriteOptimization = enableSmallWriteOptimization; + return this; + } + + public AbfsOutputStreamContext disableOutputStreamFlush( + final boolean disableOutputStreamFlush) { + this.disableOutputStreamFlush = disableOutputStreamFlush; + return this; + } + + public AbfsOutputStreamContext withStreamStatistics( + final AbfsOutputStreamStatistics streamStatistics) { + this.streamStatistics = streamStatistics; + return this; + } + + public AbfsOutputStreamContext withAppendBlob( + final boolean isAppendBlob) { + this.isAppendBlob = isAppendBlob; + return this; + } + + public AbfsOutputStreamContext withBlockFactory( + final DataBlocks.BlockFactory blockFactory) { + this.blockFactory = blockFactory; + return this; + } + + public AbfsOutputStreamContext withBlockOutputActiveBlocks( + final int blockOutputActiveBlocks) { + this.blockOutputActiveBlocks = blockOutputActiveBlocks; + return this; + } + + + public AbfsOutputStreamContext withClient( + final AbfsClient client) { + this.client = client; + return this; + } + + public AbfsOutputStreamContext withPosition( + final long position) { + this.position = position; + return this; + } + + public AbfsOutputStreamContext withFsStatistics( + final FileSystem.Statistics statistics) { + this.statistics = statistics; + return this; + } + + public AbfsOutputStreamContext withPath( + final String path) { + this.path = path; + return this; + } + + public AbfsOutputStreamContext withExecutorService( + final ExecutorService executorService) { + this.executorService = executorService; + return this; + } + + public AbfsOutputStreamContext withTracingContext( + final TracingContext tracingContext) { + this.tracingContext = tracingContext; + return this; + } + + public AbfsOutputStreamContext build() { + // Validation of parameters to be done here. + if (streamStatistics == null) { + streamStatistics = new AbfsOutputStreamStatisticsImpl(); + } + return this; + } + + + public AbfsOutputStreamContext withWriteMaxConcurrentRequestCount( + final int writeMaxConcurrentRequestCount) { + this.writeMaxConcurrentRequestCount = writeMaxConcurrentRequestCount; + return this; + } + + public AbfsOutputStreamContext withMaxWriteRequestsToQueue( + final int maxWriteRequestsToQueue) { + this.maxWriteRequestsToQueue = maxWriteRequestsToQueue; + return this; + } + + public AbfsOutputStreamContext withLease(final AbfsLease lease) { + this.lease = lease; + return this; + } + + public int getWriteBufferSize() { + return writeBufferSize; + } + + public boolean isEnableFlush() { + return enableFlush; + } + + public boolean isExpectHeaderEnabled() { + return enableExpectHeader; + } + + public boolean isDisableOutputStreamFlush() { + return disableOutputStreamFlush; + } + + public AbfsOutputStreamStatistics getStreamStatistics() { + return streamStatistics; + } + + public boolean isAppendBlob() { + return isAppendBlob; + } + + public int getWriteMaxConcurrentRequestCount() { + return this.writeMaxConcurrentRequestCount; + } + + public int getMaxWriteRequestsToQueue() { + return this.maxWriteRequestsToQueue; + } + + public boolean isEnableSmallWriteOptimization() { + return this.enableSmallWriteOptimization; + } + + public AbfsLease getLease() { + return this.lease; + } + + public String getLeaseId() { + if (this.lease == null) { + return null; + } + return this.lease.getLeaseID(); + } + + public DataBlocks.BlockFactory getBlockFactory() { + return blockFactory; + } + + public int getBlockOutputActiveBlocks() { + return blockOutputActiveBlocks; + } + + public AbfsClient getClient() { + return client; + } + + public FileSystem.Statistics getStatistics() { + return statistics; + } + + public String getPath() { + return path; + } + + public long getPosition() { + return position; + } + + public ExecutorService getExecutorService() { + return executorService; + } + + public TracingContext getTracingContext() { + return tracingContext; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamStatistics.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamStatistics.java new file mode 100644 index 0000000000000..a9e088c025b45 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamStatistics.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.statistics.DurationTracker; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; +import org.apache.hadoop.fs.store.BlockUploadStatistics; + +/** + * Interface for {@link AbfsOutputStream} statistics. + */ +@InterfaceStability.Unstable +public interface AbfsOutputStreamStatistics extends IOStatisticsSource, + BlockUploadStatistics { + + /** + * Number of bytes to be uploaded. + * + * @param bytes number of bytes to upload. + */ + void bytesToUpload(long bytes); + + /** + * Records a successful upload and the number of bytes uploaded. + * + * @param bytes number of bytes that were successfully uploaded. + */ + void uploadSuccessful(long bytes); + + /** + * Records that upload is failed and the number of bytes. + * + * @param bytes number of bytes that failed to upload. + */ + void uploadFailed(long bytes); + + /** + * Time spent in waiting for tasks to be completed in the blocking queue. + * @return instance of the DurationTracker that tracks the time for waiting. + */ + DurationTracker timeSpentTaskWait(); + + /** + * Number of times task queue is shrunk. + */ + void queueShrunk(); + + /** + * Number of times buffer is written to the service after a write operation. + */ + void writeCurrentBuffer(); + + /** + * Get the IOStatisticsStore instance from AbfsOutputStreamStatistics. + * @return instance of IOStatisticsStore which extends IOStatistics. + */ + IOStatistics getIOStatistics(); + + /** + * Method to form a string of all AbfsOutputStream statistics and their + * values. + * + * @return AbfsOutputStream statistics. + */ + @Override + String toString(); + +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamStatisticsImpl.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamStatisticsImpl.java new file mode 100644 index 0000000000000..cb054e2915ddd --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamStatisticsImpl.java @@ -0,0 +1,223 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; + +import org.apache.hadoop.fs.statistics.DurationTracker; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.StoreStatisticNames; +import org.apache.hadoop.fs.statistics.StreamStatisticNames; +import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore; + +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.iostatisticsStore; + +/** + * OutputStream statistics implementation for Abfs. + */ +public class AbfsOutputStreamStatisticsImpl + implements AbfsOutputStreamStatistics { + + private final IOStatisticsStore ioStatisticsStore = iostatisticsStore() + .withCounters( + StreamStatisticNames.BYTES_TO_UPLOAD, + StreamStatisticNames.BYTES_UPLOAD_SUCCESSFUL, + StreamStatisticNames.BYTES_UPLOAD_FAILED, + StreamStatisticNames.QUEUE_SHRUNK_OPS, + StreamStatisticNames.WRITE_CURRENT_BUFFER_OPERATIONS, + StreamStatisticNames.BLOCKS_ALLOCATED, + StreamStatisticNames.BLOCKS_RELEASED + ) + .withDurationTracking( + StreamStatisticNames.TIME_SPENT_ON_PUT_REQUEST, + StreamStatisticNames.TIME_SPENT_ON_TASK_WAIT + ) + .build(); + + /* Reference to the atomic counter for frequently updated counters to avoid + * cost of the map lookup on every increment. + */ + private final AtomicLong bytesUpload = + ioStatisticsStore.getCounterReference(StreamStatisticNames.BYTES_TO_UPLOAD); + private final AtomicLong bytesUploadedSuccessfully = + ioStatisticsStore.getCounterReference(StreamStatisticNames.BYTES_UPLOAD_SUCCESSFUL); + private final AtomicLong writeCurrentBufferOps = + ioStatisticsStore.getCounterReference(StreamStatisticNames.WRITE_CURRENT_BUFFER_OPERATIONS); + + private final AtomicLong blocksAllocated = + ioStatisticsStore.getCounterReference(StreamStatisticNames.BLOCKS_ALLOCATED); + private final AtomicLong blocksReleased = + ioStatisticsStore.getCounterReference(StreamStatisticNames.BLOCKS_RELEASED); + + /** + * Records the need to upload bytes and increments the total bytes that + * needs to be uploaded. + * + * @param bytes total bytes to upload. Negative bytes are ignored. + */ + @Override + public void bytesToUpload(long bytes) { + bytesUpload.addAndGet(bytes); + } + + /** + * Records the total bytes successfully uploaded through AbfsOutputStream. + * + * @param bytes number of bytes that were successfully uploaded. Negative + * bytes are ignored. + */ + @Override + public void uploadSuccessful(long bytes) { + bytesUploadedSuccessfully.addAndGet(bytes); + } + + /** + * Records the total bytes failed to upload through AbfsOutputStream. + * + * @param bytes number of bytes failed to upload. Negative bytes are ignored. + */ + @Override + public void uploadFailed(long bytes) { + ioStatisticsStore.incrementCounter(StreamStatisticNames.BYTES_UPLOAD_FAILED, bytes); + } + + /** + * {@inheritDoc} + * + * Records the total time spent waiting for a task to complete. + * + * When the thread executor has a task queue + * {@link java.util.concurrent.BlockingQueue} of size greater than or + * equal to 2 times the maxConcurrentRequestCounts then, it waits for a + * task in that queue to finish, then do the next task in the queue. + * + * This time spent while waiting for the task to be completed is being + * recorded in this counter. + * + */ + @Override + public DurationTracker timeSpentTaskWait() { + return ioStatisticsStore.trackDuration(StreamStatisticNames.TIME_SPENT_ON_TASK_WAIT); + } + + /** + * {@inheritDoc} + * + * Records the number of times AbfsOutputStream try to remove the completed + * write operations from the beginning of write operation task queue. + */ + @Override + public void queueShrunk() { + ioStatisticsStore.incrementCounter(StreamStatisticNames.QUEUE_SHRUNK_OPS); + } + + /** + * {@inheritDoc} + * + * Records the number of times AbfsOutputStream writes the buffer to the + * service via the AbfsClient and appends the buffer to the service. + */ + @Override + public void writeCurrentBuffer() { + writeCurrentBufferOps.incrementAndGet(); + } + + /** + * Increment the counter to indicate a block has been allocated. + */ + @Override + public void blockAllocated() { + blocksAllocated.incrementAndGet(); + } + + /** + * Increment the counter to indicate a block has been released. + */ + @Override + public void blockReleased() { + blocksReleased.incrementAndGet(); + } + + /** + * {@inheritDoc} + * + * A getter for IOStatisticsStore instance which extends IOStatistics. + * + * @return IOStatisticsStore instance. + */ + @Override + public IOStatistics getIOStatistics() { + return ioStatisticsStore; + } + + @VisibleForTesting + public long getBytesToUpload() { + return ioStatisticsStore.counters().get(StreamStatisticNames.BYTES_TO_UPLOAD); + } + + @VisibleForTesting + public long getBytesUploadSuccessful() { + return ioStatisticsStore.counters().get(StreamStatisticNames.BYTES_UPLOAD_SUCCESSFUL); + } + + @VisibleForTesting + public long getBytesUploadFailed() { + return ioStatisticsStore.counters().get(StreamStatisticNames.BYTES_UPLOAD_FAILED); + } + + @VisibleForTesting + public long getTimeSpentOnTaskWait() { + return ioStatisticsStore.counters().get(StreamStatisticNames.TIME_SPENT_ON_TASK_WAIT); + } + + @VisibleForTesting + public long getQueueShrunkOps() { + return ioStatisticsStore.counters().get(StreamStatisticNames.QUEUE_SHRUNK_OPS); + } + + @VisibleForTesting + public long getWriteCurrentBufferOperations() { + return ioStatisticsStore.counters().get(StreamStatisticNames.WRITE_CURRENT_BUFFER_OPERATIONS); + } + + /** + * Getter for mean value of time taken to complete a PUT request by + * AbfsOutputStream. + * @return mean value. + */ + @VisibleForTesting + public double getTimeSpentOnPutRequest() { + return ioStatisticsStore.meanStatistics().get(StreamStatisticNames.TIME_SPENT_ON_PUT_REQUEST + StoreStatisticNames.SUFFIX_MEAN).mean(); + } + + /** + * String to show AbfsOutputStream statistics values in AbfsOutputStream. + * + * @return String with AbfsOutputStream statistics. + */ + @Override public String toString() { + final StringBuilder outputStreamStats = new StringBuilder( + "OutputStream Statistics{"); + outputStreamStats.append(ioStatisticsStore.toString()); + outputStreamStats.append("}"); + return outputStreamStats.toString(); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java index 445c3665437c7..df58df437c158 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java @@ -19,20 +19,27 @@ package org.apache.hadoop.fs.azurebfs.services; import java.io.IOException; +import java.io.UncheckedIOException; import java.net.HttpURLConnection; import java.net.URL; import java.net.UnknownHostException; import java.util.List; +import org.apache.hadoop.classification.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.azurebfs.AbfsStatistic; import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidAbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; -import org.apache.hadoop.fs.azurebfs.oauth2.AzureADAuthenticator.HttpException; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_CONTINUE; /** * The AbfsRestOperation for Rest AbfsClient. @@ -42,6 +49,8 @@ public class AbfsRestOperation { private final AbfsRestOperationType operationType; // Blob FS client, which has the credentials, retry policy, and logs. private final AbfsClient client; + // Return intercept instance + private final AbfsThrottlingIntercept intercept; // the HTTP method (PUT, PATCH, POST, GET, HEAD, or DELETE) private final String method; // full URL including query parameters @@ -53,6 +62,9 @@ public class AbfsRestOperation { // request body and all the download methods have a response body. private final boolean hasRequestBody; + // Used only by AbfsInputStream/AbfsOutputStream to reuse SAS tokens. + private final String sasToken; + private static final Logger LOG = LoggerFactory.getLogger(AbfsClient.class); // For uploads, this is the request entity body. For downloads, @@ -60,13 +72,50 @@ public class AbfsRestOperation { private byte[] buffer; private int bufferOffset; private int bufferLength; + private int retryCount = 0; private AbfsHttpOperation result; + private AbfsCounters abfsCounters; + + /** + * This variable contains the reason of last API call within the same + * AbfsRestOperation object. + */ + private String failureReason; + + /** + * Checks if there is non-null HTTP response. + * @return true if there is a non-null HTTP response from the ABFS call. + */ + public boolean hasResult() { + return result != null; + } public AbfsHttpOperation getResult() { return result; } + public void hardSetResult(int httpStatus) { + result = AbfsHttpOperation.getAbfsHttpOperationWithFixedResult(this.url, + this.method, httpStatus); + } + + public URL getUrl() { + return url; + } + + public List getRequestHeaders() { + return requestHeaders; + } + + public boolean isARetriedRequest() { + return (retryCount > 0); + } + + String getSasToken() { + return sasToken; + } + /** * Initializes a new REST operation. * @@ -80,13 +129,35 @@ public AbfsHttpOperation getResult() { final String method, final URL url, final List requestHeaders) { + this(operationType, client, method, url, requestHeaders, null); + } + + /** + * Initializes a new REST operation. + * + * @param client The Blob FS client. + * @param method The HTTP method (PUT, PATCH, POST, GET, HEAD, or DELETE). + * @param url The full URL including query string parameters. + * @param requestHeaders The HTTP request headers. + * @param sasToken A sasToken for optional re-use by AbfsInputStream/AbfsOutputStream. + */ + AbfsRestOperation(final AbfsRestOperationType operationType, + final AbfsClient client, + final String method, + final URL url, + final List requestHeaders, + final String sasToken) { this.operationType = operationType; this.client = client; this.method = method; this.url = url; this.requestHeaders = requestHeaders; this.hasRequestBody = (AbfsHttpConstants.HTTP_METHOD_PUT.equals(method) + || AbfsHttpConstants.HTTP_METHOD_POST.equals(method) || AbfsHttpConstants.HTTP_METHOD_PATCH.equals(method)); + this.sasToken = sasToken; + this.abfsCounters = client.getAbfsCounters(); + this.intercept = client.getIntercept(); } /** @@ -101,6 +172,7 @@ public AbfsHttpOperation getResult() { * this will hold the response entity body. * @param bufferOffset An offset into the buffer where the data beings. * @param bufferLength The length of the data in the buffer. + * @param sasToken A sasToken for optional re-use by AbfsInputStream/AbfsOutputStream. */ AbfsRestOperation(AbfsRestOperationType operationType, AbfsClient client, @@ -109,30 +181,56 @@ public AbfsHttpOperation getResult() { List requestHeaders, byte[] buffer, int bufferOffset, - int bufferLength) { - this(operationType, client, method, url, requestHeaders); + int bufferLength, + String sasToken) { + this(operationType, client, method, url, requestHeaders, sasToken); this.buffer = buffer; this.bufferOffset = bufferOffset; this.bufferLength = bufferLength; + this.abfsCounters = client.getAbfsCounters(); + } + + /** + * Execute a AbfsRestOperation. Track the Duration of a request if + * abfsCounters isn't null. + * @param tracingContext TracingContext instance to track correlation IDs + */ + public void execute(TracingContext tracingContext) + throws AzureBlobFileSystemException { + + try { + IOStatisticsBinding.trackDurationOfInvocation(abfsCounters, + AbfsStatistic.getStatNameFromHttpCall(method), + () -> completeExecute(tracingContext)); + } catch (AzureBlobFileSystemException aze) { + throw aze; + } catch (IOException e) { + throw new UncheckedIOException("Error while tracking Duration of an " + + "AbfsRestOperation call", e); + } } /** * Executes the REST operation with retry, by issuing one or more * HTTP operations. + * @param tracingContext TracingContext instance to track correlation IDs */ - void execute() throws AzureBlobFileSystemException { + private void completeExecute(TracingContext tracingContext) + throws AzureBlobFileSystemException { // see if we have latency reports from the previous requests - String latencyHeader = this.client.getAbfsPerfTracker().getClientLatency(); + String latencyHeader = getClientLatency(); if (latencyHeader != null && !latencyHeader.isEmpty()) { AbfsHttpHeader httpHeader = new AbfsHttpHeader(HttpHeaderConfigurations.X_MS_ABFS_CLIENT_LATENCY, latencyHeader); requestHeaders.add(httpHeader); } - int retryCount = 0; + retryCount = 0; LOG.debug("First execution of REST operation - {}", operationType); - while (!executeHttpOperation(retryCount++)) { + while (!executeHttpOperation(retryCount, tracingContext)) { try { + ++retryCount; + tracingContext.setRetryCount(retryCount); LOG.debug("Retrying REST operation {}. RetryCount = {}", operationType, retryCount); Thread.sleep(client.getRetryPolicy().getRetryInterval(retryCount)); @@ -141,88 +239,118 @@ void execute() throws AzureBlobFileSystemException { } } - if (result.getStatusCode() >= HttpURLConnection.HTTP_BAD_REQUEST) { + int status = result.getStatusCode(); + /* + If even after exhausting all retries, the http status code has an + invalid value it qualifies for InvalidAbfsRestOperationException. + All http status code less than 1xx range are considered as invalid + status codes. + */ + if (status < HTTP_CONTINUE) { + throw new InvalidAbfsRestOperationException(null, retryCount); + } + + if (status >= HttpURLConnection.HTTP_BAD_REQUEST) { throw new AbfsRestOperationException(result.getStatusCode(), result.getStorageErrorCode(), result.getStorageErrorMessage(), null, result); } - LOG.trace("{} REST operation complete", operationType); } + @VisibleForTesting + String getClientLatency() { + return client.getAbfsPerfTracker().getClientLatency(); + } + /** * Executes a single HTTP operation to complete the REST operation. If it * fails, there may be a retry. The retryCount is incremented with each * attempt. */ - private boolean executeHttpOperation(final int retryCount) throws AzureBlobFileSystemException { - AbfsHttpOperation httpOperation = null; + private boolean executeHttpOperation(final int retryCount, + TracingContext tracingContext) throws AzureBlobFileSystemException { + AbfsHttpOperation httpOperation; + try { // initialize the HTTP request and open the connection - httpOperation = new AbfsHttpOperation(url, method, requestHeaders); - - switch(client.getAuthType()) { - case Custom: - case OAuth: - LOG.debug("Authenticating request with OAuth2 access token"); - httpOperation.getConnection().setRequestProperty(HttpHeaderConfigurations.AUTHORIZATION, - client.getAccessToken()); - break; - case SAS: - // do nothing; the SAS token should already be appended to the query string - break; - case SharedKey: - // sign the HTTP request - LOG.debug("Signing request with shared key"); - // sign the HTTP request - client.getSharedKeyCredentials().signRequest( - httpOperation.getConnection(), - hasRequestBody ? bufferLength : 0); - break; - } + httpOperation = createHttpOperation(); + incrementCounter(AbfsStatistic.CONNECTIONS_MADE, 1); + tracingContext.constructHeader(httpOperation, failureReason); + signRequest(httpOperation, hasRequestBody ? bufferLength : 0); + + } catch (IOException e) { + LOG.debug("Auth failure: {}, {}", method, url); + throw new AbfsRestOperationException(-1, null, + "Auth failure: " + e.getMessage(), e); + } + + try { // dump the headers AbfsIoUtils.dumpHeadersToDebugLog("Request Headers", httpOperation.getConnection().getRequestProperties()); - AbfsClientThrottlingIntercept.sendingRequest(operationType); - + intercept.sendingRequest(operationType, abfsCounters); if (hasRequestBody) { // HttpUrlConnection requires httpOperation.sendRequest(buffer, bufferOffset, bufferLength); + incrementCounter(AbfsStatistic.SEND_REQUESTS, 1); + incrementCounter(AbfsStatistic.BYTES_SENT, bufferLength); } httpOperation.processResponse(buffer, bufferOffset, bufferLength); - } catch (IOException ex) { - if (ex instanceof UnknownHostException) { - LOG.warn(String.format("Unknown host name: %s. Retrying to resolve the host name...", httpOperation.getUrl().getHost())); + incrementCounter(AbfsStatistic.GET_RESPONSES, 1); + //Only increment bytesReceived counter when the status code is 2XX. + if (httpOperation.getStatusCode() >= HttpURLConnection.HTTP_OK + && httpOperation.getStatusCode() <= HttpURLConnection.HTTP_PARTIAL) { + incrementCounter(AbfsStatistic.BYTES_RECEIVED, + httpOperation.getBytesReceived()); + } else if (httpOperation.getStatusCode() == HttpURLConnection.HTTP_UNAVAILABLE) { + incrementCounter(AbfsStatistic.SERVER_UNAVAILABLE, 1); } - + } catch (UnknownHostException ex) { + String hostname = null; + hostname = httpOperation.getHost(); + failureReason = RetryReason.getAbbreviation(ex, null, null); + LOG.warn("Unknown host name: {}. Retrying to resolve the host name...", + hostname); + if (!client.getRetryPolicy().shouldRetry(retryCount, -1)) { + throw new InvalidAbfsRestOperationException(ex, retryCount); + } + return false; + } catch (IOException ex) { if (LOG.isDebugEnabled()) { - if (httpOperation != null) { - LOG.debug("HttpRequestFailure: " + httpOperation.toString(), ex); - } else { - LOG.debug("HttpRequestFailure: " + method + "," + url, ex); - } + LOG.debug("HttpRequestFailure: {}, {}", httpOperation, ex); } - if (!client.getRetryPolicy().shouldRetry(retryCount, -1)) { - throw new InvalidAbfsRestOperationException(ex); - } + failureReason = RetryReason.getAbbreviation(ex, -1, ""); - // once HttpException is thrown by AzureADAuthenticator, - // it indicates the policy in AzureADAuthenticator determined - // retry is not needed - if (ex instanceof HttpException) { - throw new AbfsRestOperationException((HttpException) ex); + if (!client.getRetryPolicy().shouldRetry(retryCount, -1)) { + throw new InvalidAbfsRestOperationException(ex, retryCount); } return false; } finally { - AbfsClientThrottlingIntercept.updateMetrics(operationType, httpOperation); + int status = httpOperation.getStatusCode(); + /* + A status less than 300 (2xx range) or greater than or equal + to 500 (5xx range) should contribute to throttling metrics being updated. + Less than 200 or greater than or equal to 500 show failed operations. 2xx + range contributes to successful operations. 3xx range is for redirects + and 4xx range is for user errors. These should not be a part of + throttling backoff computation. + */ + boolean updateMetricsResponseCode = (status < HttpURLConnection.HTTP_MULT_CHOICE + || status >= HttpURLConnection.HTTP_INTERNAL_ERROR); + if (updateMetricsResponseCode) { + intercept.updateMetrics(operationType, httpOperation); + } } - LOG.debug("HttpRequest: {}", httpOperation.toString()); + LOG.debug("HttpRequest: {}: {}", operationType, httpOperation); if (client.getRetryPolicy().shouldRetry(retryCount, httpOperation.getStatusCode())) { + int status = httpOperation.getStatusCode(); + failureReason = RetryReason.getAbbreviation(null, status, httpOperation.getStorageErrorMessage()); return false; } @@ -230,4 +358,56 @@ private boolean executeHttpOperation(final int retryCount) throws AzureBlobFileS return true; } + + /** + * Sign an operation. + * @param httpOperation operation to sign + * @param bytesToSign how many bytes to sign for shared key auth. + * @throws IOException failure + */ + @VisibleForTesting + public void signRequest(final AbfsHttpOperation httpOperation, int bytesToSign) throws IOException { + switch(client.getAuthType()) { + case Custom: + case OAuth: + LOG.debug("Authenticating request with OAuth2 access token"); + httpOperation.getConnection().setRequestProperty(HttpHeaderConfigurations.AUTHORIZATION, + client.getAccessToken()); + break; + case SAS: + // do nothing; the SAS token should already be appended to the query string + httpOperation.setMaskForSAS(); //mask sig/oid from url for logs + break; + case SharedKey: + default: + // sign the HTTP request + LOG.debug("Signing request with shared key"); + // sign the HTTP request + client.getSharedKeyCredentials().signRequest( + httpOperation.getConnection(), + bytesToSign); + break; + } + } + + /** + * Creates new object of {@link AbfsHttpOperation} with the url, method, and + * requestHeaders fields of the AbfsRestOperation object. + */ + @VisibleForTesting + AbfsHttpOperation createHttpOperation() throws IOException { + return new AbfsHttpOperation(url, method, requestHeaders); + } + + /** + * Incrementing Abfs counters with a long value. + * + * @param statistic the Abfs statistic that needs to be incremented. + * @param value the value to be incremented by. + */ + private void incrementCounter(AbfsStatistic statistic, long value) { + if (abfsCounters != null) { + abfsCounters.incrementCounter(statistic, value); + } + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationType.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationType.java index d3031860dd1c2..830297f381b91 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationType.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationType.java @@ -40,5 +40,6 @@ public enum AbfsRestOperationType { Flush, ReadFile, DeletePath, - CheckAccess + CheckAccess, + LeasePath, } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsStreamContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsStreamContext.java new file mode 100644 index 0000000000000..9cd858cde818c --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsStreamContext.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +/** + * Base stream configuration class which is going + * to store common configs among input and output streams. + */ +public abstract class AbfsStreamContext { + private long sasTokenRenewPeriodForStreamsInSeconds; + + // hide default constructor + private AbfsStreamContext() { + } + + public AbfsStreamContext(final long sasTokenRenewPeriodForStreamsInSeconds) { + this.sasTokenRenewPeriodForStreamsInSeconds = sasTokenRenewPeriodForStreamsInSeconds; + } + + public long getSasTokenRenewPeriodForStreamsInSeconds() { + return sasTokenRenewPeriodForStreamsInSeconds; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsThrottlingIntercept.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsThrottlingIntercept.java new file mode 100644 index 0000000000000..57b5095bb3219 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsThrottlingIntercept.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * An interface for Abfs Throttling Interface. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public interface AbfsThrottlingIntercept { + + /** + * Updates the metrics for successful and failed read and write operations. + * @param operationType Only applicable for read and write operations. + * @param abfsHttpOperation Used for status code and data transferred. + */ + void updateMetrics(AbfsRestOperationType operationType, + AbfsHttpOperation abfsHttpOperation); + + /** + * Called before the request is sent. Client-side throttling + * uses this to suspend the request, if necessary, to minimize errors and + * maximize throughput. + * @param operationType Only applicable for read and write operations. + * @param abfsCounters Used for counters. + */ + void sendingRequest(AbfsRestOperationType operationType, + AbfsCounters abfsCounters); + +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsThrottlingInterceptFactory.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsThrottlingInterceptFactory.java new file mode 100644 index 0000000000000..279b7a318caf0 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsThrottlingInterceptFactory.java @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.util.WeakReferenceMap; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Class to get an instance of throttling intercept class per account. + */ +final class AbfsThrottlingInterceptFactory { + + private AbfsThrottlingInterceptFactory() { + } + + private static AbfsConfiguration abfsConfig; + + /** + * List of references notified of loss. + */ + private static List lostReferences = new ArrayList<>(); + + private static final Logger LOG = LoggerFactory.getLogger( + AbfsThrottlingInterceptFactory.class); + + /** + * Map which stores instance of ThrottlingIntercept class per account. + */ + private static WeakReferenceMap + interceptMap = new WeakReferenceMap<>( + AbfsThrottlingInterceptFactory::factory, + AbfsThrottlingInterceptFactory::referenceLost); + + /** + * Returns instance of throttling intercept. + * @param accountName Account name. + * @return instance of throttling intercept. + */ + private static AbfsClientThrottlingIntercept factory(final String accountName) { + return new AbfsClientThrottlingIntercept(accountName, abfsConfig); + } + + /** + * Reference lost callback. + * @param accountName key lost. + */ + private static void referenceLost(String accountName) { + lostReferences.add(accountName); + } + + /** + * Returns an instance of AbfsThrottlingIntercept. + * + * @param accountName The account for which we need instance of throttling intercept. + @param abfsConfiguration The object of abfsconfiguration class. + * @return Instance of AbfsThrottlingIntercept. + */ + static synchronized AbfsThrottlingIntercept getInstance(String accountName, + AbfsConfiguration abfsConfiguration) { + abfsConfig = abfsConfiguration; + AbfsThrottlingIntercept intercept; + if (!abfsConfiguration.isAutoThrottlingEnabled()) { + return AbfsNoOpThrottlingIntercept.INSTANCE; + } + // If singleton is enabled use a static instance of the intercept class for all accounts + if (!abfsConfiguration.accountThrottlingEnabled()) { + intercept = AbfsClientThrottlingIntercept.initializeSingleton( + abfsConfiguration); + } else { + // Return the instance from the map + intercept = interceptMap.get(accountName); + if (intercept == null) { + intercept = new AbfsClientThrottlingIntercept(accountName, + abfsConfiguration); + interceptMap.put(accountName, intercept); + } + } + return intercept; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java index b272cf27ca0d7..92a08ea8d203d 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java @@ -21,6 +21,11 @@ import java.util.Random; import java.net.HttpURLConnection; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_CONTINUE; + /** * Retry policy used by AbfsClient. * */ @@ -87,6 +92,16 @@ public ExponentialRetryPolicy(final int maxIoRetries) { DEFAULT_CLIENT_BACKOFF); } + /** + * Initializes a new instance of the {@link ExponentialRetryPolicy} class. + * + * @param conf The {@link AbfsConfiguration} from which to retrieve retry configuration. + */ + public ExponentialRetryPolicy(AbfsConfiguration conf) { + this(conf.getMaxIoRetries(), conf.getMinBackoffIntervalMilliseconds(), conf.getMaxBackoffIntervalMilliseconds(), + conf.getBackoffIntervalMilliseconds()); + } + /** * Initializes a new instance of the {@link ExponentialRetryPolicy} class. * @@ -105,7 +120,9 @@ public ExponentialRetryPolicy(final int retryCount, final int minBackoff, final /** * Returns if a request should be retried based on the retry count, current response, - * and the current strategy. + * and the current strategy. The valid http status code lies in the range of 1xx-5xx. + * But an invalid status code might be set due to network or timeout kind of issues. + * Such invalid status code also qualify for retry. * * @param retryCount The current retry attempt count. * @param statusCode The status code of the response, or -1 for socket error. @@ -113,7 +130,7 @@ public ExponentialRetryPolicy(final int retryCount, final int minBackoff, final */ public boolean shouldRetry(final int retryCount, final int statusCode) { return retryCount < this.retryCount - && (statusCode == -1 + && (statusCode < HTTP_CONTINUE || statusCode == HttpURLConnection.HTTP_CLIENT_TIMEOUT || (statusCode >= HttpURLConnection.HTTP_INTERNAL_ERROR && statusCode != HttpURLConnection.HTTP_NOT_IMPLEMENTED @@ -138,4 +155,25 @@ public long getRetryInterval(final int retryCount) { return retryInterval; } + + @VisibleForTesting + int getRetryCount() { + return this.retryCount; + } + + @VisibleForTesting + int getMinBackoff() { + return this.minBackoff; + } + + @VisibleForTesting + int getMaxBackoff() { + return maxBackoff; + } + + @VisibleForTesting + int getDeltaBackoff() { + return this.deltaBackoff; + } + } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListingSupport.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListingSupport.java new file mode 100644 index 0000000000000..dc070a1d405d8 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListingSupport.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; + +@InterfaceAudience.Private +@InterfaceStability.Unstable +public interface ListingSupport { + + /** + * @param path The list path. + * @param tracingContext TracingContext instance to track identifiers + * @return the entries in the path. + * @throws IOException in case of error + */ + FileStatus[] listStatus(Path path, TracingContext tracingContext) throws IOException; + + /** + * @param path Path the list path. + * @param startFrom The entry name that list results should start with. + * For example, if folder "/folder" contains four + * files: "afile", "bfile", "hfile", "ifile". Then + * listStatus(Path("/folder"), "hfile") will return + * "/folder/hfile" and "folder/ifile" Notice that if + * startFrom is a non-existent entry name, then the + * list response contains all entries after this + * non-existent entry in lexical order: listStatus + * (Path("/folder"), "cfile") will return + * "/folder/hfile" and "/folder/ifile". + * @param tracingContext TracingContext instance to track identifiers + * @return the entries in the path start from "startFrom" in lexical order. + * @throws IOException in case of error + */ + FileStatus[] listStatus(Path path, String startFrom, TracingContext tracingContext) throws IOException; + + /** + * @param path The list path + * @param startFrom The entry name that list results should start with. + * For example, if folder "/folder" contains four + * files: "afile", "bfile", "hfile", "ifile". Then + * listStatus(Path("/folder"), "hfile") will return + * "/folder/hfile" and "folder/ifile" Notice that if + * startFrom is a non-existent entry name, then the + * list response contains all entries after this + * non-existent entry in lexical order: listStatus + * (Path("/folder"), "cfile") will return + * "/folder/hfile" and "/folder/ifile". + * @param fileStatuses This list has to be filled with the FileStatus objects + * @param fetchAll flag to indicate if the above list needs to be + * filled with just one page os results or the entire + * result. + * @param continuation Contiuation token. null means start rom the begining. + * @param tracingContext TracingContext instance to track identifiers + * @return Continuation tokem + * @throws IOException in case of error + */ + String listStatus(Path path, String startFrom, List fileStatuses, + boolean fetchAll, String continuation, TracingContext tracingContext) throws IOException; +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ReadBuffer.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ReadBuffer.java index 00e4f008ad0a8..9ce926d841c84 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ReadBuffer.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ReadBuffer.java @@ -18,9 +18,13 @@ package org.apache.hadoop.fs.azurebfs.services; +import java.io.IOException; import java.util.concurrent.CountDownLatch; import org.apache.hadoop.fs.azurebfs.contracts.services.ReadBufferStatus; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; + +import static org.apache.hadoop.fs.azurebfs.contracts.services.ReadBufferStatus.READ_FAILED; class ReadBuffer { @@ -33,6 +37,7 @@ class ReadBuffer { private ReadBufferStatus status; // status of the buffer private CountDownLatch latch = null; // signaled when the buffer is done reading, so any client // waiting on this buffer gets unblocked + private TracingContext tracingContext; // fields to help with eviction logic private long timeStamp = 0; // tick at which buffer became available to read @@ -40,6 +45,8 @@ class ReadBuffer { private boolean isLastByteConsumed = false; private boolean isAnyByteConsumed = false; + private IOException errException = null; + public AbfsInputStream getStream() { return stream; } @@ -48,6 +55,14 @@ public void setStream(AbfsInputStream stream) { this.stream = stream; } + public void setTracingContext(TracingContext tracingContext) { + this.tracingContext = tracingContext; + } + + public TracingContext getTracingContext() { + return tracingContext; + } + public long getOffset() { return offset; } @@ -88,12 +103,23 @@ public void setBufferindex(int bufferindex) { this.bufferindex = bufferindex; } + public IOException getErrException() { + return errException; + } + + public void setErrException(final IOException errException) { + this.errException = errException; + } + public ReadBufferStatus getStatus() { return status; } public void setStatus(ReadBufferStatus status) { this.status = status; + if (status == READ_FAILED) { + bufferindex = -1; + } } public CountDownLatch getLatch() { diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ReadBufferManager.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ReadBufferManager.java index 5b71cf05225a8..6f194270e211f 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ReadBufferManager.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ReadBufferManager.java @@ -21,23 +21,34 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.util.ArrayList; import java.util.Collection; +import java.util.Iterator; import java.util.LinkedList; +import java.util.List; import java.util.Queue; import java.util.Stack; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.locks.ReentrantLock; + +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * The Read Buffer Manager for Rest AbfsClient. */ final class ReadBufferManager { private static final Logger LOGGER = LoggerFactory.getLogger(ReadBufferManager.class); + private static final int ONE_KB = 1024; + private static final int ONE_MB = ONE_KB * ONE_KB; private static final int NUM_BUFFERS = 16; - private static final int BLOCK_SIZE = 4 * 1024 * 1024; private static final int NUM_THREADS = 8; - private static final int THRESHOLD_AGE_MILLISECONDS = 3000; // have to see if 3 seconds is a good threshold + private static final int DEFAULT_THRESHOLD_AGE_MILLISECONDS = 3000; // have to see if 3 seconds is a good threshold + private static int blockSize = 4 * ONE_MB; + private static int thresholdAgeMilliseconds = DEFAULT_THRESHOLD_AGE_MILLISECONDS; private Thread[] threads = new Thread[NUM_THREADS]; private byte[][] buffers; // array of byte[] buffers, to hold the data that is read private Stack freeList = new Stack<>(); // indices in buffers[] array that are available @@ -45,21 +56,37 @@ final class ReadBufferManager { private Queue readAheadQueue = new LinkedList<>(); // queue of requests that are not picked up by any worker thread yet private LinkedList inProgressList = new LinkedList<>(); // requests being processed by worker threads private LinkedList completedReadList = new LinkedList<>(); // buffers available for reading - private static final ReadBufferManager BUFFER_MANAGER; // singleton, initialized in static initialization block + private static ReadBufferManager bufferManager; // singleton, initialized in static initialization block + private static final ReentrantLock LOCK = new ReentrantLock(); - static { - BUFFER_MANAGER = new ReadBufferManager(); - BUFFER_MANAGER.init(); + static ReadBufferManager getBufferManager() { + if (bufferManager == null) { + LOCK.lock(); + try { + if (bufferManager == null) { + bufferManager = new ReadBufferManager(); + bufferManager.init(); + } + } finally { + LOCK.unlock(); + } + } + return bufferManager; } - static ReadBufferManager getBufferManager() { - return BUFFER_MANAGER; + static void setReadBufferManagerConfigs(int readAheadBlockSize) { + if (bufferManager == null) { + LOGGER.debug( + "ReadBufferManager not initialized yet. Overriding readAheadBlockSize as {}", + readAheadBlockSize); + blockSize = readAheadBlockSize; + } } private void init() { buffers = new byte[NUM_BUFFERS][]; for (int i = 0; i < NUM_BUFFERS; i++) { - buffers[i] = new byte[BLOCK_SIZE]; // same buffers are reused. The byte array never goes back to GC + buffers[i] = new byte[blockSize]; // same buffers are reused. The byte array never goes back to GC freeList.add(i); } for (int i = 0; i < NUM_THREADS; i++) { @@ -74,6 +101,7 @@ private void init() { // hide instance constructor private ReadBufferManager() { + LOGGER.trace("Creating readbuffer manager with HADOOP-18546 patch"); } @@ -91,7 +119,8 @@ private ReadBufferManager() { * @param requestedOffset The offset in the file which shoukd be read * @param requestedLength The length to read */ - void queueReadAhead(final AbfsInputStream stream, final long requestedOffset, final int requestedLength) { + void queueReadAhead(final AbfsInputStream stream, final long requestedOffset, final int requestedLength, + TracingContext tracingContext) { if (LOGGER.isTraceEnabled()) { LOGGER.trace("Start Queueing readAhead for {} offset {} length {}", stream.getPath(), requestedOffset, requestedLength); @@ -112,6 +141,7 @@ void queueReadAhead(final AbfsInputStream stream, final long requestedOffset, fi buffer.setRequestedLength(requestedLength); buffer.setStatus(ReadBufferStatus.NOT_AVAILABLE); buffer.setLatch(new CountDownLatch(1)); + buffer.setTracingContext(tracingContext); Integer bufferIndex = freeList.pop(); // will return a value, since we have checked size > 0 already @@ -119,10 +149,10 @@ void queueReadAhead(final AbfsInputStream stream, final long requestedOffset, fi buffer.setBufferindex(bufferIndex); readAheadQueue.add(buffer); notifyAll(); - } - if (LOGGER.isTraceEnabled()) { - LOGGER.trace("Done q-ing readAhead for file {} offset {} buffer idx {}", - stream.getPath(), requestedOffset, buffer.getBufferindex()); + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("Done q-ing readAhead for file {} offset {} buffer idx {}", + stream.getPath(), requestedOffset, buffer.getBufferindex()); + } } } @@ -141,7 +171,8 @@ void queueReadAhead(final AbfsInputStream stream, final long requestedOffset, fi * @param buffer the buffer to read data into. Note that the buffer will be written into from offset 0. * @return the number of bytes read */ - int getBlock(final AbfsInputStream stream, final long position, final int length, final byte[] buffer) { + int getBlock(final AbfsInputStream stream, final long position, final int length, final byte[] buffer) + throws IOException { // not synchronized, so have to be careful with locking if (LOGGER.isTraceEnabled()) { LOGGER.trace("getBlock for file {} position {} thread {}", @@ -213,6 +244,8 @@ private synchronized boolean tryEvict() { return false; // there are no evict-able buffers } + long currentTimeInMs = currentTimeMillis(); + // first, try buffers where all bytes have been consumed (approximated as first and last bytes consumed) for (ReadBuffer buf : completedReadList) { if (buf.isFirstByteConsumed() && buf.isLastByteConsumed()) { @@ -237,24 +270,47 @@ private synchronized boolean tryEvict() { } // next, try any old nodes that have not been consumed + // Failed read buffers (with buffer index=-1) that are older than + // thresholdAge should be cleaned up, but at the same time should not + // report successful eviction. + // Queue logic expects that a buffer is freed up for read ahead when + // eviction is successful, whereas a failed ReadBuffer would have released + // its buffer when its status was set to READ_FAILED. long earliestBirthday = Long.MAX_VALUE; + ArrayList oldFailedBuffers = new ArrayList<>(); for (ReadBuffer buf : completedReadList) { - if (buf.getTimeStamp() < earliestBirthday) { + if ((buf.getBufferindex() != -1) + && (buf.getTimeStamp() < earliestBirthday)) { nodeToEvict = buf; earliestBirthday = buf.getTimeStamp(); + } else if ((buf.getBufferindex() == -1) + && (currentTimeInMs - buf.getTimeStamp()) > thresholdAgeMilliseconds) { + oldFailedBuffers.add(buf); } } - if ((currentTimeMillis() - earliestBirthday > THRESHOLD_AGE_MILLISECONDS) && (nodeToEvict != null)) { + + for (ReadBuffer buf : oldFailedBuffers) { + evict(buf); + } + + if ((currentTimeInMs - earliestBirthday > thresholdAgeMilliseconds) && (nodeToEvict != null)) { return evict(nodeToEvict); } + LOGGER.trace("No buffer eligible for eviction"); // nothing can be evicted return false; } private boolean evict(final ReadBuffer buf) { - freeList.push(buf.getBufferindex()); + // As failed ReadBuffers (bufferIndx = -1) are saved in completedReadList, + // avoid adding it to freeList. + if (buf.getBufferindex() != -1) { + freeList.push(buf.getBufferindex()); + } + completedReadList.remove(buf); + buf.setTracingContext(null); if (LOGGER.isTraceEnabled()) { LOGGER.trace("Evicting buffer idx {}; was used for file {} offset {} length {}", buf.getBufferindex(), buf.getStream().getPath(), buf.getOffset(), buf.getLength()); @@ -289,6 +345,27 @@ private ReadBuffer getFromList(final Collection list, final AbfsInpu return null; } + /** + * Returns buffers that failed or passed from completed queue. + * @param stream + * @param requestedOffset + * @return + */ + private ReadBuffer getBufferFromCompletedQueue(final AbfsInputStream stream, final long requestedOffset) { + for (ReadBuffer buffer : completedReadList) { + // Buffer is returned if the requestedOffset is at or above buffer's + // offset but less than buffer's length or the actual requestedLength + if ((buffer.getStream() == stream) + && (requestedOffset >= buffer.getOffset()) + && ((requestedOffset < buffer.getOffset() + buffer.getLength()) + || (requestedOffset < buffer.getOffset() + buffer.getRequestedLength()))) { + return buffer; + } + } + + return null; + } + private void clearFromReadAheadQueue(final AbfsInputStream stream, final long requestedOffset) { ReadBuffer buffer = getFromList(readAheadQueue, stream, requestedOffset); if (buffer != null) { @@ -299,11 +376,28 @@ private void clearFromReadAheadQueue(final AbfsInputStream stream, final long re } private int getBlockFromCompletedQueue(final AbfsInputStream stream, final long position, final int length, - final byte[] buffer) { - ReadBuffer buf = getFromList(completedReadList, stream, position); - if (buf == null || position >= buf.getOffset() + buf.getLength()) { + final byte[] buffer) throws IOException { + ReadBuffer buf = getBufferFromCompletedQueue(stream, position); + + if (buf == null) { + return 0; + } + + if (buf.getStatus() == ReadBufferStatus.READ_FAILED) { + // To prevent new read requests to fail due to old read-ahead attempts, + // return exception only from buffers that failed within last thresholdAgeMilliseconds + if ((currentTimeMillis() - (buf.getTimeStamp()) < thresholdAgeMilliseconds)) { + throw buf.getErrException(); + } else { + return 0; + } + } + + if ((buf.getStatus() != ReadBufferStatus.AVAILABLE) + || (position >= buf.getOffset() + buf.getLength())) { return 0; } + int cursor = (int) (position - buf.getOffset()); int availableLengthInBuffer = buf.getLength() - cursor; int lengthToCopy = Math.min(length, availableLengthInBuffer); @@ -361,21 +455,29 @@ ReadBuffer getNextBlockToRead() throws InterruptedException { */ void doneReading(final ReadBuffer buffer, final ReadBufferStatus result, final int bytesActuallyRead) { if (LOGGER.isTraceEnabled()) { - LOGGER.trace("ReadBufferWorker completed file {} for offset {} bytes {}", - buffer.getStream().getPath(), buffer.getOffset(), bytesActuallyRead); + LOGGER.trace("ReadBufferWorker completed read file {} for offset {} outcome {} bytes {}", + buffer.getStream().getPath(), buffer.getOffset(), result, bytesActuallyRead); } synchronized (this) { - inProgressList.remove(buffer); - if (result == ReadBufferStatus.AVAILABLE && bytesActuallyRead > 0) { - buffer.setStatus(ReadBufferStatus.AVAILABLE); + // If this buffer has already been purged during + // close of InputStream then we don't update the lists. + if (inProgressList.contains(buffer)) { + inProgressList.remove(buffer); + if (result == ReadBufferStatus.AVAILABLE && bytesActuallyRead > 0) { + buffer.setStatus(ReadBufferStatus.AVAILABLE); + buffer.setLength(bytesActuallyRead); + } else { + freeList.push(buffer.getBufferindex()); + // buffer will be deleted as per the eviction policy. + } + // completed list also contains FAILED read buffers + // for sending exception message to clients. + buffer.setStatus(result); buffer.setTimeStamp(currentTimeMillis()); - buffer.setLength(bytesActuallyRead); completedReadList.add(buffer); - } else { - freeList.push(buffer.getBufferindex()); - // buffer should go out of scope after the end of the calling method in ReadBufferWorker, and eligible for GC } } + //outside the synchronized, since anyone receiving a wake-up from the latch must see safe-published results buffer.getLatch().countDown(); // wake up waiting threads (if any) } @@ -392,4 +494,157 @@ void doneReading(final ReadBuffer buffer, final ReadBufferStatus result, final i private long currentTimeMillis() { return System.nanoTime() / 1000 / 1000; } + + @VisibleForTesting + int getThresholdAgeMilliseconds() { + return thresholdAgeMilliseconds; + } + + @VisibleForTesting + static void setThresholdAgeMilliseconds(int thresholdAgeMs) { + thresholdAgeMilliseconds = thresholdAgeMs; + } + + @VisibleForTesting + int getCompletedReadListSize() { + return completedReadList.size(); + } + + @VisibleForTesting + public synchronized List getCompletedReadListCopy() { + return new ArrayList<>(completedReadList); + } + + @VisibleForTesting + public synchronized List getFreeListCopy() { + return new ArrayList<>(freeList); + } + + @VisibleForTesting + public synchronized List getReadAheadQueueCopy() { + return new ArrayList<>(readAheadQueue); + } + + @VisibleForTesting + public synchronized List getInProgressCopiedList() { + return new ArrayList<>(inProgressList); + } + + @VisibleForTesting + void callTryEvict() { + tryEvict(); + } + + + /** + * Purging the buffers associated with an {@link AbfsInputStream} + * from {@link ReadBufferManager} when stream is closed. + * @param stream input stream. + */ + public synchronized void purgeBuffersForStream(AbfsInputStream stream) { + LOGGER.debug("Purging stale buffers for AbfsInputStream {} ", stream); + readAheadQueue.removeIf(readBuffer -> readBuffer.getStream() == stream); + purgeList(stream, completedReadList); + } + + /** + * Method to remove buffers associated with a {@link AbfsInputStream} + * when its close method is called. + * NOTE: This method is not threadsafe and must be called inside a + * synchronised block. See caller. + * @param stream associated input stream. + * @param list list of buffers like {@link this#completedReadList} + * or {@link this#inProgressList}. + */ + private void purgeList(AbfsInputStream stream, LinkedList list) { + for (Iterator it = list.iterator(); it.hasNext();) { + ReadBuffer readBuffer = it.next(); + if (readBuffer.getStream() == stream) { + it.remove(); + // As failed ReadBuffers (bufferIndex = -1) are already pushed to free + // list in doneReading method, we will skip adding those here again. + if (readBuffer.getBufferindex() != -1) { + freeList.push(readBuffer.getBufferindex()); + } + } + } + } + + /** + * Test method that can clean up the current state of readAhead buffers and + * the lists. Will also trigger a fresh init. + */ + @VisibleForTesting + void testResetReadBufferManager() { + synchronized (this) { + ArrayList completedBuffers = new ArrayList<>(); + for (ReadBuffer buf : completedReadList) { + if (buf != null) { + completedBuffers.add(buf); + } + } + + for (ReadBuffer buf : completedBuffers) { + evict(buf); + } + + readAheadQueue.clear(); + inProgressList.clear(); + completedReadList.clear(); + freeList.clear(); + for (int i = 0; i < NUM_BUFFERS; i++) { + buffers[i] = null; + } + buffers = null; + resetBufferManager(); + } + } + + /** + * Reset buffer manager to null. + */ + @VisibleForTesting + static void resetBufferManager() { + bufferManager = null; + } + + /** + * Reset readAhead buffer to needed readAhead block size and + * thresholdAgeMilliseconds. + * @param readAheadBlockSize + * @param thresholdAgeMilliseconds + */ + @VisibleForTesting + void testResetReadBufferManager(int readAheadBlockSize, int thresholdAgeMilliseconds) { + setBlockSize(readAheadBlockSize); + setThresholdAgeMilliseconds(thresholdAgeMilliseconds); + testResetReadBufferManager(); + } + + @VisibleForTesting + static void setBlockSize(int readAheadBlockSize) { + blockSize = readAheadBlockSize; + } + + @VisibleForTesting + int getReadAheadBlockSize() { + return blockSize; + } + + /** + * Test method that can mimic no free buffers scenario and also add a ReadBuffer + * into completedReadList. This readBuffer will get picked up by TryEvict() + * next time a new queue request comes in. + * @param buf that needs to be added to completedReadlist + */ + @VisibleForTesting + void testMimicFullUseAndAddFailedBuffer(ReadBuffer buf) { + freeList.clear(); + completedReadList.add(buf); + } + + @VisibleForTesting + int getNumBuffers() { + return NUM_BUFFERS; + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ReadBufferWorker.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ReadBufferWorker.java index af69de0f089e9..a30f06261ef6f 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ReadBufferWorker.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ReadBufferWorker.java @@ -18,8 +18,10 @@ package org.apache.hadoop.fs.azurebfs.services; +import java.io.IOException; import java.util.concurrent.CountDownLatch; +import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.azurebfs.contracts.services.ReadBufferStatus; class ReadBufferWorker implements Runnable { @@ -61,9 +63,22 @@ public void run() { if (buffer != null) { try { // do the actual read, from the file. - int bytesRead = buffer.getStream().readRemote(buffer.getOffset(), buffer.getBuffer(), 0, buffer.getRequestedLength()); + int bytesRead = buffer.getStream().readRemote( + buffer.getOffset(), + buffer.getBuffer(), + 0, + // If AbfsInputStream was created with bigger buffer size than + // read-ahead buffer size, make sure a valid length is passed + // for remote read + Math.min(buffer.getRequestedLength(), buffer.getBuffer().length), + buffer.getTracingContext()); + bufferManager.doneReading(buffer, ReadBufferStatus.AVAILABLE, bytesRead); // post result back to ReadBufferManager + } catch (IOException ex) { + buffer.setErrException(ex); + bufferManager.doneReading(buffer, ReadBufferStatus.READ_FAILED, 0); } catch (Exception ex) { + buffer.setErrException(new PathIOException(buffer.getStream().getPath(), ex)); bufferManager.doneReading(buffer, ReadBufferStatus.READ_FAILED, 0); } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RetryReason.java new file mode 100644 index 0000000000000..40e8cdc1e07ba --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RetryReason.java @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.util.LinkedList; +import java.util.List; + +import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.ClientErrorRetryReason; +import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.ConnectionResetRetryReason; +import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.ConnectionTimeoutRetryReason; +import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.ReadTimeoutRetryReason; +import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.RetryReasonCategory; +import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.ServerErrorRetryReason; +import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.UnknownHostRetryReason; +import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.UnknownIOExceptionRetryReason; +import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.UnknownSocketExceptionRetryReason; + + +/** + * This utility class exposes methods to convert a server response-error to a + * category of error. + */ +final class RetryReason { + + /** + * Linked-list of the implementations of RetryReasonCategory. The objects in the + * list are arranged by the rank of their significance. + *

      + *
    • ServerError (statusCode==5XX), ClientError (statusCode==4XX) are + * independent of other retryReason categories.
    • + *
    • Since {@link java.net.SocketException} is subclass of + * {@link java.io.IOException}, + * hence, {@link UnknownIOExceptionRetryReason} is placed before + * {@link UnknownSocketExceptionRetryReason}
    • + *
    • Since, connectionTimeout, readTimeout, and connectionReset are + * {@link java.net.SocketTimeoutException} exceptions with different messages, + * hence, {@link ConnectionTimeoutRetryReason}, {@link ReadTimeoutRetryReason}, + * {@link ConnectionResetRetryReason} are above {@link UnknownIOExceptionRetryReason}. + * There is no order between the three reasons as they are differentiated + * by exception-message.
    • + *
    • Since, {@link java.net.UnknownHostException} is subclass of + * {@link java.io.IOException}, {@link UnknownHostRetryReason} is placed + * over {@link UnknownIOExceptionRetryReason}
    • + *
    + */ + private static List rankedReasonCategories + = new LinkedList() {{ + add(new ServerErrorRetryReason()); + add(new ClientErrorRetryReason()); + add(new UnknownIOExceptionRetryReason()); + add(new UnknownSocketExceptionRetryReason()); + add(new ConnectionTimeoutRetryReason()); + add(new ReadTimeoutRetryReason()); + add(new UnknownHostRetryReason()); + add(new ConnectionResetRetryReason()); + }}; + + private RetryReason() { + + } + + /** + * Method to get correct abbreviation for a given set of exception, statusCode, + * storageStatusCode. + * + * @param ex exception caught during server communication. + * @param statusCode statusCode in the server response. + * @param storageErrorMessage storageErrorMessage in the server response. + * + * @return abbreviation for the the given set of exception, statusCode, storageStatusCode. + */ + static String getAbbreviation(Exception ex, + Integer statusCode, + String storageErrorMessage) { + String result = null; + for (RetryReasonCategory retryReasonCategory : rankedReasonCategories) { + final String abbreviation + = retryReasonCategory.captureAndGetAbbreviation(ex, + statusCode, storageErrorMessage); + if (abbreviation != null) { + result = abbreviation; + } + } + return result; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RetryReasonConstants.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RetryReasonConstants.java new file mode 100644 index 0000000000000..8a0af183e30ae --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RetryReasonConstants.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +public final class RetryReasonConstants { + + private RetryReasonConstants() { + + } + public static final String CONNECTION_TIMEOUT_JDK_MESSAGE = "connect timed out"; + public static final String READ_TIMEOUT_JDK_MESSAGE = "Read timed out"; + public static final String CONNECTION_RESET_MESSAGE = "Connection reset"; + public static final String OPERATION_BREACH_MESSAGE = "Operations per second is over the account limit."; + public static final String CONNECTION_RESET_ABBREVIATION = "CR"; + public static final String CONNECTION_TIMEOUT_ABBREVIATION = "CT"; + public static final String READ_TIMEOUT_ABBREVIATION = "RT"; + public static final String INGRESS_LIMIT_BREACH_ABBREVIATION = "ING"; + public static final String EGRESS_LIMIT_BREACH_ABBREVIATION = "EGR"; + public static final String OPERATION_LIMIT_BREACH_ABBREVIATION = "OPR"; + public static final String UNKNOWN_HOST_EXCEPTION_ABBREVIATION = "UH"; + public static final String IO_EXCEPTION_ABBREVIATION = "IOE"; + public static final String SOCKET_EXCEPTION_ABBREVIATION = "SE"; +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/SimpleKeyProvider.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/SimpleKeyProvider.java index 727e1b3fd3fdd..e3adc59afac5e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/SimpleKeyProvider.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/SimpleKeyProvider.java @@ -25,6 +25,7 @@ import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.KeyProviderException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; +import org.apache.hadoop.fs.azurebfs.diagnostics.Base64StringConfigurationBasicValidator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,12 +44,35 @@ public String getStorageAccountKey(String accountName, Configuration rawConfig) try { AbfsConfiguration abfsConfig = new AbfsConfiguration(rawConfig, accountName); key = abfsConfig.getPasswordString(ConfigurationKeys.FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME); - } catch(IllegalAccessException | InvalidConfigurationValueException e) { - throw new KeyProviderException("Failure to initialize configuration", e); + + // Validating the key. + validateStorageAccountKey(key); + } catch (IllegalAccessException | InvalidConfigurationValueException e) { + LOG.debug("Failure to retrieve storage account key for {}", accountName, + e); + throw new KeyProviderException("Failure to initialize configuration for " + + accountName + + " key =\"" + key + "\"" + + ": " + e, e); } catch(IOException ioe) { - LOG.warn("Unable to get key from credential providers. {}", ioe); + LOG.warn("Unable to get key for {} from credential providers. {}", + accountName, ioe, ioe); } return key; } + + /** + * A method to validate the storage key. + * + * @param key the key to be validated. + * @throws InvalidConfigurationValueException + */ + private void validateStorageAccountKey(String key) + throws InvalidConfigurationValueException { + Base64StringConfigurationBasicValidator validator = new Base64StringConfigurationBasicValidator( + ConfigurationKeys.FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME, "", true); + + validator.validate(key); + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/TimerFunctionality.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/TimerFunctionality.java new file mode 100644 index 0000000000000..bf7da69ec4982 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/TimerFunctionality.java @@ -0,0 +1,26 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +public enum TimerFunctionality { + RESUME, + + SUSPEND +} + diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ClientErrorRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ClientErrorRetryReason.java new file mode 100644 index 0000000000000..cf1c47e3eb0dc --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ClientErrorRetryReason.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_STATUS_CATEGORY_QUOTIENT; + +/** + * Category that can capture server-response errors for 4XX status-code. + */ +public class ClientErrorRetryReason extends RetryReasonCategory { + + @Override + Boolean canCapture(final Exception ex, + final Integer statusCode, + final String serverErrorMessage) { + if (statusCode == null || statusCode / HTTP_STATUS_CATEGORY_QUOTIENT != 4) { + return false; + } + return true; + } + + @Override + String getAbbreviation(final Integer statusCode, + final String serverErrorMessage) { + return statusCode + ""; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ConnectionResetRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ConnectionResetRetryReason.java new file mode 100644 index 0000000000000..702f887564632 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ConnectionResetRetryReason.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories; + +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_RESET_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_RESET_MESSAGE; + +/** + * Category that can capture server-response errors for connection-reset exception. + */ +public class ConnectionResetRetryReason extends + RetryReasonCategory { + + @Override + Boolean canCapture(final Exception ex, + final Integer statusCode, + final String serverErrorMessage) { + return checkExceptionMessage(ex, CONNECTION_RESET_MESSAGE); + } + + @Override + String getAbbreviation(final Integer statusCode, + final String serverErrorMessage) { + return CONNECTION_RESET_ABBREVIATION; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ConnectionTimeoutRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ConnectionTimeoutRetryReason.java new file mode 100644 index 0000000000000..28f35dcc80546 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ConnectionTimeoutRetryReason.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories; + + +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_JDK_MESSAGE; + +/** + * Category that can capture server-response errors for connection-timeout. + */ +public class ConnectionTimeoutRetryReason extends + RetryReasonCategory { + + @Override + String getAbbreviation(final Integer statusCode, + final String serverErrorMessage) { + return CONNECTION_TIMEOUT_ABBREVIATION; + } + + @Override + Boolean canCapture(final Exception ex, + final Integer statusCode, + final String serverErrorMessage) { + return checkExceptionMessage(ex, CONNECTION_TIMEOUT_JDK_MESSAGE); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ReadTimeoutRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ReadTimeoutRetryReason.java new file mode 100644 index 0000000000000..4663d9a52bbd6 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ReadTimeoutRetryReason.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories; + +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.READ_TIMEOUT_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.READ_TIMEOUT_JDK_MESSAGE; + +/** + * Category that can capture server-response errors for read-timeout. + */ +public class ReadTimeoutRetryReason extends RetryReasonCategory { + + @Override + Boolean canCapture(final Exception ex, + final Integer statusCode, + final String serverErrorMessage) { + return checkExceptionMessage(ex, READ_TIMEOUT_JDK_MESSAGE); + } + + @Override + String getAbbreviation(final Integer statusCode, + final String serverErrorMessage) { + return READ_TIMEOUT_ABBREVIATION; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/RetryReasonCategory.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/RetryReasonCategory.java new file mode 100644 index 0000000000000..893451b496f45 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/RetryReasonCategory.java @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories; + +import java.util.Locale; + +/** + * Provides methods to define if given exception can be categorised to certain category. + * Each category has a different implementation of the abstract class. + */ +public abstract class RetryReasonCategory { + + /** + * Returns if given server response error can be categorised by the implementation. + * + * @param ex exception captured in the server response. + * @param statusCode statusCode on the server response + * @param serverErrorMessage serverErrorMessage on the server response. + * + * @return

    1. true if server response error can be categorised by the implementation
    2. + *
    3. false if response error can not be categorised by the implementation
    + */ + abstract Boolean canCapture(Exception ex, + Integer statusCode, + String serverErrorMessage); + + /** + * Returns the abbreviation corresponding to the server response error. + * + * @param statusCode statusCode on the server response + * @param serverErrorMessage serverErrorMessage on the server response. + * + * @return abbreviation on the basis of the statusCode and the serverErrorMessage + */ + abstract String getAbbreviation(Integer statusCode, String serverErrorMessage); + + /** + * Converts the server-error response to an abbreviation if the response can be + * categorised by the implementation. + * + * @param ex exception received while making API request + * @param statusCode statusCode received in the server-response + * @param serverErrorMessage error-message received in the server-response + * + * @return abbreviation if the server-response can be categorised by the implementation. + * null if the server-response can not be categorised by the implementation. + */ + public String captureAndGetAbbreviation(Exception ex, + Integer statusCode, + String serverErrorMessage) { + if (canCapture(ex, statusCode, serverErrorMessage)) { + return getAbbreviation(statusCode, serverErrorMessage); + } + return null; + } + + /** + * Checks if a required search-string is in the exception's message. + */ + Boolean checkExceptionMessage(final Exception exceptionCaptured, + final String search) { + if (search == null) { + return false; + } + if (exceptionCaptured != null + && exceptionCaptured.getMessage() != null + && exceptionCaptured.getMessage() + .toLowerCase(Locale.US) + .contains(search.toLowerCase(Locale.US))) { + return true; + } + return false; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ServerErrorRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ServerErrorRetryReason.java new file mode 100644 index 0000000000000..dd67a0cb8cbba --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ServerErrorRetryReason.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories; + +import static java.net.HttpURLConnection.HTTP_UNAVAILABLE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_STATUS_CATEGORY_QUOTIENT; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.EGRESS_OVER_ACCOUNT_LIMIT; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.INGRESS_OVER_ACCOUNT_LIMIT; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.EGRESS_LIMIT_BREACH_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.INGRESS_LIMIT_BREACH_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.OPERATION_BREACH_MESSAGE; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.OPERATION_LIMIT_BREACH_ABBREVIATION; + +/** + * Category that can capture server-response errors for 5XX status-code. + */ +public class ServerErrorRetryReason extends RetryReasonCategory { + + @Override + Boolean canCapture(final Exception ex, + final Integer statusCode, + final String serverErrorMessage) { + if (statusCode == null || statusCode / HTTP_STATUS_CATEGORY_QUOTIENT != 5) { + return false; + } + return true; + } + + @Override + String getAbbreviation(final Integer statusCode, + final String serverErrorMessage) { + if (statusCode == HTTP_UNAVAILABLE && serverErrorMessage != null) { + String splitedServerErrorMessage = serverErrorMessage.split(System.lineSeparator(), + 2)[0]; + if (INGRESS_OVER_ACCOUNT_LIMIT.getErrorMessage().equalsIgnoreCase( + splitedServerErrorMessage)) { + return INGRESS_LIMIT_BREACH_ABBREVIATION; + } + if (EGRESS_OVER_ACCOUNT_LIMIT.getErrorMessage().equalsIgnoreCase( + splitedServerErrorMessage)) { + return EGRESS_LIMIT_BREACH_ABBREVIATION; + } + if (OPERATION_BREACH_MESSAGE.equalsIgnoreCase( + splitedServerErrorMessage)) { + return OPERATION_LIMIT_BREACH_ABBREVIATION; + } + return HTTP_UNAVAILABLE + ""; + } + return statusCode + ""; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownHostRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownHostRetryReason.java new file mode 100644 index 0000000000000..c329348d81f8d --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownHostRetryReason.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories; + +import java.net.UnknownHostException; + +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.UNKNOWN_HOST_EXCEPTION_ABBREVIATION; + +/** + * Category that can capture server-response errors for {@link UnknownHostException}. + */ +public class UnknownHostRetryReason extends RetryReasonCategory { + + @Override + Boolean canCapture(final Exception ex, + final Integer statusCode, + final String serverErrorMessage) { + if (ex instanceof UnknownHostException) { + return true; + } + return false; + } + + @Override + String getAbbreviation(final Integer statusCode, + final String serverErrorMessage) { + return UNKNOWN_HOST_EXCEPTION_ABBREVIATION; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownIOExceptionRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownIOExceptionRetryReason.java new file mode 100644 index 0000000000000..8a69ebb928d68 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownIOExceptionRetryReason.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories; + +import java.io.IOException; + +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.IO_EXCEPTION_ABBREVIATION; + + +/** + * Category that can capture server-response errors for {@link IOException}. + */ +public class UnknownIOExceptionRetryReason extends + RetryReasonCategory { + + @Override + Boolean canCapture(final Exception ex, + final Integer statusCode, + final String serverErrorMessage) { + if (ex instanceof IOException) { + return true; + } + return false; + } + + @Override + String getAbbreviation(final Integer statusCode, + final String serverErrorMessage) { + return IO_EXCEPTION_ABBREVIATION; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownSocketExceptionRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownSocketExceptionRetryReason.java new file mode 100644 index 0000000000000..18e9f115feaf6 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownSocketExceptionRetryReason.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories; + +import java.net.SocketException; + +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.SOCKET_EXCEPTION_ABBREVIATION; + +/** + * Category that can capture server-response errors for {@link SocketException}. + */ +public class UnknownSocketExceptionRetryReason extends + RetryReasonCategory { + + @Override + Boolean canCapture(final Exception ex, + final Integer statusCode, + final String serverErrorMessage) { + if (ex instanceof SocketException) { + return true; + } + return false; + } + + @Override + String getAbbreviation(final Integer statusCode, + final String serverErrorMessage) { + return SOCKET_EXCEPTION_ABBREVIATION; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/package-info.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/package-info.java new file mode 100644 index 0000000000000..7d8078620af77 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/package-info.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A retryReasonCategory defines methods applicable on server-response errors. + */ +@Private +@Evolving +package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories; + +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Evolving; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/CachedSASToken.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/CachedSASToken.java new file mode 100644 index 0000000000000..17a9125b06952 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/CachedSASToken.java @@ -0,0 +1,207 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.utils; + +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; +import java.time.OffsetDateTime; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; + +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_SAS_TOKEN_RENEW_PERIOD_FOR_STREAMS_IN_SECONDS; +import static java.time.temporal.ChronoUnit.SECONDS; + +/** + * CachedSASToken provides simple utility for managing renewal + * of SAS tokens used by Input/OutputStream. This enables SAS re-use + * and reduces calls to the SASTokenProvider. + */ +public final class CachedSASToken { + public static final Logger LOG = LoggerFactory.getLogger(CachedSASToken.class); + private final long minExpirationInSeconds; + private String sasToken; + private OffsetDateTime sasExpiry; + + /** + * Create instance with default minimum expiration. SAS tokens are + * automatically renewed when their expiration is within this period. + */ + public CachedSASToken() { + this(DEFAULT_SAS_TOKEN_RENEW_PERIOD_FOR_STREAMS_IN_SECONDS); + } + + /** + * Create instance with specified minimum expiration. SAS tokens are + * automatically renewed when their expiration is within this period. + * @param minExpirationInSeconds + */ + public CachedSASToken(long minExpirationInSeconds) { + this.minExpirationInSeconds = minExpirationInSeconds; + } + + /** + * Checks if the SAS token is expired or near expiration. + * @param expiry + * @param minExpiryInSeconds + * @return true if the SAS is near sasExpiry; otherwise false + */ + private static boolean isNearExpiry(OffsetDateTime expiry, long minExpiryInSeconds) { + if (expiry == OffsetDateTime.MIN) { + return true; + } + OffsetDateTime utcNow = OffsetDateTime.now(ZoneOffset.UTC); + return utcNow.until(expiry, SECONDS) <= minExpiryInSeconds; + } + + /** + * Parse the sasExpiry from the SAS token. The sasExpiry is the minimum + * of the ske and se parameters. The se parameter is required and the + * ske parameter is optional. + * @param token an Azure Storage SAS token + * @return the sasExpiry or OffsetDateTime.MIN if invalid. + */ + private static OffsetDateTime getExpiry(String token) { + // return MIN for all invalid input, including a null token + if (token == null) { + return OffsetDateTime.MIN; + } + + String signedExpiry = "se="; + int signedExpiryLen = 3; + + int start = token.indexOf(signedExpiry); + + // return MIN if the required se parameter is absent + if (start == -1) { + return OffsetDateTime.MIN; + } + + start += signedExpiryLen; + + // extract the value of se parameter + int end = token.indexOf("&", start); + String seValue = (end == -1) ? token.substring(start) : token.substring(start, end); + + try { + seValue = URLDecoder.decode(seValue, "utf-8"); + } catch (UnsupportedEncodingException ex) { + LOG.error("Error decoding se query parameter ({}) from SAS.", seValue, ex); + return OffsetDateTime.MIN; + } + + // parse the ISO 8601 date value; return MIN if invalid + OffsetDateTime seDate = OffsetDateTime.MIN; + try { + seDate = OffsetDateTime.parse(seValue, DateTimeFormatter.ISO_DATE_TIME); + } catch (DateTimeParseException ex) { + LOG.error("Error parsing se query parameter ({}) from SAS.", seValue, ex); + } + + String signedKeyExpiry = "ske="; + int signedKeyExpiryLen = 4; + + // if ske is present, the sasExpiry is the minimum of ske and se + start = token.indexOf(signedKeyExpiry); + + // return seDate if ske is absent + if (start == -1) { + return seDate; + } + + start += signedKeyExpiryLen; + + // extract the value of ske parameter + end = token.indexOf("&", start); + String skeValue = (end == -1) ? token.substring(start) : token.substring(start, end); + + try { + skeValue = URLDecoder.decode(skeValue, "utf-8"); + } catch (UnsupportedEncodingException ex) { + LOG.error("Error decoding ske query parameter ({}) from SAS.", skeValue, ex); + return OffsetDateTime.MIN; + } + + // parse the ISO 8601 date value; return MIN if invalid + OffsetDateTime skeDate = OffsetDateTime.MIN; + try { + skeDate = OffsetDateTime.parse(skeValue, DateTimeFormatter.ISO_DATE_TIME); + } catch (DateTimeParseException ex) { + LOG.error("Error parsing ske query parameter ({}) from SAS.", skeValue, ex); + return OffsetDateTime.MIN; + } + + return skeDate.isBefore(seDate) ? skeDate : seDate; + } + + /** + * Updates the cached SAS token and expiry. If the token is invalid, the cached value + * is cleared by setting it to null and the expiry to MIN. + * @param token an Azure Storage SAS token + */ + public void update(String token) { + // quickly return if token and cached sasToken are the same reference + // Note: use of operator == is intentional + if (token == sasToken) { + return; + } + OffsetDateTime newExpiry = getExpiry(token); + boolean isInvalid = isNearExpiry(newExpiry, minExpirationInSeconds); + synchronized (this) { + if (isInvalid) { + sasToken = null; + sasExpiry = OffsetDateTime.MIN; + } else { + sasToken = token; + sasExpiry = newExpiry; + } + } + } + + /** + * Gets the token if still valid. + * @return the token or null if it is expired or near sasExpiry. + */ + public String get() { + // quickly return null if not set + if (sasToken == null) { + return null; + } + String token; + OffsetDateTime exp; + synchronized (this) { + token = sasToken; + exp = sasExpiry; + } + boolean isInvalid = isNearExpiry(exp, minExpirationInSeconds); + return isInvalid ? null : token; + } + + @VisibleForTesting + void setForTesting(String token, OffsetDateTime expiry) { + synchronized (this) { + sasToken = token; + sasExpiry = expiry; + } + } +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/DateTimeUtils.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/DateTimeUtils.java new file mode 100644 index 0000000000000..0461869681252 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/DateTimeUtils.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.utils; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.time.Instant; +import java.util.Date; +import java.util.Locale; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_CLOCK_SKEW_WITH_SERVER_IN_MS; + +public final class DateTimeUtils { + private static final Logger LOG = LoggerFactory.getLogger(DateTimeUtils.class); + private static final String DATE_TIME_PATTERN = "E, dd MMM yyyy HH:mm:ss z"; + + public static long parseLastModifiedTime(final String lastModifiedTime) { + long parsedTime = 0; + try { + Date utcDate = new SimpleDateFormat(DATE_TIME_PATTERN, Locale.US) + .parse(lastModifiedTime); + parsedTime = utcDate.getTime(); + } catch (ParseException e) { + LOG.error("Failed to parse the date {}", lastModifiedTime); + } finally { + return parsedTime; + } + } + + /** + * Tries to identify if an operation was recently executed based on the LMT of + * a file or folder. LMT needs to be more recent that the original request + * start time. To include any clock skew with server, LMT within + * DEFAULT_CLOCK_SKEW_WITH_SERVER_IN_MS from the request start time is going + * to be considered to qualify for recent operation. + * @param lastModifiedTime File/Folder LMT + * @param expectedLMTUpdateTime original request timestamp which should + * have updated the LMT on target + * @return true if the LMT is within timespan for recent operation, else false + */ + public static boolean isRecentlyModified(final String lastModifiedTime, + final Instant expectedLMTUpdateTime) { + long lmtEpochTime = DateTimeUtils.parseLastModifiedTime(lastModifiedTime); + long currentEpochTime = expectedLMTUpdateTime.toEpochMilli(); + + return ((lmtEpochTime > currentEpochTime) + || ((currentEpochTime - lmtEpochTime) <= DEFAULT_CLOCK_SKEW_WITH_SERVER_IN_MS)); + } + + private DateTimeUtils() { + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/IdentityHandler.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/IdentityHandler.java new file mode 100644 index 0000000000000..7f866925dfd7c --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/IdentityHandler.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.azurebfs.utils; + +import java.io.IOException; + + +/** + * {@code IdentityHandler} defines the set of methods to support various + * identity lookup services. + */ +public interface IdentityHandler { + + /** + * Perform lookup from Service Principal's Object ID to Username. + * @param originalIdentity AAD object ID. + * @return User name, if no name found returns empty string. + * */ + String lookupForLocalUserIdentity(String originalIdentity) throws IOException; + + /** + * Perform lookup from Security Group's Object ID to Security Group name. + * @param originalIdentity AAD object ID. + * @return Security group name, if no name found returns empty string. + * */ + String lookupForLocalGroupIdentity(String originalIdentity) throws IOException; +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/Listener.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/Listener.java new file mode 100644 index 0000000000000..4c2270a87f100 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/Listener.java @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.utils; + +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; + +/** + * Interface for testing identifiers tracked via TracingContext + * Implemented in TracingHeaderValidator + */ + +public interface Listener { + void callTracingHeaderValidator(String header, TracingHeaderFormat format); + void updatePrimaryRequestID(String primaryRequestID); + Listener getClone(); + void setOperation(FSOperationType operation); +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TextFileBasedIdentityHandler.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TextFileBasedIdentityHandler.java new file mode 100644 index 0000000000000..6e57d0d294944 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TextFileBasedIdentityHandler.java @@ -0,0 +1,196 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.azurebfs.utils; + +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Strings; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.LineIterator; +import org.apache.hadoop.io.IOUtils; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COLON; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HASH; + + +/** + * {@code TextFileBasedIdentityHandler} is a {@link IdentityHandler} implements + * translation operation which returns identity mapped to AAD identity by + * loading the mapping file from the configured location. Location of the + * mapping file should be configured in {@code core-site.xml}. + *

    + * User identity file should be delimited by colon in below format. + *

    + * # OBJ_ID:USER_NAME:USER_ID:GROUP_ID:SPI_NAME:APP_ID
    + * 
    + * + * Example: + *
    + * a2b27aec-77bd-46dd-8c8c-39611a333331:user1:11000:21000:spi-user1:abcf86e9-5a5b-49e2-a253-f5c9e2afd4ec
    + * 
    + * + * Group identity file should be delimited by colon in below format. + *
    + * # OBJ_ID:GROUP_NAME:GROUP_ID:SGP_NAME
    + * 
    + * + * Example: + *
    + * 1d23024d-957c-4456-aac1-a57f9e2de914:group1:21000:sgp-group1
    + * 
    + */ +public class TextFileBasedIdentityHandler implements IdentityHandler { + private static final Logger LOG = LoggerFactory.getLogger(TextFileBasedIdentityHandler.class); + + /** + * Expected no of fields in the user mapping file. + */ + private static final int NO_OF_FIELDS_USER_MAPPING = 6; + /** + * Expected no of fields in the group mapping file. + */ + private static final int NO_OF_FIELDS_GROUP_MAPPING = 4; + /** + * Array index for the local username. + * Example: + * a2b27aec-77bd-46dd-8c8c-39611a333331:user1:11000:21000:spi-user1:abcf86e9-5a5b-49e2-a253-f5c9e2afd4ec + */ + private static final int ARRAY_INDEX_FOR_LOCAL_USER_NAME = 1; + /** + * Array index for the security group name. + * Example: + * 1d23024d-957c-4456-aac1-a57f9e2de914:group1:21000:sgp-group1 + */ + private static final int ARRAY_INDEX_FOR_LOCAL_GROUP_NAME = 1; + /** + * Array index for the AAD Service Principal's Object ID. + */ + private static final int ARRAY_INDEX_FOR_AAD_SP_OBJECT_ID = 0; + /** + * Array index for the AAD Security Group's Object ID. + */ + private static final int ARRAY_INDEX_FOR_AAD_SG_OBJECT_ID = 0; + private String userMappingFileLocation; + private String groupMappingFileLocation; + private HashMap userMap; + private HashMap groupMap; + + public TextFileBasedIdentityHandler(String userMappingFilePath, String groupMappingFilePath) { + Preconditions.checkArgument(!Strings.isNullOrEmpty(userMappingFilePath), + "Local User to Service Principal mapping filePath cannot by Null or Empty"); + Preconditions.checkArgument(!Strings.isNullOrEmpty(groupMappingFilePath), + "Local Group to Security Group mapping filePath cannot by Null or Empty"); + this.userMappingFileLocation = userMappingFilePath; + this.groupMappingFileLocation = groupMappingFilePath; + //Lazy Loading + this.userMap = new HashMap<>(); + this.groupMap = new HashMap<>(); + } + + /** + * Perform lookup from Service Principal's Object ID to Local Username. + * @param originalIdentity AAD object ID. + * @return Local User name, if no name found or on exception, returns empty string. + * */ + public synchronized String lookupForLocalUserIdentity(String originalIdentity) throws IOException { + if(Strings.isNullOrEmpty(originalIdentity)) { + return EMPTY_STRING; + } + + if (userMap.size() == 0) { + loadMap(userMap, userMappingFileLocation, NO_OF_FIELDS_USER_MAPPING, ARRAY_INDEX_FOR_AAD_SP_OBJECT_ID); + } + + try { + String username = !Strings.isNullOrEmpty(userMap.get(originalIdentity)) + ? userMap.get(originalIdentity).split(COLON)[ARRAY_INDEX_FOR_LOCAL_USER_NAME] : EMPTY_STRING; + + return username; + } catch (ArrayIndexOutOfBoundsException e) { + LOG.error("Error while parsing the line, returning empty string", e); + return EMPTY_STRING; + } + } + + /** + * Perform lookup from Security Group's Object ID to Local Security Group name. + * @param originalIdentity AAD object ID. + * @return Local Security group name, if no name found or on exception, returns empty string. + * */ + public synchronized String lookupForLocalGroupIdentity(String originalIdentity) throws IOException { + if(Strings.isNullOrEmpty(originalIdentity)) { + return EMPTY_STRING; + } + + if (groupMap.size() == 0) { + loadMap(groupMap, groupMappingFileLocation, NO_OF_FIELDS_GROUP_MAPPING, + ARRAY_INDEX_FOR_AAD_SG_OBJECT_ID); + } + + try { + String groupname = + !Strings.isNullOrEmpty(groupMap.get(originalIdentity)) + ? groupMap.get(originalIdentity).split(COLON)[ARRAY_INDEX_FOR_LOCAL_GROUP_NAME] : EMPTY_STRING; + + return groupname; + } catch (ArrayIndexOutOfBoundsException e) { + LOG.error("Error while parsing the line, returning empty string", e); + return EMPTY_STRING; + } + } + + /** + * Creates the map from the file using the key index. + * @param cache Instance of cache object to store the data. + * @param fileLocation Location of the file to be loaded. + * @param keyIndex Index of the key from the data loaded from the key. + */ + private static void loadMap(HashMap cache, String fileLocation, int noOfFields, int keyIndex) + throws IOException { + LOG.debug("Loading identity map from file {}", fileLocation); + int errorRecord = 0; + File file = new File(fileLocation); + LineIterator it = null; + try { + it = FileUtils.lineIterator(file, "UTF-8"); + while (it.hasNext()) { + String line = it.nextLine(); + if (!Strings.isNullOrEmpty(line.trim()) && !line.startsWith(HASH)) { + if (line.split(COLON).length != noOfFields) { + errorRecord += 1; + continue; + } + cache.put(line.split(COLON)[keyIndex], line); + } + } + LOG.debug("Loaded map stats - File: {}, Loaded: {}, Error: {} ", fileLocation, cache.size(), errorRecord); + } catch (ArrayIndexOutOfBoundsException e) { + LOG.error("Error while parsing mapping file", e); + } finally { + IOUtils.cleanupWithLogger(LOG, it); + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java new file mode 100644 index 0000000000000..97864e61e0bea --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java @@ -0,0 +1,235 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.utils; + +import java.util.UUID; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; +import org.apache.hadoop.fs.azurebfs.services.AbfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; + +/** + * The TracingContext class to correlate Store requests using unique + * identifiers and resources common to requests (e.g. filesystem, stream) + * + * Implementing new HDFS method: + * Create TracingContext instance in method of outer layer of + * ABFS driver (AzureBlobFileSystem/AbfsInputStream/AbfsOutputStream), to be + * passed through ABFS layers up to AbfsRestOperation. + * + * Add new operations to HdfsOperationConstants file. + * + * PrimaryRequestId can be enabled for individual Hadoop API that invoke + * multiple Store calls. + * + * Testing: + * Pass an instance of TracingHeaderValidator to registerListener() of ABFS + * filesystem/stream class before calling the API in tests. + */ + +public class TracingContext { + private final String clientCorrelationID; // passed over config by client + private final String fileSystemID; // GUID for fileSystem instance + private String clientRequestId = EMPTY_STRING; // GUID per http request + //Optional, non-empty for methods that trigger two or more Store calls + private String primaryRequestId; + private String streamID; // appears per stream instance (read/write ops) + private int retryCount; // retry number as recorded by AbfsRestOperation + private FSOperationType opType; // two-lettered code representing Hadoop op + private final TracingHeaderFormat format; // header ID display options + private Listener listener = null; // null except when testing + //final concatenated ID list set into x-ms-client-request-id header + private String header = EMPTY_STRING; + + /** + * If {@link #primaryRequestId} is null, this field shall be set equal + * to the last part of the {@link #clientRequestId}'s UUID + * in {@link #constructHeader(AbfsHttpOperation, String)} only on the + * first API call for an operation. Subsequent retries for that operation + * will not change this field. In case {@link #primaryRequestId} is non-null, + * this field shall not be set. + */ + private String primaryRequestIdForRetry; + + private static final Logger LOG = LoggerFactory.getLogger(AbfsClient.class); + public static final int MAX_CLIENT_CORRELATION_ID_LENGTH = 72; + public static final String CLIENT_CORRELATION_ID_PATTERN = "[a-zA-Z0-9-]*"; + + /** + * Initialize TracingContext + * @param clientCorrelationID Provided over config by client + * @param fileSystemID Unique guid for AzureBlobFileSystem instance + * @param opType Code indicating the high-level Hadoop operation that + * triggered the current Store request + * @param tracingHeaderFormat Format of IDs to be printed in header and logs + * @param listener Holds instance of TracingHeaderValidator during testing, + * null otherwise + */ + public TracingContext(String clientCorrelationID, String fileSystemID, + FSOperationType opType, TracingHeaderFormat tracingHeaderFormat, + Listener listener) { + this.fileSystemID = fileSystemID; + this.opType = opType; + this.clientCorrelationID = clientCorrelationID; + streamID = EMPTY_STRING; + retryCount = 0; + primaryRequestId = EMPTY_STRING; + format = tracingHeaderFormat; + this.listener = listener; + } + + public TracingContext(String clientCorrelationID, String fileSystemID, + FSOperationType opType, boolean needsPrimaryReqId, + TracingHeaderFormat tracingHeaderFormat, Listener listener) { + this(clientCorrelationID, fileSystemID, opType, tracingHeaderFormat, + listener); + primaryRequestId = needsPrimaryReqId ? UUID.randomUUID().toString() : ""; + if (listener != null) { + listener.updatePrimaryRequestID(primaryRequestId); + } + } + + public TracingContext(TracingContext originalTracingContext) { + this.fileSystemID = originalTracingContext.fileSystemID; + this.streamID = originalTracingContext.streamID; + this.clientCorrelationID = originalTracingContext.clientCorrelationID; + this.opType = originalTracingContext.opType; + this.retryCount = 0; + this.primaryRequestId = originalTracingContext.primaryRequestId; + this.format = originalTracingContext.format; + if (originalTracingContext.listener != null) { + this.listener = originalTracingContext.listener.getClone(); + } + } + + public static String validateClientCorrelationID(String clientCorrelationID) { + if ((clientCorrelationID.length() > MAX_CLIENT_CORRELATION_ID_LENGTH) + || (!clientCorrelationID.matches(CLIENT_CORRELATION_ID_PATTERN))) { + LOG.debug( + "Invalid config provided; correlation id not included in header."); + return EMPTY_STRING; + } + return clientCorrelationID; + } + + public void setPrimaryRequestID() { + primaryRequestId = UUID.randomUUID().toString(); + if (listener != null) { + listener.updatePrimaryRequestID(primaryRequestId); + } + } + + public void setStreamID(String stream) { + streamID = stream; + } + + public void setOperation(FSOperationType operation) { + this.opType = operation; + } + + public int getRetryCount() { + return retryCount; + } + + public void setRetryCount(int retryCount) { + this.retryCount = retryCount; + } + + public void setListener(Listener listener) { + this.listener = listener; + } + + /** + * Concatenate all identifiers separated by (:) into a string and set into + * X_MS_CLIENT_REQUEST_ID header of the http operation + * @param httpOperation AbfsHttpOperation instance to set header into + * connection + * @param previousFailure Failure seen before this API trigger on same operation + * from AbfsClient. + */ + public void constructHeader(AbfsHttpOperation httpOperation, String previousFailure) { + clientRequestId = UUID.randomUUID().toString(); + switch (format) { + case ALL_ID_FORMAT: // Optional IDs (e.g. streamId) may be empty + header = + clientCorrelationID + ":" + clientRequestId + ":" + fileSystemID + ":" + + getPrimaryRequestIdForHeader(retryCount > 0) + ":" + streamID + + ":" + opType + ":" + retryCount; + header = addFailureReasons(header, previousFailure); + break; + case TWO_ID_FORMAT: + header = clientCorrelationID + ":" + clientRequestId; + break; + default: + header = clientRequestId; //case SINGLE_ID_FORMAT + } + if (listener != null) { //for testing + listener.callTracingHeaderValidator(header, format); + } + httpOperation.setRequestProperty(HttpHeaderConfigurations.X_MS_CLIENT_REQUEST_ID, header); + /* + * In case the primaryRequestId is an empty-string and if it is the first try to + * API call (previousFailure shall be null), maintain the last part of clientRequestId's + * UUID in primaryRequestIdForRetry. This field shall be used as primaryRequestId part + * of the x-ms-client-request-id header in case of retry of the same API-request. + */ + if (primaryRequestId.isEmpty() && previousFailure == null) { + String[] clientRequestIdParts = clientRequestId.split("-"); + primaryRequestIdForRetry = clientRequestIdParts[ + clientRequestIdParts.length - 1]; + } + } + + /** + * Provide value to be used as primaryRequestId part of x-ms-client-request-id header. + * @param isRetry define if it's for a retry case. + * @return {@link #primaryRequestIdForRetry}:If the {@link #primaryRequestId} + * is an empty-string, and it's a retry iteration. + * {@link #primaryRequestId} for other cases. + */ + private String getPrimaryRequestIdForHeader(final Boolean isRetry) { + if (!primaryRequestId.isEmpty() || !isRetry) { + return primaryRequestId; + } + return primaryRequestIdForRetry; + } + + private String addFailureReasons(final String header, + final String previousFailure) { + if (previousFailure == null) { + return header; + } + return String.format("%s_%s", header, previousFailure); + } + + /** + * Return header representing the request associated with the tracingContext + * @return Header string set into X_MS_CLIENT_REQUEST_ID + */ + public String getHeader() { + return header; + } + +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderFormat.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderFormat.java new file mode 100644 index 0000000000000..3f23ae3ed7c14 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderFormat.java @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.utils; + +public enum TracingHeaderFormat { + SINGLE_ID_FORMAT, // + + TWO_ID_FORMAT, // : + + ALL_ID_FORMAT; // :: + // :::: +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java index 1bbc1b39e1689..e27d54b443ca2 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java @@ -18,14 +18,42 @@ package org.apache.hadoop.fs.azurebfs.utils; +import java.io.UnsupportedEncodingException; +import java.net.URL; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; import java.util.regex.Pattern; +import org.apache.commons.lang3.StringUtils; +import org.apache.http.NameValuePair; +import org.apache.http.client.utils.URLEncodedUtils; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.AND_MARK; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EQUAL; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SAOID; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SIGNATURE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SKOID; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SUOID; + /** * Utility class to help with Abfs url transformation to blob urls. */ public final class UriUtils { private static final String ABFS_URI_REGEX = "[^.]+\\.dfs\\.(preprod\\.){0,1}core\\.windows\\.net"; private static final Pattern ABFS_URI_PATTERN = Pattern.compile(ABFS_URI_REGEX); + private static final Set FULL_MASK_PARAM_KEYS = new HashSet<>( + Collections.singleton(QUERY_PARAM_SIGNATURE)); + private static final Set PARTIAL_MASK_PARAM_KEYS = new HashSet<>( + Arrays.asList(QUERY_PARAM_SKOID, QUERY_PARAM_SAOID, QUERY_PARAM_SUOID)); + private static final Character CHAR_MASK = 'X'; + private static final String FULL_MASK = "XXXXX"; + private static final int DEFAULT_QUERY_STRINGBUILDER_CAPACITY = 550; + private static final int PARTIAL_MASK_VISIBLE_LEN = 18; /** * Checks whether a string includes abfs url. @@ -73,6 +101,74 @@ public static String generateUniqueTestPath() { return testUniqueForkId == null ? "/test" : "/" + testUniqueForkId + "/test"; } + public static String maskUrlQueryParameters(List keyValueList, + Set queryParamsForFullMask, + Set queryParamsForPartialMask) { + return maskUrlQueryParameters(keyValueList, queryParamsForFullMask, + queryParamsForPartialMask, DEFAULT_QUERY_STRINGBUILDER_CAPACITY); + } + + /** + * Generic function to mask a set of query parameters partially/fully and + * return the resultant query string + * @param keyValueList List of NameValuePair instances for query keys/values + * @param queryParamsForFullMask values for these params will appear as "XXXX" + * @param queryParamsForPartialMask values will be masked with 'X', except for + * the last PARTIAL_MASK_VISIBLE_LEN characters + * @param queryLen to initialize StringBuilder for the masked query + * @return the masked url query part + */ + public static String maskUrlQueryParameters(List keyValueList, + Set queryParamsForFullMask, + Set queryParamsForPartialMask, int queryLen) { + StringBuilder maskedUrl = new StringBuilder(queryLen); + for (NameValuePair keyValuePair : keyValueList) { + String key = keyValuePair.getName(); + if (key.isEmpty()) { + throw new IllegalArgumentException("Query param key should not be empty"); + } + String value = keyValuePair.getValue(); + maskedUrl.append(key); + maskedUrl.append(EQUAL); + if (value != null && !value.isEmpty()) { //no mask + if (queryParamsForFullMask.contains(key)) { + maskedUrl.append(FULL_MASK); + } else if (queryParamsForPartialMask.contains(key)) { + int valueLen = value.length(); + int maskedLen = valueLen > PARTIAL_MASK_VISIBLE_LEN + ? PARTIAL_MASK_VISIBLE_LEN : valueLen / 2; + maskedUrl.append(value, 0, valueLen - maskedLen); + maskedUrl.append(StringUtils.repeat(CHAR_MASK, maskedLen)); + } else { + maskedUrl.append(value); + } + } + maskedUrl.append(AND_MARK); + } + maskedUrl.deleteCharAt(maskedUrl.length() - 1); + return maskedUrl.toString(); + } + + public static String encodedUrlStr(String url) { + try { + return URLEncoder.encode(url, "UTF-8"); + } catch (UnsupportedEncodingException e) { + return "https%3A%2F%2Ffailed%2Fto%2Fencode%2Furl"; + } + } + + public static String getMaskedUrl(URL url) { + String queryString = url.getQuery(); + if (queryString == null) { + return url.toString(); + } + List queryKeyValueList = URLEncodedUtils + .parse(queryString, StandardCharsets.UTF_8); + String maskedQueryString = maskUrlQueryParameters(queryKeyValueList, + FULL_MASK_PARAM_KEYS, PARTIAL_MASK_PARAM_KEYS, queryString.length()); + return url.toString().replace(queryString, maskedQueryString); + } + private UriUtils() { } } diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md index 01c1fbd03b35b..aff1e32b83f2d 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md @@ -257,7 +257,8 @@ will have the URL `abfs://container1@abfswales1.dfs.core.windows.net/` You can create a new container through the ABFS connector, by setting the option - `fs.azure.createRemoteFileSystemDuringInitialization` to `true`. + `fs.azure.createRemoteFileSystemDuringInitialization` to `true`. Though the + same is not supported when AuthType is SAS. If the container does not exist, an attempt to list it with `hadoop fs -ls` will fail @@ -313,16 +314,38 @@ driven by them. 1. Using OAuth 2.0 tokens of one form or another. 1. Deployed in-Azure with the Azure VMs providing OAuth 2.0 tokens to the application, "Managed Instance". +1. Using Shared Access Signature (SAS) tokens provided by a custom implementation of the SASTokenProvider interface. What can be changed is what secrets/credentials are used to authenticate the caller. -The authentication mechanism is set in `fs.azure.account.auth.type` (or the account specific variant), -and, for the various OAuth options `fs.azure.account.oauth.provider.type` +The authentication mechanism is set in `fs.azure.account.auth.type` (or the +account specific variant). The possible values are SharedKey, OAuth, Custom +and SAS. For the various OAuth options use the config `fs.azure.account +.oauth.provider.type`. Following are the implementations supported +ClientCredsTokenProvider, UserPasswordTokenProvider, MsiTokenProvider and +RefreshTokenBasedTokenProvider. An IllegalArgumentException is thrown if +the specified provider type is not one of the supported. All secrets can be stored in JCEKS files. These are encrypted and password protected —use them or a compatible Hadoop Key Management Store wherever possible +### AAD Token fetch retries + +The exponential retry policy used for the AAD token fetch retries can be tuned +with the following configurations. +* `fs.azure.oauth.token.fetch.retry.max.retries`: Sets the maximum number of + retries. Default value is 5. +* `fs.azure.oauth.token.fetch.retry.min.backoff.interval`: Minimum back-off + interval. Added to the retry interval computed from delta backoff. By + default this is set as 0. Set the interval in milli seconds. +* `fs.azure.oauth.token.fetch.retry.max.backoff.interval`: Maximum back-off +interval. Default value is 60000 (sixty seconds). Set the interval in milli +seconds. +* `fs.azure.oauth.token.fetch.retry.delta.backoff`: Back-off interval between +retries. Multiples of this timespan are used for subsequent retry attempts + . The default value is 2. + ### Default: Shared Key This is the simplest authentication mechanism of account + password. @@ -349,6 +372,15 @@ the password, "key", retrieved from the XML/JCECKs configuration files. *Note*: The source of the account key can be changed through a custom key provider; one exists to execute a shell script to retrieve it. +A custom key provider class can be provided with the config +`fs.azure.account.keyprovider`. If a key provider class is specified the same +will be used to get account key. Otherwise the Simple key provider will be used +which will use the key specified for the config `fs.azure.account.key`. + +To retrieve using shell script, specify the path to the script for the config +`fs.azure.shellkeyprovider.script`. ShellDecryptionKeyProvider class use the +script specified to retrieve the key. + ### OAuth 2.0 Client Credentials OAuth 2.0 credentials of (client id, client secret, endpoint) are provided in the configuration/JCEKS file. @@ -464,6 +496,13 @@ With an existing Oauth 2.0 token, make a request of the Active Directory endpoin Refresh token + + fs.azure.account.oauth2.refresh.endpoint + + + Refresh token endpoint + + fs.azure.account.oauth2.client.id @@ -505,6 +544,13 @@ The Azure Portal/CLI is used to create the service identity. Optional MSI Tenant ID + + fs.azure.account.oauth2.msi.endpoint + + + MSI endpoint + + fs.azure.account.oauth2.client.id @@ -539,6 +585,46 @@ token when its `getAccessToken()` method is invoked. The declared class must implement `org.apache.hadoop.fs.azurebfs.extensions.CustomTokenProviderAdaptee` and optionally `org.apache.hadoop.fs.azurebfs.extensions.BoundDTExtension`. +The declared class also holds responsibility to implement retry logic while fetching access tokens. + +### Delegation Token Provider + +A delegation token provider supplies the ABFS connector with delegation tokens, +helps renew and cancel the tokens by implementing the +CustomDelegationTokenManager interface. + +```xml + + fs.azure.enable.delegation.token + true + Make this true to use delegation token provider + + + fs.azure.delegation.token.provider.type + {fully-qualified-class-name-for-implementation-of-CustomDelegationTokenManager-interface} + +``` +In case delegation token is enabled, and the config `fs.azure.delegation.token +.provider.type` is not provided then an IlleagalArgumentException is thrown. + +### Shared Access Signature (SAS) Token Provider + +A Shared Access Signature (SAS) token provider supplies the ABFS connector with SAS +tokens by implementing the SASTokenProvider interface. + +```xml + + fs.azure.account.auth.type + SAS + + + fs.azure.sas.token.provider.type + {fully-qualified-class-name-for-implementation-of-SASTokenProvider-interface} + +``` + +The declared class must implement `org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider`. + ## Technical notes ### Proxy setup @@ -590,8 +676,6 @@ cause problems. As with all Azure storage services, the Azure Datalake Gen 2 store offers a fully consistent view of the store, with complete Create, Read, Update, and Delete consistency for data and metadata. -(Compare and contrast with S3 which only offers Create consistency; -S3Guard adds CRUD to metadata, but not the underlying data). ### Performance and Scalability @@ -643,6 +727,28 @@ Consult the javadocs for `org.apache.hadoop.fs.azurebfs.constants.ConfigurationK `org.apache.hadoop.fs.azurebfs.AbfsConfiguration` for the full list of configuration options and their default values. +### Client Correlation Options + +#### 1. Client CorrelationId Option + +Config `fs.azure.client.correlationid` provides an option to correlate client +requests using this client-provided identifier. This Id will be visible in Azure +Storage Analytics logs in the `request-id-header` field. +Reference: [Storage Analytics log format](https://docs.microsoft.com/en-us/rest/api/storageservices/storage-analytics-log-format) + +This config accepts a string which can be maximum of 72 characters and should +contain alphanumeric characters and/or hyphens only. Defaults to empty string if +input is invalid. + +#### 1. Correlation IDs Display Options + +Config `fs.azure.tracingcontext.format` provides an option to select the format +of IDs included in the `request-id-header`. This config accepts a String value +corresponding to the following enum options. + `SINGLE_ID_FORMAT` : clientRequestId + `ALL_ID_FORMAT` : all IDs (default) + `TWO_ID_FORMAT` : clientCorrelationId:clientRequestId + ### Flush Options #### 1. Azure Blob File System Flush Options @@ -661,10 +767,204 @@ Hflush() being the only documented API that can provide persistent data transfer, Flush() also attempting to persist buffered data will lead to performance issues. +### Hundred Continue Options + +`fs.azure.account.expect.header.enabled`: This configuration parameter is used +to specify whether you wish to send a expect 100 continue header with each +append request or not. It is configured to true by default. This flag configures +the client to check with the Azure store before uploading a block of data from +an output stream. This allows the client to throttle back gracefully -before +actually attempting to upload the block. In experiments this provides +significant throughput improvements under heavy load. For more information : +- https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Expect + + +### Account level throttling Options + +`fs.azure.account.operation.idle.timeout`: This value specifies the time after which the timer for the analyzer (read or +write) should be paused until no new request is made again. The default value for the same is 60 seconds. + +### HNS Check Options +Config `fs.azure.account.hns.enabled` provides an option to specify whether + the storage account is HNS enabled or not. In case the config is not provided, + a server call is made to check the same. + ### Access Options Config `fs.azure.enable.check.access` needs to be set true to enable the AzureBlobFileSystem.access(). +### Operation Idempotency + +Requests failing due to server timeouts and network failures will be retried. +PUT/POST operations are idempotent and need no specific handling +except for Rename and Delete operations. + +Rename idempotency checks are made by ensuring the LastModifiedTime on destination +is recent if source path is found to be non-existent on retry. + +Delete is considered to be idempotent by default if the target does not exist on +retry. + +### Primary User Group Options +The group name which is part of FileStatus and AclStatus will be set the same as +the username if the following config is set to true +`fs.azure.skipUserGroupMetadataDuringInitialization`. + +### IO Options +The following configs are related to read and write operations. + +`fs.azure.io.retry.max.retries`: Sets the number of retries for IO operations. +Currently this is used only for the server call retry logic. Used within +`AbfsClient` class as part of the ExponentialRetryPolicy. The value should be +greater than or equal to 0. + +`fs.azure.io.retry.min.backoff.interval`: Sets the minimum backoff interval for +retries of IO operations. Currently this is used only for the server call retry +logic. Used within `AbfsClient` class as part of the ExponentialRetryPolicy. This +value indicates the smallest interval (in milliseconds) to wait before retrying +an IO operation. The default value is 3000 (3 seconds). + +`fs.azure.io.retry.max.backoff.interval`: Sets the maximum backoff interval for +retries of IO operations. Currently this is used only for the server call retry +logic. Used within `AbfsClient` class as part of the ExponentialRetryPolicy. This +value indicates the largest interval (in milliseconds) to wait before retrying +an IO operation. The default value is 30000 (30 seconds). + +`fs.azure.io.retry.backoff.interval`: Sets the default backoff interval for +retries of IO operations. Currently this is used only for the server call retry +logic. Used within `AbfsClient` class as part of the ExponentialRetryPolicy. This +value is used to compute a random delta between 80% and 120% of the specified +value. This random delta is then multiplied by an exponent of the current IO +retry number (i.e., the default is multiplied by `2^(retryNum - 1)`) and then +contstrained within the range of [`fs.azure.io.retry.min.backoff.interval`, +`fs.azure.io.retry.max.backoff.interval`] to determine the amount of time to +wait before the next IO retry attempt. The default value is 3000 (3 seconds). + +`fs.azure.write.request.size`: To set the write buffer size. Specify the value +in bytes. The value should be between 16384 to 104857600 both inclusive (16 KB +to 100 MB). The default value will be 8388608 (8 MB). + +`fs.azure.read.request.size`: To set the read buffer size.Specify the value in +bytes. The value should be between 16384 to 104857600 both inclusive (16 KB to +100 MB). The default value will be 4194304 (4 MB). + +`fs.azure.read.alwaysReadBufferSize`: Read request size configured by +`fs.azure.read.request.size` will be honoured only when the reads done are in +sequential pattern. When the read pattern is detected to be random, read size +will be same as the buffer length provided by the calling process. +This config when set to true will force random reads to also read in same +request sizes as sequential reads. This is a means to have same read patterns +as of ADLS Gen1, as it does not differentiate read patterns and always reads by +the configured read request size. The default value for this config will be +false, where reads for the provided buffer length is done when random read +pattern is detected. + +`fs.azure.readaheadqueue.depth`: Sets the readahead queue depth in +AbfsInputStream. In case the set value is negative the read ahead queue depth +will be set as Runtime.getRuntime().availableProcessors(). By default the value +will be 2. To disable readaheads, set this value to 0. If your workload is + doing only random reads (non-sequential) or you are seeing throttling, you + may try setting this value to 0. + +`fs.azure.read.readahead.blocksize`: To set the read buffer size for the read +aheads. Specify the value in bytes. The value should be between 16384 to +104857600 both inclusive (16 KB to 100 MB). The default value will be +4194304 (4 MB). + +`fs.azure.buffered.pread.disable`: By default the positional read API will do a +seek and read on input stream. This read will fill the buffer cache in +AbfsInputStream and update the cursor positions. If this optimization is true +it will skip usage of buffer and do a lock free REST call for reading from blob. +This optimization is very much helpful for HBase kind of short random read over +a shared AbfsInputStream instance. +Note: This is not a config which can be set at cluster level. It can be used as +an option on FutureDataInputStreamBuilder. +See FileSystem#openFile(Path path) + +To run under limited memory situations configure the following. Especially +when there are too many writes from the same process. + +`fs.azure.write.max.concurrent.requests`: To set the maximum concurrent + write requests from an AbfsOutputStream instance to server at any point of + time. Effectively this will be the threadpool size within the + AbfsOutputStream instance. Set the value in between 1 to 8 both inclusive. + +`fs.azure.write.max.requests.to.queue`: To set the maximum write requests + that can be queued. Memory consumption of AbfsOutputStream instance can be + tuned with this config considering each queued request holds a buffer. Set + the value 3 or 4 times the value set for s.azure.write.max.concurrent.requests. + +`fs.azure.analysis.period`: The time after which sleep duration is recomputed after analyzing metrics. The default value +for the same is 10 seconds. + +### Security Options +`fs.azure.always.use.https`: Enforces to use HTTPS instead of HTTP when the flag +is made true. Irrespective of the flag, `AbfsClient` will use HTTPS if the secure +scheme (ABFSS) is used or OAuth is used for authentication. By default this will +be set to true. + +`fs.azure.ssl.channel.mode`: Initializing DelegatingSSLSocketFactory with the +specified SSL channel mode. Value should be of the enum +DelegatingSSLSocketFactory.SSLChannelMode. The default value will be +DelegatingSSLSocketFactory.SSLChannelMode.Default. + +### Server Options +When the config `fs.azure.io.read.tolerate.concurrent.append` is made true, the +If-Match header sent to the server for read calls will be set as * otherwise the +same will be set with ETag. This is basically a mechanism in place to handle the +reads with optimistic concurrency. +Please refer the following links for further information. +1. https://docs.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/read +2. https://azure.microsoft.com/de-de/blog/managing-concurrency-in-microsoft-azure-storage-2/ + +listStatus API fetches the FileStatus information from server in a page by page +manner. The config `fs.azure.list.max.results` used to set the maxResults URI + param which sets the pagesize(maximum results per call). The value should + be > 0. By default this will be 5000. Server has a maximum value for this + parameter as 5000. So even if the config is above 5000 the response will only +contain 5000 entries. Please refer the following link for further information. +https://docs.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/list + +### Throttling Options +ABFS driver has the capability to throttle read and write operations to achieve +maximum throughput by minimizing errors. The errors occur when the account +ingress or egress limits are exceeded and, the server-side throttles requests. +Server-side throttling causes the retry policy to be used, but the retry policy +sleeps for long periods of time causing the total ingress or egress throughput +to be as much as 35% lower than optimal. The retry policy is also after the +fact, in that it applies after a request fails. On the other hand, the +client-side throttling implemented here happens before requests are made and +sleeps just enough to minimize errors, allowing optimal ingress and/or egress +throughput. By default the throttling mechanism is enabled in the driver. The +same can be disabled by setting the config `fs.azure.enable.autothrottling` +to false. + +### Rename Options +`fs.azure.atomic.rename.key`: Directories for atomic rename support can be +specified comma separated in this config. The driver prints the following +warning log if the source of the rename belongs to one of the configured +directories. "The atomic rename feature is not supported by the ABFS scheme +; however, rename, create and delete operations are atomic if Namespace is +enabled for your Azure Storage account." +The directories can be specified as comma separated values. By default the value +is "/hbase" + +### Infinite Lease Options +`fs.azure.infinite-lease.directories`: Directories for infinite lease support +can be specified comma separated in this config. By default, multiple +clients will be able to write to the same file simultaneously. When writing +to files contained within the directories specified in this config, the +client will obtain a lease on the file that will prevent any other clients +from writing to the file. When the output stream is closed, the lease will be +released. To revoke a client's write access for a file, the +AzureBlobFilesystem breakLease method may be called. If the client dies +before the file can be closed and the lease released, breakLease will need to +be called before another client will be able to write to the file. + +`fs.azure.lease.threads`: This is the size of the thread pool that will be +used for lease operations for infinite lease directories. By default the value +is 0, so it must be set to at least 1 to support infinite lease directories. + ### Perf Options #### 1. HTTP Request Tracking Options diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/index.md b/hadoop-tools/hadoop-azure/src/site/markdown/index.md index 11d0a18b5585d..2af6b498a2743 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/index.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/index.md @@ -545,6 +545,17 @@ The maximum number of entries that that cache can hold can be customized using t ``` +### Performance optimization configurations + +`fs.azure.block.blob.buffered.pread.disable`: By default the positional read API will do a +seek and read on input stream. This read will fill the buffer cache in +BlockBlobInputStream. If this configuration is true it will skip usage of buffer and do a +lock free call for reading from blob. This optimization is very much helpful for HBase kind +of short random read over a shared InputStream instance. +Note: This is not a config which can be set at cluster level. It can be used as +an option on FutureDataInputStreamBuilder. +See FileSystem#openFile(Path path) + ## Further Reading * [Testing the Azure WASB client](testing_azure.html). diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md b/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md index a26da839f0605..933f86be3e896 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md @@ -448,7 +448,7 @@ use requires the presence of secret credentials, where tests may be slow, and where finding out why something failed from nothing but the test output is critical. -#### Subclasses Existing Shared Base Blasses +#### Subclasses Existing Shared Base Classes There are a set of base classes which should be extended for Azure tests and integration tests. @@ -592,6 +592,61 @@ with the Hadoop Distributed File System permissions model when hierarchical namespace is enabled for the storage account. Furthermore, the metadata and data produced by ADLS Gen 2 REST API can be consumed by Blob REST API, and vice versa. +## Generating test run configurations and test triggers over various config combinations + +To simplify the testing across various authentication and features combinations +that are mandatory for a PR, script `dev-support/testrun-scripts/runtests.sh` +should be used. Once the script is updated with relevant config settings for +various test combinations, it will: +1. Auto-generate configs specific to each test combinations +2. Run tests for all combinations +3. Summarize results across all the test combination runs. + +As a pre-requisite step, fill config values for test accounts and credentials +needed for authentication in `src/test/resources/azure-auth-keys.xml.template` +and rename as `src/test/resources/azure-auth-keys.xml`. + +**To add a new test combination:** Templates for mandatory test combinations +for PR validation are present in `dev-support/testrun-scripts/runtests.sh`. +If a new one needs to be added, add a combination set within +`dev-support/testrun-scripts/runtests.sh` similar to the ones already defined +and +1. Provide a new combination name +2. Update properties and values array which need to be effective for the test +combination +3. Call generateconfigs + +**To run PR validation:** Running command +* `dev-support/testrun-scripts/runtests.sh` will generate configurations for +each of the combinations defined and run tests for all the combinations. +* `dev-support/testrun-scripts/runtests.sh -c {combinationname}` Specific +combinations can be provided with -c option. If combinations are provided +with -c option, tests for only those combinations will be run. + +**Test logs:** Test runs will create a folder within dev-support/testlogs to +save the test logs. Folder name will be the test start timestamp. The mvn verify +command line logs for each combination will be saved into a file as +Test-Logs-$combination.txt into this folder. In case of any failures, this file +will have the failure exception stack. At the end of the test run, the +consolidated results of all the combination runs will be saved into a file as +Test-Results.log in the same folder. When run for PR validation, the +consolidated test results needs to be pasted into the PR comment section. + +**To generate config for use in IDE:** Running command with -a (activate) option +`dev-support/testrun-scripts/runtests.sh -a {combination name}` will update +the effective config relevant for the specific test combination. Hence the same +config files used by the mvn test runs can be used for IDE without any manual +updates needed within config file. + +**Other command line options:** +* -a Specify the combination name which needs to be +activated. This is to be used to generate config for use in IDE. +* -c Specify the combination name for test runs. If this +config is specified, tests for only the specified combinations will run. All +combinations of tests will be running if this config is not specified. +* -t ABFS mvn tests are run in parallel mode. Tests by default +are run with 8 thread count. It can be changed by providing -t + In order to test ABFS, please add the following configuration to your `src/test/resources/azure-auth-keys.xml` file. Note that the ABFS tests include compatibility tests which require WASB credentials, in addition to the ABFS @@ -646,7 +701,7 @@ hierarchical namespace enabled, and set the following configuration settings: fs.azure.account.auth.type.{YOUR_ABFS_ACCOUNT_NAME} {AUTH TYPE} - The authorization type can be SharedKey, OAuth, or Custom. The + The authorization type can be SharedKey, OAuth, Custom or SAS. The default is SharedKey. @@ -793,6 +848,136 @@ hierarchical namespace enabled, and set the following configuration settings: --> +``` +To run Delegation SAS test cases you must use a storage account with the +hierarchical namespace enabled and set the following configuration settings: + +```xml + + + + + + fs.azure.sas.token.provider.type + org.apache.hadoop.fs.azurebfs.extensions.MockDelegationSASTokenProvider + The fully qualified class name of the SAS token provider implementation. + + + + fs.azure.test.app.service.principal.tenant.id + {TID} + Tenant ID for the application's service principal. + + + + fs.azure.test.app.service.principal.object.id + {OID} + Object ID for the application's service principal. + + + + fs.azure.test.app.id + {app id} + The application's ID, also known as the client id. + + + + fs.azure.test.app.secret + {client secret} + The application's secret, also known as the client secret. + + + +``` + +To run CheckAccess test cases you must register an app with no RBAC and set +the following configurations. +```xml + + + + + fs.azure.enable.check.access + true + By default the check access will be on. Checkaccess can + be turned off by changing this flag to false. + + + fs.azure.account.test.oauth2.client.id + {client id} + The client id(app id) for the app created on step 1 + + + + fs.azure.account.test.oauth2.client.secret + {client secret} + +The client secret(application's secret) for the app created on step 1 + + + + fs.azure.check.access.testuser.guid + {guid} + The guid fetched on step 2 + + + fs.azure.account.oauth2.client.endpoint.{account name}.dfs.core +.windows.net + https://login.microsoftonline.com/{TENANTID}/oauth2/token + +Token end point. This can be found through Azure portal. As part of CheckAccess +test cases. The access will be tested for an FS instance created with the +above mentioned client credentials. So this configuration is necessary to +create the test FS instance. + + + ``` If running tests against an endpoint that uses the URL format diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AzureBlobStorageTestAccount.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AzureBlobStorageTestAccount.java index e420dabb5d0da..5d2d5d4afdc3f 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AzureBlobStorageTestAccount.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AzureBlobStorageTestAccount.java @@ -65,7 +65,7 @@ public final class AzureBlobStorageTestAccount implements AutoCloseable, public static final String ACCOUNT_KEY_PROPERTY_NAME = "fs.azure.account.key."; public static final String TEST_ACCOUNT_NAME_PROPERTY_NAME = "fs.azure.account.name"; public static final String WASB_TEST_ACCOUNT_NAME_WITH_DOMAIN = "fs.azure.wasb.account.name"; - public static final String MOCK_ACCOUNT_NAME = "mockAccount.blob.core.windows.net"; + public static final String MOCK_ACCOUNT_NAME = "mockAccount-c01112a3-2a23-433e-af2a-e808ea385136.blob.core.windows.net"; public static final String WASB_ACCOUNT_NAME_DOMAIN_SUFFIX = ".blob.core.windows.net"; public static final String WASB_ACCOUNT_NAME_DOMAIN_SUFFIX_REGEX = "\\.blob(\\.preprod)?\\.core\\.windows\\.net"; public static final String MOCK_CONTAINER_NAME = "mockContainer"; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlockBlobInputStream.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlockBlobInputStream.java index 07a13df11f3ce..cea11c0380e31 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlockBlobInputStream.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlockBlobInputStream.java @@ -37,6 +37,7 @@ import org.apache.hadoop.fs.FSExceptionMessages; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FutureDataInputStreamBuilder; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azure.integration.AbstractAzureScaleTest; import org.apache.hadoop.fs.azure.integration.AzureTestUtils; @@ -306,6 +307,61 @@ private void verifyConsistentReads(FSDataInputStream inputStreamV1, assertArrayEquals("Mismatch in read data", bufferV1, bufferV2); } + @Test + public void test_202_PosReadTest() throws Exception { + assumeHugeFileExists(); + FutureDataInputStreamBuilder builder = accountUsingInputStreamV2 + .getFileSystem().openFile(TEST_FILE_PATH); + builder.opt(AzureNativeFileSystemStore.FS_AZURE_BLOCK_BLOB_BUFFERED_PREAD_DISABLE, true); + try ( + FSDataInputStream inputStreamV1 + = accountUsingInputStreamV1.getFileSystem().open(TEST_FILE_PATH); + FSDataInputStream inputStreamV2 + = accountUsingInputStreamV2.getFileSystem().open(TEST_FILE_PATH); + FSDataInputStream inputStreamV2NoBuffer = builder.build().get(); + ) { + final int bufferSize = 4 * KILOBYTE; + byte[] bufferV1 = new byte[bufferSize]; + byte[] bufferV2 = new byte[bufferSize]; + byte[] bufferV2NoBuffer = new byte[bufferSize]; + + verifyConsistentReads(inputStreamV1, inputStreamV2, inputStreamV2NoBuffer, 0, + bufferV1, bufferV2, bufferV2NoBuffer); + + int pos = 2 * KILOBYTE; + verifyConsistentReads(inputStreamV1, inputStreamV2, inputStreamV2NoBuffer, pos, + bufferV1, bufferV2, bufferV2NoBuffer); + + pos = 10 * KILOBYTE; + verifyConsistentReads(inputStreamV1, inputStreamV2, inputStreamV2NoBuffer, pos, + bufferV1, bufferV2, bufferV2NoBuffer); + + pos = 4100 * KILOBYTE; + verifyConsistentReads(inputStreamV1, inputStreamV2, inputStreamV2NoBuffer, pos, + bufferV1, bufferV2, bufferV2NoBuffer); + } + } + + private void verifyConsistentReads(FSDataInputStream inputStreamV1, + FSDataInputStream inputStreamV2, FSDataInputStream inputStreamV2NoBuffer, + int pos, byte[] bufferV1, byte[] bufferV2, byte[] bufferV2NoBuffer) + throws IOException { + int size = bufferV1.length; + int numBytesReadV1 = inputStreamV1.read(pos, bufferV1, 0, size); + assertEquals("Bytes read from V1 stream", size, numBytesReadV1); + + int numBytesReadV2 = inputStreamV2.read(pos, bufferV2, 0, size); + assertEquals("Bytes read from V2 stream", size, numBytesReadV2); + + int numBytesReadV2NoBuffer = inputStreamV2NoBuffer.read(pos, + bufferV2NoBuffer, 0, size); + assertEquals("Bytes read from V2 stream (buffered pread disabled)", size, + numBytesReadV2NoBuffer); + + assertArrayEquals("Mismatch in read data", bufferV1, bufferV2); + assertArrayEquals("Mismatch in read data", bufferV2, bufferV2NoBuffer); + } + /** * Validates the implementation of InputStream.markSupported. * @throws IOException diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemConcurrencyLive.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemConcurrencyLive.java index 1c868ea0ff1e6..2c99b84394f82 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemConcurrencyLive.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemConcurrencyLive.java @@ -20,6 +20,7 @@ import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.junit.Assert; @@ -130,15 +131,56 @@ public void testConcurrentDeleteFile() throws Exception { } } + /** + * Validate the bug fix for HADOOP-17089. Please note that we were never + * able to reproduce this except during a Spark job that ran for multiple days + * and in a hacked-up azure-storage SDK that added sleep before and after + * the call to factory.setNamespaceAware(true) as shown in the description of + * + * @see https://github.com/Azure/azure-storage-java/pull/546 + */ + @Test(timeout = TEST_EXECUTION_TIMEOUT) + public void testConcurrentList() throws Exception { + final Path testDir = new Path("/tmp/data-loss/11230174258112/_temporary/0/_temporary/attempt_20200624190514_0006_m_0"); + final Path testFile = new Path(testDir, "part-00004-15ea87b1-312c-4fdf-1820-95afb3dfc1c3-a010.snappy.parquet"); + fs.create(testFile).close(); + List tasks = new ArrayList<>(THREAD_COUNT); + + for (int i = 0; i < THREAD_COUNT; i++) { + tasks.add(new ListTask(fs, testDir)); + } + + ExecutorService es = null; + try { + es = Executors.newFixedThreadPool(THREAD_COUNT); + + List> futures = es.invokeAll(tasks); + + for (Future future : futures) { + Assert.assertTrue(future.isDone()); + + // we are using Callable, so if an exception + // occurred during the operation, it will be thrown + // when we call get + long fileCount = future.get(); + assertEquals("The list should always contain 1 file.", 1, fileCount); + } + } finally { + if (es != null) { + es.shutdownNow(); + } + } + } + abstract class FileSystemTask implements Callable { private final FileSystem fileSystem; private final Path path; - protected FileSystem getFileSystem() { + FileSystem getFileSystem() { return this.fileSystem; } - protected Path getFilePath() { + Path getFilePath() { return this.path; } @@ -182,4 +224,17 @@ public Void call() throws Exception { return null; } } + + class ListTask extends FileSystemTask { + ListTask(FileSystem fs, Path p) { + super(fs, p); + } + + public Integer call() throws Exception { + FileSystem fs = getFileSystem(); + Path p = getFilePath(); + FileStatus[] files = fs.listStatus(p); + return files.length; + } + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestOutputStreamSemantics.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestOutputStreamSemantics.java index b8edc4b7d6586..835b82c3c1924 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestOutputStreamSemantics.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestOutputStreamSemantics.java @@ -27,8 +27,6 @@ import com.microsoft.azure.storage.blob.BlockListingFilter; import com.microsoft.azure.storage.blob.CloudBlockBlob; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -39,8 +37,9 @@ import org.hamcrest.core.IsNot; import org.junit.Test; -import static org.junit.Assert.*; -import static org.junit.Assume.assumeNotNull; +import static org.apache.hadoop.fs.contract.ContractTestUtils.assertCapabilities; +import static org.apache.hadoop.fs.contract.ContractTestUtils.assertHasStreamCapabilities; +import static org.apache.hadoop.fs.contract.ContractTestUtils.assertLacksStreamCapabilities; /** * Test semantics of functions flush, hflush, hsync, and close for block blobs, @@ -192,11 +191,14 @@ public void testPageBlobClose() throws IOException { public void testPageBlobCapabilities() throws IOException { Path path = getBlobPathWithTestName(PAGE_BLOB_DIR); try (FSDataOutputStream stream = fs.create(path)) { - assertTrue(stream.hasCapability(StreamCapabilities.HFLUSH)); - assertTrue(stream.hasCapability(StreamCapabilities.HSYNC)); - assertFalse(stream.hasCapability(StreamCapabilities.DROPBEHIND)); - assertFalse(stream.hasCapability(StreamCapabilities.READAHEAD)); - assertFalse(stream.hasCapability(StreamCapabilities.UNBUFFER)); + assertCapabilities(stream, + new String[]{ + StreamCapabilities.HFLUSH, + StreamCapabilities.HSYNC, + StreamCapabilities.DROPBEHIND, + StreamCapabilities.READAHEAD, + StreamCapabilities.UNBUFFER}, + null); stream.write(getRandomBytes()); } } @@ -285,11 +287,12 @@ public void testBlockBlobClose() throws IOException { public void testBlockBlobCapabilities() throws IOException { Path path = getBlobPathWithTestName(BLOCK_BLOB_DIR); try (FSDataOutputStream stream = fs.create(path)) { - assertFalse(stream.hasCapability(StreamCapabilities.HFLUSH)); - assertFalse(stream.hasCapability(StreamCapabilities.HSYNC)); - assertFalse(stream.hasCapability(StreamCapabilities.DROPBEHIND)); - assertFalse(stream.hasCapability(StreamCapabilities.READAHEAD)); - assertFalse(stream.hasCapability(StreamCapabilities.UNBUFFER)); + assertLacksStreamCapabilities(stream, + StreamCapabilities.HFLUSH, + StreamCapabilities.HSYNC, + StreamCapabilities.DROPBEHIND, + StreamCapabilities.READAHEAD, + StreamCapabilities.UNBUFFER); stream.write(getRandomBytes()); } } @@ -381,11 +384,12 @@ public void testBlockBlobCompactionClose() throws IOException { public void testBlockBlobCompactionCapabilities() throws IOException { Path path = getBlobPathWithTestName(BLOCK_BLOB_COMPACTION_DIR); try (FSDataOutputStream stream = fs.create(path)) { - assertTrue(stream.hasCapability(StreamCapabilities.HFLUSH)); - assertTrue(stream.hasCapability(StreamCapabilities.HSYNC)); - assertFalse(stream.hasCapability(StreamCapabilities.DROPBEHIND)); - assertFalse(stream.hasCapability(StreamCapabilities.READAHEAD)); - assertFalse(stream.hasCapability(StreamCapabilities.UNBUFFER)); + assertHasStreamCapabilities(stream, + StreamCapabilities.HFLUSH, + StreamCapabilities.HSYNC, + StreamCapabilities.DROPBEHIND, + StreamCapabilities.READAHEAD, + StreamCapabilities.UNBUFFER); stream.write(getRandomBytes()); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestPageBlobOutputStream.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestPageBlobOutputStream.java new file mode 100644 index 0000000000000..416143d3f0add --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestPageBlobOutputStream.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.azure; + +import java.io.IOException; +import java.util.EnumSet; + +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.test.LambdaTestUtils; + +public class ITestPageBlobOutputStream extends AbstractWasbTestBase { + + private static final Path TEST_FILE_PATH = new Path( + "TestPageBlobOutputStream.txt"); + + @Override + protected AzureBlobStorageTestAccount createTestAccount() throws Exception { + Configuration conf = new Configuration(); + // Configure the page blob directories key so every file created is a page + // blob. + conf.set(AzureNativeFileSystemStore.KEY_PAGE_BLOB_DIRECTORIES, "/"); + return AzureBlobStorageTestAccount.create("testpagebloboutputstream", + EnumSet.of(AzureBlobStorageTestAccount.CreateOptions.CreateContainer), + conf, true); + } + + @Test + public void testHflush() throws Exception { + Path path = fs.makeQualified(TEST_FILE_PATH); + FSDataOutputStream os = fs.create(path); + os.write(1); + os.hflush(); + // Delete the blob so that Azure call will fail. + fs.delete(path, false); + os.write(2); + LambdaTestUtils.intercept(IOException.class, + "The specified blob does not exist", () -> { + os.hflush(); + }); + LambdaTestUtils.intercept(IOException.class, + "The specified blob does not exist", () -> { + os.close(); + }); + } + + @Test + public void testHsync() throws Exception { + Path path = fs.makeQualified(TEST_FILE_PATH); + FSDataOutputStream os = fs.create(path); + os.write(1); + os.hsync(); + // Delete the blob so that Azure call will fail. + fs.delete(path, false); + os.write(2); + LambdaTestUtils.intercept(IOException.class, + "The specified blob does not exist", () -> { + os.hsync(); + }); + LambdaTestUtils.intercept(IOException.class, + "The specified blob does not exist", () -> { + os.close(); + }); + } +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestWasbUriAndConfiguration.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestWasbUriAndConfiguration.java index 982e92bb31a2f..7398e521bc51b 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestWasbUriAndConfiguration.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestWasbUriAndConfiguration.java @@ -655,9 +655,7 @@ public void testCanonicalServiceName() throws Exception { // because the mock container does not exist, this call is expected to fail. intercept(IllegalArgumentException.class, "java.net.UnknownHostException", - () -> { - fs0.getCanonicalServiceName(); - }); + () -> fs0.getCanonicalServiceName()); conf.setBoolean(RETURN_URI_AS_CANONICAL_SERVICE_NAME_PROPERTY_NAME, true); FileSystem fs1 = FileSystem.newInstance(defaultUri, conf); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/InMemoryBlockBlobStore.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/InMemoryBlockBlobStore.java index b8971c488c45a..7ddeabe242ef6 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/InMemoryBlockBlobStore.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/InMemoryBlockBlobStore.java @@ -25,6 +25,8 @@ import java.util.HashMap; import java.util.Map; +import static java.util.Objects.requireNonNull; + /** * A simple memory key-value store to help mock the Windows Azure Storage * implementation for unit testing. @@ -163,7 +165,10 @@ public synchronized boolean exists(String key) { @SuppressWarnings("unchecked") public synchronized HashMap getMetadata(String key) { - return (HashMap) blobs.get(key).metadata.clone(); + Entry entry = requireNonNull(blobs.get(key), "entry for " + key); + return (HashMap) requireNonNull(entry.metadata, + "metadata for " + key) + .clone(); } public synchronized HashMap getContainerMetadata() { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/MockStorageInterface.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/MockStorageInterface.java index 1739cff76d400..6d11207c479b5 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/MockStorageInterface.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/MockStorageInterface.java @@ -37,6 +37,7 @@ import org.apache.commons.codec.DecoderException; import org.apache.commons.codec.net.URLCodec; import org.apache.commons.lang3.NotImplementedException; +import org.apache.hadoop.fs.Path; import org.apache.http.client.utils.URIBuilder; import com.microsoft.azure.storage.AccessCondition; @@ -137,9 +138,20 @@ private static String convertUriToDecodedString(URI uri) { private static URI convertKeyToEncodedUri(String key) { try { - return new URIBuilder().setPath(key).build(); + Path p = new Path(key); + URI unEncodedURI = p.toUri(); + return new URIBuilder().setPath(unEncodedURI.getPath()) + .setScheme(unEncodedURI.getScheme()).build(); } catch (URISyntaxException e) { - throw new AssertionError("Failed to encode key: " + key); + int i = e.getIndex(); + String details; + if (i >= 0) { + details = " -- \"" + e.getInput().charAt(i) + "\""; + } else { + details = ""; + } + throw new AssertionError("Failed to encode key: " + key + + ": " + e + details); } } @@ -148,8 +160,8 @@ public CloudBlobContainerWrapper getContainerReference(String name) throws URISyntaxException, StorageException { String fullUri; URIBuilder builder = new URIBuilder(baseUriString); - fullUri = builder.setPath(builder.getPath() + "/" + name).toString(); - + String path = builder.getPath() == null ? "" : builder.getPath() + "/"; + fullUri = builder.setPath(path + name).toString(); MockCloudBlobContainerWrapper container = new MockCloudBlobContainerWrapper( fullUri, name); // Check if we have a pre-existing container with that name, and prime diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobMetadata.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobMetadata.java index 30c102839cb1e..832e7ec05a0af 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobMetadata.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobMetadata.java @@ -202,8 +202,10 @@ public void testPermissionMetadata() throws Exception { Path selfishFile = new Path("/noOneElse"); fs.create(selfishFile, justMe, true, 4096, fs.getDefaultReplication(), fs.getDefaultBlockSize(), null).close(); + String mockUri = AzureBlobStorageTestAccount.toMockUri(selfishFile); + assertNotNull("converted URI", mockUri); HashMap metadata = backingStore - .getMetadata(AzureBlobStorageTestAccount.toMockUri(selfishFile)); + .getMetadata(mockUri); assertNotNull(metadata); String storedPermission = metadata.get("hdi_permission"); assertEquals(getExpectedPermissionString("rw-------"), storedPermission); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemAuthorization.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemAuthorization.java index 4c618275e7e36..2f1c90286571b 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemAuthorization.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemAuthorization.java @@ -45,7 +45,7 @@ import org.junit.Test; import org.junit.rules.ExpectedException; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import static org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.KEY_USE_SECURE_MODE; import static org.apache.hadoop.fs.azure.CachingAuthorizer.KEY_AUTH_SERVICE_CACHING_ENABLE; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestSyncableDataOutputStream.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestSyncableDataOutputStream.java new file mode 100644 index 0000000000000..c8c6d93f49d9a --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestSyncableDataOutputStream.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.azure; + +import java.io.IOException; +import java.io.OutputStream; + +import org.junit.Test; + +import org.apache.hadoop.test.LambdaTestUtils; + +public class TestSyncableDataOutputStream { + + @Test + public void testCloseWhenFlushThrowingIOException() throws Exception { + MockOutputStream out = new MockOutputStream(); + SyncableDataOutputStream sdos = new SyncableDataOutputStream(out); + out.flushThrowIOE = true; + LambdaTestUtils.intercept(IOException.class, "An IOE from flush", () -> sdos.close()); + MockOutputStream out2 = new MockOutputStream(); + out2.flushThrowIOE = true; + LambdaTestUtils.intercept(IOException.class, "An IOE from flush", () -> { + try (SyncableDataOutputStream sdos2 = new SyncableDataOutputStream(out2)) { + } + }); + } + + private static class MockOutputStream extends OutputStream { + + private boolean flushThrowIOE = false; + private IOException lastException = null; + + @Override + public void write(int arg0) throws IOException { + + } + + @Override + public void flush() throws IOException { + if (this.flushThrowIOE) { + this.lastException = new IOException("An IOE from flush"); + throw this.lastException; + } + } + + @Override + public void close() throws IOException { + if (this.lastException != null) { + throw this.lastException; + } + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/AzureTestConstants.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/AzureTestConstants.java index 82907c5747582..231c54825f229 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/AzureTestConstants.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/AzureTestConstants.java @@ -20,6 +20,8 @@ import org.apache.hadoop.fs.Path; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_WRITE_BUFFER_SIZE; + /** * Constants for the Azure tests. */ @@ -175,4 +177,15 @@ public interface AzureTestConstants { * Base directory for page blobs. */ Path PAGE_BLOB_DIR = new Path("/" + DEFAULT_PAGE_BLOB_DIRECTORY); + + /** + * Huge file for testing AbfsOutputStream uploads: {@value} + */ + String AZURE_SCALE_HUGE_FILE_UPLOAD = AZURE_SCALE_TEST + "huge.upload"; + + /** + * Default value for Huge file to be tested for AbfsOutputStream uploads: + * {@value} + */ + int AZURE_SCALE_HUGE_FILE_UPLOAD_DEFAULT = 2 * DEFAULT_WRITE_BUFFER_SIZE; } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java index a42648fc18597..74655fd573620 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.net.URI; import java.util.Hashtable; +import java.util.Map; import java.util.UUID; import java.util.concurrent.Callable; @@ -29,20 +30,30 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; import org.apache.hadoop.fs.azurebfs.security.AbfsDelegationTokenManager; +import org.apache.hadoop.fs.azurebfs.services.AbfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; import org.apache.hadoop.fs.azurebfs.services.AuthType; +import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient; import org.apache.hadoop.fs.azure.AzureNativeFileSystemStore; import org.apache.hadoop.fs.azure.NativeAzureFileSystem; import org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation; import org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes; import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; import org.apache.hadoop.fs.azurebfs.utils.UriUtils; import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.io.IOUtils; import static org.apache.hadoop.fs.azure.AzureBlobStorageTestAccount.WASB_ACCOUNT_NAME_DOMAIN_SUFFIX; @@ -75,6 +86,8 @@ public abstract class AbstractAbfsIntegrationTest extends private String testUrl; private AuthType authType; private boolean useConfiguredFileSystem = false; + private boolean usingFilesystemForSASTests = false; + private static final int SHORTENED_GUID_LEN = 12; protected AbstractAbfsIntegrationTest() throws Exception { fileSystemName = TEST_CONTAINER_PREFIX + UUID.randomUUID().toString(); @@ -116,6 +129,10 @@ protected AbstractAbfsIntegrationTest() throws Exception { this.testUrl = defaultUri.toString(); abfsConfig.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, defaultUri.toString()); abfsConfig.setBoolean(AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION, true); + if (abfsConfig.get(FS_AZURE_TEST_APPENDBLOB_ENABLED) == "true") { + String appendblobDirs = this.testUrl + "," + abfsConfig.get(FS_AZURE_CONTRACT_TEST_URI); + rawConfig.set(FS_AZURE_APPEND_BLOB_KEY, appendblobDirs); + } // For testing purposes, an IP address and port may be provided to override // the host specified in the FileSystem URI. Also note that the format of // the Azure Storage Service URI changes from @@ -129,17 +146,52 @@ protected AbstractAbfsIntegrationTest() throws Exception { } } + protected boolean getIsNamespaceEnabled(AzureBlobFileSystem fs) + throws IOException { + return fs.getIsNamespaceEnabled(getTestTracingContext(fs, false)); + } + + public static TracingContext getSampleTracingContext(AzureBlobFileSystem fs, + boolean needsPrimaryReqId) { + String correlationId, fsId; + TracingHeaderFormat format; + correlationId = "test-corr-id"; + fsId = "test-filesystem-id"; + format = TracingHeaderFormat.ALL_ID_FORMAT; + return new TracingContext(correlationId, fsId, + FSOperationType.TEST_OP, needsPrimaryReqId, format, null); + } + + public TracingContext getTestTracingContext(AzureBlobFileSystem fs, + boolean needsPrimaryReqId) { + String correlationId, fsId; + TracingHeaderFormat format; + if (fs == null) { + correlationId = "test-corr-id"; + fsId = "test-filesystem-id"; + format = TracingHeaderFormat.ALL_ID_FORMAT; + } else { + AbfsConfiguration abfsConf = fs.getAbfsStore().getAbfsConfiguration(); + correlationId = abfsConf.getClientCorrelationId(); + fsId = fs.getFileSystemId(); + format = abfsConf.getTracingHeaderFormat(); + } + return new TracingContext(correlationId, fsId, + FSOperationType.TEST_OP, needsPrimaryReqId, format, null); + } @Before public void setup() throws Exception { //Create filesystem first to make sure getWasbFileSystem() can return an existing filesystem. createFileSystem(); - // Only live account without namespace support can run ABFS&WASB compatibility tests - if (!isIPAddress - && (abfsConfig.getAuthType(accountName) != AuthType.SAS) - && !abfs.getIsNamespaceEnabled()) { - final URI wasbUri = new URI(abfsUrlToWasbUrl(getTestUrl())); + // Only live account without namespace support can run ABFS&WASB + // compatibility tests + if (!isIPAddress && (abfsConfig.getAuthType(accountName) != AuthType.SAS) + && !abfs.getIsNamespaceEnabled(getTestTracingContext( + getFileSystem(), false))) { + final URI wasbUri = new URI( + abfsUrlToWasbUrl(getTestUrl(), abfsConfig.isHttpsAlwaysUsed())); final AzureNativeFileSystemStore azureNativeFileSystemStore = new AzureNativeFileSystemStore(); @@ -170,17 +222,23 @@ public void teardown() throws Exception { if (abfs == null) { return; } + TracingContext tracingContext = getTestTracingContext(getFileSystem(), false); - // Delete all uniquely created filesystem from the account - if (!useConfiguredFileSystem) { + if (usingFilesystemForSASTests) { + abfsConfig.set(FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME, AuthType.SharedKey.name()); + AzureBlobFileSystem tempFs = (AzureBlobFileSystem) FileSystem.newInstance(rawConfig); + tempFs.getAbfsStore().deleteFilesystem(tracingContext); + } + else if (!useConfiguredFileSystem) { + // Delete all uniquely created filesystem from the account final AzureBlobFileSystemStore abfsStore = abfs.getAbfsStore(); - abfsStore.deleteFilesystem(); + abfsStore.deleteFilesystem(tracingContext); AbfsRestOperationException ex = intercept(AbfsRestOperationException.class, new Callable>() { @Override public Hashtable call() throws Exception { - return abfsStore.getFilesystemProperties(); + return abfsStore.getFilesystemProperties(tracingContext); } }); if (FILE_SYSTEM_NOT_FOUND.getStatusCode() != ex.getStatusCode()) { @@ -195,6 +253,9 @@ public Hashtable call() throws Exception { } } + public AccessTokenProvider getAccessTokenProvider(final AzureBlobFileSystem fs) { + return ITestAbfsClient.getAccessTokenProvider(fs.getAbfsStore().getClient()); + } public void loadConfiguredFileSystem() throws Exception { // disable auto-creation of filesystem @@ -221,6 +282,18 @@ public void loadConfiguredFileSystem() throws Exception { useConfiguredFileSystem = true; } + protected void createFilesystemForSASTests() throws Exception { + // The SAS tests do not have permission to create a filesystem + // so first create temporary instance of the filesystem using SharedKey + // then re-use the filesystem it creates with SAS auth instead of SharedKey. + try (AzureBlobFileSystem tempFs = (AzureBlobFileSystem) FileSystem.newInstance(rawConfig)){ + ContractTestUtils.assertPathExists(tempFs, "This path should exist", + new Path("/")); + abfsConfig.set(FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME, AuthType.SAS.name()); + usingFilesystemForSASTests = true; + } + } + public AzureBlobFileSystem getFileSystem() throws IOException { return abfs; } @@ -270,6 +343,11 @@ protected String getTestUrl() { protected void setFileSystemName(String fileSystemName) { this.fileSystemName = fileSystemName; } + + protected String getMethodName() { + return methodName.getMethodName(); + } + protected String getFileSystemName() { return fileSystemName; } @@ -325,13 +403,13 @@ protected void touch(Path path) throws IOException { protected static String wasbUrlToAbfsUrl(final String wasbUrl) { return convertTestUrls( wasbUrl, FileSystemUriSchemes.WASB_SCHEME, FileSystemUriSchemes.WASB_SECURE_SCHEME, FileSystemUriSchemes.WASB_DNS_PREFIX, - FileSystemUriSchemes.ABFS_SCHEME, FileSystemUriSchemes.ABFS_SECURE_SCHEME, FileSystemUriSchemes.ABFS_DNS_PREFIX); + FileSystemUriSchemes.ABFS_SCHEME, FileSystemUriSchemes.ABFS_SECURE_SCHEME, FileSystemUriSchemes.ABFS_DNS_PREFIX, false); } - protected static String abfsUrlToWasbUrl(final String abfsUrl) { + protected static String abfsUrlToWasbUrl(final String abfsUrl, final boolean isAlwaysHttpsUsed) { return convertTestUrls( abfsUrl, FileSystemUriSchemes.ABFS_SCHEME, FileSystemUriSchemes.ABFS_SECURE_SCHEME, FileSystemUriSchemes.ABFS_DNS_PREFIX, - FileSystemUriSchemes.WASB_SCHEME, FileSystemUriSchemes.WASB_SECURE_SCHEME, FileSystemUriSchemes.WASB_DNS_PREFIX); + FileSystemUriSchemes.WASB_SCHEME, FileSystemUriSchemes.WASB_SECURE_SCHEME, FileSystemUriSchemes.WASB_DNS_PREFIX, isAlwaysHttpsUsed); } private static String convertTestUrls( @@ -341,15 +419,17 @@ private static String convertTestUrls( final String fromDnsPrefix, final String toNonSecureScheme, final String toSecureScheme, - final String toDnsPrefix) { + final String toDnsPrefix, + final boolean isAlwaysHttpsUsed) { String data = null; - if (url.startsWith(fromNonSecureScheme + "://")) { + if (url.startsWith(fromNonSecureScheme + "://") && isAlwaysHttpsUsed) { + data = url.replace(fromNonSecureScheme + "://", toSecureScheme + "://"); + } else if (url.startsWith(fromNonSecureScheme + "://")) { data = url.replace(fromNonSecureScheme + "://", toNonSecureScheme + "://"); } else if (url.startsWith(fromSecureScheme + "://")) { data = url.replace(fromSecureScheme + "://", toSecureScheme + "://"); } - if (data != null) { data = data.replace("." + fromDnsPrefix + ".", "." + toDnsPrefix + "."); @@ -362,6 +442,23 @@ public Path getTestPath() { return path; } + public AzureBlobFileSystemStore getAbfsStore(final AzureBlobFileSystem fs) { + return fs.getAbfsStore(); + } + + public AbfsClient getAbfsClient(final AzureBlobFileSystemStore abfsStore) { + return abfsStore.getClient(); + } + + public void setAbfsClient(AzureBlobFileSystemStore abfsStore, + AbfsClient client) { + abfsStore.setClient(client); + } + + public Path makeQualified(Path path) throws java.io.IOException { + return getFileSystem().makeQualified(path); + } + /** * Create a path under the test path provided by * {@link #getTestPath()}. @@ -371,7 +468,20 @@ public Path getTestPath() { */ protected Path path(String filepath) throws IOException { return getFileSystem().makeQualified( - new Path(getTestPath(), filepath)); + new Path(getTestPath(), getUniquePath(filepath))); + } + + /** + * Generate a unique path using the given filepath. + * @param filepath path string + * @return unique path created from filepath and a GUID + */ + protected Path getUniquePath(String filepath) { + if (filepath.equals("/")) { + return new Path(filepath); + } + return new Path(filepath + StringUtils + .right(UUID.randomUUID().toString(), SHORTENED_GUID_LEN)); } /** @@ -383,4 +493,38 @@ protected AbfsDelegationTokenManager getDelegationTokenManager() throws IOException { return getFileSystem().getDelegationTokenManager(); } + + /** + * Generic create File and enabling AbfsOutputStream Flush. + * + * @param fs AzureBlobFileSystem that is initialised in the test. + * @param path Path of the file to be created. + * @return AbfsOutputStream for writing. + * @throws AzureBlobFileSystemException + */ + protected AbfsOutputStream createAbfsOutputStreamWithFlushEnabled( + AzureBlobFileSystem fs, + Path path) throws IOException { + AzureBlobFileSystemStore abfss = fs.getAbfsStore(); + abfss.getAbfsConfiguration().setDisableOutputStreamFlush(false); + + return (AbfsOutputStream) abfss.createFile(path, fs.getFsStatistics(), + true, FsPermission.getDefault(), FsPermission.getUMask(fs.getConf()), + getTestTracingContext(fs, false)); + } + + /** + * Custom assertion for AbfsStatistics which have statistics, expected + * value and map of statistics and value as its parameters. + * @param statistic the AbfsStatistics which needs to be asserted. + * @param expectedValue the expected value of the statistics. + * @param metricMap map of (String, Long) with statistics name as key and + * statistics value as map value. + */ + protected long assertAbfsStatistics(AbfsStatistic statistic, + long expectedValue, Map metricMap) { + assertEquals("Mismatch in " + statistic.getStatName(), expectedValue, + (long) metricMap.get(statistic.getStatName())); + return expectedValue; + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsTestWithTimeout.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsTestWithTimeout.java index fee90abeabc9e..0485422871ecc 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsTestWithTimeout.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsTestWithTimeout.java @@ -17,12 +17,19 @@ */ package org.apache.hadoop.fs.azurebfs; +import java.io.IOException; + import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Rule; import org.junit.rules.TestName; import org.junit.rules.Timeout; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.Path; import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.TEST_TIMEOUT; @@ -31,6 +38,9 @@ * This class does not attempt to bind to Azure. */ public class AbstractAbfsTestWithTimeout extends Assert { + private static final Logger LOG = + LoggerFactory.getLogger(AbstractAbfsTestWithTimeout.class); + /** * The name of the current method. */ @@ -67,4 +77,53 @@ public void nameThread() { protected int getTestTimeoutMillis() { return TEST_TIMEOUT; } + + /** + * Describe a test in the logs. + * + * @param text text to print + * @param args arguments to format in the printing + */ + protected void describe(String text, Object... args) { + LOG.info("\n\n{}: {}\n", + methodName.getMethodName(), + String.format(text, args)); + } + + /** + * Validate Contents written on a file in Abfs. + * + * @param fs AzureBlobFileSystem + * @param path Path of the file + * @param originalByteArray original byte array + * @return if content is validated true else, false + * @throws IOException + */ + protected boolean validateContent(AzureBlobFileSystem fs, Path path, + byte[] originalByteArray) + throws IOException { + int pos = 0; + int lenOfOriginalByteArray = originalByteArray.length; + + try (FSDataInputStream in = fs.open(path)) { + byte valueOfContentAtPos = (byte) in.read(); + + while (valueOfContentAtPos != -1 && pos < lenOfOriginalByteArray) { + if (originalByteArray[pos] != valueOfContentAtPos) { + assertEquals("Mismatch in content validation at position {}", pos, + originalByteArray[pos], valueOfContentAtPos); + return false; + } + valueOfContentAtPos = (byte) in.read(); + pos++; + } + if (valueOfContentAtPos != -1) { + assertEquals("Expected end of file", -1, valueOfContentAtPos); + return false; + } + return true; + } + + } + } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestABFSJceksFiltering.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestABFSJceksFiltering.java new file mode 100644 index 0000000000000..e1b6b39521acd --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestABFSJceksFiltering.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import org.junit.Test; + +import org.apache.hadoop.security.alias.CredentialProviderFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; + +public class ITestABFSJceksFiltering extends AbstractAbfsIntegrationTest { + + public ITestABFSJceksFiltering() throws Exception { + } + + @Test + public void testIncompatibleCredentialProviderIsExcluded() throws Exception { + Configuration rawConfig = getRawConfiguration(); + rawConfig.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, + "jceks://abfs@a@b.c.d/tmp/a.jceks,jceks://file/tmp/secret.jceks"); + try (AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.get(rawConfig)) { + assertNotNull("filesystem", fs); + String providers = fs.getConf().get(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH); + assertEquals("jceks://file/tmp/secret.jceks", providers); + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsClient.java index a4d645899049f..f90d410343532 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsClient.java @@ -62,7 +62,9 @@ public void testContinuationTokenHavingEqualSign() throws Exception { AbfsClient abfsClient = fs.getAbfsClient(); try { - AbfsRestOperation op = abfsClient.listPath("/", true, LIST_MAX_RESULTS, "==========="); + AbfsRestOperation op = abfsClient + .listPath("/", true, LIST_MAX_RESULTS, "===========", + getTestTracingContext(fs, true)); Assert.assertTrue(false); } catch (AbfsRestOperationException ex) { Assert.assertEquals("InvalidQueryParameterValue", ex.getErrorCode().getErrorCode()); @@ -91,7 +93,7 @@ public void testUnknownHost() throws Exception { public void testListPathWithValidListMaxResultsValues() throws IOException, ExecutionException, InterruptedException { final int fileCount = 10; - final String directory = "testWithValidListMaxResultsValues"; + final Path directory = getUniquePath("testWithValidListMaxResultsValues"); createDirectoryWithNFiles(directory, fileCount); final int[] testData = {fileCount + 100, fileCount + 1, fileCount, fileCount - 1, 1}; @@ -100,7 +102,7 @@ public void testListPathWithValidListMaxResultsValues() setListMaxResults(listMaxResults); int expectedListResultsSize = listMaxResults > fileCount ? fileCount : listMaxResults; - Assertions.assertThat(listPath(directory)).describedAs( + Assertions.assertThat(listPath(directory.toString())).describedAs( "AbfsClient.listPath result should contain %d items when " + "listMaxResults is %d and directory contains %d items", expectedListResultsSize, listMaxResults, fileCount) @@ -112,9 +114,10 @@ public void testListPathWithValidListMaxResultsValues() public void testListPathWithValueGreaterThanServerMaximum() throws IOException, ExecutionException, InterruptedException { setListMaxResults(LIST_MAX_RESULTS_SERVER + 100); - final String directory = "testWithValueGreaterThanServerMaximum"; + final Path directory = getUniquePath( + "testWithValueGreaterThanServerMaximum"); createDirectoryWithNFiles(directory, LIST_MAX_RESULTS_SERVER + 200); - Assertions.assertThat(listPath(directory)).describedAs( + Assertions.assertThat(listPath(directory.toString())).describedAs( "AbfsClient.listPath result will contain a maximum of %d items " + "even if listMaxResults >= %d or directory " + "contains more than %d items", LIST_MAX_RESULTS_SERVER, @@ -135,7 +138,8 @@ public void testListPathWithInvalidListMaxResultsValues() throws Exception { private List listPath(String directory) throws IOException { return getFileSystem().getAbfsClient() - .listPath(directory, false, getListMaxResults(), null).getResult() + .listPath(directory, false, getListMaxResults(), null, + getTestTracingContext(getFileSystem(), true)).getResult() .getListResultSchema().paths(); } @@ -149,7 +153,7 @@ private void setListMaxResults(int listMaxResults) throws IOException { .setListMaxResults(listMaxResults); } - private void createDirectoryWithNFiles(String directory, int n) + private void createDirectoryWithNFiles(Path directory, int n) throws ExecutionException, InterruptedException { final List> tasks = new ArrayList<>(); ExecutorService es = Executors.newFixedThreadPool(10); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsDurationTrackers.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsDurationTrackers.java new file mode 100644 index 0000000000000..0997b3dbd44d4 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsDurationTrackers.java @@ -0,0 +1,111 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import java.io.IOException; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.services.AbfsInputStream; +import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.StoreStatisticNames; +import org.apache.hadoop.io.IOUtils; + +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.HTTP_DELETE_REQUEST; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.HTTP_GET_REQUEST; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.HTTP_HEAD_REQUEST; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.HTTP_PUT_REQUEST; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.extractStatistics; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.lookupMeanStatistic; +import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.ioStatisticsToPrettyString; + +public class ITestAbfsDurationTrackers extends AbstractAbfsIntegrationTest { + + private static final Logger LOG = + LoggerFactory.getLogger(ITestAbfsDurationTrackers.class); + private static final AbfsStatistic[] HTTP_DURATION_TRACKER_LIST = { + HTTP_HEAD_REQUEST, + HTTP_GET_REQUEST, + HTTP_DELETE_REQUEST, + HTTP_PUT_REQUEST, + }; + + public ITestAbfsDurationTrackers() throws Exception { + } + + /** + * Test to check if DurationTrackers for Abfs HTTP calls work correctly and + * track the duration of the http calls. + */ + @Test + public void testAbfsHttpCallsDurations() throws IOException { + describe("test to verify if the DurationTrackers for abfs http calls " + + "work as expected."); + + AzureBlobFileSystem fs = getFileSystem(); + Path testFilePath = path(getMethodName()); + + // Declaring output and input stream. + AbfsOutputStream out = null; + AbfsInputStream in = null; + try { + // PUT the file. + out = createAbfsOutputStreamWithFlushEnabled(fs, testFilePath); + out.write('a'); + out.hflush(); + + // GET the file. + in = fs.getAbfsStore().openFileForRead(testFilePath, + fs.getFsStatistics(), getTestTracingContext(fs, false)); + int res = in.read(); + LOG.info("Result of Read: {}", res); + + // DELETE the file. + fs.delete(testFilePath, false); + + // extract the IOStatistics from the filesystem. + IOStatistics ioStatistics = extractStatistics(fs); + LOG.info(ioStatisticsToPrettyString(ioStatistics)); + assertDurationTracker(ioStatistics); + } finally { + IOUtils.cleanupWithLogger(LOG, out, in); + } + } + + /** + * A method to assert that all the DurationTrackers for the http calls are + * working correctly. + * + * @param ioStatistics the IOStatisticsSource in use. + */ + private void assertDurationTracker(IOStatistics ioStatistics) { + for (AbfsStatistic abfsStatistic : HTTP_DURATION_TRACKER_LIST) { + Assertions.assertThat(lookupMeanStatistic(ioStatistics, + abfsStatistic.getStatName() + StoreStatisticNames.SUFFIX_MEAN).mean()) + .describedAs("The DurationTracker Named " + abfsStatistic.getStatName() + + " Doesn't match the expected value.") + .isGreaterThan(0.0); + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsHugeFiles.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsHugeFiles.java new file mode 100644 index 0000000000000..510e0a7596b47 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsHugeFiles.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Random; + +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.store.DataBlocks; + +import static org.apache.hadoop.fs.azure.integration.AzureTestConstants.AZURE_SCALE_HUGE_FILE_UPLOAD; +import static org.apache.hadoop.fs.azure.integration.AzureTestConstants.AZURE_SCALE_HUGE_FILE_UPLOAD_DEFAULT; +import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.assume; +import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.getTestPropertyInt; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.DATA_BLOCKS_BUFFER; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_WRITE_BUFFER_SIZE; + +/** + * Testing Huge file for AbfsOutputStream. + */ +@RunWith(Parameterized.class) +public class ITestAbfsHugeFiles extends AbstractAbfsScaleTest { + private static final int ONE_MB = 1024 * 1024; + private static final int EIGHT_MB = 8 * ONE_MB; + // Configurable huge file upload: "fs.azure.scale.test.huge.upload", + // default is 2 * DEFAULT_WRITE_BUFFER_SIZE(8M). + private static final int HUGE_FILE; + + // Set the HUGE_FILE. + static { + HUGE_FILE = getTestPropertyInt(new Configuration(), + AZURE_SCALE_HUGE_FILE_UPLOAD, AZURE_SCALE_HUGE_FILE_UPLOAD_DEFAULT); + } + + // Writing block size to be used in this test. + private int size; + // Block Factory to be used in this test. + private String blockFactoryName; + + @Parameterized.Parameters(name = "size [{0}] ; blockFactoryName " + + "[{1}]") + public static Collection sizes() { + return Arrays.asList(new Object[][] { + { DEFAULT_WRITE_BUFFER_SIZE, DataBlocks.DATA_BLOCKS_BUFFER_DISK }, + { HUGE_FILE, DataBlocks.DATA_BLOCKS_BUFFER_DISK }, + { DEFAULT_WRITE_BUFFER_SIZE, DataBlocks.DATA_BLOCKS_BUFFER_ARRAY }, + { HUGE_FILE, DataBlocks.DATA_BLOCKS_BUFFER_ARRAY }, + { DEFAULT_WRITE_BUFFER_SIZE, DataBlocks.DATA_BLOCKS_BYTEBUFFER }, + { HUGE_FILE, DataBlocks.DATA_BLOCKS_BYTEBUFFER }, + }); + } + + public ITestAbfsHugeFiles(int size, String blockFactoryName) + throws Exception { + this.size = size; + this.blockFactoryName = blockFactoryName; + } + + @Before + public void setUp() throws Exception { + Configuration configuration = getRawConfiguration(); + configuration.unset(DATA_BLOCKS_BUFFER); + configuration.set(DATA_BLOCKS_BUFFER, blockFactoryName); + super.setup(); + } + + /** + * Testing Huge files written at once on AbfsOutputStream. + */ + @Test + public void testHugeFileWrite() throws IOException { + AzureBlobFileSystem fs = getFileSystem(); + Path filePath = path(getMethodName()); + final byte[] b = new byte[size]; + new Random().nextBytes(b); + try (FSDataOutputStream out = fs.create(filePath)) { + out.write(b); + } + // Verify correct length was uploaded. Don't want to verify contents + // here, as this would increase the test time significantly. + assertEquals("Mismatch in content length of file uploaded", size, + fs.getFileStatus(filePath).getLen()); + } + + /** + * Testing Huge files written in chunks of 8M in lots of writes. + */ + @Test + public void testLotsOfWrites() throws IOException { + assume("If the size isn't a multiple of 8M this test would not pass, so " + + "skip", + size % EIGHT_MB == 0); + AzureBlobFileSystem fs = getFileSystem(); + Path filePath = path(getMethodName()); + final byte[] b = new byte[size]; + new Random().nextBytes(b); + try (FSDataOutputStream out = fs.create(filePath)) { + int offset = 0; + for (int i = 0; i < size / EIGHT_MB; i++) { + out.write(b, offset, EIGHT_MB); + offset += EIGHT_MB; + } + } + // Verify correct length was uploaded. Don't want to verify contents + // here, as this would increase the test time significantly. + assertEquals("Mismatch in content length of file uploaded", size, + fs.getFileStatus(filePath).getLen()); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsIdentityTransformer.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsIdentityTransformer.java index b44914e4f31dd..f0473789cf161 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsIdentityTransformer.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsIdentityTransformer.java @@ -22,7 +22,7 @@ import java.util.List; import java.util.UUID; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformer; import org.apache.hadoop.fs.permission.AclEntry; import org.junit.Test; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsInputStreamStatistics.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsInputStreamStatistics.java new file mode 100644 index 0000000000000..d96f1a283609f --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsInputStreamStatistics.java @@ -0,0 +1,426 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import java.io.IOException; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.services.AbfsInputStream; +import org.apache.hadoop.fs.azurebfs.services.AbfsInputStreamContext; +import org.apache.hadoop.fs.azurebfs.services.AbfsInputStreamStatisticsImpl; +import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; +import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.StoreStatisticNames; +import org.apache.hadoop.io.IOUtils; + +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.extractStatistics; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.lookupMeanStatistic; +import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.ioStatisticsToPrettyString; + +public class ITestAbfsInputStreamStatistics + extends AbstractAbfsIntegrationTest { + private static final int OPERATIONS = 10; + private static final Logger LOG = + LoggerFactory.getLogger(ITestAbfsInputStreamStatistics.class); + private static final int ONE_MB = 1024 * 1024; + private static final int ONE_KB = 1024; + private static final int CUSTOM_BLOCK_BUFFER_SIZE = 4 * 1024; + private byte[] defBuffer = new byte[ONE_MB]; + + public ITestAbfsInputStreamStatistics() throws Exception { + } + + /** + * Test to check the initial values of the AbfsInputStream statistics. + */ + @Test + public void testInitValues() throws IOException { + describe("Testing the initial values of AbfsInputStream Statistics"); + + AzureBlobFileSystem fs = getFileSystem(); + AzureBlobFileSystemStore abfss = fs.getAbfsStore(); + Path initValuesPath = path(getMethodName()); + AbfsOutputStream outputStream = null; + AbfsInputStream inputStream = null; + + try { + + outputStream = createAbfsOutputStreamWithFlushEnabled(fs, initValuesPath); + inputStream = abfss.openFileForRead(initValuesPath, fs.getFsStatistics(), + getTestTracingContext(fs, false)); + + AbfsInputStreamStatisticsImpl stats = + (AbfsInputStreamStatisticsImpl) inputStream.getStreamStatistics(); + + checkInitValue(stats.getSeekOperations(), "seekOps"); + checkInitValue(stats.getForwardSeekOperations(), "forwardSeekOps"); + checkInitValue(stats.getBackwardSeekOperations(), "backwardSeekOps"); + checkInitValue(stats.getBytesRead(), "bytesRead"); + checkInitValue(stats.getBytesSkippedOnSeek(), "bytesSkippedOnSeek"); + checkInitValue(stats.getBytesBackwardsOnSeek(), "bytesBackwardsOnSeek"); + checkInitValue(stats.getSeekInBuffer(), "seekInBuffer"); + checkInitValue(stats.getReadOperations(), "readOps"); + checkInitValue(stats.getBytesReadFromBuffer(), "bytesReadFromBuffer"); + checkInitValue(stats.getRemoteReadOperations(), "remoteReadOps"); + checkInitValue(stats.getReadAheadBytesRead(), "readAheadBytesRead"); + checkInitValue(stats.getRemoteBytesRead(), "readAheadRemoteBytesRead"); + + } finally { + IOUtils.cleanupWithLogger(LOG, outputStream, inputStream); + } + } + + /** + * Test to check statistics from seek operation in AbfsInputStream. + */ + @Test + public void testSeekStatistics() throws IOException { + describe("Testing the values of statistics from seek operations in " + + "AbfsInputStream"); + + AzureBlobFileSystem fs = getFileSystem(); + AzureBlobFileSystemStore abfss = fs.getAbfsStore(); + Path seekStatPath = path(getMethodName()); + + AbfsOutputStream out = null; + AbfsInputStream in = null; + + try { + out = createAbfsOutputStreamWithFlushEnabled(fs, seekStatPath); + + //Writing a default buffer in a file. + out.write(defBuffer); + out.hflush(); + in = abfss.openFileForRead(seekStatPath, fs.getFsStatistics(), + getTestTracingContext(fs, false)); + + /* + * Writing 1MB buffer to the file, this would make the fCursor(Current + * position of cursor) to the end of file. + */ + int result = in.read(defBuffer, 0, ONE_MB); + LOG.info("Result of read : {}", result); + + /* + * Seeking to start of file and then back to end would result in a + * backward and a forward seek respectively 10 times. + */ + for (int i = 0; i < OPERATIONS; i++) { + in.seek(0); + in.read(); + in.seek(ONE_MB); + } + + AbfsInputStreamStatisticsImpl stats = + (AbfsInputStreamStatisticsImpl) in.getStreamStatistics(); + + LOG.info("STATISTICS: {}", stats.toString()); + + /* + * seekOps - Since we are doing backward and forward seek OPERATIONS + * times, total seeks would be 2 * OPERATIONS. + * + * backwardSeekOps - Since we are doing a backward seek inside a loop + * for OPERATION times, total backward seeks would be OPERATIONS. + * + * forwardSeekOps - Since we are doing a forward seek inside a loop + * for OPERATION times, total forward seeks would be OPERATIONS. + * + * negativeBytesBackwardsOnSeek - Since we are doing backward seeks from + * end of file in a ONE_MB file each time, this would mean the bytes from + * backward seek would be OPERATIONS * ONE_MB. + * + * bytesSkippedOnSeek - Since, we move from start to end in seek, but + * our fCursor(position of cursor) always remain at end of file, this + * would mean no bytes were skipped on seek. Since, all forward seeks + * are in buffer. + * + * seekInBuffer - Since all seeks were in buffer, the seekInBuffer + * would be equal to OPERATIONS. + * + */ + assertEquals("Mismatch in seekOps value", 2 * OPERATIONS, + stats.getSeekOperations()); + assertEquals("Mismatch in backwardSeekOps value", OPERATIONS, + stats.getBackwardSeekOperations()); + assertEquals("Mismatch in forwardSeekOps value", OPERATIONS, + stats.getForwardSeekOperations()); + assertEquals("Mismatch in bytesBackwardsOnSeek value", + OPERATIONS * ONE_MB, stats.getBytesBackwardsOnSeek()); + assertEquals("Mismatch in bytesSkippedOnSeek value", + 0, stats.getBytesSkippedOnSeek()); + assertEquals("Mismatch in seekInBuffer value", OPERATIONS, + stats.getSeekInBuffer()); + + in.close(); + // Verifying whether stats are readable after stream is closed. + LOG.info("STATISTICS after closing: {}", stats.toString()); + } finally { + IOUtils.cleanupWithLogger(LOG, out, in); + } + } + + /** + * Test to check statistics value from read operation in AbfsInputStream. + */ + @Test + public void testReadStatistics() throws IOException { + describe("Testing the values of statistics from read operation in " + + "AbfsInputStream"); + + AzureBlobFileSystem fs = getFileSystem(); + AzureBlobFileSystemStore abfss = fs.getAbfsStore(); + Path readStatPath = path(getMethodName()); + + AbfsOutputStream out = null; + AbfsInputStream in = null; + + try { + out = createAbfsOutputStreamWithFlushEnabled(fs, readStatPath); + + /* + * Writing 1MB buffer to the file. + */ + out.write(defBuffer); + out.hflush(); + in = abfss.openFileForRead(readStatPath, fs.getFsStatistics(), + getTestTracingContext(fs, false)); + + /* + * Doing file read 10 times. + */ + for (int i = 0; i < OPERATIONS; i++) { + in.read(); + } + + AbfsInputStreamStatisticsImpl stats = + (AbfsInputStreamStatisticsImpl) in.getStreamStatistics(); + + LOG.info("STATISTICS: {}", stats.toString()); + + /* + * bytesRead - Since each time a single byte is read, total + * bytes read would be equal to OPERATIONS. + * + * readOps - Since each time read operation is performed OPERATIONS + * times, total number of read operations would be equal to OPERATIONS. + * + * remoteReadOps - Only a single remote read operation is done. Hence, + * total remote read ops is 1. + * + */ + assertEquals("Mismatch in bytesRead value", OPERATIONS, + stats.getBytesRead()); + assertEquals("Mismatch in readOps value", OPERATIONS, + stats.getReadOperations()); + assertEquals("Mismatch in remoteReadOps value", 1, + stats.getRemoteReadOperations()); + + in.close(); + // Verifying if stats are still readable after stream is closed. + LOG.info("STATISTICS after closing: {}", stats.toString()); + } finally { + IOUtils.cleanupWithLogger(LOG, out, in); + } + } + + /** + * Testing AbfsInputStream works with null Statistics. + */ + @Test + public void testWithNullStreamStatistics() throws IOException { + describe("Testing AbfsInputStream operations with statistics as null"); + + AzureBlobFileSystem fs = getFileSystem(); + Path nullStatFilePath = path(getMethodName()); + byte[] oneKbBuff = new byte[ONE_KB]; + + // Creating an AbfsInputStreamContext instance with null StreamStatistics. + AbfsInputStreamContext abfsInputStreamContext = + new AbfsInputStreamContext( + getConfiguration().getSasTokenRenewPeriodForStreamsInSeconds()) + .withReadBufferSize(getConfiguration().getReadBufferSize()) + .withReadAheadQueueDepth(getConfiguration().getReadAheadQueueDepth()) + .withStreamStatistics(null) + .withReadAheadRange(getConfiguration().getReadAheadRange()) + .build(); + + AbfsOutputStream out = null; + AbfsInputStream in = null; + + try { + out = createAbfsOutputStreamWithFlushEnabled(fs, nullStatFilePath); + + // Writing a 1KB buffer in the file. + out.write(oneKbBuff); + out.hflush(); + + // AbfsRestOperation Instance required for eTag. + AbfsRestOperation abfsRestOperation = fs.getAbfsClient() + .getPathStatus(nullStatFilePath.toUri().getPath(), false, + getTestTracingContext(fs, false)); + + // AbfsInputStream with no StreamStatistics. + in = new AbfsInputStream(fs.getAbfsClient(), null, + nullStatFilePath.toUri().getPath(), ONE_KB, abfsInputStreamContext, + abfsRestOperation.getResult().getResponseHeader("ETag"), + getTestTracingContext(fs, false)); + + // Verifying that AbfsInputStream Operations works with null statistics. + assertNotEquals("AbfsInputStream read() with null statistics should " + + "work", -1, in.read()); + in.seek(ONE_KB); + + // Verifying toString() with no StreamStatistics. + LOG.info("AbfsInputStream: {}", in.toString()); + } finally { + IOUtils.cleanupWithLogger(LOG, out, in); + } + } + + /** + * Testing readAhead counters in AbfsInputStream with 30 seconds timeout. + */ + @Test + public void testReadAheadCounters() throws IOException { + describe("Test to check correct values for readAhead counters in " + + "AbfsInputStream"); + + AzureBlobFileSystem fs = getFileSystem(); + AzureBlobFileSystemStore abfss = fs.getAbfsStore(); + Path readAheadCountersPath = path(getMethodName()); + + /* + * Setting the block size for readAhead as 4KB. + */ + abfss.getAbfsConfiguration().setReadBufferSize(CUSTOM_BLOCK_BUFFER_SIZE); + + AbfsOutputStream out = null; + AbfsInputStream in = null; + + try { + + /* + * Creating a file of 1MB size. + */ + out = createAbfsOutputStreamWithFlushEnabled(fs, readAheadCountersPath); + out.write(defBuffer); + out.close(); + + in = abfss.openFileForRead(readAheadCountersPath, fs.getFsStatistics(), + getTestTracingContext(fs, false)); + + /* + * Reading 1KB after each i * KB positions. Hence the reads are from 0 + * to 1KB, 1KB to 2KB, and so on.. for 5 operations. + */ + for (int i = 0; i < 5; i++) { + in.seek(ONE_KB * i); + in.read(defBuffer, ONE_KB * i, ONE_KB); + } + AbfsInputStreamStatisticsImpl stats = + (AbfsInputStreamStatisticsImpl) in.getStreamStatistics(); + + /* + * Verifying the counter values of readAheadBytesRead and remoteBytesRead. + * + * readAheadBytesRead : Since, we read 1KBs 5 times, that means we go + * from 0 to 5KB in the file. The bufferSize is set to 4KB, and since + * we have 8 blocks of readAhead buffer. We would have 8 blocks of 4KB + * buffer. Our read is till 5KB, hence readAhead would ideally read 2 + * blocks of 4KB which is equal to 8KB. But, sometimes to get blocks + * from readAhead buffer we might have to wait for background + * threads to fill the buffer and hence we might do remote read which + * would be faster. Therefore, readAheadBytesRead would be greater than + * or equal to the value of bytesFromReadAhead at the point we measure it. + * + * remoteBytesRead : Since, the bufferSize is set to 4KB and the number + * of blocks or readAheadQueueDepth is equal to 8. We would read 8 * 4 + * KB buffer on the first read, which is equal to 32KB. But, if we are not + * able to read some bytes that were in the buffer after doing + * readAhead, we might use remote read again. Thus, the bytes read + * remotely would be greater than or equal to the bytesFromRemoteRead + * value that we measure at some point of the operation. + * + */ + Assertions.assertThat(stats.getReadAheadBytesRead()).describedAs( + "Mismatch in readAheadBytesRead counter value") + .isGreaterThanOrEqualTo(in.getBytesFromReadAhead()); + + Assertions.assertThat(stats.getRemoteBytesRead()).describedAs( + "Mismatch in remoteBytesRead counter value") + .isGreaterThanOrEqualTo(in.getBytesFromRemoteRead()); + + } finally { + IOUtils.cleanupWithLogger(LOG, out, in); + } + } + + /** + * Testing time taken by AbfsInputStream to complete a GET request. + */ + @Test + public void testActionHttpGetRequest() throws IOException { + describe("Test to check the correct value of Time taken by http get " + + "request in AbfsInputStream"); + AzureBlobFileSystem fs = getFileSystem(); + AzureBlobFileSystemStore abfss = fs.getAbfsStore(); + Path actionHttpGetRequestPath = path(getMethodName()); + AbfsInputStream abfsInputStream = null; + AbfsOutputStream abfsOutputStream = null; + try { + abfsOutputStream = createAbfsOutputStreamWithFlushEnabled(fs, + actionHttpGetRequestPath); + abfsOutputStream.write('a'); + abfsOutputStream.hflush(); + + abfsInputStream = + abfss.openFileForRead(actionHttpGetRequestPath, + fs.getFsStatistics(), getTestTracingContext(fs, false)); + abfsInputStream.read(); + IOStatistics ioStatistics = extractStatistics(fs); + LOG.info("AbfsInputStreamStats info: {}", + ioStatisticsToPrettyString(ioStatistics)); + Assertions.assertThat( + lookupMeanStatistic(ioStatistics, + AbfsStatistic.HTTP_GET_REQUEST.getStatName() + + StoreStatisticNames.SUFFIX_MEAN).mean()) + .describedAs("Mismatch in time taken by a GET request") + .isGreaterThan(0.0); + } finally { + IOUtils.cleanupWithLogger(LOG, abfsInputStream, abfsOutputStream); + } + } + + /** + * Method to assert the initial values of the statistics. + * + * @param actualValue the actual value of the statistics. + * @param statistic the name of operation or statistic being asserted. + */ + private void checkInitValue(long actualValue, String statistic) { + assertEquals("Mismatch in " + statistic + " value", 0, actualValue); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsListStatusRemoteIterator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsListStatusRemoteIterator.java new file mode 100644 index 0000000000000..ea1d0e26facec --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsListStatusRemoteIterator.java @@ -0,0 +1,311 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import org.assertj.core.api.Assertions; +import org.junit.Assert; +import org.junit.Test; +import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.azurebfs.services.AbfsListStatusRemoteIterator; +import org.apache.hadoop.fs.azurebfs.services.ListingSupport; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.test.LambdaTestUtils; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.ArgumentMatchers.nullable; +import static org.mockito.Mockito.verify; + +/** + * Test ListStatusRemoteIterator operation. + */ +public class ITestAbfsListStatusRemoteIterator extends AbstractAbfsIntegrationTest { + + private static final int TEST_FILES_NUMBER = 1000; + private static final Logger LOG = LoggerFactory.getLogger( + ITestAbfsListStatusRemoteIterator.class); + + public ITestAbfsListStatusRemoteIterator() throws Exception { + } + + @Test + public void testAbfsIteratorWithHasNext() throws Exception { + Path testDir = createTestDirectory(); + setPageSize(10); + final List fileNames = createFilesUnderDirectory(testDir); + + ListingSupport listingSupport = Mockito.spy(getFileSystem().getAbfsStore()); + RemoteIterator fsItr = new AbfsListStatusRemoteIterator(testDir, + listingSupport, getTestTracingContext(getFileSystem(), true)); + Assertions.assertThat(fsItr) + .describedAs("RemoteIterator should be instance of " + + "AbfsListStatusRemoteIterator by default") + .isInstanceOf(AbfsListStatusRemoteIterator.class); + int itrCount = 0; + while (fsItr.hasNext()) { + FileStatus fileStatus = fsItr.next(); + verifyIteratorResultContent(fileStatus, fileNames); + itrCount++; + } + verifyIteratorResultCount(itrCount, fileNames); + int minNumberOfInvocations = TEST_FILES_NUMBER / 10; + verify(listingSupport, Mockito.atLeast(minNumberOfInvocations)) + .listStatus(any(Path.class), nullable(String.class), + anyList(), anyBoolean(), + nullable(String.class), + any(TracingContext.class)); + } + + @Test + public void testAbfsIteratorWithoutHasNext() throws Exception { + Path testDir = createTestDirectory(); + setPageSize(10); + final List fileNames = createFilesUnderDirectory(testDir); + + ListingSupport listingSupport = Mockito.spy(getFileSystem().getAbfsStore()); + RemoteIterator fsItr = new AbfsListStatusRemoteIterator(testDir, + listingSupport, getTestTracingContext(getFileSystem(), true)); + Assertions.assertThat(fsItr) + .describedAs("RemoteIterator should be instance of " + + "AbfsListStatusRemoteIterator by default") + .isInstanceOf(AbfsListStatusRemoteIterator.class); + int itrCount = 0; + for (int i = 0; i < TEST_FILES_NUMBER; i++) { + FileStatus fileStatus = fsItr.next(); + verifyIteratorResultContent(fileStatus, fileNames); + itrCount++; + } + LambdaTestUtils.intercept(NoSuchElementException.class, fsItr::next); + verifyIteratorResultCount(itrCount, fileNames); + int minNumberOfInvocations = TEST_FILES_NUMBER / 10; + verify(listingSupport, Mockito.atLeast(minNumberOfInvocations)) + .listStatus(any(Path.class), nullable(String.class), + anyList(), anyBoolean(), + nullable(String.class), + any(TracingContext.class)); + } + + @Test + public void testWithAbfsIteratorDisabled() throws Exception { + Path testDir = createTestDirectory(); + setPageSize(10); + disableAbfsIterator(); + final List fileNames = createFilesUnderDirectory(testDir); + + RemoteIterator fsItr = + getFileSystem().listStatusIterator(testDir); + Assertions.assertThat(fsItr) + .describedAs("RemoteIterator should not be instance of " + + "AbfsListStatusRemoteIterator when it is disabled") + .isNotInstanceOf(AbfsListStatusRemoteIterator.class); + int itrCount = 0; + while (fsItr.hasNext()) { + FileStatus fileStatus = fsItr.next(); + verifyIteratorResultContent(fileStatus, fileNames); + itrCount++; + } + verifyIteratorResultCount(itrCount, fileNames); + } + + @Test + public void testWithAbfsIteratorDisabledWithoutHasNext() throws Exception { + Path testDir = createTestDirectory(); + setPageSize(10); + disableAbfsIterator(); + final List fileNames = createFilesUnderDirectory(testDir); + + RemoteIterator fsItr = getFileSystem().listStatusIterator( + testDir); + Assertions.assertThat(fsItr).describedAs( + "RemoteIterator should not be instance of " + + "AbfsListStatusRemoteIterator when it is disabled") + .isNotInstanceOf(AbfsListStatusRemoteIterator.class); + int itrCount; + for (itrCount = 0; itrCount < TEST_FILES_NUMBER; itrCount++) { + FileStatus fileStatus = fsItr.next(); + verifyIteratorResultContent(fileStatus, fileNames); + } + LambdaTestUtils.intercept(NoSuchElementException.class, fsItr::next); + verifyIteratorResultCount(itrCount, fileNames); + } + + @Test + public void testNextWhenNoMoreElementsPresent() throws Exception { + Path testDir = createTestDirectory(); + setPageSize(10); + RemoteIterator fsItr = new AbfsListStatusRemoteIterator(testDir, + getFileSystem().getAbfsStore(), + getTestTracingContext(getFileSystem(), true)); + fsItr = Mockito.spy(fsItr); + Mockito.doReturn(false).when(fsItr).hasNext(); + + LambdaTestUtils.intercept(NoSuchElementException.class, fsItr::next); + } + + @Test + public void testHasNextForEmptyDir() throws Exception { + Path testDir = createTestDirectory(); + setPageSize(10); + RemoteIterator fsItr = getFileSystem() + .listStatusIterator(testDir); + Assertions.assertThat(fsItr.hasNext()) + .describedAs("hasNext returns false for empty directory") + .isFalse(); + } + + @Test + public void testHasNextForFile() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + Path testFile = path("testFile"); + String testFileName = testFile.toString(); + getFileSystem().create(testFile); + setPageSize(10); + RemoteIterator fsItr = fs.listStatusIterator(testFile); + Assertions.assertThat(fsItr.hasNext()) + .describedAs("hasNext returns true for file").isTrue(); + Assertions.assertThat(fsItr.next().getPath().toString()) + .describedAs("next returns the file itself") + .endsWith(testFileName); + } + + @Test + public void testIOException() throws Exception { + Path testDir = createTestDirectory(); + setPageSize(10); + getFileSystem().mkdirs(testDir); + + String exceptionMessage = "test exception"; + ListingSupport lsSupport = getMockListingSupport(exceptionMessage); + + LambdaTestUtils.intercept(IOException.class, + () -> new AbfsListStatusRemoteIterator(testDir, lsSupport, + getTestTracingContext(getFileSystem(), true))); + } + + @Test + public void testNonExistingPath() throws Exception { + Path nonExistingDir = new Path("nonExistingPath"); + LambdaTestUtils.intercept(FileNotFoundException.class, + () -> getFileSystem().listStatusIterator(nonExistingDir)); + } + + private void verifyIteratorResultContent(FileStatus fileStatus, + List fileNames) { + String pathStr = fileStatus.getPath().toString(); + Assert.assertTrue( + String.format("Could not remove path %s from filenames %s", pathStr, + fileNames), fileNames.remove(pathStr)); + } + + private void verifyIteratorResultCount(int itrCount, List fileNames) { + Assertions.assertThat(itrCount).describedAs( + "Number of iterations should be equal to the files created") + .isEqualTo(TEST_FILES_NUMBER); + Assertions.assertThat(fileNames) + .describedAs("After removing every item found from the iterator, " + + "there should be no more elements in the fileNames") + .hasSize(0); + } + + private ListingSupport getMockListingSupport(String exceptionMessage) { + return new ListingSupport() { + @Override + public FileStatus[] listStatus(Path path, TracingContext tracingContext) { + return null; + } + + @Override + public FileStatus[] listStatus(Path path, String startFrom, TracingContext tracingContext) { + return null; + } + + @Override + public String listStatus(Path path, String startFrom, + List fileStatuses, boolean fetchAll, + String continuation, TracingContext tracingContext) + throws IOException { + throw new IOException(exceptionMessage); + } + }; + } + + private Path createTestDirectory() throws IOException { + Path testDirectory = path("testDirectory"); + getFileSystem().mkdirs(testDirectory); + return testDirectory; + } + + private void disableAbfsIterator() throws IOException { + AzureBlobFileSystemStore abfsStore = getAbfsStore(getFileSystem()); + abfsStore.getAbfsConfiguration().setEnableAbfsListIterator(false); + } + + private void setPageSize(int pageSize) throws IOException { + AzureBlobFileSystemStore abfsStore = getAbfsStore(getFileSystem()); + abfsStore.getAbfsConfiguration().setListMaxResults(pageSize); + } + + private List createFilesUnderDirectory(Path rootPath) + throws ExecutionException, InterruptedException, IOException { + final List> tasks = new ArrayList<>(); + final List fileNames = Collections.synchronizedList(new ArrayList<>()); + ExecutorService es = Executors.newFixedThreadPool(10); + try { + for (int i = 0; i < ITestAbfsListStatusRemoteIterator.TEST_FILES_NUMBER; i++) { + Path filePath = makeQualified(new Path(rootPath, "testListPath" + i)); + tasks.add(es.submit(() -> { + touch(filePath); + synchronized (fileNames) { + Assert.assertTrue(fileNames.add(filePath.toString())); + } + return null; + })); + } + for (Future task : tasks) { + task.get(); + } + } finally { + es.shutdownNow(); + } + LOG.debug(fileNames.toString()); + Assertions.assertThat(fileNames) + .describedAs("File creation incorrect or fileNames not added to list") + .hasSize(ITestAbfsListStatusRemoteIterator.TEST_FILES_NUMBER); + return fileNames; + } + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsNetworkStatistics.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsNetworkStatistics.java new file mode 100644 index 0000000000000..66b8da89572a1 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsNetworkStatistics.java @@ -0,0 +1,325 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import java.io.IOException; +import java.util.Map; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.junit.Test; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileAlreadyExistsException; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; +import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; + +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.BYTES_RECEIVED; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.GET_RESPONSES; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.SEND_REQUESTS; + +public class ITestAbfsNetworkStatistics extends AbstractAbfsIntegrationTest { + + private static final Logger LOG = + LoggerFactory.getLogger(ITestAbfsNetworkStatistics.class); + private static final int WRITE_OPERATION_LOOP_COUNT = 10; + + public ITestAbfsNetworkStatistics() throws Exception { + } + + /** + * Testing connections_made, send_request and bytes_send statistics in + * {@link AbfsRestOperation}. + */ + @Test + public void testAbfsHttpSendStatistics() throws IOException { + describe("Test to check correct values of statistics after Abfs http send " + + "request is done."); + + AzureBlobFileSystem fs = getFileSystem(); + Map metricMap; + Path sendRequestPath = path(getMethodName()); + String testNetworkStatsString = "http_send"; + + metricMap = fs.getInstrumentationMap(); + long expectedConnectionsMade = metricMap.get(CONNECTIONS_MADE.getStatName()); + long expectedRequestsSent = metricMap.get(SEND_REQUESTS.getStatName()); + long expectedBytesSent = 0; + + // -------------------------------------------------------------------- + // Operation: Creating AbfsOutputStream + try (AbfsOutputStream out = createAbfsOutputStreamWithFlushEnabled(fs, + sendRequestPath)) { + // Network stats calculation: For Creating AbfsOutputStream: + // 1 create request = 1 connection made and 1 send request + expectedConnectionsMade++; + expectedRequestsSent++; + // -------------------------------------------------------------------- + + // Operation: Write small data + // Network stats calculation: No additions. + // Data written is less than the buffer size and hence will not + // trigger any append request to store + out.write(testNetworkStatsString.getBytes()); + // -------------------------------------------------------------------- + + // Operation: HFlush + // Flushes all outstanding data (i.e. the current unfinished packet) + // from the client into the service on all DataNode replicas. + out.hflush(); + /* + * Network stats calculation: + * 3 possibilities here: + * A. As there is pending data to be written to store, this will result in: + * 1 append + 1 flush = 2 connections and 2 send requests + * + * B. If config "fs.azure.enable.small.write.optimization" is enabled, append + * and flush call will be merged for small data in buffer in this test. + * In which case it will be: + * 1 append+flush request = 1 connection and 1 send request + * + * C. If the path is configured for append Blob files to be used, hflush + * is a no-op. So in this case: + * 1 append = 1 connection and 1 send request + */ + if (fs.getAbfsStore().isAppendBlobKey(fs.makeQualified(sendRequestPath).toString()) + || (this.getConfiguration().isSmallWriteOptimizationEnabled())) { + expectedConnectionsMade++; + expectedRequestsSent++; + } else { + expectedConnectionsMade += 2; + expectedRequestsSent += 2; + } + expectedBytesSent += testNetworkStatsString.getBytes().length; + // -------------------------------------------------------------------- + + // Assertions + metricMap = fs.getInstrumentationMap(); + assertAbfsStatistics(CONNECTIONS_MADE, + expectedConnectionsMade, metricMap); + assertAbfsStatistics(SEND_REQUESTS, expectedRequestsSent, + metricMap); + assertAbfsStatistics(AbfsStatistic.BYTES_SENT, + expectedBytesSent, metricMap); + } + + // -------------------------------------------------------------------- + // Operation: AbfsOutputStream close. + // Network Stats calculation: 1 flush (with close) is send. + // 1 flush request = 1 connection and 1 send request + expectedConnectionsMade++; + expectedRequestsSent++; + // -------------------------------------------------------------------- + + // Operation: Re-create the file / create overwrite scenario + try (AbfsOutputStream out = createAbfsOutputStreamWithFlushEnabled(fs, + sendRequestPath)) { + /* + * Network Stats calculation: create overwrite + * There are 2 possibilities here. + * A. create overwrite results in 1 server call + * create with overwrite=true = 1 connection and 1 send request + * + * B. If config "fs.azure.enable.conditional.create.overwrite" is enabled, + * create overwrite=false (will fail in this case as file is indeed present) + * + getFileStatus to fetch the file ETag + * + create overwrite=true + * = 3 connections and 2 send requests + */ + if (this.getConfiguration().isConditionalCreateOverwriteEnabled()) { + expectedConnectionsMade += 3; + expectedRequestsSent += 2; + } else { + expectedConnectionsMade += 1; + expectedRequestsSent += 1; + } + // -------------------------------------------------------------------- + + // Operation: Multiple small appends + hflush + for (int i = 0; i < WRITE_OPERATION_LOOP_COUNT; i++) { + out.write(testNetworkStatsString.getBytes()); + // Network stats calculation: no-op. Small write + out.hflush(); + // Network stats calculation: Hflush + // refer to previous comments for hFlush network stats calcualtion + // possibilities + if (fs.getAbfsStore().isAppendBlobKey(fs.makeQualified(sendRequestPath).toString()) + || (this.getConfiguration().isSmallWriteOptimizationEnabled())) { + expectedConnectionsMade++; + expectedRequestsSent++; + } else { + expectedConnectionsMade += 2; + expectedRequestsSent += 2; + } + expectedBytesSent += testNetworkStatsString.getBytes().length; + } + // -------------------------------------------------------------------- + + // Assertions + metricMap = fs.getInstrumentationMap(); + assertAbfsStatistics(CONNECTIONS_MADE, expectedConnectionsMade, metricMap); + assertAbfsStatistics(SEND_REQUESTS, expectedRequestsSent, metricMap); + assertAbfsStatistics(AbfsStatistic.BYTES_SENT, expectedBytesSent, metricMap); + } + + } + + /** + * Testing get_response and bytes_received in {@link AbfsRestOperation}. + */ + @Test + public void testAbfsHttpResponseStatistics() throws IOException { + describe("Test to check correct values of statistics after Http " + + "Response is processed."); + + AzureBlobFileSystem fs = getFileSystem(); + Path getResponsePath = path(getMethodName()); + Map metricMap; + String testResponseString = "some response"; + + FSDataOutputStream out = null; + FSDataInputStream in = null; + long expectedConnectionsMade; + long expectedGetResponses; + long expectedBytesReceived; + + try { + // Creating a File and writing some bytes in it. + out = fs.create(getResponsePath); + out.write(testResponseString.getBytes()); + out.hflush(); + + // Set metric baseline + metricMap = fs.getInstrumentationMap(); + long bytesWrittenToFile = testResponseString.getBytes().length; + expectedConnectionsMade = metricMap.get(CONNECTIONS_MADE.getStatName()); + expectedGetResponses = metricMap.get(CONNECTIONS_MADE.getStatName()); + expectedBytesReceived = metricMap.get(BYTES_RECEIVED.getStatName()); + + // -------------------------------------------------------------------- + // Operation: Create AbfsInputStream + in = fs.open(getResponsePath); + // Network stats calculation: For Creating AbfsInputStream: + // 1 GetFileStatus request to fetch file size = 1 connection and 1 get response + expectedConnectionsMade++; + expectedGetResponses++; + // -------------------------------------------------------------------- + + // Operation: Read + int result = in.read(); + // Network stats calculation: For read: + // 1 read request = 1 connection and 1 get response + expectedConnectionsMade++; + expectedGetResponses++; + expectedBytesReceived += bytesWrittenToFile; + // -------------------------------------------------------------------- + + // Assertions + metricMap = fs.getInstrumentationMap(); + assertAbfsStatistics(CONNECTIONS_MADE, expectedConnectionsMade, metricMap); + assertAbfsStatistics(GET_RESPONSES, expectedGetResponses, metricMap); + assertAbfsStatistics(AbfsStatistic.BYTES_RECEIVED, expectedBytesReceived, metricMap); + } finally { + IOUtils.cleanupWithLogger(LOG, out, in); + } + + // -------------------------------------------------------------------- + // Operation: AbfsOutputStream close. + // Network Stats calculation: no op. + // -------------------------------------------------------------------- + + try { + + // Recreate file with different file size + // [Create and append related network stats checks are done in + // test method testAbfsHttpSendStatistics] + StringBuilder largeBuffer = new StringBuilder(); + out = fs.create(getResponsePath); + + for (int i = 0; i < WRITE_OPERATION_LOOP_COUNT; i++) { + out.write(testResponseString.getBytes()); + out.hflush(); + largeBuffer.append(testResponseString); + } + + // sync back to metric baseline + metricMap = fs.getInstrumentationMap(); + expectedConnectionsMade = metricMap.get(CONNECTIONS_MADE.getStatName()); + expectedGetResponses = metricMap.get(GET_RESPONSES.getStatName()); + // -------------------------------------------------------------------- + // Operation: Create AbfsInputStream + in = fs.open(getResponsePath); + // Network stats calculation: For Creating AbfsInputStream: + // 1 GetFileStatus for file size = 1 connection and 1 get response + expectedConnectionsMade++; + expectedGetResponses++; + // -------------------------------------------------------------------- + + // Operation: Read + in.read(0, largeBuffer.toString().getBytes(), 0, largeBuffer.toString().getBytes().length); + // Network stats calculation: Total data written is still lesser than + // a buffer size. Hence will trigger only one read to store. So result is: + // 1 read request = 1 connection and 1 get response + expectedConnectionsMade++; + expectedGetResponses++; + expectedBytesReceived += (WRITE_OPERATION_LOOP_COUNT * testResponseString.getBytes().length); + // -------------------------------------------------------------------- + + // Assertions + metricMap = fs.getInstrumentationMap(); + assertAbfsStatistics(CONNECTIONS_MADE, expectedConnectionsMade, metricMap); + assertAbfsStatistics(GET_RESPONSES, expectedGetResponses, metricMap); + assertAbfsStatistics(AbfsStatistic.BYTES_RECEIVED, expectedBytesReceived, metricMap); + } finally { + IOUtils.cleanupWithLogger(LOG, out, in); + } + } + + /** + * Testing bytes_received counter value when a response failure occurs. + */ + @Test + public void testAbfsHttpResponseFailure() throws IOException { + describe("Test to check the values of bytes received counter when a " + + "response is failed"); + + AzureBlobFileSystem fs = getFileSystem(); + Path responseFailurePath = path(getMethodName()); + Map metricMap; + FSDataOutputStream out = null; + + try { + //create an empty file + out = fs.create(responseFailurePath); + //Re-creating the file again on same path with false overwrite, this + // would cause a response failure with status code 409. + out = fs.create(responseFailurePath, false); + } catch (FileAlreadyExistsException faee) { + metricMap = fs.getInstrumentationMap(); + // Assert after catching the 409 error to check the counter values. + assertAbfsStatistics(AbfsStatistic.BYTES_RECEIVED, 0, metricMap); + } finally { + IOUtils.cleanupWithLogger(LOG, out); + } + } +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsOutputStreamStatistics.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsOutputStreamStatistics.java new file mode 100644 index 0000000000000..8be997ce69cf3 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsOutputStreamStatistics.java @@ -0,0 +1,273 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import java.io.IOException; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; +import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStreamStatisticsImpl; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.StoreStatisticNames; + +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.extractStatistics; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.lookupMeanStatistic; +import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.ioStatisticsToPrettyString; + +/** + * Test AbfsOutputStream statistics. + */ +public class ITestAbfsOutputStreamStatistics + extends AbstractAbfsIntegrationTest { + + private static final int OPERATIONS = 10; + private static final Logger LOG = + LoggerFactory.getLogger(ITestAbfsOutputStreamStatistics.class); + + public ITestAbfsOutputStreamStatistics() throws Exception { + } + + /** + * Tests to check bytes uploaded successfully in {@link AbfsOutputStream}. + */ + @Test + public void testAbfsOutputStreamUploadingBytes() throws IOException { + describe("Testing bytes uploaded successfully by AbfsOutputSteam"); + final AzureBlobFileSystem fs = getFileSystem(); + Path uploadBytesFilePath = path(getMethodName()); + String testBytesToUpload = "bytes"; + + try ( + AbfsOutputStream outForSomeBytes = createAbfsOutputStreamWithFlushEnabled( + fs, uploadBytesFilePath) + ) { + + AbfsOutputStreamStatisticsImpl abfsOutputStreamStatisticsForUploadBytes = + getAbfsOutputStreamStatistics(outForSomeBytes); + + //Test for zero bytes To upload. + assertEquals("Mismatch in bytes to upload", 0, + abfsOutputStreamStatisticsForUploadBytes.getBytesToUpload()); + + outForSomeBytes.write(testBytesToUpload.getBytes()); + outForSomeBytes.flush(); + abfsOutputStreamStatisticsForUploadBytes = + getAbfsOutputStreamStatistics(outForSomeBytes); + + //Test for bytes to upload. + assertEquals("Mismatch in bytes to upload", + testBytesToUpload.getBytes().length, + abfsOutputStreamStatisticsForUploadBytes.getBytesToUpload()); + + //Test for successful bytes uploaded. + assertEquals("Mismatch in successful bytes uploaded", + testBytesToUpload.getBytes().length, + abfsOutputStreamStatisticsForUploadBytes.getBytesUploadSuccessful()); + + } + + try ( + AbfsOutputStream outForLargeBytes = createAbfsOutputStreamWithFlushEnabled( + fs, uploadBytesFilePath)) { + + for (int i = 0; i < OPERATIONS; i++) { + outForLargeBytes.write(testBytesToUpload.getBytes()); + } + outForLargeBytes.flush(); + AbfsOutputStreamStatisticsImpl abfsOutputStreamStatistics = + getAbfsOutputStreamStatistics(outForLargeBytes); + + //Test for bytes to upload. + assertEquals("Mismatch in bytes to upload", + OPERATIONS * (testBytesToUpload.getBytes().length), + abfsOutputStreamStatistics.getBytesToUpload()); + + //Test for successful bytes uploaded. + assertEquals("Mismatch in successful bytes uploaded", + OPERATIONS * (testBytesToUpload.getBytes().length), + abfsOutputStreamStatistics.getBytesUploadSuccessful()); + + } + } + + /** + * Tests to check correct values of queue shrunk operations in + * AbfsOutputStream. + * + * After writing data, AbfsOutputStream doesn't upload the data until + * flushed. Hence, flush() method is called after write() to test queue + * shrink operations. + */ + @Test + public void testAbfsOutputStreamQueueShrink() throws IOException { + describe("Testing queue shrink operations by AbfsOutputStream"); + final AzureBlobFileSystem fs = getFileSystem(); + Path queueShrinkFilePath = path(getMethodName()); + String testQueueShrink = "testQueue"; + if (fs.getAbfsStore().isAppendBlobKey(fs.makeQualified(queueShrinkFilePath).toString())) { + // writeOperationsQueue is not used for appendBlob, hence queueShrink is 0 + return; + } + + try (AbfsOutputStream outForOneOp = createAbfsOutputStreamWithFlushEnabled( + fs, queueShrinkFilePath)) { + + AbfsOutputStreamStatisticsImpl abfsOutputStreamStatistics = + getAbfsOutputStreamStatistics(outForOneOp); + + //Test for shrinking queue zero time. + assertEquals("Mismatch in queue shrunk operations", 0, + abfsOutputStreamStatistics.getQueueShrunkOps()); + + } + + /* + * After writing in the loop we flush inside the loop to ensure the write + * operation done in that loop is considered to be done which would help + * us triggering the shrinkWriteOperationQueue() method each time after + * the write operation. + * If we call flush outside the loop, then it will take all the write + * operations inside the loop as one write operation. + * + */ + try ( + AbfsOutputStream outForLargeOps = createAbfsOutputStreamWithFlushEnabled( + fs, queueShrinkFilePath)) { + for (int i = 0; i < OPERATIONS; i++) { + outForLargeOps.write(testQueueShrink.getBytes()); + outForLargeOps.flush(); + } + + AbfsOutputStreamStatisticsImpl abfsOutputStreamStatistics = + getAbfsOutputStreamStatistics(outForLargeOps); + /* + * After a write operation is done, it is in a task queue where it is + * removed. Hence, to get the correct expected value we get the size of + * the task queue from AbfsOutputStream and subtract it with total + * write operations done to get the number of queue shrinks done. + * + */ + assertEquals("Mismatch in queue shrunk operations", + OPERATIONS - outForLargeOps.getWriteOperationsSize(), + abfsOutputStreamStatistics.getQueueShrunkOps()); + } + + } + + /** + * Tests to check correct values of write current buffer operations done by + * AbfsOutputStream. + * + * After writing data, AbfsOutputStream doesn't upload data till flush() is + * called. Hence, flush() calls were made after write(). + */ + @Test + public void testAbfsOutputStreamWriteBuffer() throws IOException { + describe("Testing write current buffer operations by AbfsOutputStream"); + final AzureBlobFileSystem fs = getFileSystem(); + Path writeBufferFilePath = path(getMethodName()); + String testWriteBuffer = "Buffer"; + + try (AbfsOutputStream outForOneOp = createAbfsOutputStreamWithFlushEnabled( + fs, writeBufferFilePath)) { + + AbfsOutputStreamStatisticsImpl abfsOutputStreamStatistics = + getAbfsOutputStreamStatistics(outForOneOp); + + //Test for zero time writing buffer to service. + assertEquals("Mismatch in write current buffer operations", 0, + abfsOutputStreamStatistics.getWriteCurrentBufferOperations()); + + outForOneOp.write(testWriteBuffer.getBytes()); + outForOneOp.flush(); + + abfsOutputStreamStatistics = getAbfsOutputStreamStatistics(outForOneOp); + + //Test for one time writing buffer to service. + assertEquals("Mismatch in write current buffer operations", 1, + abfsOutputStreamStatistics.getWriteCurrentBufferOperations()); + } + + try ( + AbfsOutputStream outForLargeOps = createAbfsOutputStreamWithFlushEnabled( + fs, writeBufferFilePath)) { + + /* + * Need to flush each time after we write to actually write the data + * into the data store and thus, get the writeCurrentBufferToService() + * method triggered and increment the statistic. + */ + for (int i = 0; i < OPERATIONS; i++) { + outForLargeOps.write(testWriteBuffer.getBytes()); + outForLargeOps.flush(); + } + AbfsOutputStreamStatisticsImpl abfsOutputStreamStatistics = + getAbfsOutputStreamStatistics(outForLargeOps); + //Test for 10 times writing buffer to service. + assertEquals("Mismatch in write current buffer operations", + OPERATIONS, + abfsOutputStreamStatistics.getWriteCurrentBufferOperations()); + } + } + + /** + * Test to check correct value of time spent on a PUT request in + * AbfsOutputStream. + */ + @Test + public void testAbfsOutputStreamDurationTrackerPutRequest() throws IOException { + describe("Testing to check if DurationTracker for PUT request is working " + + "correctly."); + AzureBlobFileSystem fs = getFileSystem(); + Path pathForPutRequest = path(getMethodName()); + + try(AbfsOutputStream outputStream = + createAbfsOutputStreamWithFlushEnabled(fs, pathForPutRequest)) { + outputStream.write('a'); + outputStream.hflush(); + + IOStatistics ioStatistics = extractStatistics(fs); + LOG.info("AbfsOutputStreamStats info: {}", + ioStatisticsToPrettyString(ioStatistics)); + Assertions.assertThat( + lookupMeanStatistic(ioStatistics, + AbfsStatistic.HTTP_PUT_REQUEST.getStatName() + + StoreStatisticNames.SUFFIX_MEAN).mean()) + .describedAs("Mismatch in timeSpentOnPutRequest DurationTracker") + .isGreaterThan(0.0); + } + } + + /** + * Method to get the AbfsOutputStream statistics. + * + * @param out AbfsOutputStream whose statistics is needed. + * @return AbfsOutputStream statistics implementation class to get the + * values of the counters. + */ + private static AbfsOutputStreamStatisticsImpl getAbfsOutputStreamStatistics( + AbfsOutputStream out) { + return (AbfsOutputStreamStatisticsImpl) out.getOutputStreamStatistics(); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsReadWriteAndSeek.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsReadWriteAndSeek.java index a270a00e9132e..beada775ae87b 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsReadWriteAndSeek.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsReadWriteAndSeek.java @@ -28,10 +28,18 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.services.AbfsInputStream; +import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; +import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL_INFO; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.APPENDBLOB_MAX_WRITE_BUFFER_SIZE; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_READ_BUFFER_SIZE; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.MAX_BUFFER_SIZE; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.MIN_BUFFER_SIZE; +import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.logIOStatisticsAtLevel; /** * Test read, write and seek. @@ -40,19 +48,29 @@ */ @RunWith(Parameterized.class) public class ITestAbfsReadWriteAndSeek extends AbstractAbfsScaleTest { - private static final Path TEST_PATH = new Path("/testfile"); + private static final String TEST_PATH = "/testfile"; - @Parameterized.Parameters(name = "Size={0}") + /** + * Parameterize on read buffer size and readahead. + * For test performance, a full x*y test matrix is not used. + * @return the test parameters + */ + @Parameterized.Parameters(name = "Size={0}-readahead={1}") public static Iterable sizes() { - return Arrays.asList(new Object[][]{{MIN_BUFFER_SIZE}, - {DEFAULT_READ_BUFFER_SIZE}, - {MAX_BUFFER_SIZE}}); + return Arrays.asList(new Object[][]{{MIN_BUFFER_SIZE, true}, + {DEFAULT_READ_BUFFER_SIZE, false}, + {DEFAULT_READ_BUFFER_SIZE, true}, + {APPENDBLOB_MAX_WRITE_BUFFER_SIZE, false}, + {MAX_BUFFER_SIZE, true}}); } private final int size; + private final boolean readaheadEnabled; - public ITestAbfsReadWriteAndSeek(final int size) throws Exception { + public ITestAbfsReadWriteAndSeek(final int size, + final boolean readaheadEnabled) throws Exception { this.size = size; + this.readaheadEnabled = readaheadEnabled; } @Test @@ -63,27 +81,86 @@ public void testReadAndWriteWithDifferentBufferSizesAndSeek() throws Exception { private void testReadWriteAndSeek(int bufferSize) throws Exception { final AzureBlobFileSystem fs = getFileSystem(); final AbfsConfiguration abfsConfiguration = fs.getAbfsStore().getAbfsConfiguration(); - abfsConfiguration.setWriteBufferSize(bufferSize); abfsConfiguration.setReadBufferSize(bufferSize); - + abfsConfiguration.setReadAheadEnabled(readaheadEnabled); final byte[] b = new byte[2 * bufferSize]; new Random().nextBytes(b); - try (FSDataOutputStream stream = fs.create(TEST_PATH)) { + + Path testPath = path(TEST_PATH); + FSDataOutputStream stream = fs.create(testPath); + try { stream.write(b); + } finally{ + stream.close(); } + logIOStatisticsAtLevel(LOG, IOSTATISTICS_LOGGING_LEVEL_INFO, stream); final byte[] readBuffer = new byte[2 * bufferSize]; int result; - try (FSDataInputStream inputStream = fs.open(TEST_PATH)) { + IOStatisticsSource statisticsSource = null; + try (FSDataInputStream inputStream = fs.open(testPath)) { + statisticsSource = inputStream; + ((AbfsInputStream) inputStream.getWrappedStream()).registerListener( + new TracingHeaderValidator(abfsConfiguration.getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.READ, true, 0, + ((AbfsInputStream) inputStream.getWrappedStream()) + .getStreamID())); inputStream.seek(bufferSize); result = inputStream.read(readBuffer, bufferSize, bufferSize); assertNotEquals(-1, result); + + //to test tracingHeader for case with bypassReadAhead == true + inputStream.seek(0); + byte[] temp = new byte[5]; + int t = inputStream.read(temp, 0, 1); + inputStream.seek(0); result = inputStream.read(readBuffer, 0, bufferSize); } + logIOStatisticsAtLevel(LOG, IOSTATISTICS_LOGGING_LEVEL_INFO, statisticsSource); + assertNotEquals("data read in final read()", -1, result); assertArrayEquals(readBuffer, b); } + + @Test + public void testReadAheadRequestID() throws java.io.IOException { + final AzureBlobFileSystem fs = getFileSystem(); + final AbfsConfiguration abfsConfiguration = fs.getAbfsStore().getAbfsConfiguration(); + int bufferSize = MIN_BUFFER_SIZE; + abfsConfiguration.setReadBufferSize(bufferSize); + abfsConfiguration.setReadAheadEnabled(readaheadEnabled); + + final byte[] b = new byte[bufferSize * 10]; + new Random().nextBytes(b); + Path testPath = path(TEST_PATH); + try (FSDataOutputStream stream = fs.create(testPath)) { + ((AbfsOutputStream) stream.getWrappedStream()).registerListener( + new TracingHeaderValidator(abfsConfiguration.getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.WRITE, false, 0, + ((AbfsOutputStream) stream.getWrappedStream()) + .getStreamID())); + stream.write(b); + logIOStatisticsAtLevel(LOG, IOSTATISTICS_LOGGING_LEVEL_INFO, stream); + } + + + final byte[] readBuffer = new byte[4 * bufferSize]; + int result; + fs.registerListener( + new TracingHeaderValidator(abfsConfiguration.getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.OPEN, false, 0)); + try (FSDataInputStream inputStream = fs.open(testPath)) { + ((AbfsInputStream) inputStream.getWrappedStream()).registerListener( + new TracingHeaderValidator(abfsConfiguration.getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.READ, false, 0, + ((AbfsInputStream) inputStream.getWrappedStream()) + .getStreamID())); + result = inputStream.read(readBuffer, 0, bufferSize*4); + logIOStatisticsAtLevel(LOG, IOSTATISTICS_LOGGING_LEVEL_INFO, inputStream); + } + fs.registerListener(null); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsRestOperationException.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsRestOperationException.java index 6f6982652e49f..3fe3557d501dc 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsRestOperationException.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsRestOperationException.java @@ -20,21 +20,31 @@ import java.io.IOException; +import org.assertj.core.api.Assertions; +import org.junit.Assert; +import org.junit.Test; + import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; import org.apache.hadoop.fs.azurebfs.oauth2.RetryTestTokenProvider; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.junit.Assert; -import org.junit.Test; - +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DOT; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_TOKEN_PROVIDER_TYPE_PROPERTY_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_ABFS_ACCOUNT_NAME; import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** * Verify the AbfsRestOperationException error message format. * */ public class ITestAbfsRestOperationException extends AbstractAbfsIntegrationTest{ + private static final String RETRY_TEST_TOKEN_PROVIDER = "org.apache.hadoop.fs.azurebfs.oauth2.RetryTestTokenProvider"; + public ITestAbfsRestOperationException() throws Exception { super(); } @@ -64,8 +74,9 @@ public void testAbfsRestOperationExceptionFormat() throws IOException { // verify its format String errorMessage = ex.getLocalizedMessage(); String[] errorFields = errorMessage.split(","); - - Assert.assertEquals(6, errorFields.length); + Assertions.assertThat(errorFields) + .describedAs("fields in exception of %s", ex) + .hasSize(6); // Check status message, status code, HTTP Request Type and URL. Assert.assertEquals("Operation failed: \"The specified path does not exist.\"", errorFields[0].trim()); Assert.assertEquals("404", errorFields[1].trim()); @@ -79,12 +90,13 @@ public void testAbfsRestOperationExceptionFormat() throws IOException { } @Test - public void testRequestRetryConfig() throws Exception { - testRetryLogic(0); - testRetryLogic(3); + public void testCustomTokenFetchRetryCount() throws Exception { + testWithDifferentCustomTokenFetchRetry(0); + testWithDifferentCustomTokenFetchRetry(3); + testWithDifferentCustomTokenFetchRetry(5); } - public void testRetryLogic(int numOfRetries) throws Exception { + public void testWithDifferentCustomTokenFetchRetry(int numOfRetries) throws Exception { AzureBlobFileSystem fs = this.getFileSystem(); Configuration config = new Configuration(this.getRawConfiguration()); @@ -93,14 +105,17 @@ public void testRetryLogic(int numOfRetries) throws Exception { config.set("fs.azure.account.auth.type." + accountName, "Custom"); config.set("fs.azure.account.oauth.provider.type." + accountName, "org.apache.hadoop.fs" + ".azurebfs.oauth2.RetryTestTokenProvider"); - config.set("fs.azure.io.retry.max.retries", Integer.toString(numOfRetries)); + config.set("fs.azure.custom.token.fetch.retry.count", Integer.toString(numOfRetries)); // Stop filesystem creation as it will lead to calls to store. config.set("fs.azure.createRemoteFileSystemDuringInitialization", "false"); final AzureBlobFileSystem fs1 = (AzureBlobFileSystem) FileSystem.newInstance(fs.getUri(), config); - RetryTestTokenProvider.ResetStatusToFirstTokenFetch(); + RetryTestTokenProvider retryTestTokenProvider + = RetryTestTokenProvider.getCurrentRetryTestProviderInstance( + getAccessTokenProvider(fs1)); + retryTestTokenProvider.resetStatusToFirstTokenFetch(); intercept(Exception.class, ()-> { @@ -108,9 +123,40 @@ public void testRetryLogic(int numOfRetries) throws Exception { }); // Number of retries done should be as configured - Assert.assertTrue( - "Number of token fetch retries (" + RetryTestTokenProvider.reTryCount - + ") done, does not match with max " + "retry count configured (" + numOfRetries - + ")", RetryTestTokenProvider.reTryCount == numOfRetries); + Assert.assertEquals( + "Number of token fetch retries done does not match with fs.azure" + + ".custom.token.fetch.retry.count configured", numOfRetries, + retryTestTokenProvider.getRetryCount()); + } + + @Test + public void testAuthFailException() throws Exception { + Configuration config = new Configuration(getRawConfiguration()); + String accountName = config + .get(FS_AZURE_ABFS_ACCOUNT_NAME); + // Setup to configure custom token provider + config.set(FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME + DOT + + accountName, "Custom"); + config.set( + FS_AZURE_ACCOUNT_TOKEN_PROVIDER_TYPE_PROPERTY_NAME + DOT + accountName, + RETRY_TEST_TOKEN_PROVIDER); + // Stop filesystem creation as it will lead to calls to store. + config.set(AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION, "false"); + + final AzureBlobFileSystem fs = getFileSystem(config); + try { + fs.getFileStatus(new Path("/")); + fail("Should fail at auth token fetch call"); + } catch (AbfsRestOperationException e) { + String errorDesc = "Should throw RestOp exception on AAD failure"; + Assertions.assertThat(e.getStatusCode()) + .describedAs("Incorrect status code. " + errorDesc).isEqualTo(-1); + Assertions.assertThat(e.getErrorCode()) + .describedAs("Incorrect error code. " + errorDesc) + .isEqualTo(AzureServiceErrorCode.UNKNOWN); + Assertions.assertThat(e.getErrorMessage()) + .describedAs("Incorrect error message. " + errorDesc) + .contains("Auth failure: "); + } } } \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsStatistics.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsStatistics.java new file mode 100644 index 0000000000000..98162fee08e9f --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsStatistics.java @@ -0,0 +1,280 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import java.io.IOException; +import java.util.Map; + +import org.junit.Before; +import org.junit.Test; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.services.AbfsCounters; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.fs.statistics.IOStatistics; + +import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL; +import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL_INFO; + +/** + * Tests AzureBlobFileSystem Statistics. + */ +public class ITestAbfsStatistics extends AbstractAbfsIntegrationTest { + + private static final int NUMBER_OF_OPS = 10; + + public ITestAbfsStatistics() throws Exception { + } + + @Before + public void setUp() throws Exception { + super.setup(); + // Setting IOStats to INFO level, to see the IOStats after close(). + getFileSystem().getConf().set(IOSTATISTICS_LOGGING_LEVEL, + IOSTATISTICS_LOGGING_LEVEL_INFO); + } + + /** + * Testing the initial value of statistics. + */ + @Test + public void testInitialStatsValues() throws IOException { + describe("Testing the initial values of Abfs counters"); + + AbfsCounters abfsCounters = + new AbfsCountersImpl(getFileSystem().getUri()); + IOStatistics ioStatistics = abfsCounters.getIOStatistics(); + + //Initial value verification for counters + for (Map.Entry entry : ioStatistics.counters().entrySet()) { + checkInitialValue(entry.getKey(), entry.getValue(), 0); + } + + //Initial value verification for gauges + for (Map.Entry entry : ioStatistics.gauges().entrySet()) { + checkInitialValue(entry.getKey(), entry.getValue(), 0); + } + + //Initial value verifications for DurationTrackers + for (Map.Entry entry : ioStatistics.maximums().entrySet()) { + checkInitialValue(entry.getKey(), entry.getValue(), -1); + } + } + + /** + * Testing statistics by creating files and directories. + */ + @Test + public void testCreateStatistics() throws IOException { + describe("Testing counter values got by creating directories and files in" + + " Abfs"); + + AzureBlobFileSystem fs = getFileSystem(); + Path createFilePath = path(getMethodName()); + Path createDirectoryPath = path(getMethodName() + "Dir"); + + fs.mkdirs(createDirectoryPath); + fs.createNonRecursive(createFilePath, FsPermission + .getDefault(), false, 1024, (short) 1, 1024, null).close(); + + Map metricMap = fs.getInstrumentationMap(); + /* + Test of statistic values after creating a directory and a file ; + getFileStatus is called 1 time after creating file and 1 time at time of + initialising. + */ + assertAbfsStatistics(AbfsStatistic.CALL_CREATE, 1, metricMap); + assertAbfsStatistics(AbfsStatistic.CALL_CREATE_NON_RECURSIVE, 1, metricMap); + assertAbfsStatistics(AbfsStatistic.FILES_CREATED, 1, metricMap); + assertAbfsStatistics(AbfsStatistic.DIRECTORIES_CREATED, 1, metricMap); + assertAbfsStatistics(AbfsStatistic.CALL_MKDIRS, 1, metricMap); + assertAbfsStatistics(AbfsStatistic.CALL_GET_FILE_STATUS, 2, metricMap); + + //re-initialising Abfs to reset statistic values. + fs.initialize(fs.getUri(), fs.getConf()); + + /* + Creating 10 directories and files; Directories and files can't be created + with same name, hence + i to give unique names. + */ + for (int i = 0; i < NUMBER_OF_OPS; i++) { + fs.mkdirs(path(getMethodName() + "Dir" + i)); + fs.createNonRecursive(path(getMethodName() + i), + FsPermission.getDefault(), false, 1024, (short) 1, + 1024, null).close(); + } + + metricMap = fs.getInstrumentationMap(); + /* + Test of statistics values after creating 10 directories and files; + getFileStatus is called 1 time at initialise() plus number of times file + is created. + */ + assertAbfsStatistics(AbfsStatistic.CALL_CREATE, NUMBER_OF_OPS, metricMap); + assertAbfsStatistics(AbfsStatistic.CALL_CREATE_NON_RECURSIVE, NUMBER_OF_OPS, + metricMap); + assertAbfsStatistics(AbfsStatistic.FILES_CREATED, NUMBER_OF_OPS, metricMap); + assertAbfsStatistics(AbfsStatistic.DIRECTORIES_CREATED, NUMBER_OF_OPS, + metricMap); + assertAbfsStatistics(AbfsStatistic.CALL_MKDIRS, NUMBER_OF_OPS, metricMap); + assertAbfsStatistics(AbfsStatistic.CALL_GET_FILE_STATUS, + 1 + NUMBER_OF_OPS, metricMap); + } + + /** + * Testing statistics by deleting files and directories. + */ + @Test + public void testDeleteStatistics() throws IOException { + describe("Testing counter values got by deleting directory and files " + + "in Abfs"); + + AzureBlobFileSystem fs = getFileSystem(); + /* + This directory path needs to be root for triggering the + directories_deleted counter. + */ + Path createDirectoryPath = path("/"); + Path createFilePath = path(getMethodName()); + + /* + creating a directory and a file inside that directory. + The directory is root. Hence, no parent. This allows us to invoke + deleteRoot() method to see the population of directories_deleted and + files_deleted counters. + */ + fs.mkdirs(createDirectoryPath); + fs.create(path(createDirectoryPath + getMethodName())).close(); + fs.delete(createDirectoryPath, true); + + Map metricMap = fs.getInstrumentationMap(); + + /* + Test for op_delete, files_deleted, op_list_status. + since directory is delete recursively op_delete is called 2 times. + 1 file is deleted, 1 listStatus() call is made. + */ + assertAbfsStatistics(AbfsStatistic.CALL_DELETE, 2, metricMap); + assertAbfsStatistics(AbfsStatistic.FILES_DELETED, 1, metricMap); + assertAbfsStatistics(AbfsStatistic.CALL_LIST_STATUS, 1, metricMap); + + /* + creating a root directory and deleting it recursively to see if + directories_deleted is called or not. + */ + fs.mkdirs(createDirectoryPath); + fs.create(createFilePath).close(); + fs.delete(createDirectoryPath, true); + metricMap = fs.getInstrumentationMap(); + + //Test for directories_deleted. + assertAbfsStatistics(AbfsStatistic.DIRECTORIES_DELETED, 1, metricMap); + } + + /** + * Testing statistics of open, append, rename and exists method calls. + */ + @Test + public void testOpenAppendRenameExists() throws IOException { + describe("Testing counter values on calling open, append and rename and " + + "exists methods on Abfs"); + + AzureBlobFileSystem fs = getFileSystem(); + Path createFilePath = path(getMethodName()); + Path destCreateFilePath = path(getMethodName() + "New"); + + fs.create(createFilePath).close(); + fs.open(createFilePath).close(); + fs.append(createFilePath).close(); + assertTrue(fs.rename(createFilePath, destCreateFilePath)); + + Map metricMap = fs.getInstrumentationMap(); + //Testing single method calls to open, append and rename. + assertAbfsStatistics(AbfsStatistic.CALL_OPEN, 1, metricMap); + assertAbfsStatistics(AbfsStatistic.CALL_APPEND, 1, metricMap); + assertAbfsStatistics(AbfsStatistic.CALL_RENAME, 1, metricMap); + + //Testing if file exists at path. + assertTrue(String.format("File with name %s should exist", + destCreateFilePath), + fs.exists(destCreateFilePath)); + assertFalse(String.format("File with name %s should not exist", + createFilePath), + fs.exists(createFilePath)); + + metricMap = fs.getInstrumentationMap(); + //Testing exists() calls. + assertAbfsStatistics(AbfsStatistic.CALL_EXIST, 2, metricMap); + + //re-initialising Abfs to reset statistic values. + fs.initialize(fs.getUri(), fs.getConf()); + + fs.create(destCreateFilePath).close(); + + for (int i = 0; i < NUMBER_OF_OPS; i++) { + fs.open(destCreateFilePath); + fs.append(destCreateFilePath).close(); + } + + metricMap = fs.getInstrumentationMap(); + + //Testing large number of method calls to open, append. + assertAbfsStatistics(AbfsStatistic.CALL_OPEN, NUMBER_OF_OPS, metricMap); + assertAbfsStatistics(AbfsStatistic.CALL_APPEND, NUMBER_OF_OPS, metricMap); + + for (int i = 0; i < NUMBER_OF_OPS; i++) { + // rename and then back to earlier name for no error while looping. + assertTrue(fs.rename(destCreateFilePath, createFilePath)); + assertTrue(fs.rename(createFilePath, destCreateFilePath)); + + //check if first name is existing and 2nd is not existing. + assertTrue(String.format("File with name %s should exist", + destCreateFilePath), + fs.exists(destCreateFilePath)); + assertFalse(String.format("File with name %s should not exist", + createFilePath), + fs.exists(createFilePath)); + + } + + metricMap = fs.getInstrumentationMap(); + + /* + Testing exists() calls and rename calls. Since both were called 2 + times in 1 loop. 2*numberOfOps is expectedValue. + */ + assertAbfsStatistics(AbfsStatistic.CALL_RENAME, 2 * NUMBER_OF_OPS, + metricMap); + assertAbfsStatistics(AbfsStatistic.CALL_EXIST, 2 * NUMBER_OF_OPS, + metricMap); + + } + + /** + * Method to check initial value of the statistics which should be 0. + * + * @param statName name of the statistic to be checked. + * @param statValue value of the statistic. + * @param expectedInitialValue initial value expected from this statistic. + */ + private void checkInitialValue(String statName, long statValue, + long expectedInitialValue) { + assertEquals("Mismatch in " + statName, expectedInitialValue, statValue); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsStreamStatistics.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsStreamStatistics.java new file mode 100644 index 0000000000000..e5f182df2a1a2 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsStreamStatistics.java @@ -0,0 +1,172 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IOUtils; + +/** + * Test Abfs Stream. + */ + +public class ITestAbfsStreamStatistics extends AbstractAbfsIntegrationTest { + public ITestAbfsStreamStatistics() throws Exception { + } + + private static final Logger LOG = + LoggerFactory.getLogger(ITestAbfsStreamStatistics.class); + + private static final int LARGE_NUMBER_OF_OPS = 99; + + /*** + * Testing {@code incrementReadOps()} in class {@code AbfsInputStream} and + * {@code incrementWriteOps()} in class {@code AbfsOutputStream}. + * + */ + @Test + public void testAbfsStreamOps() throws Exception { + describe("Test to see correct population of read and write operations in " + + "Abfs"); + + final AzureBlobFileSystem fs = getFileSystem(); + Path smallOperationsFile = path("testOneReadWriteOps"); + Path largeOperationsFile = path("testLargeReadWriteOps"); + FileSystem.Statistics statistics = fs.getFsStatistics(); + String testReadWriteOps = "test this"; + statistics.reset(); + + //Test for zero write operation + assertReadWriteOps("write", 0, statistics.getWriteOps()); + + //Test for zero read operation + assertReadWriteOps("read", 0, statistics.getReadOps()); + + FSDataOutputStream outForOneOperation = null; + FSDataInputStream inForOneOperation = null; + try { + outForOneOperation = fs.create(smallOperationsFile); + statistics.reset(); + outForOneOperation.write(testReadWriteOps.getBytes()); + + //Test for a single write operation + assertReadWriteOps("write", 1, statistics.getWriteOps()); + + //Flushing output stream to see content to read + outForOneOperation.hflush(); + inForOneOperation = fs.open(smallOperationsFile); + statistics.reset(); + int result = inForOneOperation.read(testReadWriteOps.getBytes(), 0, + testReadWriteOps.getBytes().length); + + LOG.info("Result of Read operation : {}", result); + /* + * Testing if 2 read_ops value is coming after reading full content + * from a file (3 if anything to read from Buffer too). Reason: read() + * call gives read_ops=1, reading from AbfsClient(http GET) gives + * read_ops=2. + * + * In some cases ABFS-prefetch thread runs in the background which + * returns some bytes from buffer and gives an extra readOp. + * Thus, making readOps values arbitrary and giving intermittent + * failures in some cases. Hence, readOps values of 2 or 3 is seen in + * different setups. + * + */ + assertTrue(String.format("The actual value of %d was not equal to the " + + "expected value of 2 or 3", statistics.getReadOps()), + statistics.getReadOps() == 2 || statistics.getReadOps() == 3); + + } finally { + IOUtils.cleanupWithLogger(LOG, inForOneOperation, + outForOneOperation); + } + + //Validating if content is being written in the smallOperationsFile + assertTrue("Mismatch in content validation", + validateContent(fs, smallOperationsFile, + testReadWriteOps.getBytes())); + + FSDataOutputStream outForLargeOperations = null; + FSDataInputStream inForLargeOperations = null; + StringBuilder largeOperationsValidationString = new StringBuilder(); + try { + outForLargeOperations = fs.create(largeOperationsFile); + statistics.reset(); + int largeValue = LARGE_NUMBER_OF_OPS; + for (int i = 0; i < largeValue; i++) { + outForLargeOperations.write(testReadWriteOps.getBytes()); + + //Creating the String for content Validation + largeOperationsValidationString.append(testReadWriteOps); + } + LOG.info("Number of bytes of Large data written: {}", + largeOperationsValidationString.toString().getBytes().length); + + //Test for 1000000 write operations + assertReadWriteOps("write", largeValue, statistics.getWriteOps()); + + inForLargeOperations = fs.open(largeOperationsFile); + for (int i = 0; i < largeValue; i++) { + inForLargeOperations + .read(testReadWriteOps.getBytes(), 0, + testReadWriteOps.getBytes().length); + } + + if (fs.getAbfsStore().isAppendBlobKey(fs.makeQualified(largeOperationsFile).toString())) { + // for appendblob data is already flushed, so there is more data to read. + assertTrue(String.format("The actual value of %d was not equal to the " + + "expected value", statistics.getReadOps()), + statistics.getReadOps() == (largeValue + 3) || statistics.getReadOps() == (largeValue + 4)); + } else { + //Test for 1000000 read operations + assertReadWriteOps("read", largeValue, statistics.getReadOps()); + } + + } finally { + IOUtils.cleanupWithLogger(LOG, inForLargeOperations, + outForLargeOperations); + } + //Validating if content is being written in largeOperationsFile + assertTrue("Mismatch in content validation", + validateContent(fs, largeOperationsFile, + largeOperationsValidationString.toString().getBytes())); + + } + + /** + * Generic method to assert both Read an write operations. + * + * @param operation what operation is being asserted + * @param expectedValue value which is expected + * @param actualValue value which is actual + */ + + private void assertReadWriteOps(String operation, long expectedValue, + long actualValue) { + assertEquals("Mismatch in " + operation + " operations", expectedValue, + actualValue); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAppend.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAppend.java index cbe19396d1277..dbe4b42a67df3 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAppend.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAppend.java @@ -19,12 +19,15 @@ package org.apache.hadoop.fs.azurebfs; import java.io.FileNotFoundException; +import java.io.IOException; import java.util.Random; import org.junit.Test; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; import org.apache.hadoop.fs.contract.ContractTestUtils; /** @@ -32,8 +35,8 @@ */ public class ITestAzureBlobFileSystemAppend extends AbstractAbfsIntegrationTest { - private static final Path TEST_FILE_PATH = new Path("testfile"); - private static final Path TEST_FOLDER_PATH = new Path("testFolder"); + private static final String TEST_FILE_PATH = "testfile"; + private static final String TEST_FOLDER_PATH = "testFolder"; public ITestAzureBlobFileSystemAppend() throws Exception { super(); @@ -42,15 +45,15 @@ public ITestAzureBlobFileSystemAppend() throws Exception { @Test(expected = FileNotFoundException.class) public void testAppendDirShouldFail() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - final Path filePath = TEST_FILE_PATH; + final Path filePath = path(TEST_FILE_PATH); fs.mkdirs(filePath); - fs.append(filePath, 0); + fs.append(filePath, 0).close(); } @Test public void testAppendWithLength0() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - try(FSDataOutputStream stream = fs.create(TEST_FILE_PATH)) { + try(FSDataOutputStream stream = fs.create(path(TEST_FILE_PATH))) { final byte[] b = new byte[1024]; new Random().nextBytes(b); stream.write(b, 1000, 0); @@ -62,18 +65,29 @@ public void testAppendWithLength0() throws Exception { @Test(expected = FileNotFoundException.class) public void testAppendFileAfterDelete() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - final Path filePath = TEST_FILE_PATH; + final Path filePath = path(TEST_FILE_PATH); ContractTestUtils.touch(fs, filePath); fs.delete(filePath, false); - fs.append(filePath); + fs.append(filePath).close(); } @Test(expected = FileNotFoundException.class) public void testAppendDirectory() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - final Path folderPath = TEST_FOLDER_PATH; + final Path folderPath = path(TEST_FOLDER_PATH); fs.mkdirs(folderPath); - fs.append(folderPath); + fs.append(folderPath).close(); + } + + @Test + public void testTracingForAppend() throws IOException { + AzureBlobFileSystem fs = getFileSystem(); + Path testPath = path(TEST_FILE_PATH); + fs.create(testPath).close(); + fs.registerListener(new TracingHeaderValidator( + fs.getAbfsStore().getAbfsConfiguration().getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.APPEND, false, 0)); + fs.append(testPath, 10); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAttributes.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAttributes.java index cc86923357aa5..beb7d0ebaaa8e 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAttributes.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAttributes.java @@ -21,11 +21,14 @@ import java.io.IOException; import java.util.EnumSet; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.XAttrSetFlag; import org.junit.Assume; import org.junit.Test; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.XAttrSetFlag; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; + import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** @@ -42,7 +45,8 @@ public ITestAzureBlobFileSystemAttributes() throws Exception { @Test public void testSetGetXAttr() throws Exception { AzureBlobFileSystem fs = getFileSystem(); - Assume.assumeTrue(fs.getIsNamespaceEnabled()); + AbfsConfiguration conf = fs.getAbfsStore().getAbfsConfiguration(); + Assume.assumeTrue(getIsNamespaceEnabled(fs)); byte[] attributeValue1 = fs.getAbfsStore().encodeAttribute("hi"); byte[] attributeValue2 = fs.getAbfsStore().encodeAttribute("你好"); @@ -55,8 +59,13 @@ public void testSetGetXAttr() throws Exception { assertNull(fs.getXAttr(testFile, attributeName1)); // after setting the xAttr on the file, the value should be retrievable + fs.registerListener( + new TracingHeaderValidator(conf.getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.SET_ATTR, true, 0)); fs.setXAttr(testFile, attributeName1, attributeValue1); + fs.setListenerOperation(FSOperationType.GET_ATTR); assertArrayEquals(attributeValue1, fs.getXAttr(testFile, attributeName1)); + fs.registerListener(null); // after setting a second xAttr on the file, the first xAttr values should not be overwritten fs.setXAttr(testFile, attributeName2, attributeValue2); @@ -67,7 +76,7 @@ public void testSetGetXAttr() throws Exception { @Test public void testSetGetXAttrCreateReplace() throws Exception { AzureBlobFileSystem fs = getFileSystem(); - Assume.assumeTrue(fs.getIsNamespaceEnabled()); + Assume.assumeTrue(getIsNamespaceEnabled(fs)); byte[] attributeValue = fs.getAbfsStore().encodeAttribute("one"); String attributeName = "user.someAttribute"; Path testFile = path("createReplaceXAttr"); @@ -84,7 +93,7 @@ public void testSetGetXAttrCreateReplace() throws Exception { @Test public void testSetGetXAttrReplace() throws Exception { AzureBlobFileSystem fs = getFileSystem(); - Assume.assumeTrue(fs.getIsNamespaceEnabled()); + Assume.assumeTrue(getIsNamespaceEnabled(fs)); byte[] attributeValue1 = fs.getAbfsStore().encodeAttribute("one"); byte[] attributeValue2 = fs.getAbfsStore().encodeAttribute("two"); String attributeName = "user.someAttribute"; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAuthorization.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAuthorization.java index 94e0ce3f4826e..338cf8476afd8 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAuthorization.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAuthorization.java @@ -99,7 +99,7 @@ public void testSASTokenProviderEmptySASToken() throws Exception { this.getConfiguration().getRawConfiguration()); intercept(SASTokenProviderException.class, () -> { - testFs.create(new org.apache.hadoop.fs.Path("/testFile")); + testFs.create(new org.apache.hadoop.fs.Path("/testFile")).close(); }); } @@ -114,7 +114,7 @@ public void testSASTokenProviderNullSASToken() throws Exception { testFs.initialize(fs.getUri(), this.getConfiguration().getRawConfiguration()); intercept(SASTokenProviderException.class, ()-> { - testFs.create(new org.apache.hadoop.fs.Path("/testFile")); + testFs.create(new org.apache.hadoop.fs.Path("/testFile")).close(); }); } @@ -209,46 +209,55 @@ public void testGetFileStatusUnauthorized() throws Exception { @Test public void testSetOwnerUnauthorized() throws Exception { + Assume.assumeTrue(getIsNamespaceEnabled(getFileSystem())); runTest(FileSystemOperations.SetOwner, true); } @Test public void testSetPermissionUnauthorized() throws Exception { + Assume.assumeTrue(getIsNamespaceEnabled(getFileSystem())); runTest(FileSystemOperations.SetPermissions, true); } @Test public void testModifyAclEntriesUnauthorized() throws Exception { + Assume.assumeTrue(getIsNamespaceEnabled(getFileSystem())); runTest(FileSystemOperations.ModifyAclEntries, true); } @Test public void testRemoveAclEntriesUnauthorized() throws Exception { + Assume.assumeTrue(getIsNamespaceEnabled(getFileSystem())); runTest(FileSystemOperations.RemoveAclEntries, true); } @Test public void testRemoveDefaultAclUnauthorized() throws Exception { + Assume.assumeTrue(getIsNamespaceEnabled(getFileSystem())); runTest(FileSystemOperations.RemoveDefaultAcl, true); } @Test public void testRemoveAclUnauthorized() throws Exception { + Assume.assumeTrue(getIsNamespaceEnabled(getFileSystem())); runTest(FileSystemOperations.RemoveAcl, true); } @Test public void testSetAclUnauthorized() throws Exception { + Assume.assumeTrue(getIsNamespaceEnabled(getFileSystem())); runTest(FileSystemOperations.SetAcl, true); } @Test public void testGetAclStatusAuthorized() throws Exception { + Assume.assumeTrue(getIsNamespaceEnabled(getFileSystem())); runTest(FileSystemOperations.GetAcl, false); } @Test public void testGetAclStatusUnauthorized() throws Exception { + Assume.assumeTrue(getIsNamespaceEnabled(getFileSystem())); runTest(FileSystemOperations.GetAcl, true); } @@ -288,7 +297,7 @@ private void executeOp(Path reqPath, AzureBlobFileSystem fs, fs.listStatus(reqPath); break; case CreatePath: - fs.create(reqPath); + fs.create(reqPath).close(); break; case RenamePath: fs.rename(reqPath, diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemBackCompat.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemBackCompat.java index 5ac16b45a4572..2941b96fefa2e 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemBackCompat.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemBackCompat.java @@ -43,20 +43,23 @@ public ITestAzureBlobFileSystemBackCompat() throws Exception { public void testBlobBackCompat() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); Assume.assumeFalse("This test does not support namespace enabled account", - this.getFileSystem().getIsNamespaceEnabled()); + getIsNamespaceEnabled(getFileSystem())); String storageConnectionString = getBlobConnectionString(); CloudStorageAccount storageAccount = CloudStorageAccount.parse(storageConnectionString); CloudBlobClient blobClient = storageAccount.createCloudBlobClient(); CloudBlobContainer container = blobClient.getContainerReference(this.getFileSystemName()); container.createIfNotExists(); - CloudBlockBlob blockBlob = container.getBlockBlobReference("test/10/10/10"); + Path testPath = getUniquePath("test"); + CloudBlockBlob blockBlob = container + .getBlockBlobReference(testPath + "/10/10/10"); blockBlob.uploadText(""); - blockBlob = container.getBlockBlobReference("test/10/123/3/2/1/3"); + blockBlob = container.getBlockBlobReference(testPath + "/10/123/3/2/1/3"); blockBlob.uploadText(""); - FileStatus[] fileStatuses = fs.listStatus(new Path("/test/10/")); + FileStatus[] fileStatuses = fs + .listStatus(new Path(String.format("/%s/10/", testPath))); assertEquals(2, fileStatuses.length); assertEquals("10", fileStatuses[0].getPath().getName()); assertTrue(fileStatuses[0].isDirectory()); @@ -74,6 +77,12 @@ private String getBlobConnectionString() { + ";AccountName=" + this.getAccountName().split("\\.")[0] + ";AccountKey=" + this.getAccountKey(); } + else if (this.getConfiguration().isHttpsAlwaysUsed()) { + connectionString = "DefaultEndpointsProtocol=https;BlobEndpoint=https://" + + this.getAccountName().replaceFirst("\\.dfs\\.", ".blob.") + + ";AccountName=" + this.getAccountName().split("\\.")[0] + + ";AccountKey=" + this.getAccountKey(); + } else { connectionString = "DefaultEndpointsProtocol=http;BlobEndpoint=http://" + this.getAccountName().replaceFirst("\\.dfs\\.", ".blob.") diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCheckAccess.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCheckAccess.java index bc5fc59d9d97e..3b31fc728bccf 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCheckAccess.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCheckAccess.java @@ -17,16 +17,19 @@ */ package org.apache.hadoop.fs.azurebfs; -import com.google.common.collect.Lists; - import java.io.FileNotFoundException; import java.io.IOException; +import java.lang.reflect.Field; import java.util.List; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.junit.Assume; import org.junit.Test; +import org.mockito.Mockito; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider; +import org.apache.hadoop.fs.azurebfs.services.AuthType; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.utils.AclTestHelpers; @@ -37,6 +40,9 @@ import org.apache.hadoop.security.AccessControlException; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_OAUTH_CLIENT_ENDPOINT; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_TOKEN_PROVIDER_TYPE_PROPERTY_NAME; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_CHECK_ACCESS; import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_BLOB_FS_CHECKACCESS_TEST_CLIENT_ID; import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_BLOB_FS_CHECKACCESS_TEST_CLIENT_SECRET; @@ -44,9 +50,15 @@ import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_BLOB_FS_CLIENT_ID; import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_BLOB_FS_CLIENT_SECRET; import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** * Test cases for AzureBlobFileSystem.access() + * + * Some of the tests in this class require additional configs set in the test + * config file. + * Refer testing_azure.md for how to set the configs. + * */ public class ITestAzureBlobFileSystemCheckAccess extends AbstractAbfsIntegrationTest { @@ -72,25 +84,27 @@ private void setTestUserFs() throws Exception { if (this.testUserFs != null) { return; } - String orgClientId = getConfiguration().get(FS_AZURE_BLOB_FS_CLIENT_ID); - String orgClientSecret = getConfiguration() - .get(FS_AZURE_BLOB_FS_CLIENT_SECRET); - Boolean orgCreateFileSystemDurungInit = getConfiguration() - .getBoolean(AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION, true); - getRawConfiguration().set(FS_AZURE_BLOB_FS_CLIENT_ID, - getConfiguration().get(FS_AZURE_BLOB_FS_CHECKACCESS_TEST_CLIENT_ID)); - getRawConfiguration().set(FS_AZURE_BLOB_FS_CLIENT_SECRET, getConfiguration() - .get(FS_AZURE_BLOB_FS_CHECKACCESS_TEST_CLIENT_SECRET)); - getRawConfiguration() - .setBoolean(AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION, - false); - FileSystem fs = FileSystem.newInstance(getRawConfiguration()); - getRawConfiguration().set(FS_AZURE_BLOB_FS_CLIENT_ID, orgClientId); - getRawConfiguration().set(FS_AZURE_BLOB_FS_CLIENT_SECRET, orgClientSecret); - getRawConfiguration() - .setBoolean(AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION, - orgCreateFileSystemDurungInit); - this.testUserFs = fs; + checkIfConfigIsSet(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ENDPOINT + + "." + getAccountName()); + Configuration conf = getRawConfiguration(); + setTestFsConf(FS_AZURE_BLOB_FS_CLIENT_ID, + FS_AZURE_BLOB_FS_CHECKACCESS_TEST_CLIENT_ID); + setTestFsConf(FS_AZURE_BLOB_FS_CLIENT_SECRET, + FS_AZURE_BLOB_FS_CHECKACCESS_TEST_CLIENT_SECRET); + conf.set(FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME, AuthType.OAuth.name()); + conf.set(FS_AZURE_ACCOUNT_TOKEN_PROVIDER_TYPE_PROPERTY_NAME + "." + + getAccountName(), ClientCredsTokenProvider.class.getName()); + conf.setBoolean(AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION, + false); + this.testUserFs = FileSystem.newInstance(getRawConfiguration()); + } + + private void setTestFsConf(final String fsConfKey, + final String testFsConfKey) { + final String confKeyWithAccountName = fsConfKey + "." + getAccountName(); + final String confValue = getConfiguration() + .getString(testFsConfKey, ""); + getRawConfiguration().set(confKeyWithAccountName, confValue); } @Test(expected = IllegalArgumentException.class) @@ -100,15 +114,17 @@ public void testCheckAccessWithNullPath() throws IOException { @Test(expected = NullPointerException.class) public void testCheckAccessForFileWithNullFsAction() throws Exception { - assumeHNSAndCheckAccessEnabled(); + Assume.assumeTrue(FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT + " is false", + isHNSEnabled); + Assume.assumeTrue(FS_AZURE_ENABLE_CHECK_ACCESS + " is false", + isCheckAccessEnabled); // NPE when trying to convert null FsAction enum superUserFs.access(new Path("test.txt"), null); } @Test(expected = FileNotFoundException.class) public void testCheckAccessForNonExistentFile() throws Exception { - assumeHNSAndCheckAccessEnabled(); - setTestUserFs(); + checkPrerequisites(); Path nonExistentFile = setupTestDirectoryAndUserAccess( "/nonExistentFile1.txt", FsAction.ALL); superUserFs.delete(nonExistentFile, true); @@ -153,15 +169,40 @@ public void testCheckAccessForAccountWithoutNS() throws Exception { getConfiguration() .getBoolean(FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, true)); Assume.assumeTrue(FS_AZURE_ENABLE_CHECK_ACCESS + " is false", - isCheckAccessEnabled); + isCheckAccessEnabled); + checkIfConfigIsSet(FS_AZURE_BLOB_FS_CHECKACCESS_TEST_CLIENT_ID); + checkIfConfigIsSet(FS_AZURE_BLOB_FS_CHECKACCESS_TEST_CLIENT_SECRET); + checkIfConfigIsSet(FS_AZURE_BLOB_FS_CHECKACCESS_TEST_USER_GUID); + setTestUserFs(); + + // When the driver does not know if the account is HNS enabled or not it + // makes a server call and fails + intercept(AccessControlException.class, + "\"This request is not authorized to perform this operation using " + + "this permission.\", 403", + () -> testUserFs.access(new Path("/"), FsAction.READ)); + + // When the driver has already determined if the account is HNS enabled + // or not, and as the account is non HNS the AzureBlobFileSystem#access + // acts as noop + AzureBlobFileSystemStore mockAbfsStore = + Mockito.mock(AzureBlobFileSystemStore.class); + Mockito.when(mockAbfsStore + .getIsNamespaceEnabled(getTestTracingContext(getFileSystem(), false))) + .thenReturn(true); + Field abfsStoreField = AzureBlobFileSystem.class.getDeclaredField( + "abfsStore"); + abfsStoreField.setAccessible(true); + abfsStoreField.set(testUserFs, mockAbfsStore); testUserFs.access(new Path("/"), FsAction.READ); + + superUserFs.access(new Path("/"), FsAction.READ); } @Test public void testFsActionNONE() throws Exception { - assumeHNSAndCheckAccessEnabled(); - setTestUserFs(); + checkPrerequisites(); Path testFilePath = setupTestDirectoryAndUserAccess("/test2.txt", FsAction.NONE); assertInaccessible(testFilePath, FsAction.EXECUTE); @@ -175,8 +216,7 @@ public void testFsActionNONE() throws Exception { @Test public void testFsActionEXECUTE() throws Exception { - assumeHNSAndCheckAccessEnabled(); - setTestUserFs(); + checkPrerequisites(); Path testFilePath = setupTestDirectoryAndUserAccess("/test3.txt", FsAction.EXECUTE); assertAccessible(testFilePath, FsAction.EXECUTE); @@ -191,8 +231,7 @@ public void testFsActionEXECUTE() throws Exception { @Test public void testFsActionREAD() throws Exception { - assumeHNSAndCheckAccessEnabled(); - setTestUserFs(); + checkPrerequisites(); Path testFilePath = setupTestDirectoryAndUserAccess("/test4.txt", FsAction.READ); assertAccessible(testFilePath, FsAction.READ); @@ -207,8 +246,7 @@ public void testFsActionREAD() throws Exception { @Test public void testFsActionWRITE() throws Exception { - assumeHNSAndCheckAccessEnabled(); - setTestUserFs(); + checkPrerequisites(); Path testFilePath = setupTestDirectoryAndUserAccess("/test5.txt", FsAction.WRITE); assertAccessible(testFilePath, FsAction.WRITE); @@ -223,8 +261,7 @@ public void testFsActionWRITE() throws Exception { @Test public void testFsActionREADEXECUTE() throws Exception { - assumeHNSAndCheckAccessEnabled(); - setTestUserFs(); + checkPrerequisites(); Path testFilePath = setupTestDirectoryAndUserAccess("/test6.txt", FsAction.READ_EXECUTE); assertAccessible(testFilePath, FsAction.EXECUTE); @@ -239,8 +276,7 @@ public void testFsActionREADEXECUTE() throws Exception { @Test public void testFsActionWRITEEXECUTE() throws Exception { - assumeHNSAndCheckAccessEnabled(); - setTestUserFs(); + checkPrerequisites(); Path testFilePath = setupTestDirectoryAndUserAccess("/test7.txt", FsAction.WRITE_EXECUTE); assertAccessible(testFilePath, FsAction.EXECUTE); @@ -255,8 +291,7 @@ public void testFsActionWRITEEXECUTE() throws Exception { @Test public void testFsActionALL() throws Exception { - assumeHNSAndCheckAccessEnabled(); - setTestUserFs(); + checkPrerequisites(); Path testFilePath = setupTestDirectoryAndUserAccess("/test8.txt", FsAction.ALL); assertAccessible(testFilePath, FsAction.EXECUTE); @@ -268,11 +303,22 @@ public void testFsActionALL() throws Exception { assertAccessible(testFilePath, FsAction.ALL); } - private void assumeHNSAndCheckAccessEnabled() { + private void checkPrerequisites() throws Exception { + setTestUserFs(); Assume.assumeTrue(FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT + " is false", isHNSEnabled); Assume.assumeTrue(FS_AZURE_ENABLE_CHECK_ACCESS + " is false", isCheckAccessEnabled); + checkIfConfigIsSet(FS_AZURE_BLOB_FS_CHECKACCESS_TEST_CLIENT_ID); + checkIfConfigIsSet(FS_AZURE_BLOB_FS_CHECKACCESS_TEST_CLIENT_SECRET); + checkIfConfigIsSet(FS_AZURE_BLOB_FS_CHECKACCESS_TEST_USER_GUID); + } + + private void checkIfConfigIsSet(String configKey){ + AbfsConfiguration conf = getConfiguration(); + String value = conf.get(configKey); + Assume.assumeTrue(configKey + " config is mandatory for the test to run", + value != null && value.trim().length() > 1); } private void assertAccessible(Path testFilePath, FsAction fsAction) @@ -306,7 +352,8 @@ private void modifyAcl(Path file, String uid, FsAction fsAction) private Path setupTestDirectoryAndUserAccess(String testFileName, FsAction fsAction) throws Exception { - Path file = new Path(TEST_FOLDER_PATH + testFileName); + Path testPath = path(TEST_FOLDER_PATH); + Path file = new Path(testPath + testFileName); file = this.superUserFs.makeQualified(file); this.superUserFs.delete(file, true); this.superUserFs.create(file); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCopy.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCopy.java index 917ee9ce1b07e..aabaf82b622a8 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCopy.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCopy.java @@ -53,7 +53,7 @@ public void testCopyFromLocalFileSystem() throws Exception { localFs.delete(localFilePath, true); try { writeString(localFs, localFilePath, "Testing"); - Path dstPath = new Path("copiedFromLocal"); + Path dstPath = path("copiedFromLocal"); assertTrue(FileUtil.copy(localFs, localFilePath, fs, dstPath, false, fs.getConf())); assertIsFile(fs, dstPath); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java index 94368a4f36955..d9a3cea089f63 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java @@ -21,18 +21,47 @@ import java.io.FileNotFoundException; import java.io.FilterOutputStream; import java.io.IOException; +import java.lang.reflect.Field; import java.util.EnumSet; +import java.util.UUID; import org.junit.Test; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CreateFlag; +import org.apache.hadoop.fs.FileAlreadyExistsException; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.ConcurrentWriteOperationDetectedException; +import org.apache.hadoop.fs.azurebfs.services.AbfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; +import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; +import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; + +import static java.net.HttpURLConnection.HTTP_CONFLICT; +import static java.net.HttpURLConnection.HTTP_INTERNAL_ERROR; +import static java.net.HttpURLConnection.HTTP_NOT_FOUND; +import static java.net.HttpURLConnection.HTTP_OK; +import static java.net.HttpURLConnection.HTTP_PRECON_FAILED; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + import static org.apache.hadoop.fs.contract.ContractTestUtils.assertIsFile; import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE; /** * Test create operation. @@ -40,7 +69,7 @@ public class ITestAzureBlobFileSystemCreate extends AbstractAbfsIntegrationTest { private static final Path TEST_FILE_PATH = new Path("testfile"); - private static final Path TEST_FOLDER_PATH = new Path("testFolder"); + private static final String TEST_FOLDER_PATH = "testFolder"; private static final String TEST_CHILD_FILE = "childFile"; public ITestAzureBlobFileSystemCreate() throws Exception { @@ -63,13 +92,19 @@ public void testEnsureFileCreatedImmediately() throws Exception { @SuppressWarnings("deprecation") public void testCreateNonRecursive() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - Path testFile = new Path(TEST_FOLDER_PATH, TEST_CHILD_FILE); + Path testFolderPath = path(TEST_FOLDER_PATH); + Path testFile = new Path(testFolderPath, TEST_CHILD_FILE); try { fs.createNonRecursive(testFile, true, 1024, (short) 1, 1024, null); fail("Should've thrown"); } catch (FileNotFoundException expected) { } - fs.mkdirs(TEST_FOLDER_PATH); + fs.registerListener(new TracingHeaderValidator( + fs.getAbfsStore().getAbfsConfiguration().getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.MKDIR, false, 0)); + fs.mkdirs(testFolderPath); + fs.registerListener(null); + fs.createNonRecursive(testFile, true, 1024, (short) 1, 1024, null) .close(); assertIsFile(fs, testFile); @@ -79,13 +114,14 @@ public void testCreateNonRecursive() throws Exception { @SuppressWarnings("deprecation") public void testCreateNonRecursive1() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - Path testFile = new Path(TEST_FOLDER_PATH, TEST_CHILD_FILE); + Path testFolderPath = path(TEST_FOLDER_PATH); + Path testFile = new Path(testFolderPath, TEST_CHILD_FILE); try { fs.createNonRecursive(testFile, FsPermission.getDefault(), EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE), 1024, (short) 1, 1024, null); fail("Should've thrown"); } catch (FileNotFoundException expected) { } - fs.mkdirs(TEST_FOLDER_PATH); + fs.mkdirs(testFolderPath); fs.createNonRecursive(testFile, true, 1024, (short) 1, 1024, null) .close(); assertIsFile(fs, testFile); @@ -97,13 +133,14 @@ public void testCreateNonRecursive1() throws Exception { public void testCreateNonRecursive2() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - Path testFile = new Path(TEST_FOLDER_PATH, TEST_CHILD_FILE); + Path testFolderPath = path(TEST_FOLDER_PATH); + Path testFile = new Path(testFolderPath, TEST_CHILD_FILE); try { fs.createNonRecursive(testFile, FsPermission.getDefault(), false, 1024, (short) 1, 1024, null); fail("Should've thrown"); } catch (FileNotFoundException e) { } - fs.mkdirs(TEST_FOLDER_PATH); + fs.mkdirs(testFolderPath); fs.createNonRecursive(testFile, true, 1024, (short) 1, 1024, null) .close(); assertIsFile(fs, testFile); @@ -115,7 +152,8 @@ public void testCreateNonRecursive2() throws Exception { @Test public void testWriteAfterClose() throws Throwable { final AzureBlobFileSystem fs = getFileSystem(); - Path testPath = new Path(TEST_FOLDER_PATH, TEST_CHILD_FILE); + Path testFolderPath = path(TEST_FOLDER_PATH); + Path testPath = new Path(testFolderPath, TEST_CHILD_FILE); FSDataOutputStream out = fs.create(testPath); out.close(); intercept(IOException.class, () -> out.write('a')); @@ -135,7 +173,8 @@ public void testWriteAfterClose() throws Throwable { @Test public void testTryWithResources() throws Throwable { final AzureBlobFileSystem fs = getFileSystem(); - Path testPath = new Path(TEST_FOLDER_PATH, TEST_CHILD_FILE); + Path testFolderPath = path(TEST_FOLDER_PATH); + Path testPath = new Path(testFolderPath, TEST_CHILD_FILE); try (FSDataOutputStream out = fs.create(testPath)) { out.write('1'); out.hsync(); @@ -145,15 +184,19 @@ public void testTryWithResources() throws Throwable { out.hsync(); fail("Expected a failure"); } catch (FileNotFoundException fnfe) { - // the exception raised in close() must be in the caught exception's - // suppressed list - Throwable[] suppressed = fnfe.getSuppressed(); - assertEquals("suppressed count", 1, suppressed.length); - Throwable inner = suppressed[0]; - if (!(inner instanceof IOException)) { - throw inner; + //appendblob outputStream does not generate suppressed exception on close as it is + //single threaded code + if (!fs.getAbfsStore().isAppendBlobKey(fs.makeQualified(testPath).toString())) { + // the exception raised in close() must be in the caught exception's + // suppressed list + Throwable[] suppressed = fnfe.getSuppressed(); + assertEquals("suppressed count", 1, suppressed.length); + Throwable inner = suppressed[0]; + if (!(inner instanceof IOException)) { + throw inner; + } + GenericTestUtils.assertExceptionContains(fnfe.getMessage(), inner); } - GenericTestUtils.assertExceptionContains(fnfe.getMessage(), inner); } } @@ -164,7 +207,8 @@ public void testTryWithResources() throws Throwable { @Test public void testFilterFSWriteAfterClose() throws Throwable { final AzureBlobFileSystem fs = getFileSystem(); - Path testPath = new Path(TEST_FOLDER_PATH, TEST_CHILD_FILE); + Path testFolderPath = path(TEST_FOLDER_PATH); + Path testPath = new Path(testFolderPath, TEST_CHILD_FILE); FSDataOutputStream out = fs.create(testPath); intercept(FileNotFoundException.class, () -> { @@ -184,4 +228,267 @@ public void testFilterFSWriteAfterClose() throws Throwable { }); } + /** + * Tests if the number of connections made for: + * 1. create overwrite=false of a file that doesnt pre-exist + * 2. create overwrite=false of a file that pre-exists + * 3. create overwrite=true of a file that doesnt pre-exist + * 4. create overwrite=true of a file that pre-exists + * matches the expectation when run against both combinations of + * fs.azure.enable.conditional.create.overwrite=true and + * fs.azure.enable.conditional.create.overwrite=false + * @throws Throwable + */ + @Test + public void testDefaultCreateOverwriteFileTest() throws Throwable { + testCreateFileOverwrite(true); + testCreateFileOverwrite(false); + } + + public void testCreateFileOverwrite(boolean enableConditionalCreateOverwrite) + throws Throwable { + final AzureBlobFileSystem currentFs = getFileSystem(); + Configuration config = new Configuration(this.getRawConfiguration()); + config.set("fs.azure.enable.conditional.create.overwrite", + Boolean.toString(enableConditionalCreateOverwrite)); + + final AzureBlobFileSystem fs = + (AzureBlobFileSystem) FileSystem.newInstance(currentFs.getUri(), + config); + + long totalConnectionMadeBeforeTest = fs.getInstrumentationMap() + .get(CONNECTIONS_MADE.getStatName()); + + int createRequestCount = 0; + final Path nonOverwriteFile = new Path("/NonOverwriteTest_FileName_" + + UUID.randomUUID().toString()); + + // Case 1: Not Overwrite - File does not pre-exist + // create should be successful + fs.create(nonOverwriteFile, false); + + // One request to server to create path should be issued + createRequestCount++; + + assertAbfsStatistics( + CONNECTIONS_MADE, + totalConnectionMadeBeforeTest + createRequestCount, + fs.getInstrumentationMap()); + + // Case 2: Not Overwrite - File pre-exists + fs.registerListener(new TracingHeaderValidator( + fs.getAbfsStore().getAbfsConfiguration().getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.CREATE, false, 0)); + intercept(FileAlreadyExistsException.class, + () -> fs.create(nonOverwriteFile, false)); + fs.registerListener(null); + + // One request to server to create path should be issued + createRequestCount++; + + assertAbfsStatistics( + CONNECTIONS_MADE, + totalConnectionMadeBeforeTest + createRequestCount, + fs.getInstrumentationMap()); + + final Path overwriteFilePath = new Path("/OverwriteTest_FileName_" + + UUID.randomUUID().toString()); + + // Case 3: Overwrite - File does not pre-exist + // create should be successful + fs.create(overwriteFilePath, true); + + // One request to server to create path should be issued + createRequestCount++; + + assertAbfsStatistics( + CONNECTIONS_MADE, + totalConnectionMadeBeforeTest + createRequestCount, + fs.getInstrumentationMap()); + + // Case 4: Overwrite - File pre-exists + fs.registerListener(new TracingHeaderValidator( + fs.getAbfsStore().getAbfsConfiguration().getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.CREATE, true, 0)); + fs.create(overwriteFilePath, true); + fs.registerListener(null); + + if (enableConditionalCreateOverwrite) { + // Three requests will be sent to server to create path, + // 1. create without overwrite + // 2. GetFileStatus to get eTag + // 3. create with overwrite + createRequestCount += 3; + } else { + createRequestCount++; + } + + assertAbfsStatistics( + CONNECTIONS_MADE, + totalConnectionMadeBeforeTest + createRequestCount, + fs.getInstrumentationMap()); + } + + /** + * Test negative scenarios with Create overwrite=false as default + * With create overwrite=true ending in 3 calls: + * A. Create overwrite=false + * B. GFS + * C. Create overwrite=true + * + * Scn1: A fails with HTTP409, leading to B which fails with HTTP404, + * detect parallel access + * Scn2: A fails with HTTP409, leading to B which fails with HTTP500, + * fail create with HTTP500 + * Scn3: A fails with HTTP409, leading to B and then C, + * which fails with HTTP412, detect parallel access + * Scn4: A fails with HTTP409, leading to B and then C, + * which fails with HTTP500, fail create with HTTP500 + * Scn5: A fails with HTTP500, fail create with HTTP500 + */ + @Test + public void testNegativeScenariosForCreateOverwriteDisabled() + throws Throwable { + + final AzureBlobFileSystem currentFs = getFileSystem(); + Configuration config = new Configuration(this.getRawConfiguration()); + config.set("fs.azure.enable.conditional.create.overwrite", + Boolean.toString(true)); + + final AzureBlobFileSystem fs = + (AzureBlobFileSystem) FileSystem.newInstance(currentFs.getUri(), + config); + + // Get mock AbfsClient with current config + AbfsClient + mockClient + = ITestAbfsClient.getMockAbfsClient( + fs.getAbfsStore().getClient(), + fs.getAbfsStore().getAbfsConfiguration()); + + AzureBlobFileSystemStore abfsStore = fs.getAbfsStore(); + abfsStore = setAzureBlobSystemStoreField(abfsStore, "client", mockClient); + boolean isNamespaceEnabled = abfsStore + .getIsNamespaceEnabled(getTestTracingContext(fs, false)); + + AbfsRestOperation successOp = mock( + AbfsRestOperation.class); + AbfsHttpOperation http200Op = mock( + AbfsHttpOperation.class); + when(http200Op.getStatusCode()).thenReturn(HTTP_OK); + when(successOp.getResult()).thenReturn(http200Op); + + AbfsRestOperationException conflictResponseEx + = getMockAbfsRestOperationException(HTTP_CONFLICT); + AbfsRestOperationException serverErrorResponseEx + = getMockAbfsRestOperationException(HTTP_INTERNAL_ERROR); + AbfsRestOperationException fileNotFoundResponseEx + = getMockAbfsRestOperationException(HTTP_NOT_FOUND); + AbfsRestOperationException preConditionResponseEx + = getMockAbfsRestOperationException(HTTP_PRECON_FAILED); + + // mock for overwrite=false + doThrow(conflictResponseEx) // Scn1: GFS fails with Http404 + .doThrow(conflictResponseEx) // Scn2: GFS fails with Http500 + .doThrow( + conflictResponseEx) // Scn3: create overwrite=true fails with Http412 + .doThrow( + conflictResponseEx) // Scn4: create overwrite=true fails with Http500 + .doThrow( + serverErrorResponseEx) // Scn5: create overwrite=false fails with Http500 + .when(mockClient) + .createPath(any(String.class), eq(true), eq(false), + isNamespaceEnabled ? any(String.class) : eq(null), + isNamespaceEnabled ? any(String.class) : eq(null), + any(boolean.class), eq(null), any(TracingContext.class)); + + doThrow(fileNotFoundResponseEx) // Scn1: GFS fails with Http404 + .doThrow(serverErrorResponseEx) // Scn2: GFS fails with Http500 + .doReturn(successOp) // Scn3: create overwrite=true fails with Http412 + .doReturn(successOp) // Scn4: create overwrite=true fails with Http500 + .when(mockClient) + .getPathStatus(any(String.class), eq(false), any(TracingContext.class)); + + // mock for overwrite=true + doThrow( + preConditionResponseEx) // Scn3: create overwrite=true fails with Http412 + .doThrow( + serverErrorResponseEx) // Scn4: create overwrite=true fails with Http500 + .when(mockClient) + .createPath(any(String.class), eq(true), eq(true), + isNamespaceEnabled ? any(String.class) : eq(null), + isNamespaceEnabled ? any(String.class) : eq(null), + any(boolean.class), eq(null), any(TracingContext.class)); + + // Scn1: GFS fails with Http404 + // Sequence of events expected: + // 1. create overwrite=false - fail with conflict + // 2. GFS - fail with File Not found + // Create will fail with ConcurrentWriteOperationDetectedException + validateCreateFileException(ConcurrentWriteOperationDetectedException.class, + abfsStore); + + // Scn2: GFS fails with Http500 + // Sequence of events expected: + // 1. create overwrite=false - fail with conflict + // 2. GFS - fail with Server error + // Create will fail with 500 + validateCreateFileException(AbfsRestOperationException.class, abfsStore); + + // Scn3: create overwrite=true fails with Http412 + // Sequence of events expected: + // 1. create overwrite=false - fail with conflict + // 2. GFS - pass + // 3. create overwrite=true - fail with Pre-Condition + // Create will fail with ConcurrentWriteOperationDetectedException + validateCreateFileException(ConcurrentWriteOperationDetectedException.class, + abfsStore); + + // Scn4: create overwrite=true fails with Http500 + // Sequence of events expected: + // 1. create overwrite=false - fail with conflict + // 2. GFS - pass + // 3. create overwrite=true - fail with Server error + // Create will fail with 500 + validateCreateFileException(AbfsRestOperationException.class, abfsStore); + + // Scn5: create overwrite=false fails with Http500 + // Sequence of events expected: + // 1. create overwrite=false - fail with server error + // Create will fail with 500 + validateCreateFileException(AbfsRestOperationException.class, abfsStore); + } + + private AzureBlobFileSystemStore setAzureBlobSystemStoreField( + final AzureBlobFileSystemStore abfsStore, + final String fieldName, + Object fieldObject) throws Exception { + + Field abfsClientField = AzureBlobFileSystemStore.class.getDeclaredField( + fieldName); + abfsClientField.setAccessible(true); + Field modifiersField = Field.class.getDeclaredField("modifiers"); + modifiersField.setAccessible(true); + modifiersField.setInt(abfsClientField, + abfsClientField.getModifiers() & ~java.lang.reflect.Modifier.FINAL); + abfsClientField.set(abfsStore, fieldObject); + return abfsStore; + } + + private void validateCreateFileException(final Class exceptionClass, final AzureBlobFileSystemStore abfsStore) + throws Exception { + FsPermission permission = new FsPermission(FsAction.ALL, FsAction.ALL, + FsAction.ALL); + FsPermission umask = new FsPermission(FsAction.NONE, FsAction.NONE, + FsAction.NONE); + Path testPath = new Path("testFile"); + intercept( + exceptionClass, + () -> abfsStore.createFile(testPath, null, true, permission, umask, + getTestTracingContext(getFileSystem(), true))); + } + + private AbfsRestOperationException getMockAbfsRestOperationException(int status) { + return new AbfsRestOperationException(status, "", "", new Exception()); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSAS.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSAS.java new file mode 100644 index 0000000000000..5735423aaf928 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSAS.java @@ -0,0 +1,497 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.AccessDeniedException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.UUID; + +import org.assertj.core.api.Assertions; +import org.junit.Assume; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; +import org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys; +import org.apache.hadoop.fs.azurebfs.extensions.MockDelegationSASTokenProvider; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; +import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; +import org.apache.hadoop.fs.azurebfs.services.AuthType; +import org.apache.hadoop.fs.permission.AclEntry; +import org.apache.hadoop.fs.permission.AclEntryScope; +import org.apache.hadoop.fs.permission.AclStatus; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.security.AccessControlException; + +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_SAS_TOKEN_PROVIDER_TYPE; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.AUTHORIZATION_PERMISSION_MISS_MATCH; +import static org.apache.hadoop.fs.azurebfs.utils.AclTestHelpers.aclEntry; +import static org.apache.hadoop.fs.contract.ContractTestUtils.assertPathDoesNotExist; +import static org.apache.hadoop.fs.contract.ContractTestUtils.assertPathExists; +import static org.apache.hadoop.fs.permission.AclEntryScope.ACCESS; +import static org.apache.hadoop.fs.permission.AclEntryScope.DEFAULT; +import static org.apache.hadoop.fs.permission.AclEntryType.GROUP; +import static org.apache.hadoop.fs.permission.AclEntryType.USER; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +/** + * Test Perform Authorization Check operation + */ +public class ITestAzureBlobFileSystemDelegationSAS extends AbstractAbfsIntegrationTest { + private static final String TEST_GROUP = UUID.randomUUID().toString(); + + private static final Logger LOG = + LoggerFactory.getLogger(ITestAzureBlobFileSystemDelegationSAS.class); + + private boolean isHNSEnabled; + + public ITestAzureBlobFileSystemDelegationSAS() throws Exception { + // These tests rely on specific settings in azure-auth-keys.xml: + String sasProvider = getRawConfiguration().get(FS_AZURE_SAS_TOKEN_PROVIDER_TYPE); + Assume.assumeTrue(MockDelegationSASTokenProvider.class.getCanonicalName().equals(sasProvider)); + Assume.assumeNotNull(getRawConfiguration().get(TestConfigurationKeys.FS_AZURE_TEST_APP_ID)); + Assume.assumeNotNull(getRawConfiguration().get(TestConfigurationKeys.FS_AZURE_TEST_APP_SECRET)); + Assume.assumeNotNull(getRawConfiguration().get(TestConfigurationKeys.FS_AZURE_TEST_APP_SERVICE_PRINCIPAL_TENANT_ID)); + Assume.assumeNotNull(getRawConfiguration().get(TestConfigurationKeys.FS_AZURE_TEST_APP_SERVICE_PRINCIPAL_OBJECT_ID)); + // The test uses shared key to create a random filesystem and then creates another + // instance of this filesystem using SAS authorization. + Assume.assumeTrue(this.getAuthType() == AuthType.SharedKey); + } + + @Override + public void setup() throws Exception { + isHNSEnabled = this.getConfiguration().getBoolean( + TestConfigurationKeys.FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, false); + Assume.assumeTrue(isHNSEnabled); + createFilesystemForSASTests(); + super.setup(); + } + + @Test + // Test filesystem operations access, create, mkdirs, setOwner, getFileStatus + public void testCheckAccess() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + + Path rootPath = new Path("/"); + fs.setOwner(rootPath, MockDelegationSASTokenProvider.TEST_OWNER, null); + fs.setPermission(rootPath, new FsPermission(FsAction.ALL, FsAction.READ_EXECUTE, FsAction.EXECUTE)); + FileStatus rootStatus = fs.getFileStatus(rootPath); + assertEquals("The directory permissions are not expected.", "rwxr-x--x", rootStatus.getPermission().toString()); + assertEquals("The directory owner is not expected.", + MockDelegationSASTokenProvider.TEST_OWNER, + rootStatus.getOwner()); + + Path dirPath = new Path(UUID.randomUUID().toString()); + fs.mkdirs(dirPath); + + Path filePath = new Path(dirPath, "file1"); + fs.create(filePath).close(); + fs.setPermission(filePath, new FsPermission(FsAction.READ, FsAction.READ, FsAction.NONE)); + + FileStatus dirStatus = fs.getFileStatus(dirPath); + FileStatus fileStatus = fs.getFileStatus(filePath); + + assertEquals("The owner is not expected.", MockDelegationSASTokenProvider.TEST_OWNER, dirStatus.getOwner()); + assertEquals("The owner is not expected.", MockDelegationSASTokenProvider.TEST_OWNER, fileStatus.getOwner()); + assertEquals("The directory permissions are not expected.", "rwxr-xr-x", dirStatus.getPermission().toString()); + assertEquals("The file permissions are not expected.", "r--r-----", fileStatus.getPermission().toString()); + + assertTrue(isAccessible(fs, dirPath, FsAction.READ_WRITE)); + assertFalse(isAccessible(fs, filePath, FsAction.READ_WRITE)); + + fs.setPermission(filePath, new FsPermission(FsAction.READ_WRITE, FsAction.READ, FsAction.NONE)); + fileStatus = fs.getFileStatus(filePath); + assertEquals("The file permissions are not expected.", "rw-r-----", fileStatus.getPermission().toString()); + assertTrue(isAccessible(fs, filePath, FsAction.READ_WRITE)); + + fs.setPermission(dirPath, new FsPermission(FsAction.EXECUTE, FsAction.NONE, FsAction.NONE)); + dirStatus = fs.getFileStatus(dirPath); + assertEquals("The file permissions are not expected.", "--x------", dirStatus.getPermission().toString()); + assertFalse(isAccessible(fs, dirPath, FsAction.READ_WRITE)); + assertTrue(isAccessible(fs, dirPath, FsAction.EXECUTE)); + + fs.setPermission(dirPath, new FsPermission(FsAction.NONE, FsAction.NONE, FsAction.NONE)); + dirStatus = fs.getFileStatus(dirPath); + assertEquals("The file permissions are not expected.", "---------", dirStatus.getPermission().toString()); + assertFalse(isAccessible(fs, filePath, FsAction.READ_WRITE)); + } + + private boolean isAccessible(FileSystem fs, Path path, FsAction fsAction) + throws IOException { + try { + fs.access(path, fsAction); + } catch (AccessControlException ace) { + return false; + } + return true; + } + + @Test + // Test filesystem operations create, create with overwrite, append and open. + // Test output stream operation write, flush and close + // Test input stream operation, read + public void testReadAndWrite() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + Path reqPath = new Path(UUID.randomUUID().toString()); + + final String msg1 = "purple"; + final String msg2 = "yellow"; + int expectedFileLength = msg1.length() * 2; + + byte[] readBuffer = new byte[1024]; + + // create file with content "purplepurple" + try (FSDataOutputStream stream = fs.create(reqPath)) { + stream.writeBytes(msg1); + stream.hflush(); + stream.writeBytes(msg1); + } + + // open file and verify content is "purplepurple" + try (FSDataInputStream stream = fs.open(reqPath)) { + int bytesRead = stream.read(readBuffer, 0, readBuffer.length); + assertEquals(expectedFileLength, bytesRead); + String fileContent = new String(readBuffer, 0, bytesRead, StandardCharsets.UTF_8); + assertEquals(msg1 + msg1, fileContent); + } + + // overwrite file with content "yellowyellow" + try (FSDataOutputStream stream = fs.create(reqPath)) { + stream.writeBytes(msg2); + stream.hflush(); + stream.writeBytes(msg2); + } + + // open file and verify content is "yellowyellow" + try (FSDataInputStream stream = fs.open(reqPath)) { + int bytesRead = stream.read(readBuffer, 0, readBuffer.length); + assertEquals(expectedFileLength, bytesRead); + String fileContent = new String(readBuffer, 0, bytesRead, StandardCharsets.UTF_8); + assertEquals(msg2 + msg2, fileContent); + } + + // append to file so final content is "yellowyellowpurplepurple" + try (FSDataOutputStream stream = fs.append(reqPath)) { + stream.writeBytes(msg1); + stream.hflush(); + stream.writeBytes(msg1); + } + + // open file and verify content is "yellowyellowpurplepurple" + try (FSDataInputStream stream = fs.open(reqPath)) { + int bytesRead = stream.read(readBuffer, 0, readBuffer.length); + assertEquals(2 * expectedFileLength, bytesRead); + String fileContent = new String(readBuffer, 0, bytesRead, StandardCharsets.UTF_8); + assertEquals(msg2 + msg2 + msg1 + msg1, fileContent); + } + } + + @Test + // Test rename file and rename folder + public void testRename() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + Path sourceDir = new Path(UUID.randomUUID().toString()); + Path sourcePath = new Path(sourceDir, UUID.randomUUID().toString()); + Path destinationPath = new Path(sourceDir, UUID.randomUUID().toString()); + Path destinationDir = new Path(UUID.randomUUID().toString()); + + // create file with content "hello" + try (FSDataOutputStream stream = fs.create(sourcePath)) { + stream.writeBytes("hello"); + } + + assertPathDoesNotExist(fs, "This path should not exist", destinationPath); + fs.rename(sourcePath, destinationPath); + assertPathDoesNotExist(fs, "This path should not exist", sourcePath); + assertPathExists(fs, "This path should exist", destinationPath); + + assertPathDoesNotExist(fs, "This path should not exist", destinationDir); + fs.rename(sourceDir, destinationDir); + assertPathDoesNotExist(fs, "This path should not exist", sourceDir); + assertPathExists(fs, "This path should exist", destinationDir); + } + + @Test + // Test delete file and delete folder + public void testDelete() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + Path dirPath = new Path(UUID.randomUUID().toString()); + Path filePath = new Path(dirPath, UUID.randomUUID().toString()); + + // create file with content "hello" + try (FSDataOutputStream stream = fs.create(filePath)) { + stream.writeBytes("hello"); + } + + assertPathExists(fs, "This path should exist", filePath); + fs.delete(filePath, false); + assertPathDoesNotExist(fs, "This path should not exist", filePath); + + assertPathExists(fs, "This path should exist", dirPath); + fs.delete(dirPath, false); + assertPathDoesNotExist(fs, "This path should not exist", dirPath); + } + + @Test + // Test delete folder recursive + public void testDeleteRecursive() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + Path dirPath = new Path(UUID.randomUUID().toString()); + Path filePath = new Path(dirPath, UUID.randomUUID().toString()); + + // create file with content "hello" + try (FSDataOutputStream stream = fs.create(filePath)) { + stream.writeBytes("hello"); + } + + assertPathExists(fs, "This path should exist", dirPath); + assertPathExists(fs, "This path should exist", filePath); + fs.delete(dirPath, true); + assertPathDoesNotExist(fs, "This path should not exist", filePath); + assertPathDoesNotExist(fs, "This path should not exist", dirPath); + } + + @Test + // Test list on file, directory and root path + public void testList() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + Path dirPath = new Path(UUID.randomUUID().toString()); + Path filePath = new Path(dirPath, UUID.randomUUID().toString()); + + fs.mkdirs(dirPath); + + // create file with content "hello" + try (FSDataOutputStream stream = fs.create(filePath)) { + stream.writeBytes("hello"); + } + + fs.listStatus(filePath); + fs.listStatus(dirPath); + fs.listStatus(new Path("/")); + } + + @Test + // Test filesystem operations setAcl, getAclStatus, removeAcl + // setPermissions and getFileStatus + public void testAcl() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + Path reqPath = new Path(UUID.randomUUID().toString()); + + fs.create(reqPath).close(); + + fs.setAcl(reqPath, Arrays + .asList(aclEntry(ACCESS, GROUP, TEST_GROUP, FsAction.ALL))); + + AclStatus acl = fs.getAclStatus(reqPath); + assertEquals(MockDelegationSASTokenProvider.TEST_OWNER, acl.getOwner()); + assertEquals("[group::r--, group:" + TEST_GROUP + ":rwx]", acl.getEntries().toString()); + + fs.removeAcl(reqPath); + acl = fs.getAclStatus(reqPath); + assertEquals("[]", acl.getEntries().toString()); + + fs.setPermission(reqPath, + new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE)); + + FileStatus status = fs.getFileStatus(reqPath); + assertEquals("rwx------", status.getPermission().toString()); + + acl = fs.getAclStatus(reqPath); + assertEquals("rwx------", acl.getPermission().toString()); + } + + @Test + // Test getFileStatus and getAclStatus operations on root path + public void testRootPath() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + Path rootPath = new Path(AbfsHttpConstants.ROOT_PATH); + + fs.setOwner(rootPath, MockDelegationSASTokenProvider.TEST_OWNER, null); + FileStatus status = fs.getFileStatus(rootPath); + assertEquals("rwxr-x---", status.getPermission().toString()); + assertEquals(MockDelegationSASTokenProvider.TEST_OWNER, status.getOwner()); + assertTrue(status.isDirectory()); + + AclStatus acl = fs.getAclStatus(rootPath); + assertEquals("rwxr-x---", acl.getPermission().toString()); + + List aclSpec = new ArrayList<>(); + int count = 0; + for (AclEntry entry: acl.getEntries()) { + aclSpec.add(entry); + if (entry.getScope() == AclEntryScope.DEFAULT) { + count++; + } + } + assertEquals(0, count); + + aclSpec.add(aclEntry(DEFAULT, USER, "cd548981-afec-4ab9-9d39-f6f2add2fd9b", FsAction.EXECUTE)); + + fs.modifyAclEntries(rootPath, aclSpec); + + acl = fs.getAclStatus(rootPath); + + count = 0; + for (AclEntry entry: acl.getEntries()) { + aclSpec.add(entry); + if (entry.getScope() == AclEntryScope.DEFAULT) { + count++; + } + } + assertEquals(5, count); + + fs.removeDefaultAcl(rootPath); + + acl = fs.getAclStatus(rootPath); + + count = 0; + for (AclEntry entry: acl.getEntries()) { + aclSpec.add(entry); + if (entry.getScope() == AclEntryScope.DEFAULT) { + count++; + } + } + assertEquals(0, count); + } + + @Test + // Test filesystem operations getXAttr and setXAttr + public void testProperties() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + Path reqPath = new Path(UUID.randomUUID().toString()); + + fs.create(reqPath).close(); + + final String propertyName = "user.mime_type"; + final byte[] propertyValue = "text/plain".getBytes("utf-8"); + fs.setXAttr(reqPath, propertyName, propertyValue); + + assertArrayEquals(propertyValue, fs.getXAttr(reqPath, propertyName)); + } + + @Test + public void testSignatureMask() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + String src = String.format("/testABC/test%s.xt", UUID.randomUUID()); + fs.create(new Path(src)).close(); + AbfsRestOperation abfsHttpRestOperation = fs.getAbfsClient() + .renamePath(src, "/testABC" + "/abc.txt", null, + getTestTracingContext(fs, false), null, false, isHNSEnabled) + .getOp(); + AbfsHttpOperation result = abfsHttpRestOperation.getResult(); + String url = result.getMaskedUrl(); + String encodedUrl = result.getMaskedEncodedUrl(); + Assertions.assertThat(url.substring(url.indexOf("sig="))) + .describedAs("Signature query param should be masked") + .startsWith("sig=XXXXX"); + Assertions.assertThat(encodedUrl.substring(encodedUrl.indexOf("sig%3D"))) + .describedAs("Signature query param should be masked") + .startsWith("sig%3DXXXXX"); + } + + @Test + public void testSignatureMaskOnExceptionMessage() throws Exception { + intercept(IOException.class, "sig=XXXX", + () -> getFileSystem().getAbfsClient() + .renamePath("testABC/test.xt", "testABC/abc.txt", null, + getTestTracingContext(getFileSystem(), false), null, false, isHNSEnabled)); + } + + @Test + // SetPermission should fail when saoid is not the owner and succeed when it is. + public void testSetPermissionForNonOwner() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + + Path rootPath = new Path("/"); + FileStatus rootStatus = fs.getFileStatus(rootPath); + assertEquals("The permissions are not expected.", + "rwxr-x---", + rootStatus.getPermission().toString()); + assertNotEquals("The owner is not expected.", + MockDelegationSASTokenProvider.TEST_OWNER, + rootStatus.getOwner()); + + // Attempt to set permission without being the owner. + intercept(AccessDeniedException.class, + AUTHORIZATION_PERMISSION_MISS_MATCH.getErrorCode(), () -> { + fs.setPermission(rootPath, new FsPermission(FsAction.ALL, + FsAction.READ_EXECUTE, FsAction.EXECUTE)); + return "Set permission should fail because saoid is not the owner."; + }); + + // Attempt to set permission as the owner. + fs.setOwner(rootPath, MockDelegationSASTokenProvider.TEST_OWNER, null); + fs.setPermission(rootPath, new FsPermission(FsAction.ALL, + FsAction.READ_EXECUTE, FsAction.EXECUTE)); + rootStatus = fs.getFileStatus(rootPath); + assertEquals("The permissions are not expected.", + "rwxr-x--x", + rootStatus.getPermission().toString()); + assertEquals("The directory owner is not expected.", + MockDelegationSASTokenProvider.TEST_OWNER, + rootStatus.getOwner()); + } + + @Test + // Without saoid or suoid, setPermission should succeed with sp=p for a non-owner. + public void testSetPermissionWithoutAgentForNonOwner() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + Path path = new Path(MockDelegationSASTokenProvider.NO_AGENT_PATH); + fs.create(path).close(); + + FileStatus status = fs.getFileStatus(path); + assertEquals("The permissions are not expected.", + "rw-r--r--", + status.getPermission().toString()); + assertNotEquals("The owner is not expected.", + TestConfigurationKeys.FS_AZURE_TEST_APP_SERVICE_PRINCIPAL_OBJECT_ID, + status.getOwner()); + + fs.setPermission(path, new FsPermission(FsAction.READ, FsAction.READ, FsAction.NONE)); + + FileStatus fileStatus = fs.getFileStatus(path); + assertEquals("The permissions are not expected.", + "r--r-----", + fileStatus.getPermission().toString()); + } + + @Test + public void testSASQuesMarkPrefix() throws Exception { + AbfsConfiguration testConfig = this.getConfiguration(); + // the SAS Token Provider is changed + testConfig.set(FS_AZURE_SAS_TOKEN_PROVIDER_TYPE, "org.apache.hadoop.fs.azurebfs.extensions.MockWithPrefixSASTokenProvider"); + + AzureBlobFileSystem testFs = (AzureBlobFileSystem) FileSystem.newInstance(getRawConfiguration()); + Path testFile = new Path("/testSASPrefixQuesMark"); + + // the creation of this filesystem should work correctly even when a SAS Token is generated with a ? prefix + testFs.create(testFile).close(); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java index 486daca4f1120..1f0ff667522da 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java @@ -26,22 +26,51 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; +import org.assertj.core.api.Assertions; +import org.junit.Assume; import org.junit.Test; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.services.AbfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; +import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; +import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient; +import org.apache.hadoop.fs.azurebfs.services.TestAbfsPerfTracker; +import org.apache.hadoop.fs.azurebfs.utils.TestMockHelpers; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import static java.net.HttpURLConnection.HTTP_BAD_REQUEST; +import static java.net.HttpURLConnection.HTTP_NOT_FOUND; +import static java.net.HttpURLConnection.HTTP_OK; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doCallRealMethod; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_DELETE; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_DELETE_CONSIDERED_IDEMPOTENT; +import static org.apache.hadoop.fs.azurebfs.services.AbfsRestOperationType.DeletePath; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertDeleted; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertPathDoesNotExist; import static org.apache.hadoop.test.LambdaTestUtils.intercept; + /** * Test delete operation. */ public class ITestAzureBlobFileSystemDelete extends AbstractAbfsIntegrationTest { + private static final int REDUCED_RETRY_COUNT = 1; + private static final int REDUCED_MAX_BACKOFF_INTERVALS_MS = 5000; + public ITestAzureBlobFileSystemDelete() throws Exception { super(); } @@ -50,12 +79,13 @@ public ITestAzureBlobFileSystemDelete() throws Exception { public void testDeleteRoot() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - fs.mkdirs(new Path("/testFolder0")); - fs.mkdirs(new Path("/testFolder1")); - fs.mkdirs(new Path("/testFolder2")); - touch(new Path("/testFolder1/testfile")); - touch(new Path("/testFolder1/testfile2")); - touch(new Path("/testFolder1/testfile3")); + Path testPath = path("/testFolder"); + fs.mkdirs(new Path(testPath + "_0")); + fs.mkdirs(new Path(testPath + "_1")); + fs.mkdirs(new Path(testPath + "_2")); + touch(new Path(testPath + "_1/testfile")); + touch(new Path(testPath + "_1/testfile2")); + touch(new Path(testPath + "_1/testfile3")); Path root = new Path("/"); FileStatus[] ls = fs.listStatus(root); @@ -69,7 +99,7 @@ public void testDeleteRoot() throws Exception { @Test() public void testOpenFileAfterDelete() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - Path testfile = new Path("/testFile"); + Path testfile = path("/testFile"); touch(testfile); assertDeleted(fs, testfile, false); @@ -80,7 +110,7 @@ public void testOpenFileAfterDelete() throws Exception { @Test public void testEnsureFileIsDeleted() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - Path testfile = new Path("testfile"); + Path testfile = path("testfile"); touch(testfile); assertDeleted(fs, testfile, false); assertPathDoesNotExist(fs, "deleted", testfile); @@ -89,10 +119,10 @@ public void testEnsureFileIsDeleted() throws Exception { @Test public void testDeleteDirectory() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - Path dir = new Path("testfile"); + Path dir = path("testfile"); fs.mkdirs(dir); - fs.mkdirs(new Path("testfile/test1")); - fs.mkdirs(new Path("testfile/test1/test2")); + fs.mkdirs(new Path(dir + "/test1")); + fs.mkdirs(new Path(dir + "/test1/test2")); assertDeleted(fs, dir, true); assertPathDoesNotExist(fs, "deleted", dir); @@ -104,8 +134,9 @@ public void testDeleteFirstLevelDirectory() throws Exception { final List> tasks = new ArrayList<>(); ExecutorService es = Executors.newFixedThreadPool(10); + Path dir = path("/test"); for (int i = 0; i < 1000; i++) { - final Path fileName = new Path("/test/" + i); + final Path fileName = new Path(dir + "/" + i); Callable callable = new Callable() { @Override public Void call() throws Exception { @@ -122,12 +153,134 @@ public Void call() throws Exception { } es.shutdownNow(); - Path dir = new Path("/test"); + fs.registerListener(new TracingHeaderValidator( + fs.getAbfsStore().getAbfsConfiguration().getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.DELETE, false, 0)); // first try a non-recursive delete, expect failure intercept(FileAlreadyExistsException.class, () -> fs.delete(dir, false)); + fs.registerListener(null); assertDeleted(fs, dir, true); assertPathDoesNotExist(fs, "deleted", dir); } + + @Test + public void testDeleteIdempotency() throws Exception { + Assume.assumeTrue(DEFAULT_DELETE_CONSIDERED_IDEMPOTENT); + // Config to reduce the retry and maxBackoff time for test run + AbfsConfiguration abfsConfig + = TestAbfsConfigurationFieldsValidation.updateRetryConfigs( + getConfiguration(), + REDUCED_RETRY_COUNT, REDUCED_MAX_BACKOFF_INTERVALS_MS); + + final AzureBlobFileSystem fs = getFileSystem(); + AbfsClient abfsClient = fs.getAbfsStore().getClient(); + AbfsClient testClient = ITestAbfsClient.createTestClientFromCurrentContext( + abfsClient, + abfsConfig); + + // Mock instance of AbfsRestOperation + AbfsRestOperation op = mock(AbfsRestOperation.class); + // Set retryCount to non-zero + when(op.isARetriedRequest()).thenReturn(true); + + // Case 1: Mock instance of Http Operation response. This will return + // HTTP:Not Found + AbfsHttpOperation http404Op = mock(AbfsHttpOperation.class); + when(http404Op.getStatusCode()).thenReturn(HTTP_NOT_FOUND); + + // Mock delete response to 404 + when(op.getResult()).thenReturn(http404Op); + when(op.hasResult()).thenReturn(true); + + Assertions.assertThat(testClient.deleteIdempotencyCheckOp(op) + .getResult() + .getStatusCode()) + .describedAs( + "Delete is considered idempotent by default and should return success.") + .isEqualTo(HTTP_OK); + + // Case 2: Mock instance of Http Operation response. This will return + // HTTP:Bad Request + AbfsHttpOperation http400Op = mock(AbfsHttpOperation.class); + when(http400Op.getStatusCode()).thenReturn(HTTP_BAD_REQUEST); + + // Mock delete response to 400 + when(op.getResult()).thenReturn(http400Op); + when(op.hasResult()).thenReturn(true); + + Assertions.assertThat(testClient.deleteIdempotencyCheckOp(op) + .getResult() + .getStatusCode()) + .describedAs( + "Idempotency check to happen only for HTTP 404 response.") + .isEqualTo(HTTP_BAD_REQUEST); + + } + + @Test + public void testDeleteIdempotencyTriggerHttp404() throws Exception { + + final AzureBlobFileSystem fs = getFileSystem(); + AbfsClient client = ITestAbfsClient.createTestClientFromCurrentContext( + fs.getAbfsStore().getClient(), + this.getConfiguration()); + + // Case 1: Not a retried case should throw error back + // Add asserts at AzureBlobFileSystemStore and AbfsClient levels + intercept(AbfsRestOperationException.class, + () -> fs.getAbfsStore().delete( + new Path("/NonExistingPath"), + false, getTestTracingContext(fs, false))); + + intercept(AbfsRestOperationException.class, + () -> client.deletePath( + "/NonExistingPath", + false, + null, + getTestTracingContext(fs, true))); + + // mock idempotency check to mimic retried case + AbfsClient mockClient = ITestAbfsClient.getMockAbfsClient( + fs.getAbfsStore().getClient(), + this.getConfiguration()); + AzureBlobFileSystemStore mockStore = mock(AzureBlobFileSystemStore.class); + mockStore = TestMockHelpers.setClassField(AzureBlobFileSystemStore.class, mockStore, + "client", mockClient); + mockStore = TestMockHelpers.setClassField(AzureBlobFileSystemStore.class, + mockStore, + "abfsPerfTracker", + TestAbfsPerfTracker.getAPerfTrackerInstance(this.getConfiguration())); + doCallRealMethod().when(mockStore).delete(new Path("/NonExistingPath"), + false, getTestTracingContext(fs, false)); + + // Case 2: Mimic retried case + // Idempotency check on Delete always returns success + AbfsRestOperation idempotencyRetOp = ITestAbfsClient.getRestOp( + DeletePath, mockClient, HTTP_METHOD_DELETE, + ITestAbfsClient.getTestUrl(mockClient, "/NonExistingPath"), + ITestAbfsClient.getTestRequestHeaders(mockClient)); + idempotencyRetOp.hardSetResult(HTTP_OK); + + doReturn(idempotencyRetOp).when(mockClient).deleteIdempotencyCheckOp(any()); + TracingContext tracingContext = getTestTracingContext(fs, false); + when(mockClient.deletePath("/NonExistingPath", false, null, tracingContext)) + .thenCallRealMethod(); + + Assertions.assertThat(mockClient.deletePath( + "/NonExistingPath", + false, + null, + tracingContext) + .getResult() + .getStatusCode()) + .describedAs("Idempotency check reports successful " + + "delete. 200OK should be returned") + .isEqualTo(idempotencyRetOp.getResult().getStatusCode()); + + // Call from AzureBlobFileSystemStore should not fail either + mockStore.delete(new Path("/NonExistingPath"), false, getTestTracingContext(fs, false)); + } + } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemE2E.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemE2E.java index ebc9c07e53e59..56016a39470e4 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemE2E.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemE2E.java @@ -33,6 +33,8 @@ import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_TOLERATE_CONCURRENT_APPEND; +import static org.apache.hadoop.fs.contract.ContractTestUtils.assertPathDoesNotExist; +import static org.apache.hadoop.fs.contract.ContractTestUtils.assertPathExists; import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** @@ -52,14 +54,14 @@ public ITestAzureBlobFileSystemE2E() throws Exception { @Test public void testWriteOneByteToFile() throws Exception { - final Path testFilePath = new Path(methodName.getMethodName()); + final Path testFilePath = path(methodName.getMethodName()); testWriteOneByteToFile(testFilePath); } @Test public void testReadWriteBytesToFile() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - final Path testFilePath = new Path(methodName.getMethodName()); + final Path testFilePath = path(methodName.getMethodName()); testWriteOneByteToFile(testFilePath); try(FSDataInputStream inputStream = fs.open(testFilePath, TEST_DEFAULT_BUFFER_SIZE)) { @@ -78,7 +80,7 @@ public void testOOBWritesAndReadFail() throws Exception { final byte[] b = new byte[2 * readBufferSize]; new Random().nextBytes(b); - final Path testFilePath = new Path(methodName.getMethodName()); + final Path testFilePath = path(methodName.getMethodName()); try(FSDataOutputStream writeStream = fs.create(testFilePath)) { writeStream.write(b); writeStream.flush(); @@ -107,7 +109,7 @@ public void testOOBWritesAndReadSucceed() throws Exception { byte[] bytesToRead = new byte[readBufferSize]; final byte[] b = new byte[2 * readBufferSize]; new Random().nextBytes(b); - final Path testFilePath = new Path(methodName.getMethodName()); + final Path testFilePath = path(methodName.getMethodName()); try (FSDataOutputStream writeStream = fs.create(testFilePath)) { writeStream.write(b); @@ -130,7 +132,7 @@ public void testOOBWritesAndReadSucceed() throws Exception { @Test public void testWriteWithBufferOffset() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - final Path testFilePath = new Path(methodName.getMethodName()); + final Path testFilePath = path(methodName.getMethodName()); final byte[] b = new byte[1024 * 1000]; new Random().nextBytes(b); @@ -151,7 +153,7 @@ public void testWriteWithBufferOffset() throws Exception { @Test public void testReadWriteHeavyBytesToFileWithSmallerChunks() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - final Path testFilePath = new Path(methodName.getMethodName()); + final Path testFilePath = path(methodName.getMethodName()); final byte[] writeBuffer = new byte[5 * 1000 * 1024]; new Random().nextBytes(writeBuffer); @@ -171,47 +173,51 @@ public void testReadWriteHeavyBytesToFileWithSmallerChunks() throws Exception { @Test public void testReadWithFileNotFoundException() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - final Path testFilePath = new Path(methodName.getMethodName()); + final Path testFilePath = path(methodName.getMethodName()); testWriteOneByteToFile(testFilePath); - FSDataInputStream inputStream = fs.open(testFilePath, TEST_DEFAULT_BUFFER_SIZE); - fs.delete(testFilePath, true); - assertFalse(fs.exists(testFilePath)); + try (FSDataInputStream inputStream = fs.open(testFilePath, + TEST_DEFAULT_BUFFER_SIZE)) { + fs.delete(testFilePath, true); + assertPathDoesNotExist(fs, "This path should not exist", testFilePath); - intercept(FileNotFoundException.class, - () -> inputStream.read(new byte[1])); + intercept(FileNotFoundException.class, () -> inputStream.read(new byte[1])); + } } @Test public void testWriteWithFileNotFoundException() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - final Path testFilePath = new Path(methodName.getMethodName()); + final Path testFilePath = path(methodName.getMethodName()); - FSDataOutputStream stream = fs.create(testFilePath); - assertTrue(fs.exists(testFilePath)); - stream.write(TEST_BYTE); + try (FSDataOutputStream stream = fs.create(testFilePath)) { + assertPathExists(fs, "Path should exist", testFilePath); + stream.write(TEST_BYTE); - fs.delete(testFilePath, true); - assertFalse(fs.exists(testFilePath)); + fs.delete(testFilePath, true); + assertPathDoesNotExist(fs, "This path should not exist", testFilePath); - // trigger append call - intercept(FileNotFoundException.class, - () -> stream.close()); + // trigger append call + intercept(FileNotFoundException.class, () -> stream.close()); + } } @Test public void testFlushWithFileNotFoundException() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - final Path testFilePath = new Path(methodName.getMethodName()); + final Path testFilePath = path(methodName.getMethodName()); + if (fs.getAbfsStore().isAppendBlobKey(fs.makeQualified(testFilePath).toString())) { + return; + } - FSDataOutputStream stream = fs.create(testFilePath); - assertTrue(fs.exists(testFilePath)); + try (FSDataOutputStream stream = fs.create(testFilePath)) { + assertPathExists(fs, "This path should exist", testFilePath); - fs.delete(testFilePath, true); - assertFalse(fs.exists(testFilePath)); + fs.delete(testFilePath, true); + assertPathDoesNotExist(fs, "This path should not exist", testFilePath); - intercept(FileNotFoundException.class, - () -> stream.close()); + intercept(FileNotFoundException.class, () -> stream.close()); + } } private void testWriteOneByteToFile(Path testFilePath) throws Exception { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java index 421fa9a65cc05..4fa7a0fca68ae 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java @@ -27,6 +27,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; +import static org.apache.hadoop.fs.contract.ContractTestUtils.assertPathExists; + /** * Test FileStatus. */ @@ -37,8 +39,8 @@ public class ITestAzureBlobFileSystemFileStatus extends private static final String DEFAULT_UMASK_VALUE = "027"; private static final String FULL_PERMISSION = "777"; - private static final Path TEST_FILE = new Path("testFile"); - private static final Path TEST_FOLDER = new Path("testDir"); + private static final String TEST_FILE = "testFile"; + private static final String TEST_FOLDER = "testDir"; public ITestAzureBlobFileSystemFileStatus() throws Exception { super(); @@ -57,8 +59,9 @@ public void testEnsureStatusWorksForRoot() throws Exception { public void testFileStatusPermissionsAndOwnerAndGroup() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); fs.getConf().set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, DEFAULT_UMASK_VALUE); - touch(TEST_FILE); - validateStatus(fs, TEST_FILE, false); + Path testFile = path(TEST_FILE); + touch(testFile); + validateStatus(fs, testFile, false); } private FileStatus validateStatus(final AzureBlobFileSystem fs, final Path name, final boolean isDir) @@ -67,7 +70,7 @@ private FileStatus validateStatus(final AzureBlobFileSystem fs, final Path name, String errorInStatus = "error in " + fileStatus + " from " + fs; - if (!fs.getIsNamespaceEnabled()) { + if (!getIsNamespaceEnabled(fs)) { assertEquals(errorInStatus + ": owner", fs.getOwnerUser(), fileStatus.getOwner()); assertEquals(errorInStatus + ": group", @@ -93,9 +96,10 @@ private FileStatus validateStatus(final AzureBlobFileSystem fs, final Path name, public void testFolderStatusPermissionsAndOwnerAndGroup() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); fs.getConf().set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, DEFAULT_UMASK_VALUE); - fs.mkdirs(TEST_FOLDER); + Path testFolder = path(TEST_FOLDER); + fs.mkdirs(testFolder); - validateStatus(fs, TEST_FOLDER, true); + validateStatus(fs, testFolder, true); } @Test @@ -108,11 +112,11 @@ public void testAbfsPathWithHost() throws IOException { Path pathwithouthost2 = new Path("/abfs/file2.txt"); // verify compatibility of this path format - fs.create(pathWithHost1); - assertTrue(fs.exists(pathwithouthost1)); + fs.create(pathWithHost1).close(); + assertPathExists(fs, "This path should exist", pathwithouthost1); - fs.create(pathwithouthost2); - assertTrue(fs.exists(pathWithHost2)); + fs.create(pathwithouthost2).close(); + assertPathExists(fs, "This path should exist", pathWithHost2); // verify get FileStatus fileStatus1 = fs.getFileStatus(pathWithHost1); @@ -125,13 +129,13 @@ public void testAbfsPathWithHost() throws IOException { @Test public void testLastModifiedTime() throws IOException { AzureBlobFileSystem fs = this.getFileSystem(); - Path testFilePath = new Path("childfile1.txt"); + Path testFilePath = path("childfile1.txt"); long createStartTime = System.currentTimeMillis(); long minCreateStartTime = (createStartTime / 1000) * 1000 - 1; // Dividing and multiplying by 1000 to make last 3 digits 0. // It is observed that modification time is returned with last 3 // digits 0 always. - fs.create(testFilePath); + fs.create(testFilePath).close(); long createEndTime = System.currentTimeMillis(); FileStatus fStat = fs.getFileStatus(testFilePath); long lastModifiedTime = fStat.getModificationTime(); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFlush.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFlush.java index 60f7f7d23f02a..d27f9fa62194d 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFlush.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFlush.java @@ -29,8 +29,12 @@ import java.util.concurrent.Future; import java.io.IOException; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.StreamCapabilities; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys; import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; import org.hamcrest.core.IsEqual; import org.hamcrest.core.IsNot; import org.junit.Test; @@ -41,6 +45,10 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_APPEND_BLOB_KEY; +import static org.apache.hadoop.fs.contract.ContractTestUtils.assertHasStreamCapabilities; +import static org.apache.hadoop.fs.contract.ContractTestUtils.assertLacksStreamCapabilities; + /** * Test flush operation. * This class cannot be run in parallel test mode--check comments in @@ -49,7 +57,8 @@ public class ITestAzureBlobFileSystemFlush extends AbstractAbfsScaleTest { private static final int BASE_SIZE = 1024; private static final int ONE_THOUSAND = 1000; - private static final int TEST_BUFFER_SIZE = 5 * ONE_THOUSAND * BASE_SIZE; + //3000 KB to support appenblob too + private static final int TEST_BUFFER_SIZE = 3 * ONE_THOUSAND * BASE_SIZE; private static final int ONE_MB = 1024 * 1024; private static final int FLUSH_TIMES = 200; private static final int THREAD_SLEEP_TIME = 1000; @@ -226,11 +235,15 @@ private void testFlush(boolean disableOutputStreamFlush) throws Exception { final Path testFilePath = path(methodName.getMethodName()); byte[] buffer = getRandomBytesArray(); - // The test case must write "fs.azure.write.request.size" bytes // to the stream in order for the data to be uploaded to storage. - assertEquals(fs.getAbfsStore().getAbfsConfiguration().getWriteBufferSize(), - buffer.length); + assertTrue(fs.getAbfsStore().getAbfsConfiguration().getWriteBufferSize() + <= buffer.length); + + boolean isAppendBlob = true; + if (!fs.getAbfsStore().isAppendBlobKey(fs.makeQualified(testFilePath).toString())) { + isAppendBlob = false; + } try (FSDataOutputStream stream = fs.create(testFilePath)) { stream.write(buffer); @@ -245,7 +258,8 @@ private void testFlush(boolean disableOutputStreamFlush) throws Exception { // Verify that the data can be read if disableOutputStreamFlush is // false; and otherwise cannot be read. - validate(fs.open(testFilePath), buffer, !disableOutputStreamFlush); + /* For Appendlob flush is not needed to update data on server */ + validate(fs.open(testFilePath), buffer, !disableOutputStreamFlush || isAppendBlob); } } @@ -267,10 +281,15 @@ public void testHflushWithFlushDisabled() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); byte[] buffer = getRandomBytesArray(); final Path testFilePath = path(methodName.getMethodName()); + boolean isAppendBlob = false; + if (fs.getAbfsStore().isAppendBlobKey(fs.makeQualified(testFilePath).toString())) { + isAppendBlob = true; + } try (FSDataOutputStream stream = getStreamAfterWrite(fs, testFilePath, buffer, false)) { stream.hflush(); - validate(fs, testFilePath, buffer, false); + /* For Appendlob flush is not needed to update data on server */ + validate(fs, testFilePath, buffer, isAppendBlob); } } @@ -287,6 +306,25 @@ public void testHsyncWithFlushEnabled() throws Exception { } } + @Test + public void testTracingHeaderForAppendBlob() throws Exception { + Configuration config = new Configuration(this.getRawConfiguration()); + config.set(FS_AZURE_APPEND_BLOB_KEY, "abfss:/"); + config.set(TestConfigurationKeys.FS_AZURE_TEST_APPENDBLOB_ENABLED, "true"); + AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem + .newInstance(config); + + byte[] buf = new byte[10]; + new Random().nextBytes(buf); + try (FSDataOutputStream out = fs.create(new Path("/testFile"))) { + ((AbfsOutputStream) out.getWrappedStream()).registerListener(new TracingHeaderValidator( + fs.getAbfsStore().getAbfsConfiguration().getClientCorrelationId(), fs.getFileSystemId(), FSOperationType.WRITE, false, 0, + ((AbfsOutputStream) out.getWrappedStream()).getStreamID())); + out.write(buf); + out.hsync(); + } + } + @Test public void testStreamCapabilitiesWithFlushDisabled() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); @@ -295,11 +333,12 @@ public void testStreamCapabilitiesWithFlushDisabled() throws Exception { final Path testFilePath = path(methodName.getMethodName()); try (FSDataOutputStream stream = getStreamAfterWrite(fs, testFilePath, buffer, false)) { - assertFalse(stream.hasCapability(StreamCapabilities.HFLUSH)); - assertFalse(stream.hasCapability(StreamCapabilities.HSYNC)); - assertFalse(stream.hasCapability(StreamCapabilities.DROPBEHIND)); - assertFalse(stream.hasCapability(StreamCapabilities.READAHEAD)); - assertFalse(stream.hasCapability(StreamCapabilities.UNBUFFER)); + assertLacksStreamCapabilities(stream, + StreamCapabilities.HFLUSH, + StreamCapabilities.HSYNC, + StreamCapabilities.DROPBEHIND, + StreamCapabilities.READAHEAD, + StreamCapabilities.UNBUFFER); } } @@ -309,11 +348,13 @@ public void testStreamCapabilitiesWithFlushEnabled() throws Exception { byte[] buffer = getRandomBytesArray(); final Path testFilePath = path(methodName.getMethodName()); try (FSDataOutputStream stream = getStreamAfterWrite(fs, testFilePath, buffer, true)) { - assertTrue(stream.hasCapability(StreamCapabilities.HFLUSH)); - assertTrue(stream.hasCapability(StreamCapabilities.HSYNC)); - assertFalse(stream.hasCapability(StreamCapabilities.DROPBEHIND)); - assertFalse(stream.hasCapability(StreamCapabilities.READAHEAD)); - assertFalse(stream.hasCapability(StreamCapabilities.UNBUFFER)); + assertHasStreamCapabilities(stream, + StreamCapabilities.HFLUSH, + StreamCapabilities.HSYNC); + assertLacksStreamCapabilities(stream, + StreamCapabilities.DROPBEHIND, + StreamCapabilities.READAHEAD, + StreamCapabilities.UNBUFFER); } } @@ -322,9 +363,14 @@ public void testHsyncWithFlushDisabled() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); byte[] buffer = getRandomBytesArray(); final Path testFilePath = path(methodName.getMethodName()); + boolean isAppendBlob = false; + if (fs.getAbfsStore().isAppendBlobKey(fs.makeQualified(testFilePath).toString())) { + isAppendBlob = true; + } try (FSDataOutputStream stream = getStreamAfterWrite(fs, testFilePath, buffer, false)) { stream.hsync(); - validate(fs, testFilePath, buffer, false); + /* For Appendlob flush is not needed to update data on server */ + validate(fs, testFilePath, buffer, isAppendBlob); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java new file mode 100644 index 0000000000000..07b8a6f2bb203 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java @@ -0,0 +1,349 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.azurebfs; + +import java.io.IOException; +import java.util.concurrent.RejectedExecutionException; + +import org.junit.Assert; +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.services.AbfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsLease; +import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; +import org.apache.hadoop.fs.azurebfs.utils.Listener; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; +import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.LambdaTestUtils; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.spy; + +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_INFINITE_LEASE_KEY; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_LEASE_THREADS; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT; +import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ERR_ACQUIRING_LEASE; +import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ERR_LEASE_EXPIRED; +import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ERR_NO_LEASE_ID_SPECIFIED; +import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ERR_NO_LEASE_THREADS; +import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ERR_PARALLEL_ACCESS_DETECTED; + +/** + * Test lease operations. + */ +public class ITestAzureBlobFileSystemLease extends AbstractAbfsIntegrationTest { + private static final int TEST_EXECUTION_TIMEOUT = 30 * 1000; + private static final int LONG_TEST_EXECUTION_TIMEOUT = 90 * 1000; + private static final String TEST_FILE = "testfile"; + private final boolean isHNSEnabled; + + public ITestAzureBlobFileSystemLease() throws Exception { + super(); + + this.isHNSEnabled = getConfiguration() + .getBoolean(FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, false); + } + + private AzureBlobFileSystem getCustomFileSystem(Path infiniteLeaseDirs, int numLeaseThreads) throws Exception { + Configuration conf = getRawConfiguration(); + conf.setBoolean(String.format("fs.%s.impl.disable.cache", getAbfsScheme()), true); + conf.set(FS_AZURE_INFINITE_LEASE_KEY, infiniteLeaseDirs.toUri().getPath()); + conf.setInt(FS_AZURE_LEASE_THREADS, numLeaseThreads); + return getFileSystem(conf); + } + + @Test(timeout = TEST_EXECUTION_TIMEOUT) + public void testNoInfiniteLease() throws IOException { + final Path testFilePath = new Path(path(methodName.getMethodName()), TEST_FILE); + final AzureBlobFileSystem fs = getFileSystem(); + fs.mkdirs(testFilePath.getParent()); + try (FSDataOutputStream out = fs.create(testFilePath)) { + Assert.assertFalse("Output stream should not have lease", + ((AbfsOutputStream) out.getWrappedStream()).hasLease()); + } + Assert.assertTrue("Store leases were not freed", fs.getAbfsStore().areLeasesFreed()); + } + + @Test(timeout = TEST_EXECUTION_TIMEOUT) + public void testNoLeaseThreads() throws Exception { + final Path testFilePath = new Path(path(methodName.getMethodName()), TEST_FILE); + final AzureBlobFileSystem fs = getCustomFileSystem(testFilePath.getParent(), 0); + fs.mkdirs(testFilePath.getParent()); + LambdaTestUtils.intercept(IOException.class, ERR_NO_LEASE_THREADS, () -> { + try (FSDataOutputStream out = fs.create(testFilePath)) { + } + return "No failure when lease requested with 0 lease threads"; + }); + } + + @Test(timeout = TEST_EXECUTION_TIMEOUT) + public void testOneWriter() throws Exception { + final Path testFilePath = new Path(path(methodName.getMethodName()), TEST_FILE); + final AzureBlobFileSystem fs = getCustomFileSystem(testFilePath.getParent(), 1); + fs.mkdirs(testFilePath.getParent()); + + FSDataOutputStream out = fs.create(testFilePath); + Assert.assertTrue("Output stream should have lease", + ((AbfsOutputStream) out.getWrappedStream()).hasLease()); + out.close(); + Assert.assertFalse("Output stream should not have lease", + ((AbfsOutputStream) out.getWrappedStream()).hasLease()); + Assert.assertTrue("Store leases were not freed", fs.getAbfsStore().areLeasesFreed()); + } + + @Test(timeout = TEST_EXECUTION_TIMEOUT) + public void testSubDir() throws Exception { + final Path testFilePath = new Path(new Path(path(methodName.getMethodName()), "subdir"), + TEST_FILE); + final AzureBlobFileSystem fs = + getCustomFileSystem(testFilePath.getParent().getParent(), 1); + fs.mkdirs(testFilePath.getParent().getParent()); + + FSDataOutputStream out = fs.create(testFilePath); + Assert.assertTrue("Output stream should have lease", + ((AbfsOutputStream) out.getWrappedStream()).hasLease()); + out.close(); + Assert.assertFalse("Output stream should not have lease", + ((AbfsOutputStream) out.getWrappedStream()).hasLease()); + Assert.assertTrue("Store leases were not freed", fs.getAbfsStore().areLeasesFreed()); + } + + @Test(timeout = TEST_EXECUTION_TIMEOUT) + public void testTwoCreate() throws Exception { + final Path testFilePath = new Path(path(methodName.getMethodName()), TEST_FILE); + final AzureBlobFileSystem fs = getCustomFileSystem(testFilePath.getParent(), 1); + fs.mkdirs(testFilePath.getParent()); + + try (FSDataOutputStream out = fs.create(testFilePath)) { + LambdaTestUtils.intercept(IOException.class, isHNSEnabled ? ERR_PARALLEL_ACCESS_DETECTED + : ERR_NO_LEASE_ID_SPECIFIED, () -> { + try (FSDataOutputStream out2 = fs.create(testFilePath)) { + } + return "Expected second create on infinite lease dir to fail"; + }); + } + Assert.assertTrue("Store leases were not freed", fs.getAbfsStore().areLeasesFreed()); + } + + private void twoWriters(AzureBlobFileSystem fs, Path testFilePath, boolean expectException) throws Exception { + try (FSDataOutputStream out = fs.create(testFilePath)) { + try (FSDataOutputStream out2 = fs.append(testFilePath)) { + out2.writeInt(2); + out2.hsync(); + } catch (IOException e) { + if (expectException) { + GenericTestUtils.assertExceptionContains(ERR_ACQUIRING_LEASE, e); + } else { + throw e; + } + } + out.writeInt(1); + out.hsync(); + } + + Assert.assertTrue("Store leases were not freed", fs.getAbfsStore().areLeasesFreed()); + } + + @Test(timeout = TEST_EXECUTION_TIMEOUT) + public void testTwoWritersCreateAppendNoInfiniteLease() throws Exception { + final Path testFilePath = new Path(path(methodName.getMethodName()), TEST_FILE); + final AzureBlobFileSystem fs = getFileSystem(); + fs.mkdirs(testFilePath.getParent()); + + twoWriters(fs, testFilePath, false); + } + + @Test(timeout = LONG_TEST_EXECUTION_TIMEOUT) + public void testTwoWritersCreateAppendWithInfiniteLeaseEnabled() throws Exception { + final Path testFilePath = new Path(path(methodName.getMethodName()), TEST_FILE); + final AzureBlobFileSystem fs = getCustomFileSystem(testFilePath.getParent(), 1); + fs.mkdirs(testFilePath.getParent()); + + twoWriters(fs, testFilePath, true); + } + + @Test(timeout = TEST_EXECUTION_TIMEOUT) + public void testLeaseFreedOnClose() throws Exception { + final Path testFilePath = new Path(path(methodName.getMethodName()), TEST_FILE); + final AzureBlobFileSystem fs = getCustomFileSystem(testFilePath.getParent(), 1); + fs.mkdirs(testFilePath.getParent()); + + FSDataOutputStream out; + out = fs.create(testFilePath); + out.write(0); + Assert.assertTrue("Output stream should have lease", + ((AbfsOutputStream) out.getWrappedStream()).hasLease()); + out.close(); + Assert.assertFalse("Output stream should not have lease after close", + ((AbfsOutputStream) out.getWrappedStream()).hasLease()); + Assert.assertTrue("Store leases were not freed", fs.getAbfsStore().areLeasesFreed()); + } + + @Test(timeout = TEST_EXECUTION_TIMEOUT) + public void testWriteAfterBreakLease() throws Exception { + final Path testFilePath = new Path(path(methodName.getMethodName()), TEST_FILE); + final AzureBlobFileSystem fs = getCustomFileSystem(testFilePath.getParent(), 1); + fs.mkdirs(testFilePath.getParent()); + + FSDataOutputStream out; + out = fs.create(testFilePath); + out.write(0); + out.hsync(); + + fs.registerListener(new TracingHeaderValidator( + getConfiguration().getClientCorrelationId(), fs.getFileSystemId(), + FSOperationType.BREAK_LEASE, false, 0)); + fs.breakLease(testFilePath); + fs.registerListener(null); + + LambdaTestUtils.intercept(IOException.class, ERR_LEASE_EXPIRED, () -> { + out.write(1); + out.hsync(); + return "Expected exception on write after lease break but got " + out; + }); + + LambdaTestUtils.intercept(IOException.class, ERR_LEASE_EXPIRED, () -> { + out.close(); + return "Expected exception on close after lease break but got " + out; + }); + + Assert.assertTrue("Output stream lease should be freed", + ((AbfsOutputStream) out.getWrappedStream()).isLeaseFreed()); + + try (FSDataOutputStream out2 = fs.append(testFilePath)) { + out2.write(2); + out2.hsync(); + } + + Assert.assertTrue("Store leases were not freed", fs.getAbfsStore().areLeasesFreed()); + } + + @Test(timeout = LONG_TEST_EXECUTION_TIMEOUT) + public void testLeaseFreedAfterBreak() throws Exception { + final Path testFilePath = new Path(path(methodName.getMethodName()), TEST_FILE); + final AzureBlobFileSystem fs = getCustomFileSystem(testFilePath.getParent(), 1); + fs.mkdirs(testFilePath.getParent()); + + FSDataOutputStream out = fs.create(testFilePath); + out.write(0); + + fs.breakLease(testFilePath); + + LambdaTestUtils.intercept(IOException.class, ERR_LEASE_EXPIRED, () -> { + out.close(); + return "Expected exception on close after lease break but got " + out; + }); + + Assert.assertTrue("Output stream lease should be freed", + ((AbfsOutputStream) out.getWrappedStream()).isLeaseFreed()); + + Assert.assertTrue("Store leases were not freed", fs.getAbfsStore().areLeasesFreed()); + } + + @Test(timeout = TEST_EXECUTION_TIMEOUT) + public void testInfiniteLease() throws Exception { + final Path testFilePath = new Path(path(methodName.getMethodName()), TEST_FILE); + final AzureBlobFileSystem fs = getCustomFileSystem(testFilePath.getParent(), 1); + fs.mkdirs(testFilePath.getParent()); + + try (FSDataOutputStream out = fs.create(testFilePath)) { + Assert.assertTrue("Output stream should have lease", + ((AbfsOutputStream) out.getWrappedStream()).hasLease()); + out.write(0); + } + Assert.assertTrue(fs.getAbfsStore().areLeasesFreed()); + + try (FSDataOutputStream out = fs.append(testFilePath)) { + Assert.assertTrue("Output stream should have lease", + ((AbfsOutputStream) out.getWrappedStream()).hasLease()); + out.write(1); + } + Assert.assertTrue("Store leases were not freed", fs.getAbfsStore().areLeasesFreed()); + } + + @Test(timeout = TEST_EXECUTION_TIMEOUT) + public void testFileSystemClose() throws Exception { + final Path testFilePath = new Path(path(methodName.getMethodName()), TEST_FILE); + final AzureBlobFileSystem fs = getCustomFileSystem(testFilePath.getParent(), 1); + fs.mkdirs(testFilePath.getParent()); + + try (FSDataOutputStream out = fs.create(testFilePath)) { + out.write(0); + Assert.assertFalse("Store leases should exist", + fs.getAbfsStore().areLeasesFreed()); + } + fs.close(); + Assert.assertTrue("Store leases were not freed", fs.getAbfsStore().areLeasesFreed()); + + LambdaTestUtils.intercept(RejectedExecutionException.class, () -> { + try (FSDataOutputStream out2 = fs.append(testFilePath)) { + } + return "Expected exception on new append after closed FS"; + }); + } + + @Test(timeout = TEST_EXECUTION_TIMEOUT) + public void testAcquireRetry() throws Exception { + final Path testFilePath = new Path(path(methodName.getMethodName()), TEST_FILE); + final AzureBlobFileSystem fs = getCustomFileSystem(testFilePath.getParent(), 1); + fs.mkdirs(testFilePath.getParent()); + fs.createNewFile(testFilePath); + TracingContext tracingContext = getTestTracingContext(fs, true); + Listener listener = new TracingHeaderValidator( + getConfiguration().getClientCorrelationId(), fs.getFileSystemId(), + FSOperationType.TEST_OP, true, 0); + tracingContext.setListener(listener); + + AbfsLease lease = new AbfsLease(fs.getAbfsClient(), + testFilePath.toUri().getPath(), tracingContext); + Assert.assertNotNull("Did not successfully lease file", lease.getLeaseID()); + listener.setOperation(FSOperationType.RELEASE_LEASE); + lease.free(); + lease.getTracingContext().setListener(null); + Assert.assertEquals("Unexpected acquire retry count", 0, lease.getAcquireRetryCount()); + + AbfsClient mockClient = spy(fs.getAbfsClient()); + + doThrow(new AbfsLease.LeaseException("failed to acquire 1")) + .doThrow(new AbfsLease.LeaseException("failed to acquire 2")) + .doCallRealMethod().when(mockClient) + .acquireLease(anyString(), anyInt(), any(TracingContext.class)); + + lease = new AbfsLease(mockClient, testFilePath.toUri().getPath(), 5, 1, tracingContext); + Assert.assertNotNull("Acquire lease should have retried", lease.getLeaseID()); + lease.free(); + Assert.assertEquals("Unexpected acquire retry count", 2, lease.getAcquireRetryCount()); + + doThrow(new AbfsLease.LeaseException("failed to acquire")).when(mockClient) + .acquireLease(anyString(), anyInt(), any(TracingContext.class)); + + LambdaTestUtils.intercept(AzureBlobFileSystemException.class, () -> { + new AbfsLease(mockClient, testFilePath.toUri().getPath(), 5, 1, + tracingContext); + }); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index 25a15679263b3..8d1330b5ea7dd 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -29,12 +29,17 @@ import org.junit.Test; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; import org.apache.hadoop.fs.contract.ContractTestUtils; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_LIST_MAX_RESULTS; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertMkdirs; import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertPathExists; @@ -55,7 +60,10 @@ public ITestAzureBlobFileSystemListStatus() throws Exception { @Test public void testListPath() throws Exception { - final AzureBlobFileSystem fs = getFileSystem(); + Configuration config = new Configuration(this.getRawConfiguration()); + config.set(AZURE_LIST_MAX_RESULTS, "5000"); + final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem + .newInstance(getFileSystem().getUri(), config); final List> tasks = new ArrayList<>(); ExecutorService es = Executors.newFixedThreadPool(10); @@ -77,6 +85,9 @@ public Void call() throws Exception { } es.shutdownNow(); + fs.registerListener( + new TracingHeaderValidator(getConfiguration().getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.LISTSTATUS, true, 0)); FileStatus[] files = fs.listStatus(new Path("/")); assertEquals(TEST_FILES_NUMBER, files.length /* user directory */); } @@ -88,7 +99,7 @@ public Void call() throws Exception { @Test public void testListFileVsListDir() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - Path path = new Path("/testFile"); + Path path = path("/testFile"); try(FSDataOutputStream ignored = fs.create(path)) { FileStatus[] testFiles = fs.listStatus(path); assertEquals("length of test files", 1, testFiles.length); @@ -100,19 +111,20 @@ public void testListFileVsListDir() throws Exception { @Test public void testListFileVsListDir2() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - fs.mkdirs(new Path("/testFolder")); - fs.mkdirs(new Path("/testFolder/testFolder2")); - fs.mkdirs(new Path("/testFolder/testFolder2/testFolder3")); - Path testFile0Path = new Path("/testFolder/testFolder2/testFolder3/testFile"); + Path testFolder = path("/testFolder"); + fs.mkdirs(testFolder); + fs.mkdirs(new Path(testFolder + "/testFolder2")); + fs.mkdirs(new Path(testFolder + "/testFolder2/testFolder3")); + Path testFile0Path = new Path( + testFolder + "/testFolder2/testFolder3/testFile"); ContractTestUtils.touch(fs, testFile0Path); FileStatus[] testFiles = fs.listStatus(testFile0Path); assertEquals("Wrong listing size of file " + testFile0Path, 1, testFiles.length); FileStatus file0 = testFiles[0]; - assertEquals("Wrong path for " + file0, - new Path(getTestUrl(), "/testFolder/testFolder2/testFolder3/testFile"), - file0.getPath()); + assertEquals("Wrong path for " + file0, new Path(getTestUrl(), + testFolder + "/testFolder2/testFolder3/testFile"), file0.getPath()); assertIsFileReference(file0); } @@ -125,18 +137,18 @@ public void testListNonExistentDir() throws Exception { @Test public void testListFiles() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - Path testDir = new Path("/test"); + Path testDir = path("/test"); fs.mkdirs(testDir); FileStatus[] fileStatuses = fs.listStatus(new Path("/")); assertEquals(1, fileStatuses.length); - fs.mkdirs(new Path("/test/sub")); + fs.mkdirs(new Path(testDir + "/sub")); fileStatuses = fs.listStatus(testDir); assertEquals(1, fileStatuses.length); assertEquals("sub", fileStatuses[0].getPath().getName()); assertIsDirectoryReference(fileStatuses[0]); - Path childF = fs.makeQualified(new Path("/test/f")); + Path childF = fs.makeQualified(new Path(testDir + "/f")); touch(childF); fileStatuses = fs.listStatus(testDir); assertEquals(2, fileStatuses.length); @@ -182,7 +194,7 @@ public void testMkdirTrailingPeriodDirName() throws IOException { final AzureBlobFileSystem fs = getFileSystem(); Path nontrailingPeriodDir = path("testTrailingDir/dir"); - Path trailingPeriodDir = path("testTrailingDir/dir."); + Path trailingPeriodDir = new Path("testMkdirTrailingDir/dir."); assertMkdirs(fs, nontrailingPeriodDir); @@ -201,8 +213,8 @@ public void testCreateTrailingPeriodFileName() throws IOException { boolean exceptionThrown = false; final AzureBlobFileSystem fs = getFileSystem(); - Path trailingPeriodFile = path("testTrailingDir/file."); - Path nontrailingPeriodFile = path("testTrailingDir/file"); + Path trailingPeriodFile = new Path("testTrailingDir/file."); + Path nontrailingPeriodFile = path("testCreateTrailingDir/file"); createFile(fs, nontrailingPeriodFile, false, new byte[0]); assertPathExists(fs, "Trailing period file does not exist", @@ -224,7 +236,7 @@ public void testRenameTrailingPeriodFile() throws IOException { final AzureBlobFileSystem fs = getFileSystem(); Path nonTrailingPeriodFile = path("testTrailingDir/file"); - Path trailingPeriodFile = path("testTrailingDir/file."); + Path trailingPeriodFile = new Path("testRenameTrailingDir/file."); createFile(fs, nonTrailingPeriodFile, false, new byte[0]); try { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java index 382d3966485f1..bc6f35c66bc53 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java @@ -18,11 +18,21 @@ package org.apache.hadoop.fs.azurebfs; +import java.util.UUID; + +import org.junit.Assume; import org.junit.Test; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileAlreadyExistsException; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_MKDIR_OVERWRITE; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_FS_AZURE_ENABLE_MKDIR_OVERWRITE; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertMkdirs; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** * Test mkdir operation. @@ -35,14 +45,101 @@ public ITestAzureBlobFileSystemMkDir() throws Exception { @Test public void testCreateDirWithExistingDir() throws Exception { + Assume.assumeTrue( + DEFAULT_FS_AZURE_ENABLE_MKDIR_OVERWRITE || !getIsNamespaceEnabled( + getFileSystem())); final AzureBlobFileSystem fs = getFileSystem(); - Path path = new Path("testFolder"); + Path path = path("testFolder"); assertMkdirs(fs, path); assertMkdirs(fs, path); } + @Test + public void testMkdirExistingDirOverwriteFalse() throws Exception { + Assume.assumeFalse("Ignore test until default overwrite is set to false", + DEFAULT_FS_AZURE_ENABLE_MKDIR_OVERWRITE); + Assume.assumeTrue("Ignore test for Non-HNS accounts", + getIsNamespaceEnabled(getFileSystem())); + //execute test only for HNS account with default overwrite=false + Configuration config = new Configuration(this.getRawConfiguration()); + config.set(FS_AZURE_ENABLE_MKDIR_OVERWRITE, Boolean.toString(false)); + AzureBlobFileSystem fs = getFileSystem(config); + Path path = path("testFolder"); + assertMkdirs(fs, path); //checks that mkdirs returns true + long timeCreated = fs.getFileStatus(path).getModificationTime(); + assertMkdirs(fs, path); //call to existing dir should return success + assertEquals("LMT should not be updated for existing dir", timeCreated, + fs.getFileStatus(path).getModificationTime()); + } + + @Test + public void createDirWithExistingFilename() throws Exception { + Assume.assumeFalse("Ignore test until default overwrite is set to false", + DEFAULT_FS_AZURE_ENABLE_MKDIR_OVERWRITE && getIsNamespaceEnabled( + getFileSystem())); + final AzureBlobFileSystem fs = getFileSystem(); + Path path = path("testFilePath"); + fs.create(path).close(); + assertTrue(fs.getFileStatus(path).isFile()); + intercept(FileAlreadyExistsException.class, () -> fs.mkdirs(path)); + } + @Test public void testCreateRoot() throws Exception { assertMkdirs(getFileSystem(), new Path("/")); } + + /** + * Test mkdir for possible values of fs.azure.disable.default.create.overwrite + * @throws Exception + */ + @Test + public void testDefaultCreateOverwriteDirTest() throws Throwable { + // the config fs.azure.disable.default.create.overwrite should have no + // effect on mkdirs + testCreateDirOverwrite(true); + testCreateDirOverwrite(false); + } + + public void testCreateDirOverwrite(boolean enableConditionalCreateOverwrite) + throws Throwable { + final AzureBlobFileSystem currentFs = getFileSystem(); + Configuration config = new Configuration(this.getRawConfiguration()); + config.set("fs.azure.enable.conditional.create.overwrite", + Boolean.toString(enableConditionalCreateOverwrite)); + + final AzureBlobFileSystem fs = + (AzureBlobFileSystem) FileSystem.newInstance(currentFs.getUri(), + config); + + long totalConnectionMadeBeforeTest = fs.getInstrumentationMap() + .get(CONNECTIONS_MADE.getStatName()); + + int mkdirRequestCount = 0; + final Path dirPath = new Path("/DirPath_" + + UUID.randomUUID().toString()); + + // Case 1: Dir does not pre-exist + fs.mkdirs(dirPath); + + // One request to server + mkdirRequestCount++; + + assertAbfsStatistics( + CONNECTIONS_MADE, + totalConnectionMadeBeforeTest + mkdirRequestCount, + fs.getInstrumentationMap()); + + // Case 2: Dir pre-exists + // Mkdir on existing Dir path will not lead to failure + fs.mkdirs(dirPath); + + // One request to server + mkdirRequestCount++; + + assertAbfsStatistics( + CONNECTIONS_MADE, + totalConnectionMadeBeforeTest + mkdirRequestCount, + fs.getInstrumentationMap()); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemOauth.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemOauth.java index 533f47125654e..f27e75839b73f 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemOauth.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemOauth.java @@ -22,18 +22,22 @@ import java.io.InputStream; import java.util.Map; -import org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys; import org.junit.Assume; import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; import org.apache.hadoop.fs.azurebfs.services.AuthType; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.io.IOUtils; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_OAUTH_CLIENT_ID; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_OAUTH_CLIENT_SECRET; @@ -41,6 +45,8 @@ import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_BLOB_DATA_CONTRIBUTOR_CLIENT_SECRET; import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_BLOB_DATA_READER_CLIENT_ID; import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_BLOB_DATA_READER_CLIENT_SECRET; +import static org.apache.hadoop.fs.contract.ContractTestUtils.assertPathDoesNotExist; +import static org.apache.hadoop.fs.contract.ContractTestUtils.assertPathExists; /** * Test Azure Oauth with Blob Data contributor role and Blob Data Reader role. @@ -50,8 +56,10 @@ public class ITestAzureBlobFileSystemOauth extends AbstractAbfsIntegrationTest{ private static final Path FILE_PATH = new Path("/testFile"); - private static final Path EXISTED_FILE_PATH = new Path("/existedFile"); - private static final Path EXISTED_FOLDER_PATH = new Path("/existedFolder"); + private static final String EXISTED_FILE_PATH = "/existedFile"; + private static final String EXISTED_FOLDER_PATH = "/existedFolder"; + private static final Logger LOG = + LoggerFactory.getLogger(ITestAbfsStreamStatistics.class); public ITestAzureBlobFileSystemOauth() throws Exception { Assume.assumeTrue(this.getAuthType() == AuthType.OAuth); @@ -66,7 +74,9 @@ public void testBlobDataContributor() throws Exception { String secret = this.getConfiguration().get(TestConfigurationKeys.FS_AZURE_BLOB_DATA_CONTRIBUTOR_CLIENT_SECRET); Assume.assumeTrue("Contributor client secret not provided", secret != null); - prepareFiles(); + Path existedFilePath = path(EXISTED_FILE_PATH); + Path existedFolderPath = path(EXISTED_FOLDER_PATH); + prepareFiles(existedFilePath, existedFolderPath); final AzureBlobFileSystem fs = getBlobConributor(); @@ -74,39 +84,39 @@ public void testBlobDataContributor() throws Exception { try(FSDataOutputStream stream = fs.create(FILE_PATH)) { stream.write(0); } - assertTrue(fs.exists(FILE_PATH)); + assertPathExists(fs, "This path should exist", FILE_PATH); FileStatus fileStatus = fs.getFileStatus(FILE_PATH); assertEquals(1, fileStatus.getLen()); // delete file assertTrue(fs.delete(FILE_PATH, true)); - assertFalse(fs.exists(FILE_PATH)); + assertPathDoesNotExist(fs, "This path should not exist", FILE_PATH); // Verify Blob Data Contributor has full access to existed folder, file // READ FOLDER - assertTrue(fs.exists(EXISTED_FOLDER_PATH)); + assertPathExists(fs, "This path should exist", existedFolderPath); //DELETE FOLDER - fs.delete(EXISTED_FOLDER_PATH, true); - assertFalse(fs.exists(EXISTED_FOLDER_PATH)); + fs.delete(existedFolderPath, true); + assertPathDoesNotExist(fs, "This path should not exist", existedFolderPath); // READ FILE - try (FSDataInputStream stream = fs.open(EXISTED_FILE_PATH)) { + try (FSDataInputStream stream = fs.open(existedFilePath)) { assertTrue(stream.read() != 0); } - assertEquals(0, fs.getFileStatus(EXISTED_FILE_PATH).getLen()); + assertEquals(0, fs.getFileStatus(existedFilePath).getLen()); // WRITE FILE - try (FSDataOutputStream stream = fs.append(EXISTED_FILE_PATH)) { + try (FSDataOutputStream stream = fs.append(existedFilePath)) { stream.write(0); } - assertEquals(1, fs.getFileStatus(EXISTED_FILE_PATH).getLen()); + assertEquals(1, fs.getFileStatus(existedFilePath).getLen()); // REMOVE FILE - fs.delete(EXISTED_FILE_PATH, true); - assertFalse(fs.exists(EXISTED_FILE_PATH)); + fs.delete(existedFilePath, true); + assertPathDoesNotExist(fs, "This path should not exist", existedFilePath); } /* @@ -119,45 +129,52 @@ public void testBlobDataReader() throws Exception { String secret = this.getConfiguration().get(TestConfigurationKeys.FS_AZURE_BLOB_DATA_READER_CLIENT_SECRET); Assume.assumeTrue("Reader client secret not provided", secret != null); - prepareFiles(); + Path existedFilePath = path(EXISTED_FILE_PATH); + Path existedFolderPath = path(EXISTED_FOLDER_PATH); + prepareFiles(existedFilePath, existedFolderPath); final AzureBlobFileSystem fs = getBlobReader(); // Use abfsStore in this test to verify the ERROR code in AbfsRestOperationException AzureBlobFileSystemStore abfsStore = fs.getAbfsStore(); + TracingContext tracingContext = getTestTracingContext(fs, true); // TEST READ FS - Map properties = abfsStore.getFilesystemProperties(); + Map properties = abfsStore.getFilesystemProperties(tracingContext); // TEST READ FOLDER - assertTrue(fs.exists(EXISTED_FOLDER_PATH)); + assertPathExists(fs, "This path should exist", existedFolderPath); // TEST DELETE FOLDER try { - abfsStore.delete(EXISTED_FOLDER_PATH, true); + abfsStore.delete(existedFolderPath, true, tracingContext); } catch (AbfsRestOperationException e) { assertEquals(AzureServiceErrorCode.AUTHORIZATION_PERMISSION_MISS_MATCH, e.getErrorCode()); } // TEST READ FILE - try (InputStream inputStream = abfsStore.openFileForRead(EXISTED_FILE_PATH, null)) { + try (InputStream inputStream = abfsStore + .openFileForRead(existedFilePath, null, tracingContext)) { assertTrue(inputStream.read() != 0); } // TEST WRITE FILE try { - abfsStore.openFileForWrite(EXISTED_FILE_PATH, true); + abfsStore.openFileForWrite(existedFilePath, fs.getFsStatistics(), true, + tracingContext); } catch (AbfsRestOperationException e) { assertEquals(AzureServiceErrorCode.AUTHORIZATION_PERMISSION_MISS_MATCH, e.getErrorCode()); + } finally { + IOUtils.cleanupWithLogger(LOG, abfsStore); } } - private void prepareFiles() throws IOException { + private void prepareFiles(Path existedFilePath, Path existedFolderPath) throws IOException { // create test files/folders to verify access control diff between // Blob data contributor and Blob data reader final AzureBlobFileSystem fs = this.getFileSystem(); - fs.create(EXISTED_FILE_PATH); - assertTrue(fs.exists(EXISTED_FILE_PATH)); - fs.mkdirs(EXISTED_FOLDER_PATH); - assertTrue(fs.exists(EXISTED_FOLDER_PATH)); + fs.create(existedFilePath).close(); + assertPathExists(fs, "This path should exist", existedFilePath); + fs.mkdirs(existedFolderPath); + assertPathExists(fs, "This path should exist", existedFolderPath); } private AzureBlobFileSystem getBlobConributor() throws Exception { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemPermission.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemPermission.java index 257fb4fdbd2ab..0d644b6c743d0 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemPermission.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemPermission.java @@ -76,7 +76,7 @@ public static Collection abfsCreateNonRecursiveTestData() public void testFilePermission() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - Assume.assumeTrue(fs.getIsNamespaceEnabled()); + Assume.assumeTrue(getIsNamespaceEnabled(fs)); fs.getConf().set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, DEFAULT_UMASK_VALUE); path = new Path(testRoot, UUID.randomUUID().toString()); @@ -84,7 +84,8 @@ public void testFilePermission() throws Exception { new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE)); fs.removeDefaultAcl(path.getParent()); - fs.create(path, permission, true, KILOBYTE, (short) 1, KILOBYTE - 1, null); + fs.create(path, permission, true, KILOBYTE, (short) 1, KILOBYTE - 1, + null).close(); FileStatus status = fs.getFileStatus(path); Assert.assertEquals(permission.applyUMask(DEFAULT_UMASK_PERMISSION), status.getPermission()); } @@ -92,7 +93,7 @@ public void testFilePermission() throws Exception { @Test public void testFolderPermission() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - Assume.assumeTrue(fs.getIsNamespaceEnabled()); + Assume.assumeTrue(getIsNamespaceEnabled(fs)); fs.getConf().set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "027"); path = new Path(testRoot, UUID.randomUUID().toString()); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRandomRead.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRandomRead.java index e5f64b5f2c0a9..c1f0e06439950 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRandomRead.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRandomRead.java @@ -21,12 +21,15 @@ import java.io.IOException; import java.util.Random; import java.util.concurrent.Callable; +import java.util.UUID; import org.junit.Assume; +import org.junit.Ignore; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSExceptionMessages; @@ -36,30 +39,43 @@ import org.apache.hadoop.fs.azure.NativeAzureFileSystem; import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.azurebfs.services.AbfsInputStream; +import org.apache.hadoop.fs.azurebfs.services.TestAbfsInputStream; + import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.BYTES_RECEIVED; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.GET_RESPONSES; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.ETAG; /** * Test random read operation. */ public class ITestAzureBlobFileSystemRandomRead extends AbstractAbfsScaleTest { + private static final int BYTE = 1; + private static final int THREE_BYTES = 3; + private static final int FIVE_BYTES = 5; + private static final int TWENTY_BYTES = 20; + private static final int THIRTY_BYTES = 30; private static final int KILOBYTE = 1024; private static final int MEGABYTE = KILOBYTE * KILOBYTE; + private static final int FOUR_MB = 4 * MEGABYTE; + private static final int NINE_MB = 9 * MEGABYTE; private static final long TEST_FILE_SIZE = 8 * MEGABYTE; private static final int MAX_ELAPSEDTIMEMS = 20; private static final int SEQUENTIAL_READ_BUFFER_SIZE = 16 * KILOBYTE; - private static final int CREATE_BUFFER_SIZE = 26 * KILOBYTE; private static final int SEEK_POSITION_ONE = 2* KILOBYTE; private static final int SEEK_POSITION_TWO = 5 * KILOBYTE; private static final int SEEK_POSITION_THREE = 10 * KILOBYTE; private static final int SEEK_POSITION_FOUR = 4100 * KILOBYTE; - private static final Path TEST_FILE_PATH = new Path( - "/TestRandomRead.txt"); + private static final int ALWAYS_READ_BUFFER_SIZE_TEST_FILE_SIZE = 16 * MEGABYTE; + private static final int DISABLED_READAHEAD_DEPTH = 0; + + private static final String TEST_FILE_PREFIX = "/TestRandomRead"; private static final String WASB = "WASB"; private static final String ABFS = "ABFS"; - private static long testFileLength = 0; private static final Logger LOG = LoggerFactory.getLogger(ITestAzureBlobFileSystemRandomRead.class); @@ -70,9 +86,10 @@ public ITestAzureBlobFileSystemRandomRead() throws Exception { @Test public void testBasicRead() throws Exception { - assumeHugeFileExists(); + Path testPath = path(TEST_FILE_PREFIX + "_testBasicRead"); + assumeHugeFileExists(testPath); - try (FSDataInputStream inputStream = this.getFileSystem().open(TEST_FILE_PATH)) { + try (FSDataInputStream inputStream = this.getFileSystem().open(testPath)) { byte[] buffer = new byte[3 * MEGABYTE]; // forward seek and read a kilobyte into first kilobyte of bufferV2 @@ -97,13 +114,15 @@ public void testBasicRead() throws Exception { @Test public void testRandomRead() throws Exception { Assume.assumeFalse("This test does not support namespace enabled account", - this.getFileSystem().getIsNamespaceEnabled()); - assumeHugeFileExists(); + getIsNamespaceEnabled(getFileSystem())); + Path testPath = path(TEST_FILE_PREFIX + "_testRandomRead"); + assumeHugeFileExists(testPath); + try ( FSDataInputStream inputStreamV1 - = this.getFileSystem().open(TEST_FILE_PATH); + = this.getFileSystem().open(testPath); FSDataInputStream inputStreamV2 - = this.getWasbFileSystem().open(TEST_FILE_PATH); + = this.getWasbFileSystem().open(testPath); ) { final int bufferSize = 4 * KILOBYTE; byte[] bufferV1 = new byte[bufferSize]; @@ -155,8 +174,10 @@ public void testRandomRead() throws Exception { */ @Test public void testSeekToNewSource() throws Exception { - assumeHugeFileExists(); - try (FSDataInputStream inputStream = this.getFileSystem().open(TEST_FILE_PATH)) { + Path testPath = path(TEST_FILE_PREFIX + "_testSeekToNewSource"); + assumeHugeFileExists(testPath); + + try (FSDataInputStream inputStream = this.getFileSystem().open(testPath)) { assertFalse(inputStream.seekToNewSource(0)); } } @@ -168,8 +189,10 @@ public void testSeekToNewSource() throws Exception { */ @Test public void testSkipBounds() throws Exception { - assumeHugeFileExists(); - try (FSDataInputStream inputStream = this.getFileSystem().open(TEST_FILE_PATH)) { + Path testPath = path(TEST_FILE_PREFIX + "_testSkipBounds"); + long testFileLength = assumeHugeFileExists(testPath); + + try (FSDataInputStream inputStream = this.getFileSystem().open(testPath)) { ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); long skipped = inputStream.skip(-1); @@ -207,8 +230,10 @@ public Long call() throws Exception { */ @Test public void testValidateSeekBounds() throws Exception { - assumeHugeFileExists(); - try (FSDataInputStream inputStream = this.getFileSystem().open(TEST_FILE_PATH)) { + Path testPath = path(TEST_FILE_PREFIX + "_testValidateSeekBounds"); + long testFileLength = assumeHugeFileExists(testPath); + + try (FSDataInputStream inputStream = this.getFileSystem().open(testPath)) { ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); inputStream.seek(0); @@ -256,8 +281,10 @@ public FSDataInputStream call() throws Exception { */ @Test public void testSeekAndAvailableAndPosition() throws Exception { - assumeHugeFileExists(); - try (FSDataInputStream inputStream = this.getFileSystem().open(TEST_FILE_PATH)) { + Path testPath = path(TEST_FILE_PREFIX + "_testSeekAndAvailableAndPosition"); + long testFileLength = assumeHugeFileExists(testPath); + + try (FSDataInputStream inputStream = this.getFileSystem().open(testPath)) { byte[] expected1 = {(byte) 'a', (byte) 'b', (byte) 'c'}; byte[] expected2 = {(byte) 'd', (byte) 'e', (byte) 'f'}; byte[] expected3 = {(byte) 'b', (byte) 'c', (byte) 'd'}; @@ -320,8 +347,10 @@ public void testSeekAndAvailableAndPosition() throws Exception { */ @Test public void testSkipAndAvailableAndPosition() throws Exception { - assumeHugeFileExists(); - try (FSDataInputStream inputStream = this.getFileSystem().open(TEST_FILE_PATH)) { + Path testPath = path(TEST_FILE_PREFIX + "_testSkipAndAvailableAndPosition"); + long testFileLength = assumeHugeFileExists(testPath); + + try (FSDataInputStream inputStream = this.getFileSystem().open(testPath)) { byte[] expected1 = {(byte) 'a', (byte) 'b', (byte) 'c'}; byte[] expected2 = {(byte) 'd', (byte) 'e', (byte) 'f'}; byte[] expected3 = {(byte) 'b', (byte) 'c', (byte) 'd'}; @@ -384,15 +413,17 @@ public void testSkipAndAvailableAndPosition() throws Exception { @Test public void testSequentialReadAfterReverseSeekPerformance() throws Exception { - assumeHugeFileExists(); + Path testPath = path( + TEST_FILE_PREFIX + "_testSequentialReadAfterReverseSeekPerformance"); + assumeHugeFileExists(testPath); final int maxAttempts = 10; final double maxAcceptableRatio = 1.01; double beforeSeekElapsedMs = 0, afterSeekElapsedMs = 0; double ratio = Double.MAX_VALUE; for (int i = 0; i < maxAttempts && ratio >= maxAcceptableRatio; i++) { - beforeSeekElapsedMs = sequentialRead(ABFS, + beforeSeekElapsedMs = sequentialRead(ABFS, testPath, this.getFileSystem(), false); - afterSeekElapsedMs = sequentialRead(ABFS, + afterSeekElapsedMs = sequentialRead(ABFS, testPath, this.getFileSystem(), true); ratio = afterSeekElapsedMs / beforeSeekElapsedMs; LOG.info((String.format( @@ -412,11 +443,12 @@ public void testSequentialReadAfterReverseSeekPerformance() } @Test + @Ignore("HADOOP-16915") public void testRandomReadPerformance() throws Exception { Assume.assumeFalse("This test does not support namespace enabled account", - this.getFileSystem().getIsNamespaceEnabled()); - createTestFile(); - assumeHugeFileExists(); + getIsNamespaceEnabled(getFileSystem())); + Path testPath = path(TEST_FILE_PREFIX + "_testRandomReadPerformance"); + assumeHugeFileExists(testPath); final AzureBlobFileSystem abFs = this.getFileSystem(); final NativeAzureFileSystem wasbFs = this.getWasbFileSystem(); @@ -426,8 +458,8 @@ public void testRandomReadPerformance() throws Exception { double v1ElapsedMs = 0, v2ElapsedMs = 0; double ratio = Double.MAX_VALUE; for (int i = 0; i < maxAttempts && ratio >= maxAcceptableRatio; i++) { - v1ElapsedMs = randomRead(1, wasbFs); - v2ElapsedMs = randomRead(2, abFs); + v1ElapsedMs = randomRead(1, testPath, wasbFs); + v2ElapsedMs = randomRead(2, testPath, abFs); ratio = v2ElapsedMs / v1ElapsedMs; @@ -446,15 +478,113 @@ public void testRandomReadPerformance() throws Exception { ratio < maxAcceptableRatio); } + /** + * With this test we should see a full buffer read being triggered in case + * alwaysReadBufferSize is on, else only the requested buffer size. + * Hence a seek done few bytes away from last read position will trigger + * a network read when alwaysReadBufferSize is off, whereas it will return + * from the internal buffer when it is on. + * Reading a full buffer size is the Gen1 behaviour. + * @throws Throwable + */ + @Test + public void testAlwaysReadBufferSizeConfig() throws Throwable { + testAlwaysReadBufferSizeConfig(false); + testAlwaysReadBufferSizeConfig(true); + } + + public void testAlwaysReadBufferSizeConfig(boolean alwaysReadBufferSizeConfigValue) + throws Throwable { + final AzureBlobFileSystem currentFs = getFileSystem(); + Configuration config = new Configuration(this.getRawConfiguration()); + config.set("fs.azure.readaheadqueue.depth", "0"); + config.set("fs.azure.read.alwaysReadBufferSize", + Boolean.toString(alwaysReadBufferSizeConfigValue)); + + final Path testFile = new Path("/FileName_" + + UUID.randomUUID().toString()); + + final AzureBlobFileSystem fs = createTestFile(testFile, 16 * MEGABYTE, + 1 * MEGABYTE, config); + String eTag = fs.getAbfsClient() + .getPathStatus(testFile.toUri().getPath(), false, + getTestTracingContext(fs, false)) + .getResult() + .getResponseHeader(ETAG); + + TestAbfsInputStream testInputStream = new TestAbfsInputStream(); + + AbfsInputStream inputStream = testInputStream.getAbfsInputStream( + fs.getAbfsClient(), + testFile.getName(), ALWAYS_READ_BUFFER_SIZE_TEST_FILE_SIZE, eTag, + DISABLED_READAHEAD_DEPTH, FOUR_MB, + alwaysReadBufferSizeConfigValue, FOUR_MB); + + long connectionsAtStart = fs.getInstrumentationMap() + .get(GET_RESPONSES.getStatName()); + + long dateSizeReadStatAtStart = fs.getInstrumentationMap() + .get(BYTES_RECEIVED.getStatName()); + + long newReqCount = 0; + long newDataSizeRead = 0; + + byte[] buffer20b = new byte[TWENTY_BYTES]; + byte[] buffer30b = new byte[THIRTY_BYTES]; + byte[] byteBuffer5 = new byte[FIVE_BYTES]; + + // first read + // if alwaysReadBufferSize is off, this is a sequential read + inputStream.read(byteBuffer5, 0, FIVE_BYTES); + newReqCount++; + newDataSizeRead += FOUR_MB; + + assertAbfsStatistics(GET_RESPONSES, connectionsAtStart + newReqCount, + fs.getInstrumentationMap()); + assertAbfsStatistics(BYTES_RECEIVED, + dateSizeReadStatAtStart + newDataSizeRead, fs.getInstrumentationMap()); + + // second read beyond that the buffer holds + // if alwaysReadBufferSize is off, this is a random read. Reads only + // incoming buffer size + // else, reads a buffer size + inputStream.seek(NINE_MB); + inputStream.read(buffer20b, 0, BYTE); + newReqCount++; + if (alwaysReadBufferSizeConfigValue) { + newDataSizeRead += FOUR_MB; + } else { + newDataSizeRead += TWENTY_BYTES; + } + + assertAbfsStatistics(GET_RESPONSES, connectionsAtStart + newReqCount, fs.getInstrumentationMap()); + assertAbfsStatistics(BYTES_RECEIVED, + dateSizeReadStatAtStart + newDataSizeRead, fs.getInstrumentationMap()); + + // third read adjacent to second but not exactly sequential. + // if alwaysReadBufferSize is off, this is another random read + // else second read would have read this too. + inputStream.seek(NINE_MB + TWENTY_BYTES + THREE_BYTES); + inputStream.read(buffer30b, 0, THREE_BYTES); + if (!alwaysReadBufferSizeConfigValue) { + newReqCount++; + newDataSizeRead += THIRTY_BYTES; + } + + assertAbfsStatistics(GET_RESPONSES, connectionsAtStart + newReqCount, fs.getInstrumentationMap()); + assertAbfsStatistics(BYTES_RECEIVED, dateSizeReadStatAtStart + newDataSizeRead, fs.getInstrumentationMap()); + } private long sequentialRead(String version, + Path testPath, FileSystem fs, boolean afterReverseSeek) throws IOException { byte[] buffer = new byte[SEQUENTIAL_READ_BUFFER_SIZE]; long totalBytesRead = 0; long bytesRead = 0; - try(FSDataInputStream inputStream = fs.open(TEST_FILE_PATH)) { + long testFileLength = fs.getFileStatus(testPath).getLen(); + try(FSDataInputStream inputStream = fs.open(testPath)) { if (afterReverseSeek) { while (bytesRead > 0 && totalBytesRead < 4 * MEGABYTE) { bytesRead = inputStream.read(buffer); @@ -485,14 +615,14 @@ private long sequentialRead(String version, } } - private long randomRead(int version, FileSystem fs) throws Exception { - assumeHugeFileExists(); + private long randomRead(int version, Path testPath, FileSystem fs) throws Exception { + assumeHugeFileExists(testPath); final long minBytesToRead = 2 * MEGABYTE; Random random = new Random(); byte[] buffer = new byte[8 * KILOBYTE]; long totalBytesRead = 0; long bytesRead = 0; - try(FSDataInputStream inputStream = fs.open(TEST_FILE_PATH)) { + try(FSDataInputStream inputStream = fs.open(testPath)) { ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); do { bytesRead = inputStream.read(buffer); @@ -524,28 +654,48 @@ private static double toMbps(long bytes, long milliseconds) { return bytes / 1000.0 * 8 / milliseconds; } - private void createTestFile() throws Exception { - final AzureBlobFileSystem fs = this.getFileSystem(); - if (fs.exists(TEST_FILE_PATH)) { - FileStatus status = fs.getFileStatus(TEST_FILE_PATH); - if (status.getLen() >= TEST_FILE_SIZE) { - return; + private long createTestFile(Path testPath) throws Exception { + createTestFile(testPath, + TEST_FILE_SIZE, + MEGABYTE, + null); + + return TEST_FILE_SIZE; + } + + private AzureBlobFileSystem createTestFile(Path testFilePath, long testFileSize, + int createBufferSize, Configuration config) throws Exception { + AzureBlobFileSystem fs; + + if (config == null) { + config = this.getRawConfiguration(); + } + + final AzureBlobFileSystem currentFs = getFileSystem(); + fs = (AzureBlobFileSystem) FileSystem.newInstance(currentFs.getUri(), + config); + + if (fs.exists(testFilePath)) { + FileStatus status = fs.getFileStatus(testFilePath); + if (status.getLen() == testFileSize) { + return fs; } } - byte[] buffer = new byte[CREATE_BUFFER_SIZE]; + byte[] buffer = new byte[createBufferSize]; char character = 'a'; for (int i = 0; i < buffer.length; i++) { buffer[i] = (byte) character; character = (character == 'z') ? 'a' : (char) ((int) character + 1); } - LOG.info(String.format("Creating test file %s of size: %d ", TEST_FILE_PATH, TEST_FILE_SIZE)); + LOG.info(String.format("Creating test file %s of size: %d ", testFilePath, testFileSize)); ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); - try (FSDataOutputStream outputStream = fs.create(TEST_FILE_PATH)) { + try (FSDataOutputStream outputStream = fs.create(testFilePath)) { + String bufferContents = new String(buffer); int bytesWritten = 0; - while (bytesWritten < TEST_FILE_SIZE) { + while (bytesWritten < testFileSize) { outputStream.write(buffer); bytesWritten += buffer.length; } @@ -555,18 +705,18 @@ private void createTestFile() throws Exception { outputStream.close(); closeTimer.end("time to close() output stream"); } - timer.end("time to write %d KB", TEST_FILE_SIZE / 1024); - testFileLength = fs.getFileStatus(TEST_FILE_PATH).getLen(); - + timer.end("time to write %d KB", testFileSize / 1024); + return fs; } - private void assumeHugeFileExists() throws Exception{ - createTestFile(); + private long assumeHugeFileExists(Path testPath) throws Exception{ + long fileSize = createTestFile(testPath); FileSystem fs = this.getFileSystem(); - ContractTestUtils.assertPathExists(this.getFileSystem(), "huge file not created", TEST_FILE_PATH); - FileStatus status = fs.getFileStatus(TEST_FILE_PATH); - ContractTestUtils.assertIsFile(TEST_FILE_PATH, status); - assertTrue("File " + TEST_FILE_PATH + " is empty", status.getLen() > 0); + ContractTestUtils.assertPathExists(this.getFileSystem(), "huge file not created", testPath); + FileStatus status = fs.getFileStatus(testPath); + ContractTestUtils.assertIsFile(testPath, status); + assertTrue("File " + testPath + " is not of expected size " + fileSize + ":actual=" + status.getLen(), status.getLen() == fileSize); + return fileSize; } private void verifyConsistentReads(FSDataInputStream inputStreamV1, diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java index e0e1d899a2184..ea07650e90110 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java @@ -30,11 +30,17 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.statistics.IOStatisticAssertions; +import org.apache.hadoop.fs.statistics.IOStatistics; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.RENAME_PATH_ATTEMPTS; +import static org.apache.hadoop.fs.contract.ContractTestUtils.assertIsFile; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertMkdirs; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertPathDoesNotExist; +import static org.apache.hadoop.fs.contract.ContractTestUtils.assertPathExists; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertRenameOutcome; -import static org.apache.hadoop.fs.contract.ContractTestUtils.assertIsFile; +import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; +import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset; /** * Test rename operation. @@ -59,16 +65,26 @@ public void testEnsureFileIsRenamed() throws Exception { assertPathDoesNotExist(fs, "expected renamed", src); } + @Test + public void testRenameWithPreExistingDestination() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + Path src = path("renameSrc"); + touch(src); + Path dest = path("renameDest"); + touch(dest); + assertRenameOutcome(fs, src, dest, false); + } + @Test public void testRenameFileUnderDir() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - Path sourceDir = new Path("/testSrc"); + Path sourceDir = path("/testSrc"); assertMkdirs(fs, sourceDir); String filename = "file1"; Path file1 = new Path(sourceDir, filename); touch(file1); - Path destDir = new Path("/testDst"); + Path destDir = path("/testDst"); assertRenameOutcome(fs, sourceDir, destDir, true); FileStatus[] fileStatus = fs.listStatus(destDir); assertNotNull("Null file status", fileStatus); @@ -80,14 +96,15 @@ public void testRenameFileUnderDir() throws Exception { @Test public void testRenameDirectory() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - fs.mkdirs(new Path("testDir")); - Path test1 = new Path("testDir/test1"); + Path testDir = path("testDir"); + fs.mkdirs(testDir); + Path test1 = new Path(testDir + "/test1"); fs.mkdirs(test1); - fs.mkdirs(new Path("testDir/test1/test2")); - fs.mkdirs(new Path("testDir/test1/test2/test3")); + fs.mkdirs(new Path(testDir + "/test1/test2")); + fs.mkdirs(new Path(testDir + "/test1/test2/test3")); assertRenameOutcome(fs, test1, - new Path("testDir/test10"), true); + new Path(testDir + "/test10"), true); assertPathDoesNotExist(fs, "rename source dir", test1); } @@ -97,8 +114,9 @@ public void testRenameFirstLevelDirectory() throws Exception { final List> tasks = new ArrayList<>(); ExecutorService es = Executors.newFixedThreadPool(10); + Path source = path("/test"); for (int i = 0; i < 1000; i++) { - final Path fileName = new Path("/test/" + i); + final Path fileName = new Path(source + "/" + i); Callable callable = new Callable() { @Override public Void call() throws Exception { @@ -115,8 +133,7 @@ public Void call() throws Exception { } es.shutdownNow(); - Path source = new Path("/test"); - Path dest = new Path("/renamedDir"); + Path dest = path("/renamedDir"); assertRenameOutcome(fs, source, dest, true); FileStatus[] files = fs.listStatus(dest); @@ -140,13 +157,45 @@ public void testRenameRoot() throws Exception { @Test public void testPosixRenameDirectory() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - fs.mkdirs(new Path("testDir2/test1/test2/test3")); - fs.mkdirs(new Path("testDir2/test4")); - Assert.assertTrue(fs.rename(new Path("testDir2/test1/test2/test3"), new Path("testDir2/test4"))); - assertTrue(fs.exists(new Path("testDir2"))); - assertTrue(fs.exists(new Path("testDir2/test1/test2"))); - assertTrue(fs.exists(new Path("testDir2/test4"))); - assertTrue(fs.exists(new Path("testDir2/test4/test3"))); - assertFalse(fs.exists(new Path("testDir2/test1/test2/test3"))); + Path testDir2 = path("testDir2"); + fs.mkdirs(new Path(testDir2 + "/test1/test2/test3")); + fs.mkdirs(new Path(testDir2 + "/test4")); + Assert.assertTrue(fs.rename(new Path(testDir2 + "/test1/test2/test3"), new Path(testDir2 + "/test4"))); + assertPathExists(fs, "This path should exist", testDir2); + assertPathExists(fs, "This path should exist", + new Path(testDir2 + "/test1/test2")); + assertPathExists(fs, "This path should exist", + new Path(testDir2 + "/test4")); + assertPathExists(fs, "This path should exist", + new Path(testDir2 + "/test4/test3")); + assertPathDoesNotExist(fs, "This path should not exist", + new Path(testDir2 + "/test1/test2/test3")); + } + + @Test + public void testRenameWithNoDestinationParentDir() throws Exception { + describe("Verifying the expected behaviour of ABFS rename when " + + "destination parent Dir doesn't exist."); + + final AzureBlobFileSystem fs = getFileSystem(); + Path sourcePath = path(getMethodName()); + Path destPath = new Path("falseParent", "someChildFile"); + + byte[] data = dataset(1024, 'a', 'z'); + writeDataset(fs, sourcePath, data, data.length, 1024, true); + + // Verify that renaming on a destination with no parent dir wasn't + // successful. + assertFalse("Rename result expected to be false with no Parent dir", + fs.rename(sourcePath, destPath)); + + // Verify that metadata was in an incomplete state after the rename + // failure, and we retired the rename once more. + IOStatistics ioStatistics = fs.getIOStatistics(); + IOStatisticAssertions.assertThatStatisticCounter(ioStatistics, + RENAME_PATH_ATTEMPTS.getStatName()) + .describedAs("There should be 2 rename attempts if metadata " + + "incomplete state failure is hit") + .isEqualTo(2); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRenameUnicode.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRenameUnicode.java index 044c325c8c8dc..f913da7b15ed0 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRenameUnicode.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRenameUnicode.java @@ -76,7 +76,7 @@ public ITestAzureBlobFileSystemRenameUnicode() throws Exception { @Test public void testRenameFileUsingUnicode() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - Path folderPath1 = new Path(srcDir); + Path folderPath1 = path(srcDir); assertMkdirs(fs, folderPath1); assertIsDirectory(fs, folderPath1); Path filePath = new Path(folderPath1 + "/" + filename); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemStoreListStatusWithRange.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemStoreListStatusWithRange.java index 849bb6ba0987b..ef7f1565df73f 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemStoreListStatusWithRange.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemStoreListStatusWithRange.java @@ -107,7 +107,8 @@ public ITestAzureBlobFileSystemStoreListStatusWithRange() throws Exception { @Test public void testListWithRange() throws IOException { try { - FileStatus[] listResult = store.listStatus(new Path(path), startFrom); + FileStatus[] listResult = store.listStatus(new Path(path), startFrom, + getTestTracingContext(fs, true)); if (!expectedResult) { Assert.fail("Excepting failure with IllegalArgumentException"); } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFilesystemAcl.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFilesystemAcl.java index 397d1a6d9a19b..6b83fa8b9ebfa 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFilesystemAcl.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFilesystemAcl.java @@ -18,7 +18,7 @@ package org.apache.hadoop.fs.azurebfs; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import java.io.FileNotFoundException; import java.util.List; @@ -32,12 +32,15 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.utils.AclTestHelpers; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; import org.apache.hadoop.fs.permission.AclEntry; import org.apache.hadoop.fs.permission.AclStatus; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; +import static org.apache.hadoop.fs.contract.ContractTestUtils.assertPathExists; import static org.junit.Assume.assumeTrue; import static org.apache.hadoop.fs.permission.AclEntryScope.ACCESS; @@ -88,7 +91,7 @@ public ITestAzureBlobFilesystemAcl() throws Exception { @Test public void testModifyAclEntries() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); fs.mkdirs(path, FsPermission.createImmutable((short) RWX_RX)); @@ -121,7 +124,7 @@ public void testModifyAclEntries() throws Exception { @Test public void testModifyAclEntriesOnlyAccess() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); fs.create(path).close(); fs.setPermission(path, FsPermission.createImmutable((short) RW_R)); @@ -145,7 +148,7 @@ public void testModifyAclEntriesOnlyAccess() throws Exception { @Test public void testModifyAclEntriesOnlyDefault() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -168,7 +171,7 @@ public void testModifyAclEntriesOnlyDefault() throws Exception { @Test public void testModifyAclEntriesMinimal() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); fs.create(path).close(); fs.setPermission(path, FsPermission.createImmutable((short) RW_R)); @@ -186,7 +189,7 @@ public void testModifyAclEntriesMinimal() throws Exception { @Test public void testModifyAclEntriesMinimalDefault() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -206,7 +209,7 @@ public void testModifyAclEntriesMinimalDefault() throws Exception { @Test public void testModifyAclEntriesCustomMask() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); fs.create(path).close(); fs.setPermission(path, FsPermission.createImmutable((short) RW_R)); @@ -225,7 +228,7 @@ public void testModifyAclEntriesCustomMask() throws Exception { @Test public void testModifyAclEntriesStickyBit() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) 01750)); List aclSpec = Lists.newArrayList( @@ -255,7 +258,7 @@ public void testModifyAclEntriesStickyBit() throws Exception { @Test(expected=FileNotFoundException.class) public void testModifyAclEntriesPathNotFound() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); // Path has not been created. List aclSpec = Lists.newArrayList( @@ -269,7 +272,7 @@ public void testModifyAclEntriesPathNotFound() throws Exception { @Test (expected=Exception.class) public void testModifyAclEntriesDefaultOnFile() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); fs.create(path).close(); fs.setPermission(path, FsPermission.createImmutable((short) RW_R)); @@ -281,7 +284,7 @@ public void testModifyAclEntriesDefaultOnFile() throws Exception { @Test public void testModifyAclEntriesWithDefaultMask() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -305,7 +308,7 @@ public void testModifyAclEntriesWithDefaultMask() throws Exception { @Test public void testModifyAclEntriesWithAccessMask() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -326,7 +329,7 @@ public void testModifyAclEntriesWithAccessMask() throws Exception { @Test(expected=PathIOException.class) public void testModifyAclEntriesWithDuplicateEntries() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -342,7 +345,7 @@ public void testModifyAclEntriesWithDuplicateEntries() throws Exception { @Test public void testRemoveAclEntries() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -370,7 +373,7 @@ public void testRemoveAclEntries() throws Exception { @Test public void testRemoveAclEntriesOnlyAccess() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); fs.create(path).close(); fs.setPermission(path, FsPermission.createImmutable((short) RW_R)); @@ -395,7 +398,7 @@ public void testRemoveAclEntriesOnlyAccess() throws Exception { @Test public void testRemoveAclEntriesOnlyDefault() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -422,7 +425,7 @@ public void testRemoveAclEntriesOnlyDefault() throws Exception { @Test public void testRemoveAclEntriesMinimal() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); fs.create(path).close(); fs.setPermission(path, FsPermission.createImmutable((short) RWX_RW)); @@ -445,7 +448,7 @@ public void testRemoveAclEntriesMinimal() throws Exception { @Test public void testRemoveAclEntriesMinimalDefault() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -473,7 +476,7 @@ public void testRemoveAclEntriesMinimalDefault() throws Exception { @Test public void testRemoveAclEntriesStickyBit() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) 01750)); List aclSpec = Lists.newArrayList( @@ -501,7 +504,7 @@ public void testRemoveAclEntriesStickyBit() throws Exception { @Test(expected=FileNotFoundException.class) public void testRemoveAclEntriesPathNotFound() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); // Path has not been created. List aclSpec = Lists.newArrayList( @@ -512,7 +515,7 @@ public void testRemoveAclEntriesPathNotFound() throws Exception { @Test(expected=PathIOException.class) public void testRemoveAclEntriesAccessMask() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -526,7 +529,7 @@ public void testRemoveAclEntriesAccessMask() throws Exception { @Test(expected=PathIOException.class) public void testRemoveAclEntriesDefaultMask() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -540,7 +543,7 @@ public void testRemoveAclEntriesDefaultMask() throws Exception { @Test(expected=PathIOException.class) public void testRemoveAclEntriesWithDuplicateEntries() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -556,7 +559,7 @@ public void testRemoveAclEntriesWithDuplicateEntries() throws Exception { @Test public void testRemoveDefaultAcl() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -578,7 +581,7 @@ public void testRemoveDefaultAcl() throws Exception { @Test public void testRemoveDefaultAclOnlyAccess() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); fs.create(path).close(); fs.setPermission(path, FsPermission.createImmutable((short) RW_R)); @@ -600,7 +603,7 @@ public void testRemoveDefaultAclOnlyAccess() throws Exception { @Test public void testRemoveDefaultAclOnlyDefault() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -616,7 +619,7 @@ public void testRemoveDefaultAclOnlyDefault() throws Exception { @Test public void testRemoveDefaultAclMinimal() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); fs.removeDefaultAcl(path); @@ -629,7 +632,7 @@ public void testRemoveDefaultAclMinimal() throws Exception { @Test public void testRemoveDefaultAclStickyBit() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) 01750)); List aclSpec = Lists.newArrayList( @@ -651,7 +654,7 @@ public void testRemoveDefaultAclStickyBit() throws Exception { @Test(expected=FileNotFoundException.class) public void testRemoveDefaultAclPathNotFound() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); // Path has not been created. fs.removeDefaultAcl(path); @@ -660,7 +663,7 @@ public void testRemoveDefaultAclPathNotFound() throws Exception { @Test public void testRemoveAcl() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -682,7 +685,7 @@ public void testRemoveAcl() throws Exception { @Test public void testRemoveAclMinimalAcl() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); fs.create(path).close(); fs.setPermission(path, FsPermission.createImmutable((short) RW_R)); @@ -696,7 +699,7 @@ public void testRemoveAclMinimalAcl() throws Exception { @Test public void testRemoveAclStickyBit() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) 01750)); List aclSpec = Lists.newArrayList( @@ -716,7 +719,7 @@ public void testRemoveAclStickyBit() throws Exception { @Test public void testRemoveAclOnlyDefault() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -735,7 +738,7 @@ public void testRemoveAclOnlyDefault() throws Exception { @Test(expected=FileNotFoundException.class) public void testRemoveAclPathNotFound() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); // Path has not been created. fs.removeAcl(path); @@ -744,7 +747,7 @@ public void testRemoveAclPathNotFound() throws Exception { @Test public void testSetAcl() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -770,7 +773,7 @@ public void testSetAcl() throws Exception { @Test public void testSetAclOnlyAccess() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); fs.create(path).close(); fs.setPermission(path, FsPermission.createImmutable((short) RW_R)); @@ -791,7 +794,7 @@ public void testSetAclOnlyAccess() throws Exception { @Test public void testSetAclOnlyDefault() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -811,7 +814,7 @@ public void testSetAclOnlyDefault() throws Exception { @Test public void testSetAclMinimal() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); fs.create(path).close(); fs.setPermission(path, FsPermission.createImmutable((short) RW_R_R)); @@ -835,7 +838,7 @@ public void testSetAclMinimal() throws Exception { @Test public void testSetAclMinimalDefault() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -855,7 +858,7 @@ public void testSetAclMinimalDefault() throws Exception { @Test public void testSetAclCustomMask() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); fs.create(path).close(); fs.setPermission(path, FsPermission.createImmutable((short) RW_R)); @@ -877,7 +880,7 @@ public void testSetAclCustomMask() throws Exception { @Test public void testSetAclStickyBit() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) 01750)); List aclSpec = Lists.newArrayList( @@ -903,7 +906,7 @@ public void testSetAclStickyBit() throws Exception { @Test(expected=FileNotFoundException.class) public void testSetAclPathNotFound() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); // Path has not been created. List aclSpec = Lists.newArrayList( @@ -917,7 +920,7 @@ public void testSetAclPathNotFound() throws Exception { @Test(expected=Exception.class) public void testSetAclDefaultOnFile() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); fs.create(path).close(); fs.setPermission(path, FsPermission.createImmutable((short) RW_R)); @@ -929,7 +932,7 @@ public void testSetAclDefaultOnFile() throws Exception { @Test public void testSetAclDoesNotChangeDefaultMask() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -953,7 +956,7 @@ public void testSetAclDoesNotChangeDefaultMask() throws Exception { @Test(expected=PathIOException.class) public void testSetAclWithDuplicateEntries() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -965,7 +968,7 @@ public void testSetAclWithDuplicateEntries() throws Exception { @Test public void testSetPermission() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -992,7 +995,7 @@ public void testSetPermission() throws Exception { @Test public void testSetPermissionOnlyAccess() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); fs.create(path).close(); fs.setPermission(path, FsPermission.createImmutable((short) RW_R)); @@ -1014,7 +1017,7 @@ public void testSetPermissionOnlyAccess() throws Exception { @Test public void testSetPermissionOnlyDefault() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -1038,7 +1041,7 @@ public void testSetPermissionOnlyDefault() throws Exception { @Test public void testDefaultAclNewFile() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -1058,7 +1061,7 @@ public void testDefaultAclNewFile() throws Exception { @Ignore // wait umask fix to be deployed public void testOnlyAccessAclNewFile() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -1075,7 +1078,7 @@ public void testOnlyAccessAclNewFile() throws Exception { @Test public void testDefaultMinimalAclNewFile() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -1094,7 +1097,7 @@ public void testDefaultMinimalAclNewFile() throws Exception { @Test public void testDefaultAclNewDir() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -1120,7 +1123,7 @@ public void testDefaultAclNewDir() throws Exception { @Test public void testOnlyAccessAclNewDir() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -1137,7 +1140,7 @@ public void testOnlyAccessAclNewDir() throws Exception { @Test public void testDefaultMinimalAclNewDir() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX)); List aclSpec = Lists.newArrayList( @@ -1159,7 +1162,7 @@ public void testDefaultMinimalAclNewDir() throws Exception { @Test public void testDefaultAclNewFileWithMode() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX_RX)); List aclSpec = Lists.newArrayList( @@ -1181,7 +1184,7 @@ public void testDefaultAclNewFileWithMode() throws Exception { @Test public void testDefaultAclNewDirWithMode() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) RWX_RX_RX)); List aclSpec = Lists.newArrayList( @@ -1205,7 +1208,7 @@ public void testDefaultAclNewDirWithMode() throws Exception { @Test public void testDefaultAclRenamedFile() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); Path dirPath = new Path(path, "dir"); FileSystem.mkdirs(fs, dirPath, FsPermission.createImmutable((short) RWX_RX)); @@ -1216,7 +1219,12 @@ public void testDefaultAclRenamedFile() throws Exception { fs.create(filePath).close(); fs.setPermission(filePath, FsPermission.createImmutable((short) RW_R)); Path renamedFilePath = new Path(dirPath, "file1"); + + fs.registerListener(new TracingHeaderValidator( + fs.getAbfsStore().getAbfsConfiguration().getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.RENAME, true, 0)); fs.rename(filePath, renamedFilePath); + fs.registerListener(null); AclEntry[] expected = new AclEntry[] { }; AclStatus s = fs.getAclStatus(renamedFilePath); AclEntry[] returned = s.getEntries().toArray(new AclEntry[0]); @@ -1227,7 +1235,7 @@ public void testDefaultAclRenamedFile() throws Exception { @Test public void testDefaultAclRenamedDir() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); path = new Path(testRoot, UUID.randomUUID().toString()); Path dirPath = new Path(path, "dir"); FileSystem.mkdirs(fs, dirPath, FsPermission.createImmutable((short) RWX_RX)); @@ -1248,39 +1256,58 @@ public void testDefaultAclRenamedDir() throws Exception { @Test public void testEnsureAclOperationWorksForRoot() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - assumeTrue(fs.getIsNamespaceEnabled()); + assumeTrue(getIsNamespaceEnabled(fs)); Path rootPath = new Path("/"); List aclSpec1 = Lists.newArrayList( aclEntry(DEFAULT, GROUP, FOO, ALL), aclEntry(ACCESS, GROUP, BAR, ALL)); + + fs.registerListener(new TracingHeaderValidator( + fs.getAbfsStore().getAbfsConfiguration().getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.SET_ACL, true, 0)); fs.setAcl(rootPath, aclSpec1); + + fs.setListenerOperation(FSOperationType.GET_ACL_STATUS); fs.getAclStatus(rootPath); + fs.setListenerOperation(FSOperationType.SET_OWNER); fs.setOwner(rootPath, TEST_OWNER, TEST_GROUP); + fs.setListenerOperation(FSOperationType.SET_PERMISSION); fs.setPermission(rootPath, new FsPermission("777")); List aclSpec2 = Lists.newArrayList( aclEntry(DEFAULT, USER, FOO, ALL), aclEntry(ACCESS, USER, BAR, ALL)); + fs.setListenerOperation(FSOperationType.MODIFY_ACL); fs.modifyAclEntries(rootPath, aclSpec2); + fs.setListenerOperation(FSOperationType.REMOVE_ACL_ENTRIES); fs.removeAclEntries(rootPath, aclSpec2); + fs.setListenerOperation(FSOperationType.REMOVE_DEFAULT_ACL); fs.removeDefaultAcl(rootPath); + fs.setListenerOperation(FSOperationType.REMOVE_ACL); fs.removeAcl(rootPath); } @Test public void testSetOwnerForNonNamespaceEnabledAccount() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - Assume.assumeTrue(!fs.getIsNamespaceEnabled()); + AbfsConfiguration conf = fs.getAbfsStore().getAbfsConfiguration(); + Assume.assumeTrue(!getIsNamespaceEnabled(fs)); final Path filePath = new Path(methodName.getMethodName()); fs.create(filePath); - assertTrue(fs.exists(filePath)); + assertPathExists(fs, "This path should exist", filePath); + TracingHeaderValidator tracingHeaderValidator = new TracingHeaderValidator( + conf.getClientCorrelationId(), fs.getFileSystemId(), + FSOperationType.GET_FILESTATUS, false, 0); + fs.registerListener(tracingHeaderValidator); FileStatus oldFileStatus = fs.getFileStatus(filePath); + tracingHeaderValidator.setOperation(FSOperationType.SET_OWNER); fs.setOwner(filePath, TEST_OWNER, TEST_GROUP); + fs.registerListener(null); FileStatus newFileStatus = fs.getFileStatus(filePath); assertEquals(oldFileStatus.getOwner(), newFileStatus.getOwner()); @@ -1290,11 +1317,11 @@ public void testSetOwnerForNonNamespaceEnabledAccount() throws Exception { @Test public void testSetPermissionForNonNamespaceEnabledAccount() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - Assume.assumeTrue(!fs.getIsNamespaceEnabled()); + Assume.assumeTrue(!getIsNamespaceEnabled(fs)); final Path filePath = new Path(methodName.getMethodName()); fs.create(filePath); - assertTrue(fs.exists(filePath)); + assertPathExists(fs, "This path should exist", filePath); FsPermission oldPermission = fs.getFileStatus(filePath).getPermission(); // default permission for non-namespace enabled account is "777" FsPermission newPermission = new FsPermission("557"); @@ -1309,7 +1336,7 @@ public void testSetPermissionForNonNamespaceEnabledAccount() throws Exception { @Test public void testModifyAclEntriesForNonNamespaceEnabledAccount() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - Assume.assumeTrue(!fs.getIsNamespaceEnabled()); + Assume.assumeTrue(!getIsNamespaceEnabled(fs)); final Path filePath = new Path(methodName.getMethodName()); fs.create(filePath); try { @@ -1326,7 +1353,7 @@ public void testModifyAclEntriesForNonNamespaceEnabledAccount() throws Exception @Test public void testRemoveAclEntriesEntriesForNonNamespaceEnabledAccount() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - Assume.assumeTrue(!fs.getIsNamespaceEnabled()); + Assume.assumeTrue(!getIsNamespaceEnabled(fs)); final Path filePath = new Path(methodName.getMethodName()); fs.create(filePath); try { @@ -1343,7 +1370,7 @@ public void testRemoveAclEntriesEntriesForNonNamespaceEnabledAccount() throws Ex @Test public void testRemoveDefaultAclForNonNamespaceEnabledAccount() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - Assume.assumeTrue(!fs.getIsNamespaceEnabled()); + Assume.assumeTrue(!getIsNamespaceEnabled(fs)); final Path filePath = new Path(methodName.getMethodName()); fs.create(filePath); try { @@ -1357,7 +1384,7 @@ public void testRemoveDefaultAclForNonNamespaceEnabledAccount() throws Exception @Test public void testRemoveAclForNonNamespaceEnabledAccount() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - Assume.assumeTrue(!fs.getIsNamespaceEnabled()); + Assume.assumeTrue(!getIsNamespaceEnabled(fs)); final Path filePath = new Path(methodName.getMethodName()); fs.create(filePath); try { @@ -1371,7 +1398,7 @@ public void testRemoveAclForNonNamespaceEnabledAccount() throws Exception { @Test public void testSetAclForNonNamespaceEnabledAccount() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - Assume.assumeTrue(!fs.getIsNamespaceEnabled()); + Assume.assumeTrue(!getIsNamespaceEnabled(fs)); final Path filePath = new Path(methodName.getMethodName()); fs.create(filePath); try { @@ -1388,7 +1415,7 @@ public void testSetAclForNonNamespaceEnabledAccount() throws Exception { @Test public void testGetAclStatusForNonNamespaceEnabledAccount() throws Exception { final AzureBlobFileSystem fs = this.getFileSystem(); - Assume.assumeTrue(!fs.getIsNamespaceEnabled()); + Assume.assumeTrue(!getIsNamespaceEnabled(fs)); final Path filePath = new Path(methodName.getMethodName()); fs.create(filePath); try { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java new file mode 100644 index 0000000000000..c2d7f80d37348 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java @@ -0,0 +1,982 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 ("License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.EnumSet; +import java.util.Hashtable; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Random; +import java.util.UUID; + +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.assertj.core.api.Assertions; +import org.junit.Assume; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.XAttrSetFlag; +import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; +import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; +import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; +import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters.Mode; +import org.apache.hadoop.fs.azurebfs.services.AbfsAclHelper; +import org.apache.hadoop.fs.azurebfs.services.AbfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpHeader; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; +import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; +import org.apache.hadoop.fs.azurebfs.services.AuthType; +import org.apache.hadoop.fs.azurebfs.utils.Base64; +import org.apache.hadoop.fs.permission.AclEntry; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.test.LambdaTestUtils; + +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_MB; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_ENCRYPTION_ALGORITHM; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_ENCRYPTION_KEY; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_ENCRYPTION_KEY_SHA256; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_REQUEST_SERVER_ENCRYPTED; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_SERVER_ENCRYPTED; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_ABFS_ACCOUNT_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_ACCOUNT_KEY; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_TEST_CPK_ENABLED; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_TEST_CPK_ENABLED_SECONDARY_ACCOUNT; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_TEST_CPK_ENABLED_SECONDARY_ACCOUNT_KEY; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.TEST_CONFIGURATION_FILE_NAME; +import static org.apache.hadoop.fs.azurebfs.utils.AclTestHelpers.aclEntry; +import static org.apache.hadoop.fs.permission.AclEntryScope.ACCESS; +import static org.apache.hadoop.fs.permission.AclEntryType.USER; +import static org.apache.hadoop.fs.permission.FsAction.ALL; + +public class ITestCustomerProvidedKey extends AbstractAbfsIntegrationTest { + private static final Logger LOG = LoggerFactory + .getLogger(ITestCustomerProvidedKey.class); + + private static final String XMS_PROPERTIES_ENCODING = "ISO-8859-1"; + private static final int INT_512 = 512; + private static final int INT_50 = 50; + private static final int ENCRYPTION_KEY_LEN = 32; + private static final int FILE_SIZE = 10 * ONE_MB; + private static final int FILE_SIZE_FOR_COPY_BETWEEN_ACCOUNTS = 24 * ONE_MB; + + private boolean isNamespaceEnabled; + + public ITestCustomerProvidedKey() throws Exception { + boolean isCPKTestsEnabled = getConfiguration() + .getBoolean(FS_AZURE_TEST_CPK_ENABLED, false); + Assume.assumeTrue(isCPKTestsEnabled); + isNamespaceEnabled = getConfiguration() + .getBoolean(FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, false); + } + + @Test + public void testReadWithCPK() throws Exception { + final AzureBlobFileSystem fs = getAbfs(true); + String fileName = path("/" + methodName.getMethodName()).toString(); + createFileAndGetContent(fs, fileName, FILE_SIZE); + + AbfsClient abfsClient = fs.getAbfsClient(); + int length = FILE_SIZE; + byte[] buffer = new byte[length]; + TracingContext tracingContext = getTestTracingContext(fs, false); + final AbfsRestOperation op = abfsClient.getPathStatus(fileName, false, + tracingContext); + final String eTag = op.getResult() + .getResponseHeader(HttpHeaderConfigurations.ETAG); + AbfsRestOperation abfsRestOperation = abfsClient + .read(fileName, 0, buffer, 0, length, eTag, null, tracingContext); + assertCPKHeaders(abfsRestOperation, true); + assertResponseHeader(abfsRestOperation, true, X_MS_ENCRYPTION_KEY_SHA256, + getCPKSha(fs)); + assertResponseHeader(abfsRestOperation, true, X_MS_SERVER_ENCRYPTED, + "true"); + assertResponseHeader(abfsRestOperation, false, + X_MS_REQUEST_SERVER_ENCRYPTED, ""); + + // Trying to read with different CPK headers + Configuration conf = fs.getConf(); + String accountName = conf.get(FS_AZURE_ABFS_ACCOUNT_NAME); + conf.set(FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY + "." + accountName, + "different-1234567890123456789012"); + try (AzureBlobFileSystem fs2 = (AzureBlobFileSystem) FileSystem.newInstance(conf); + FSDataInputStream iStream = fs2.open(new Path(fileName))) { + int len = 8 * ONE_MB; + byte[] b = new byte[len]; + LambdaTestUtils.intercept(IOException.class, () -> { + iStream.read(b, 0, len); + }); + } + + // Trying to read with no CPK headers + conf.unset(FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY + "." + accountName); + try (AzureBlobFileSystem fs3 = (AzureBlobFileSystem) FileSystem + .get(conf); FSDataInputStream iStream = fs3.open(new Path(fileName))) { + int len = 8 * ONE_MB; + byte[] b = new byte[len]; + LambdaTestUtils.intercept(IOException.class, () -> { + iStream.read(b, 0, len); + }); + } + } + + @Test + public void testReadWithoutCPK() throws Exception { + final AzureBlobFileSystem fs = getAbfs(false); + String fileName = path("/" + methodName.getMethodName()).toString(); + createFileAndGetContent(fs, fileName, FILE_SIZE); + + AbfsClient abfsClient = fs.getAbfsClient(); + int length = INT_512; + byte[] buffer = new byte[length * 4]; + TracingContext tracingContext = getTestTracingContext(fs, false); + final AbfsRestOperation op = abfsClient + .getPathStatus(fileName, false, tracingContext); + final String eTag = op.getResult() + .getResponseHeader(HttpHeaderConfigurations.ETAG); + AbfsRestOperation abfsRestOperation = abfsClient + .read(fileName, 0, buffer, 0, length, eTag, null, tracingContext); + assertCPKHeaders(abfsRestOperation, false); + assertResponseHeader(abfsRestOperation, false, X_MS_ENCRYPTION_KEY_SHA256, + getCPKSha(fs)); + assertResponseHeader(abfsRestOperation, true, X_MS_SERVER_ENCRYPTED, + "true"); + assertResponseHeader(abfsRestOperation, false, + X_MS_REQUEST_SERVER_ENCRYPTED, ""); + + // Trying to read with CPK headers + Configuration conf = fs.getConf(); + String accountName = conf.get(FS_AZURE_ABFS_ACCOUNT_NAME); + conf.set(FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY + "." + accountName, + "12345678901234567890123456789012"); + + try (AzureBlobFileSystem fs2 = (AzureBlobFileSystem) FileSystem.newInstance(conf); + AbfsClient abfsClient2 = fs2.getAbfsClient()) { + LambdaTestUtils.intercept(IOException.class, () -> { + abfsClient2.read(fileName, 0, buffer, 0, length, eTag, null, + getTestTracingContext(fs, false)); + }); + } + } + + @Test + public void testAppendWithCPK() throws Exception { + final AzureBlobFileSystem fs = getAbfs(true); + final String fileName = path("/" + methodName.getMethodName()).toString(); + createFileAndGetContent(fs, fileName, FILE_SIZE); + + // Trying to append with correct CPK headers + AppendRequestParameters appendRequestParameters = + new AppendRequestParameters( + 0, 0, 5, Mode.APPEND_MODE, false, null, true); + byte[] buffer = getRandomBytesArray(5); + AbfsClient abfsClient = fs.getAbfsClient(); + AbfsRestOperation abfsRestOperation = abfsClient + .append(fileName, buffer, appendRequestParameters, null, getTestTracingContext(fs, false)); + assertCPKHeaders(abfsRestOperation, true); + assertResponseHeader(abfsRestOperation, true, X_MS_ENCRYPTION_KEY_SHA256, + getCPKSha(fs)); + assertResponseHeader(abfsRestOperation, false, X_MS_SERVER_ENCRYPTED, ""); + assertResponseHeader(abfsRestOperation, true, X_MS_REQUEST_SERVER_ENCRYPTED, + "true"); + + // Trying to append with different CPK headers + Configuration conf = fs.getConf(); + String accountName = conf.get(FS_AZURE_ABFS_ACCOUNT_NAME); + conf.set(FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY + "." + accountName, + "different-1234567890123456789012"); + try (AzureBlobFileSystem fs2 = (AzureBlobFileSystem) FileSystem.newInstance(conf); + AbfsClient abfsClient2 = fs2.getAbfsClient()) { + LambdaTestUtils.intercept(IOException.class, () -> { + abfsClient2.append(fileName, buffer, appendRequestParameters, null, + getTestTracingContext(fs, false)); + }); + } + + // Trying to append with no CPK headers + conf.unset(FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY + "." + accountName); + try (AzureBlobFileSystem fs3 = (AzureBlobFileSystem) FileSystem + .get(conf); AbfsClient abfsClient3 = fs3.getAbfsClient()) { + LambdaTestUtils.intercept(IOException.class, () -> { + abfsClient3.append(fileName, buffer, appendRequestParameters, null, + getTestTracingContext(fs, false)); + }); + } + } + + @Test + public void testAppendWithoutCPK() throws Exception { + final AzureBlobFileSystem fs = getAbfs(false); + final String fileName = path("/" + methodName.getMethodName()).toString(); + createFileAndGetContent(fs, fileName, FILE_SIZE); + + // Trying to append without CPK headers + AppendRequestParameters appendRequestParameters = + new AppendRequestParameters( + 0, 0, 5, Mode.APPEND_MODE, false, null, true); + byte[] buffer = getRandomBytesArray(5); + AbfsClient abfsClient = fs.getAbfsClient(); + AbfsRestOperation abfsRestOperation = abfsClient + .append(fileName, buffer, appendRequestParameters, null, + getTestTracingContext(fs, false)); + assertCPKHeaders(abfsRestOperation, false); + assertResponseHeader(abfsRestOperation, false, X_MS_ENCRYPTION_KEY_SHA256, + ""); + assertResponseHeader(abfsRestOperation, false, X_MS_SERVER_ENCRYPTED, ""); + assertResponseHeader(abfsRestOperation, true, X_MS_REQUEST_SERVER_ENCRYPTED, + "true"); + + // Trying to append with CPK headers + Configuration conf = fs.getConf(); + String accountName = conf.get(FS_AZURE_ABFS_ACCOUNT_NAME); + conf.set(FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY + "." + accountName, + "12345678901234567890123456789012"); + try (AzureBlobFileSystem fs2 = (AzureBlobFileSystem) FileSystem.newInstance(conf); + AbfsClient abfsClient2 = fs2.getAbfsClient()) { + LambdaTestUtils.intercept(IOException.class, () -> { + abfsClient2.append(fileName, buffer, appendRequestParameters, null, + getTestTracingContext(fs, false)); + }); + } + } + + @Test + public void testSetGetXAttr() throws Exception { + final AzureBlobFileSystem fs = getAbfs(true); + final String fileName = path(methodName.getMethodName()).toString(); + createFileAndGetContent(fs, fileName, FILE_SIZE); + + String valSent = "testValue"; + String attrName = "testXAttr"; + + // set get and verify + fs.setXAttr(new Path(fileName), attrName, + valSent.getBytes(StandardCharsets.UTF_8), + EnumSet.of(XAttrSetFlag.CREATE)); + byte[] valBytes = fs.getXAttr(new Path(fileName), attrName); + String valRecieved = new String(valBytes); + assertEquals(valSent, valRecieved); + + // set new value get and verify + valSent = "new value"; + fs.setXAttr(new Path(fileName), attrName, + valSent.getBytes(StandardCharsets.UTF_8), + EnumSet.of(XAttrSetFlag.REPLACE)); + valBytes = fs.getXAttr(new Path(fileName), attrName); + valRecieved = new String(valBytes); + assertEquals(valSent, valRecieved); + + // Read without CPK header + LambdaTestUtils.intercept(IOException.class, () -> { + getAbfs(false).getXAttr(new Path(fileName), attrName); + }); + + // Wrong CPK + LambdaTestUtils.intercept(IOException.class, () -> { + getSameFSWithWrongCPK(fs).getXAttr(new Path(fileName), attrName); + }); + } + + @Test + public void testCopyBetweenAccounts() throws Exception { + String accountName = getRawConfiguration() + .get(FS_AZURE_TEST_CPK_ENABLED_SECONDARY_ACCOUNT); + String accountKey = getRawConfiguration() + .get(FS_AZURE_TEST_CPK_ENABLED_SECONDARY_ACCOUNT_KEY); + Assume.assumeTrue(accountName != null && !accountName.isEmpty()); + Assume.assumeTrue(accountKey != null && !accountKey.isEmpty()); + String fileSystemName = "cpkfs"; + + // Create fs1 and a file with CPK + AzureBlobFileSystem fs1 = getAbfs(true); + int fileSize = FILE_SIZE_FOR_COPY_BETWEEN_ACCOUNTS; + byte[] fileContent = getRandomBytesArray(fileSize); + Path testFilePath = createFileWithContent(fs1, + String.format("fs1-file%s.txt", UUID.randomUUID()), fileContent); + + // Create fs2 with different CPK + Configuration conf = new Configuration(); + conf.addResource(TEST_CONFIGURATION_FILE_NAME); + conf.setBoolean(AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION, true); + conf.unset(FS_AZURE_ABFS_ACCOUNT_NAME); + conf.set(FS_AZURE_ABFS_ACCOUNT_NAME, accountName); + conf.set(FS_AZURE_ACCOUNT_KEY + "." + accountName, accountKey); + conf.set(FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY + "." + accountName, + "123456789012345678901234567890ab"); + conf.set("fs.defaultFS", "abfs://" + fileSystemName + "@" + accountName); + AzureBlobFileSystem fs2 = (AzureBlobFileSystem) FileSystem.newInstance(conf); + + // Read from fs1 and write to fs2, fs1 and fs2 are having different CPK + Path fs2DestFilePath = new Path( + String.format("fs2-dest-file%s.txt", UUID.randomUUID())); + FSDataOutputStream ops = fs2.create(fs2DestFilePath); + try (FSDataInputStream iStream = fs1.open(testFilePath)) { + long totalBytesRead = 0; + do { + int length = 8 * ONE_MB; + byte[] buffer = new byte[length]; + int bytesRead = iStream.read(buffer, 0, length); + totalBytesRead += bytesRead; + ops.write(buffer); + } while (totalBytesRead < fileContent.length); + ops.close(); + } + + // Trying to read fs2DestFilePath with different CPK headers + conf.unset(FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY + "." + accountName); + conf.set(FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY + "." + accountName, + "different-1234567890123456789012"); + try (AzureBlobFileSystem fs3 = (AzureBlobFileSystem) FileSystem + .get(conf); FSDataInputStream iStream = fs3.open(fs2DestFilePath)) { + int length = 8 * ONE_MB; + byte[] buffer = new byte[length]; + LambdaTestUtils.intercept(IOException.class, () -> { + iStream.read(buffer, 0, length); + }); + } + + // Trying to read fs2DestFilePath with no CPK headers + conf.unset(FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY + "." + accountName); + try (AzureBlobFileSystem fs4 = (AzureBlobFileSystem) FileSystem + .get(conf); FSDataInputStream iStream = fs4.open(fs2DestFilePath)) { + int length = 8 * ONE_MB; + byte[] buffer = new byte[length]; + LambdaTestUtils.intercept(IOException.class, () -> { + iStream.read(buffer, 0, length); + }); + } + + // Read fs2DestFilePath and verify the content with the initial random + // bytes created and wrote into the source file at fs1 + try (FSDataInputStream iStream = fs2.open(fs2DestFilePath)) { + long totalBytesRead = 0; + int pos = 0; + do { + int length = 8 * ONE_MB; + byte[] buffer = new byte[length]; + int bytesRead = iStream.read(buffer, 0, length); + totalBytesRead += bytesRead; + for (int i = 0; i < bytesRead; i++) { + assertEquals(fileContent[pos + i], buffer[i]); + } + pos = pos + bytesRead; + } while (totalBytesRead < fileContent.length); + } + } + + @Test + public void testListPathWithCPK() throws Exception { + testListPath(true); + } + + @Test + public void testListPathWithoutCPK() throws Exception { + testListPath(false); + } + + private void testListPath(final boolean isWithCPK) throws Exception { + final AzureBlobFileSystem fs = getAbfs(isWithCPK); + final Path testPath = path("/" + methodName.getMethodName()); + String testDirName = testPath.toString(); + fs.mkdirs(testPath); + createFileAndGetContent(fs, testDirName + "/aaa", FILE_SIZE); + createFileAndGetContent(fs, testDirName + "/bbb", FILE_SIZE); + AbfsClient abfsClient = fs.getAbfsClient(); + AbfsRestOperation abfsRestOperation = abfsClient + .listPath(testDirName, false, INT_50, null, + getTestTracingContext(fs, false)); + assertListstatus(fs, abfsRestOperation, testPath); + + // Trying with different CPK headers + Configuration conf = fs.getConf(); + String accountName = conf.get(FS_AZURE_ABFS_ACCOUNT_NAME); + conf.set(FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY + "." + accountName, + "different-1234567890123456789012"); + AzureBlobFileSystem fs2 = (AzureBlobFileSystem) FileSystem.newInstance(conf); + AbfsClient abfsClient2 = fs2.getAbfsClient(); + TracingContext tracingContext = getTestTracingContext(fs, false); + abfsRestOperation = abfsClient2.listPath(testDirName, false, INT_50, + null, tracingContext); + assertListstatus(fs, abfsRestOperation, testPath); + + if (isWithCPK) { + // Trying with no CPK headers + conf.unset(FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY + "." + accountName); + AzureBlobFileSystem fs3 = (AzureBlobFileSystem) FileSystem.get(conf); + AbfsClient abfsClient3 = fs3.getAbfsClient(); + abfsRestOperation = abfsClient3 + .listPath(testDirName, false, INT_50, null, tracingContext); + assertListstatus(fs, abfsRestOperation, testPath); + } + } + + private void assertListstatus(AzureBlobFileSystem fs, + AbfsRestOperation abfsRestOperation, Path testPath) throws IOException { + assertCPKHeaders(abfsRestOperation, false); + assertNoCPKResponseHeadersPresent(abfsRestOperation); + + FileStatus[] listStatuses = fs.listStatus(testPath); + Assertions.assertThat(listStatuses.length) + .describedAs("listStatuses should have 2 entries").isEqualTo(2); + + listStatuses = getSameFSWithWrongCPK(fs).listStatus(testPath); + Assertions.assertThat(listStatuses.length) + .describedAs("listStatuses should have 2 entries").isEqualTo(2); + } + + @Test + public void testCreatePathWithCPK() throws Exception { + testCreatePath(true); + } + + @Test + public void testCreatePathWithoutCPK() throws Exception { + testCreatePath(false); + } + + private void testCreatePath(final boolean isWithCPK) throws Exception { + final AzureBlobFileSystem fs = getAbfs(isWithCPK); + final String testFileName = path("/" + methodName.getMethodName()) + .toString(); + createFileAndGetContent(fs, testFileName, FILE_SIZE); + + AbfsClient abfsClient = fs.getAbfsClient(); + FsPermission permission = new FsPermission(FsAction.EXECUTE, + FsAction.EXECUTE, FsAction.EXECUTE); + FsPermission umask = new FsPermission(FsAction.NONE, FsAction.NONE, + FsAction.NONE); + TracingContext tracingContext = getTestTracingContext(fs, false); + boolean isNamespaceEnabled = fs.getIsNamespaceEnabled(tracingContext); + AbfsRestOperation abfsRestOperation = abfsClient + .createPath(testFileName, true, true, + isNamespaceEnabled ? getOctalNotation(permission) : null, + isNamespaceEnabled ? getOctalNotation(umask) : null, false, null, + tracingContext); + assertCPKHeaders(abfsRestOperation, isWithCPK); + assertResponseHeader(abfsRestOperation, isWithCPK, + X_MS_ENCRYPTION_KEY_SHA256, getCPKSha(fs)); + assertResponseHeader(abfsRestOperation, false, X_MS_SERVER_ENCRYPTED, ""); + assertResponseHeader(abfsRestOperation, true, X_MS_REQUEST_SERVER_ENCRYPTED, + "true"); + + FileStatus[] listStatuses = fs.listStatus(new Path(testFileName)); + Assertions.assertThat(listStatuses.length) + .describedAs("listStatuses should have 1 entry").isEqualTo(1); + + listStatuses = getSameFSWithWrongCPK(fs).listStatus(new Path(testFileName)); + Assertions.assertThat(listStatuses.length) + .describedAs("listStatuses should have 1 entry").isEqualTo(1); + } + + @Test + public void testRenamePathWithCPK() throws Exception { + testRenamePath(true); + } + + @Test + public void testRenamePathWithoutCPK() throws Exception { + testRenamePath(false); + } + + private void testRenamePath(final boolean isWithCPK) throws Exception { + final AzureBlobFileSystem fs = getAbfs(isWithCPK); + final String testFileName = path("/" + methodName.getMethodName()) + .toString(); + createFileAndGetContent(fs, testFileName, FILE_SIZE); + + FileStatus fileStatusBeforeRename = fs + .getFileStatus(new Path(testFileName)); + + String newName = "/newName"; + AbfsClient abfsClient = fs.getAbfsClient(); + AbfsRestOperation abfsRestOperation = abfsClient + .renamePath(testFileName, newName, null, + getTestTracingContext(fs, false), null, false, isNamespaceEnabled) + .getOp(); + assertCPKHeaders(abfsRestOperation, false); + assertNoCPKResponseHeadersPresent(abfsRestOperation); + + LambdaTestUtils.intercept(FileNotFoundException.class, + (() -> fs.getFileStatus(new Path(testFileName)))); + + FileStatus fileStatusAfterRename = fs.getFileStatus(new Path(newName)); + Assertions.assertThat(fileStatusAfterRename.getLen()) + .describedAs("File size has to be same before and after rename") + .isEqualTo(fileStatusBeforeRename.getLen()); + } + + @Test + public void testFlushWithCPK() throws Exception { + testFlush(true); + } + + @Test + public void testFlushWithoutCPK() throws Exception { + testFlush(false); + } + + private void testFlush(final boolean isWithCPK) throws Exception { + final AzureBlobFileSystem fs = getAbfs(isWithCPK); + final String testFileName = path("/" + methodName.getMethodName()) + .toString(); + fs.create(new Path(testFileName)).close(); + AbfsClient abfsClient = fs.getAbfsClient(); + String expectedCPKSha = getCPKSha(fs); + + byte[] fileContent = getRandomBytesArray(FILE_SIZE); + Path testFilePath = new Path(testFileName + "1"); + try (FSDataOutputStream oStream = fs.create(testFilePath)) { + oStream.write(fileContent); + } + + // Trying to read with different CPK headers + Configuration conf = fs.getConf(); + String accountName = conf.get(FS_AZURE_ABFS_ACCOUNT_NAME); + conf.set(FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY + "." + accountName, + "different-1234567890123456789012"); + try (AzureBlobFileSystem fs2 = (AzureBlobFileSystem) FileSystem.newInstance(conf); + AbfsClient abfsClient2 = fs2.getAbfsClient()) { + LambdaTestUtils.intercept(IOException.class, () -> { + abfsClient2.flush(testFileName, 0, false, false, null, null, + getTestTracingContext(fs, false)); + }); + } + + // Trying to read with no CPK headers + if (isWithCPK) { + conf.unset(FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY + "." + accountName); + try (AzureBlobFileSystem fs3 = (AzureBlobFileSystem) FileSystem + .get(conf); AbfsClient abfsClient3 = fs3.getAbfsClient()) { + LambdaTestUtils.intercept(IOException.class, () -> { + abfsClient3.flush(testFileName, 0, false, false, null, null, + getTestTracingContext(fs, false)); + }); + } + } + + // With correct CPK + AbfsRestOperation abfsRestOperation = abfsClient + .flush(testFileName, 0, false, false, null, null, + getTestTracingContext(fs, false)); + assertCPKHeaders(abfsRestOperation, isWithCPK); + assertResponseHeader(abfsRestOperation, isWithCPK, + X_MS_ENCRYPTION_KEY_SHA256, expectedCPKSha); + assertResponseHeader(abfsRestOperation, false, X_MS_SERVER_ENCRYPTED, ""); + assertResponseHeader(abfsRestOperation, true, X_MS_REQUEST_SERVER_ENCRYPTED, + isWithCPK + ""); + } + + @Test + public void testSetPathPropertiesWithCPK() throws Exception { + testSetPathProperties(true); + } + + @Test + public void testSetPathPropertiesWithoutCPK() throws Exception { + testSetPathProperties(false); + } + + private void testSetPathProperties(final boolean isWithCPK) throws Exception { + final AzureBlobFileSystem fs = getAbfs(isWithCPK); + final String testFileName = path("/" + methodName.getMethodName()) + .toString(); + createFileAndGetContent(fs, testFileName, FILE_SIZE); + + AbfsClient abfsClient = fs.getAbfsClient(); + final Hashtable properties = new Hashtable<>(); + properties.put("key", "val"); + AbfsRestOperation abfsRestOperation = abfsClient + .setPathProperties(testFileName, + convertXmsPropertiesToCommaSeparatedString(properties), + getTestTracingContext(fs, false)); + assertCPKHeaders(abfsRestOperation, isWithCPK); + assertResponseHeader(abfsRestOperation, isWithCPK, + X_MS_ENCRYPTION_KEY_SHA256, getCPKSha(fs)); + assertResponseHeader(abfsRestOperation, false, X_MS_SERVER_ENCRYPTED, ""); + assertResponseHeader(abfsRestOperation, true, X_MS_REQUEST_SERVER_ENCRYPTED, + "true"); + } + + @Test + public void testGetPathStatusFileWithCPK() throws Exception { + testGetPathStatusFile(true); + } + + @Test + public void testGetPathStatusFileWithoutCPK() throws Exception { + testGetPathStatusFile(false); + } + + private void testGetPathStatusFile(final boolean isWithCPK) throws Exception { + final AzureBlobFileSystem fs = getAbfs(isWithCPK); + final String testFileName = path("/" + methodName.getMethodName()) + .toString(); + createFileAndGetContent(fs, testFileName, FILE_SIZE); + + AbfsClient abfsClient = fs.getAbfsClient(); + TracingContext tracingContext = getTestTracingContext(fs, false); + AbfsRestOperation abfsRestOperation = abfsClient + .getPathStatus(testFileName, false, tracingContext); + assertCPKHeaders(abfsRestOperation, false); + assertResponseHeader(abfsRestOperation, isWithCPK, + X_MS_ENCRYPTION_KEY_SHA256, getCPKSha(fs)); + assertResponseHeader(abfsRestOperation, true, X_MS_SERVER_ENCRYPTED, + "true"); + assertResponseHeader(abfsRestOperation, false, + X_MS_REQUEST_SERVER_ENCRYPTED, ""); + + abfsRestOperation = abfsClient.getPathStatus(testFileName, true, tracingContext); + assertCPKHeaders(abfsRestOperation, isWithCPK); + assertResponseHeader(abfsRestOperation, isWithCPK, + X_MS_ENCRYPTION_KEY_SHA256, getCPKSha(fs)); + assertResponseHeader(abfsRestOperation, true, X_MS_SERVER_ENCRYPTED, + "true"); + assertResponseHeader(abfsRestOperation, false, + X_MS_REQUEST_SERVER_ENCRYPTED, ""); + } + + @Test + public void testDeletePathWithCPK() throws Exception { + testDeletePath(false); + } + + @Test + public void testDeletePathWithoutCPK() throws Exception { + testDeletePath(false); + } + + private void testDeletePath(final boolean isWithCPK) throws Exception { + final AzureBlobFileSystem fs = getAbfs(isWithCPK); + final String testFileName = path("/" + methodName.getMethodName()) + .toString(); + createFileAndGetContent(fs, testFileName, FILE_SIZE); + + FileStatus[] listStatuses = fs.listStatus(new Path(testFileName)); + Assertions.assertThat(listStatuses.length) + .describedAs("listStatuses should have 1 entry").isEqualTo(1); + + AbfsClient abfsClient = fs.getAbfsClient(); + AbfsRestOperation abfsRestOperation = abfsClient + .deletePath(testFileName, false, null, + getTestTracingContext(fs, false)); + assertCPKHeaders(abfsRestOperation, false); + assertNoCPKResponseHeadersPresent(abfsRestOperation); + + Assertions.assertThatThrownBy(() -> fs.listStatus(new Path(testFileName))) + .isInstanceOf(FileNotFoundException.class); + } + + @Test + public void testSetPermissionWithCPK() throws Exception { + testSetPermission(true); + } + + @Test + public void testSetPermissionWithoutCPK() throws Exception { + testSetPermission(false); + } + + private void testSetPermission(final boolean isWithCPK) throws Exception { + final AzureBlobFileSystem fs = getAbfs(isWithCPK); + final String testFileName = path("/" + methodName.getMethodName()) + .toString(); + Assume.assumeTrue(fs.getIsNamespaceEnabled(getTestTracingContext(fs, false))); + createFileAndGetContent(fs, testFileName, FILE_SIZE); + AbfsClient abfsClient = fs.getAbfsClient(); + FsPermission permission = new FsPermission(FsAction.EXECUTE, + FsAction.EXECUTE, FsAction.EXECUTE); + AbfsRestOperation abfsRestOperation = abfsClient + .setPermission(testFileName, permission.toString(), + getTestTracingContext(fs, false)); + assertCPKHeaders(abfsRestOperation, false); + assertNoCPKResponseHeadersPresent(abfsRestOperation); + } + + @Test + public void testSetAclWithCPK() throws Exception { + testSetAcl(true); + } + + @Test + public void testSetAclWithoutCPK() throws Exception { + testSetAcl(false); + } + + private void testSetAcl(final boolean isWithCPK) throws Exception { + final AzureBlobFileSystem fs = getAbfs(isWithCPK); + final String testFileName = path("/" + methodName.getMethodName()) + .toString(); + TracingContext tracingContext = getTestTracingContext(fs, false); + Assume.assumeTrue(fs.getIsNamespaceEnabled(tracingContext)); + createFileAndGetContent(fs, testFileName, FILE_SIZE); + AbfsClient abfsClient = fs.getAbfsClient(); + + List aclSpec = Lists.newArrayList(aclEntry(ACCESS, USER, ALL)); + final Map aclEntries = AbfsAclHelper + .deserializeAclSpec(AclEntry.aclSpecToString(aclSpec)); + + AbfsRestOperation abfsRestOperation = abfsClient + .setAcl(testFileName, AbfsAclHelper.serializeAclSpec(aclEntries), + tracingContext); + assertCPKHeaders(abfsRestOperation, false); + assertNoCPKResponseHeadersPresent(abfsRestOperation); + } + + @Test + public void testGetAclWithCPK() throws Exception { + testGetAcl(true); + } + + @Test + public void testGetAclWithoutCPK() throws Exception { + testGetAcl(false); + } + + private void testGetAcl(final boolean isWithCPK) throws Exception { + final AzureBlobFileSystem fs = getAbfs(isWithCPK); + final String testFileName = path("/" + methodName.getMethodName()) + .toString(); + TracingContext tracingContext = getTestTracingContext(fs, false); + Assume.assumeTrue(fs.getIsNamespaceEnabled(tracingContext)); + createFileAndGetContent(fs, testFileName, FILE_SIZE); + AbfsClient abfsClient = fs.getAbfsClient(); + AbfsRestOperation abfsRestOperation = + abfsClient.getAclStatus(testFileName, tracingContext); + assertCPKHeaders(abfsRestOperation, false); + assertNoCPKResponseHeadersPresent(abfsRestOperation); + } + + @Test + public void testCheckAccessWithCPK() throws Exception { + testCheckAccess(true); + } + + @Test + public void testCheckAccessWithoutCPK() throws Exception { + testCheckAccess(false); + } + + private void testCheckAccess(final boolean isWithCPK) throws Exception { + boolean isHNSEnabled = getConfiguration() + .getBoolean(FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, false); + Assume.assumeTrue(FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT + " is false", + isHNSEnabled); + Assume.assumeTrue("AuthType has to be OAuth", + getAuthType() == AuthType.OAuth); + + final AzureBlobFileSystem fs = getAbfs(isWithCPK); + final String testFileName = path("/" + methodName.getMethodName()) + .toString(); + fs.create(new Path(testFileName)).close(); + AbfsClient abfsClient = fs.getAbfsClient(); + AbfsRestOperation abfsRestOperation = abfsClient + .checkAccess(testFileName, "rwx", getTestTracingContext(fs, false)); + assertCPKHeaders(abfsRestOperation, false); + assertNoCPKResponseHeadersPresent(abfsRestOperation); + } + + private byte[] createFileAndGetContent(AzureBlobFileSystem fs, + String fileName, int fileSize) throws IOException { + byte[] fileContent = getRandomBytesArray(fileSize); + Path testFilePath = createFileWithContent(fs, fileName, fileContent); + ContractTestUtils.verifyFileContents(fs, testFilePath, fileContent); + return fileContent; + } + + private void assertCPKHeaders(AbfsRestOperation abfsRestOperation, + boolean isCPKHeaderExpected) { + assertHeader(abfsRestOperation, X_MS_ENCRYPTION_KEY, isCPKHeaderExpected); + assertHeader(abfsRestOperation, X_MS_ENCRYPTION_KEY_SHA256, + isCPKHeaderExpected); + assertHeader(abfsRestOperation, X_MS_ENCRYPTION_ALGORITHM, + isCPKHeaderExpected); + } + + private void assertNoCPKResponseHeadersPresent( + AbfsRestOperation abfsRestOperation) { + assertResponseHeader(abfsRestOperation, false, X_MS_SERVER_ENCRYPTED, ""); + assertResponseHeader(abfsRestOperation, false, + X_MS_REQUEST_SERVER_ENCRYPTED, ""); + assertResponseHeader(abfsRestOperation, false, X_MS_ENCRYPTION_KEY_SHA256, + ""); + } + + private void assertResponseHeader(AbfsRestOperation abfsRestOperation, + boolean isHeaderExpected, String headerName, String expectedValue) { + final AbfsHttpOperation result = abfsRestOperation.getResult(); + final String value = result.getResponseHeader(headerName); + if (isHeaderExpected) { + Assertions.assertThat(value).isEqualTo(expectedValue); + } else { + Assertions.assertThat(value).isNull(); + } + } + + private void assertHeader(AbfsRestOperation abfsRestOperation, + String headerName, boolean isCPKHeaderExpected) { + assertTrue(abfsRestOperation != null); + Optional header = abfsRestOperation.getRequestHeaders() + .stream().filter(abfsHttpHeader -> abfsHttpHeader.getName() + .equalsIgnoreCase(headerName)).findFirst(); + String desc; + if (isCPKHeaderExpected) { + desc = + "CPK header " + headerName + " is expected, but the same is absent."; + } else { + desc = "CPK header " + headerName + + " is not expected, but the same is present."; + } + Assertions.assertThat(header.isPresent()).describedAs(desc) + .isEqualTo(isCPKHeaderExpected); + } + + private byte[] getSHA256Hash(String key) throws IOException { + try { + final MessageDigest digester = MessageDigest.getInstance("SHA-256"); + return digester.digest(key.getBytes(StandardCharsets.UTF_8)); + } catch (NoSuchAlgorithmException e) { + throw new IOException(e); + } + } + + private String getCPKSha(final AzureBlobFileSystem abfs) throws IOException { + Configuration conf = abfs.getConf(); + String accountName = conf.get(FS_AZURE_ABFS_ACCOUNT_NAME); + String encryptionKey = conf + .get(FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY + "." + accountName); + if (encryptionKey == null || encryptionKey.isEmpty()) { + return ""; + } + return getBase64EncodedString(getSHA256Hash(encryptionKey)); + } + + private String getBase64EncodedString(byte[] bytes) { + return java.util.Base64.getEncoder().encodeToString(bytes); + } + + private Path createFileWithContent(FileSystem fs, String fileName, + byte[] fileContent) throws IOException { + Path testFilePath = new Path(fileName); + try (FSDataOutputStream oStream = fs.create(testFilePath)) { + oStream.write(fileContent); + oStream.flush(); + } + return testFilePath; + } + + private String convertXmsPropertiesToCommaSeparatedString( + final Hashtable properties) + throws CharacterCodingException { + StringBuilder commaSeparatedProperties = new StringBuilder(); + final CharsetEncoder encoder = Charset.forName(XMS_PROPERTIES_ENCODING) + .newEncoder(); + for (Map.Entry propertyEntry : properties.entrySet()) { + String key = propertyEntry.getKey(); + String value = propertyEntry.getValue(); + Boolean canEncodeValue = encoder.canEncode(value); + if (!canEncodeValue) { + throw new CharacterCodingException(); + } + String encodedPropertyValue = Base64 + .encode(encoder.encode(CharBuffer.wrap(value)).array()); + commaSeparatedProperties.append(key).append(AbfsHttpConstants.EQUAL) + .append(encodedPropertyValue); + commaSeparatedProperties.append(AbfsHttpConstants.COMMA); + } + if (commaSeparatedProperties.length() != 0) { + commaSeparatedProperties + .deleteCharAt(commaSeparatedProperties.length() - 1); + } + return commaSeparatedProperties.toString(); + } + + private String getOctalNotation(FsPermission fsPermission) { + Preconditions.checkNotNull(fsPermission, "fsPermission"); + return String + .format(AbfsHttpConstants.PERMISSION_FORMAT, fsPermission.toOctal()); + } + + private byte[] getRandomBytesArray(int length) { + final byte[] b = new byte[length]; + new Random().nextBytes(b); + return b; + } + + private AzureBlobFileSystem getAbfs(boolean withCPK) throws IOException { + return getAbfs(withCPK, "12345678901234567890123456789012"); + } + + private AzureBlobFileSystem getAbfs(boolean withCPK, String cpk) + throws IOException { + Configuration conf = getRawConfiguration(); + if (withCPK) { + conf.set(FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY + "." + getAccountName(), + cpk); + } else { + conf.unset( + FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY + "." + getAccountName()); + } + return (AzureBlobFileSystem) FileSystem.newInstance(conf); + } + + private AzureBlobFileSystem getSameFSWithWrongCPK( + final AzureBlobFileSystem fs) throws IOException { + AbfsConfiguration abfsConf = fs.getAbfsStore().getAbfsConfiguration(); + Configuration conf = abfsConf.getRawConfiguration(); + String accountName = conf.get(FS_AZURE_ABFS_ACCOUNT_NAME); + String cpk = conf + .get(FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY + "." + accountName); + if (cpk == null || cpk.isEmpty()) { + cpk = "01234567890123456789012345678912"; + } + cpk = "different-" + cpk; + String differentCpk = cpk.substring(0, ENCRYPTION_KEY_LEN - 1); + conf.set(FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY + "." + accountName, + differentCpk); + conf.set("fs.defaultFS", + "abfs://" + getFileSystemName() + "@" + accountName); + AzureBlobFileSystem sameFSWithDifferentCPK = + (AzureBlobFileSystem) FileSystem.newInstance(conf); + return sameFSWithDifferentCPK; + } + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestFileSystemInitialization.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestFileSystemInitialization.java index 8b60dd801cb30..f7d4a5b7a83e7 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestFileSystemInitialization.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestFileSystemInitialization.java @@ -20,14 +20,22 @@ import java.net.URI; +import org.assertj.core.api.Assertions; import org.junit.Test; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes; import org.apache.hadoop.fs.azurebfs.services.AuthType; +import static org.apache.hadoop.fs.CommonPathCapabilities.ETAGS_AVAILABLE; +import static org.apache.hadoop.fs.CommonPathCapabilities.ETAGS_PRESERVED_IN_RENAME; +import static org.apache.hadoop.fs.CommonPathCapabilities.FS_ACLS; +import static org.apache.hadoop.fs.azurebfs.constants.InternalConstants.CAPABILITY_SAFE_READAHEAD; +import static org.junit.Assume.assumeTrue; + /** * Test AzureBlobFileSystem initialization. */ @@ -74,4 +82,28 @@ public void ensureSecureAzureBlobFileSystemIsInitialized() throws Exception { assertNotNull("working directory", fs.getWorkingDirectory()); } } + + @Test + public void testFileSystemCapabilities() throws Throwable { + final AzureBlobFileSystem fs = getFileSystem(); + + final Path p = new Path("}"); + // etags always present + Assertions.assertThat(fs.hasPathCapability(p, ETAGS_AVAILABLE)) + .describedAs("path capability %s in %s", ETAGS_AVAILABLE, fs) + .isTrue(); + // readahead always correct + Assertions.assertThat(fs.hasPathCapability(p, CAPABILITY_SAFE_READAHEAD)) + .describedAs("path capability %s in %s", CAPABILITY_SAFE_READAHEAD, fs) + .isTrue(); + + // etags-over-rename and ACLs are either both true or both false. + final boolean etagsAcrossRename = fs.hasPathCapability(p, ETAGS_PRESERVED_IN_RENAME); + final boolean acls = fs.hasPathCapability(p, FS_ACLS); + Assertions.assertThat(etagsAcrossRename) + .describedAs("capabilities %s=%s and %s=%s in %s", + ETAGS_PRESERVED_IN_RENAME, etagsAcrossRename, + FS_ACLS, acls, fs) + .isEqualTo(acls); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestFileSystemProperties.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestFileSystemProperties.java index ba9b639adb602..0ccef2e6ccb34 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestFileSystemProperties.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestFileSystemProperties.java @@ -26,22 +26,29 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; /** * Test FileSystemProperties. */ public class ITestFileSystemProperties extends AbstractAbfsIntegrationTest { private static final int TEST_DATA = 100; - private static final Path TEST_PATH = new Path("/testfile"); + private static final String TEST_PATH = "/testfile"; public ITestFileSystemProperties() throws Exception { } @Test public void testReadWriteBytesToFileAndEnsureThreadPoolCleanup() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - testWriteOneByteToFileAndEnsureThreadPoolCleanup(); + Path testPath = path(TEST_PATH); + try(FSDataOutputStream stream = fs.create(testPath)) { + stream.write(TEST_DATA); + } + + FileStatus fileStatus = fs.getFileStatus(testPath); + assertEquals(1, fileStatus.getLen()); - try(FSDataInputStream inputStream = fs.open(TEST_PATH, 4 * 1024 * 1024)) { + try(FSDataInputStream inputStream = fs.open(testPath, 4 * 1024 * 1024)) { int i = inputStream.read(); assertEquals(TEST_DATA, i); } @@ -50,11 +57,12 @@ public void testReadWriteBytesToFileAndEnsureThreadPoolCleanup() throws Exceptio @Test public void testWriteOneByteToFileAndEnsureThreadPoolCleanup() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - try(FSDataOutputStream stream = fs.create(TEST_PATH)) { + Path testPath = path(TEST_PATH); + try(FSDataOutputStream stream = fs.create(testPath)) { stream.write(TEST_DATA); } - FileStatus fileStatus = fs.getFileStatus(TEST_PATH); + FileStatus fileStatus = fs.getFileStatus(testPath); assertEquals(1, fileStatus.getLen()); } @@ -64,8 +72,10 @@ public void testBase64FileSystemProperties() throws Exception { final Hashtable properties = new Hashtable<>(); properties.put("key", "{ value: value }"); - fs.getAbfsStore().setFilesystemProperties(properties); - Hashtable fetchedProperties = fs.getAbfsStore().getFilesystemProperties(); + TracingContext tracingContext = getTestTracingContext(fs, true); + fs.getAbfsStore().setFilesystemProperties(properties, tracingContext); + Hashtable fetchedProperties = fs.getAbfsStore() + .getFilesystemProperties(tracingContext); assertEquals(properties, fetchedProperties); } @@ -75,10 +85,12 @@ public void testBase64PathProperties() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); final Hashtable properties = new Hashtable<>(); properties.put("key", "{ value: valueTest }"); - touch(TEST_PATH); - fs.getAbfsStore().setPathProperties(TEST_PATH, properties); - Hashtable fetchedProperties = - fs.getAbfsStore().getPathStatus(TEST_PATH); + Path testPath = path(TEST_PATH); + touch(testPath); + TracingContext tracingContext = getTestTracingContext(fs, true); + fs.getAbfsStore().setPathProperties(testPath, properties, tracingContext); + Hashtable fetchedProperties = fs.getAbfsStore() + .getPathStatus(testPath, tracingContext); assertEquals(properties, fetchedProperties); } @@ -88,8 +100,10 @@ public void testBase64InvalidFileSystemProperties() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); final Hashtable properties = new Hashtable<>(); properties.put("key", "{ value: value歲 }"); - fs.getAbfsStore().setFilesystemProperties(properties); - Hashtable fetchedProperties = fs.getAbfsStore().getFilesystemProperties(); + TracingContext tracingContext = getTestTracingContext(fs, true); + fs.getAbfsStore().setFilesystemProperties(properties, tracingContext); + Hashtable fetchedProperties = fs.getAbfsStore() + .getFilesystemProperties(tracingContext); assertEquals(properties, fetchedProperties); } @@ -99,9 +113,12 @@ public void testBase64InvalidPathProperties() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); final Hashtable properties = new Hashtable<>(); properties.put("key", "{ value: valueTest兩 }"); - touch(TEST_PATH); - fs.getAbfsStore().setPathProperties(TEST_PATH, properties); - Hashtable fetchedProperties = fs.getAbfsStore().getPathStatus(TEST_PATH); + Path testPath = path(TEST_PATH); + touch(testPath); + TracingContext tracingContext = getTestTracingContext(fs, true); + fs.getAbfsStore().setPathProperties(testPath, properties, tracingContext); + Hashtable fetchedProperties = fs.getAbfsStore() + .getPathStatus(testPath, tracingContext); assertEquals(properties, fetchedProperties); } @@ -111,8 +128,10 @@ public void testSetFileSystemProperties() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); final Hashtable properties = new Hashtable<>(); properties.put("containerForDevTest", "true"); - fs.getAbfsStore().setFilesystemProperties(properties); - Hashtable fetchedProperties = fs.getAbfsStore().getFilesystemProperties(); + TracingContext tracingContext = getTestTracingContext(fs, true); + fs.getAbfsStore().setFilesystemProperties(properties, tracingContext); + Hashtable fetchedProperties = fs.getAbfsStore() + .getFilesystemProperties(tracingContext); assertEquals(properties, fetchedProperties); } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestGetNameSpaceEnabled.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestGetNameSpaceEnabled.java index 74c8803e4f6a8..01227691c3139 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestGetNameSpaceEnabled.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestGetNameSpaceEnabled.java @@ -23,14 +23,29 @@ import org.junit.Assume; import org.junit.Test; +import org.assertj.core.api.Assertions; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.azurebfs.services.AbfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; -import org.apache.hadoop.fs.azurebfs.services.AuthType; +import org.apache.hadoop.fs.azurebfs.enums.Trilean; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_FS_AZURE_ACCOUNT_IS_HNS_ENABLED; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.TEST_CONFIGURATION_FILE_NAME; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_IS_HNS_ENABLED; import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -39,6 +54,9 @@ */ public class ITestGetNameSpaceEnabled extends AbstractAbfsIntegrationTest { + private static final String TRUE_STR = "true"; + private static final String FALSE_STR = "false"; + private boolean isUsingXNSAccount; public ITestGetNameSpaceEnabled() throws Exception { isUsingXNSAccount = getConfiguration().getBoolean(FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, false); @@ -49,7 +67,7 @@ public void testXNSAccount() throws IOException { Assume.assumeTrue("Skip this test because the account being used for test is a non XNS account", isUsingXNSAccount); assertTrue("Expecting getIsNamespaceEnabled() return true", - getFileSystem().getIsNamespaceEnabled()); + getIsNamespaceEnabled(getFileSystem())); } @Test @@ -57,7 +75,57 @@ public void testNonXNSAccount() throws IOException { Assume.assumeFalse("Skip this test because the account being used for test is a XNS account", isUsingXNSAccount); assertFalse("Expecting getIsNamespaceEnabled() return false", - getFileSystem().getIsNamespaceEnabled()); + getIsNamespaceEnabled(getFileSystem())); + } + + @Test + public void testGetIsNamespaceEnabledWhenConfigIsTrue() throws Exception { + AzureBlobFileSystem fs = getNewFSWithHnsConf(TRUE_STR); + Assertions.assertThat(getIsNamespaceEnabled(fs)).describedAs( + "getIsNamespaceEnabled should return true when the " + + "config is set as true").isTrue(); + fs.getAbfsStore().deleteFilesystem(getTestTracingContext(fs, false)); + unsetAndAssert(); + } + + @Test + public void testGetIsNamespaceEnabledWhenConfigIsFalse() throws Exception { + AzureBlobFileSystem fs = getNewFSWithHnsConf(FALSE_STR); + Assertions.assertThat(getIsNamespaceEnabled(fs)).describedAs( + "getIsNamespaceEnabled should return false when the " + + "config is set as false").isFalse(); + fs.getAbfsStore().deleteFilesystem(getTestTracingContext(fs, false)); + unsetAndAssert(); + } + + private void unsetAndAssert() throws Exception { + AzureBlobFileSystem fs = getNewFSWithHnsConf( + DEFAULT_FS_AZURE_ACCOUNT_IS_HNS_ENABLED); + boolean expectedValue = this.getConfiguration() + .getBoolean(FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, false); + Assertions.assertThat(getIsNamespaceEnabled(fs)).describedAs( + "getIsNamespaceEnabled should return the value " + + "configured for fs.azure.test.namespace.enabled") + .isEqualTo(expectedValue); + fs.getAbfsStore().deleteFilesystem(getTestTracingContext(fs, false)); + } + + private AzureBlobFileSystem getNewFSWithHnsConf( + String isNamespaceEnabledAccount) throws Exception { + Configuration rawConfig = new Configuration(); + rawConfig.addResource(TEST_CONFIGURATION_FILE_NAME); + rawConfig.set(FS_AZURE_ACCOUNT_IS_HNS_ENABLED, isNamespaceEnabledAccount); + rawConfig + .setBoolean(AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION, true); + rawConfig.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, + getNonExistingUrl()); + return (AzureBlobFileSystem) FileSystem.get(rawConfig); + } + + private String getNonExistingUrl() { + String testUri = this.getTestUrl(); + return getAbfsScheme() + "://" + UUID.randomUUID() + testUri + .substring(testUri.indexOf("@")); } @Test @@ -77,22 +145,73 @@ public void testFailedRequestWhenFSNotExist() throws Exception { } @Test - public void testFailedRequestWhenCredentialsNotCorrect() throws Exception { - Assume.assumeTrue(this.getAuthType() == AuthType.SharedKey); - Configuration config = this.getRawConfiguration(); - config.setBoolean(AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION, false); - String accountName = this.getAccountName(); - String configkKey = FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME + "." + accountName; - // Provide a wrong sharedKey - String secret = config.get(configkKey); - secret = (char) (secret.charAt(0) + 1) + secret.substring(1); - config.set(configkKey, secret); - - AzureBlobFileSystem fs = this.getFileSystem(config); - intercept(AbfsRestOperationException.class, - "\"Server failed to authenticate the request. Make sure the value of Authorization header is formed correctly including the signature.\", 403", - ()-> { - fs.getIsNamespaceEnabled(); - }); + public void testEnsureGetAclCallIsMadeOnceWhenConfigIsInvalid() + throws Exception { + unsetConfAndEnsureGetAclCallIsMadeOnce(); + ensureGetAclCallIsMadeOnceForInvalidConf(" "); + unsetConfAndEnsureGetAclCallIsMadeOnce(); + ensureGetAclCallIsMadeOnceForInvalidConf("Invalid conf"); + unsetConfAndEnsureGetAclCallIsMadeOnce(); + } + + @Test + public void testEnsureGetAclCallIsNeverMadeWhenConfigIsValid() + throws Exception { + unsetConfAndEnsureGetAclCallIsMadeOnce(); + ensureGetAclCallIsNeverMadeForValidConf(FALSE_STR.toLowerCase()); + unsetConfAndEnsureGetAclCallIsMadeOnce(); + ensureGetAclCallIsNeverMadeForValidConf(FALSE_STR.toUpperCase()); + unsetConfAndEnsureGetAclCallIsMadeOnce(); + ensureGetAclCallIsNeverMadeForValidConf(TRUE_STR.toLowerCase()); + unsetConfAndEnsureGetAclCallIsMadeOnce(); + ensureGetAclCallIsNeverMadeForValidConf(TRUE_STR.toUpperCase()); + unsetConfAndEnsureGetAclCallIsMadeOnce(); + } + + @Test + public void testEnsureGetAclCallIsMadeOnceWhenConfigIsNotPresent() + throws IOException { + unsetConfAndEnsureGetAclCallIsMadeOnce(); + } + + private void ensureGetAclCallIsMadeOnceForInvalidConf(String invalidConf) + throws Exception { + this.getFileSystem().getAbfsStore() + .setNamespaceEnabled(Trilean.getTrilean(invalidConf)); + AbfsClient mockClient = + callAbfsGetIsNamespaceEnabledAndReturnMockAbfsClient(); + verify(mockClient, times(1)) + .getAclStatus(anyString(), any(TracingContext.class)); + } + + private void ensureGetAclCallIsNeverMadeForValidConf(String validConf) + throws Exception { + this.getFileSystem().getAbfsStore() + .setNamespaceEnabled(Trilean.getTrilean(validConf)); + AbfsClient mockClient = + callAbfsGetIsNamespaceEnabledAndReturnMockAbfsClient(); + verify(mockClient, never()) + .getAclStatus(anyString(), any(TracingContext.class)); + } + + private void unsetConfAndEnsureGetAclCallIsMadeOnce() throws IOException { + this.getFileSystem().getAbfsStore().setNamespaceEnabled(Trilean.UNKNOWN); + AbfsClient mockClient = + callAbfsGetIsNamespaceEnabledAndReturnMockAbfsClient(); + verify(mockClient, times(1)) + .getAclStatus(anyString(), any(TracingContext.class)); + } + + private AbfsClient callAbfsGetIsNamespaceEnabledAndReturnMockAbfsClient() + throws IOException { + final AzureBlobFileSystem abfs = this.getFileSystem(); + final AzureBlobFileSystemStore abfsStore = abfs.getAbfsStore(); + final AbfsClient mockClient = mock(AbfsClient.class); + doReturn(mock(AbfsRestOperation.class)).when(mockClient) + .getAclStatus(anyString(), any(TracingContext.class)); + abfsStore.setClient(mockClient); + getIsNamespaceEnabled(abfs); + return mockClient; } -} \ No newline at end of file + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestSharedKeyAuth.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestSharedKeyAuth.java new file mode 100644 index 0000000000000..fedddcc4b16fb --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestSharedKeyAuth.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.azurebfs; + +import org.junit.Assume; +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.services.AbfsClient; +import org.apache.hadoop.fs.azurebfs.services.AuthType; + +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +public class ITestSharedKeyAuth extends AbstractAbfsIntegrationTest { + + public ITestSharedKeyAuth() throws Exception { + super(); + } + + @Test + public void testWithWrongSharedKey() throws Exception { + Assume.assumeTrue(this.getAuthType() == AuthType.SharedKey); + Configuration config = this.getRawConfiguration(); + config.setBoolean(AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION, + false); + String accountName = this.getAccountName(); + String configkKey = FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME + "." + accountName; + // a wrong sharedKey + String secret = "XjUjsGherkDpljuyThd7RpljhR6uhsFjhlxRpmhgD12lnj7lhfRn8kgPt5" + + "+MJHS7UJNDER+jn6KP6Jnm2ONQlm=="; + config.set(configkKey, secret); + + AbfsClient abfsClient = this.getFileSystem(config).getAbfsClient(); + intercept(AbfsRestOperationException.class, + "\"Server failed to authenticate the request. Make sure the value of " + + "Authorization header is formed correctly including the " + + "signature.\", 403", + () -> { + abfsClient + .getAclStatus("/", getTestTracingContext(getFileSystem(), false)); + }); + } + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestSmallWriteOptimization.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestSmallWriteOptimization.java new file mode 100644 index 0000000000000..fce2b682f580a --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestSmallWriteOptimization.java @@ -0,0 +1,523 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import java.util.Arrays; +import java.util.Random; +import java.util.UUID; +import java.util.Map; +import java.io.IOException; + +import org.assertj.core.api.Assertions; +import org.junit.Assume; +import org.junit.runners.Parameterized; +import org.junit.runner.RunWith; +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys; + +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.BYTES_SENT; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.SEND_REQUESTS; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_AZURE_ENABLE_SMALL_WRITE_OPTIMIZATION; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_TEST_APPENDBLOB_ENABLED; + +/** + * Test combination for small writes with flush and close operations. + * This test class formulates an append test flow to assert on various scenarios. + * Test stages: + * 1. Pre-create test file of required size. This is determined by + * startingFileSize parameter. If it is 0, then pre-creation is skipped. + * + * 2. Formulate an append loop or iteration. An iteration, will do N writes + * (determined by numOfClientWrites parameter) with each writing X bytes + * (determined by recurringClientWriteSize parameter). + * + * 3. Determine total number of append iterations needed by a test. + * If intention is to close the outputStream right after append, setting + * directCloseTest parameter will determine 1 append test iteration with an + * ending close. + * Else, it will execute TEST_FLUSH_ITERATION number of test iterations, with + * each doing appends, hflush/hsync and then close. + * + * 4. Execute test iterations with asserts on number of store requests made and + * validating file content. + */ +@RunWith(Parameterized.class) +public class ITestSmallWriteOptimization extends AbstractAbfsScaleTest { + private static final int ONE_MB = 1024 * 1024; + private static final int TWO_MB = 2 * ONE_MB; + private static final int TEST_BUFFER_SIZE = TWO_MB; + private static final int HALF_TEST_BUFFER_SIZE = TWO_MB / 2; + private static final int QUARTER_TEST_BUFFER_SIZE = TWO_MB / 4; + private static final int TEST_FLUSH_ITERATION = 2; + + @Parameterized.Parameter + public String testScenario; + + @Parameterized.Parameter(1) + public boolean enableSmallWriteOptimization; + + /** + * If true, will initiate close after appends. (That is, no explicit hflush or + * hsync calls will be made from client app.) + */ + @Parameterized.Parameter(2) + public boolean directCloseTest; + + /** + * If non-zero, test file should be created as pre-requisite with this size. + */ + @Parameterized.Parameter(3) + public Integer startingFileSize; + + /** + * Determines the write sizes to be issued by client app. + */ + @Parameterized.Parameter(4) + public Integer recurringClientWriteSize; + + /** + * Determines the number of Client writes to make. + */ + @Parameterized.Parameter(5) + public Integer numOfClientWrites; + + /** + * True, if the small write optimization is supposed to be effective in + * the scenario. + */ + @Parameterized.Parameter(6) + public boolean flushExpectedToBeMergedWithAppend; + + @Parameterized.Parameters(name = "{0}") + public static Iterable params() { + return Arrays.asList( + // Parameter Order : + // testScenario, + // enableSmallWriteOptimization, directCloseTest, startingFileSize, + // recurringClientWriteSize, numOfClientWrites, flushExpectedToBeMergedWithAppend + new Object[][]{ + // Buffer Size Write tests + { "OptmON_FlushCloseTest_EmptyFile_BufferSizeWrite", + true, false, 0, TEST_BUFFER_SIZE, 1, false + }, + { "OptmON_FlushCloseTest_NonEmptyFile_BufferSizeWrite", + true, false, 2 * TEST_BUFFER_SIZE, TEST_BUFFER_SIZE, 1, false + }, + { "OptmON_CloseTest_EmptyFile_BufferSizeWrite", + true, true, 0, TEST_BUFFER_SIZE, 1, false + }, + { "OptmON_CloseTest_NonEmptyFile_BufferSizeWrite", + true, true, 2 * TEST_BUFFER_SIZE, TEST_BUFFER_SIZE, 1, false + }, + { "OptmOFF_FlushCloseTest_EmptyFile_BufferSizeWrite", + false, false, 0, TEST_BUFFER_SIZE, 1, false + }, + { "OptmOFF_FlushCloseTest_NonEmptyFile_BufferSizeWrite", + false, false, 2 * TEST_BUFFER_SIZE, TEST_BUFFER_SIZE, 1, false + }, + { "OptmOFF_CloseTest_EmptyFile_BufferSizeWrite", + false, true, 0, TEST_BUFFER_SIZE, 1, false + }, + { "OptmOFF_CloseTest_NonEmptyFile_BufferSizeWrite", + false, true, 2 * TEST_BUFFER_SIZE, TEST_BUFFER_SIZE, 1, false + }, + // Less than buffer size write tests + { "OptmON_FlushCloseTest_EmptyFile_LessThanBufferSizeWrite", + true, false, 0, Math.abs(HALF_TEST_BUFFER_SIZE), 1, true + }, + { "OptmON_FlushCloseTest_NonEmptyFile_LessThanBufferSizeWrite", + true, false, 2 * TEST_BUFFER_SIZE, + Math.abs(HALF_TEST_BUFFER_SIZE), 1, true + }, + { "OptmON_CloseTest_EmptyFile_LessThanBufferSizeWrite", + true, true, 0, Math.abs(HALF_TEST_BUFFER_SIZE), 1, true + }, + { "OptmON_CloseTest_NonEmptyFile_LessThanBufferSizeWrite", + true, true, 2 * TEST_BUFFER_SIZE, + Math.abs(HALF_TEST_BUFFER_SIZE), 1, true + }, + { "OptmOFF_FlushCloseTest_EmptyFile_LessThanBufferSizeWrite", + false, false, 0, Math.abs(HALF_TEST_BUFFER_SIZE), 1, false + }, + { "OptmOFF_FlushCloseTest_NonEmptyFile_LessThanBufferSizeWrite", + false, false, 2 * TEST_BUFFER_SIZE, + Math.abs(HALF_TEST_BUFFER_SIZE), 1, false + }, + { "OptmOFF_CloseTest_EmptyFile_LessThanBufferSizeWrite", + false, true, 0, Math.abs(HALF_TEST_BUFFER_SIZE), 1, false + }, + { "OptmOFF_CloseTest_NonEmptyFile_LessThanBufferSizeWrite", + false, true, 2 * TEST_BUFFER_SIZE, + Math.abs(HALF_TEST_BUFFER_SIZE), 1, false + }, + // Multiple small writes still less than buffer size + { "OptmON_FlushCloseTest_EmptyFile_MultiSmallWritesStillLessThanBufferSize", + true, false, 0, Math.abs(QUARTER_TEST_BUFFER_SIZE), 3, true + }, + { "OptmON_FlushCloseTest_NonEmptyFile_MultiSmallWritesStillLessThanBufferSize", + true, false, 2 * TEST_BUFFER_SIZE, + Math.abs(QUARTER_TEST_BUFFER_SIZE), 3, true + }, + { "OptmON_CloseTest_EmptyFile_MultiSmallWritesStillLessThanBufferSize", + true, true, 0, Math.abs(QUARTER_TEST_BUFFER_SIZE), 3, true + }, + { "OptmON_CloseTest_NonEmptyFile_MultiSmallWritesStillLessThanBufferSize", + true, true, 2 * TEST_BUFFER_SIZE, + Math.abs(QUARTER_TEST_BUFFER_SIZE), 3, true + }, + { "OptmOFF_FlushCloseTest_EmptyFile_MultiSmallWritesStillLessThanBufferSize", + false, false, 0, Math.abs(QUARTER_TEST_BUFFER_SIZE), 3, false + }, + { "OptmOFF_FlushCloseTest_NonEmptyFile_MultiSmallWritesStillLessThanBufferSize", + false, false, 2 * TEST_BUFFER_SIZE, + Math.abs(QUARTER_TEST_BUFFER_SIZE), 3, false + }, + { "OptmOFF_CloseTest_EmptyFile_MultiSmallWritesStillLessThanBufferSize", + false, true, 0, Math.abs(QUARTER_TEST_BUFFER_SIZE), 3, false + }, + { "OptmOFF_CloseTest_NonEmptyFile_MultiSmallWritesStillLessThanBufferSize", + false, true, 2 * TEST_BUFFER_SIZE, + Math.abs(QUARTER_TEST_BUFFER_SIZE), 3, false + }, + // Multiple full buffer writes + { "OptmON_FlushCloseTest_EmptyFile_MultiBufferSizeWrite", + true, false, 0, TEST_BUFFER_SIZE, 3, false + }, + { "OptmON_FlushCloseTest_NonEmptyFile_MultiBufferSizeWrite", + true, false, 2 * TEST_BUFFER_SIZE, TEST_BUFFER_SIZE, 3, false + }, + { "OptmON_CloseTest_EmptyFile_MultiBufferSizeWrite", + true, true, 0, TEST_BUFFER_SIZE, 3, false + }, + { "OptmON_CloseTest_NonEmptyFile_MultiBufferSizeWrite", + true, true, 2 * TEST_BUFFER_SIZE, TEST_BUFFER_SIZE, 3, false + }, + { "OptmOFF_FlushCloseTest_EmptyFile_MultiBufferSizeWrite", + false, false, 0, TEST_BUFFER_SIZE, 3, false + }, + { "OptmOFF_FlushCloseTest_NonEmptyFile_MultiBufferSizeWrite", + false, false, 2 * TEST_BUFFER_SIZE, TEST_BUFFER_SIZE, 3, false + }, + { "OptmOFF_CloseTest_EmptyFile_MultiBufferSizeWrite", + false, true, 0, TEST_BUFFER_SIZE, 3, false + }, + { "OptmOFF_CloseTest_NonEmptyFile_MultiBufferSizeWrite", + false, true, 2 * TEST_BUFFER_SIZE, TEST_BUFFER_SIZE, 3, false + }, + // Multiple full buffers triggered and data less than buffer size pending + { "OptmON_FlushCloseTest_EmptyFile_BufferAndExtraWrite", + true, false, 0, + TEST_BUFFER_SIZE + Math.abs(QUARTER_TEST_BUFFER_SIZE), + 3, false + }, + { "OptmON_FlushCloseTest_NonEmptyFile_BufferAndExtraWrite", + true, false, 2 * TEST_BUFFER_SIZE, + TEST_BUFFER_SIZE + Math.abs(QUARTER_TEST_BUFFER_SIZE), + 3, false + }, + { "OptmON_CloseTest_EmptyFile__BufferAndExtraWrite", + true, true, 0, + TEST_BUFFER_SIZE + Math.abs(QUARTER_TEST_BUFFER_SIZE), + 3, false + }, + { "OptmON_CloseTest_NonEmptyFile_BufferAndExtraWrite", + true, true, 2 * TEST_BUFFER_SIZE, + TEST_BUFFER_SIZE + Math.abs(QUARTER_TEST_BUFFER_SIZE), + 3, false + }, + { "OptmOFF_FlushCloseTest_EmptyFile_BufferAndExtraWrite", + false, false, 0, + TEST_BUFFER_SIZE + Math.abs(QUARTER_TEST_BUFFER_SIZE), + 3, false + }, + { "OptmOFF_FlushCloseTest_NonEmptyFile_BufferAndExtraWrite", + false, false, 2 * TEST_BUFFER_SIZE, + TEST_BUFFER_SIZE + Math.abs(QUARTER_TEST_BUFFER_SIZE), + 3, false + }, + { "OptmOFF_CloseTest_EmptyFile_BufferAndExtraWrite", + false, true, 0, + TEST_BUFFER_SIZE + Math.abs(QUARTER_TEST_BUFFER_SIZE), + 3, false + }, + { "OptmOFF_CloseTest_NonEmptyFile_BufferAndExtraWrite", + false, true, 2 * TEST_BUFFER_SIZE, + TEST_BUFFER_SIZE + Math.abs(QUARTER_TEST_BUFFER_SIZE), + 3, false + }, + // 0 byte tests + { "OptmON_FlushCloseTest_EmptyFile_0ByteWrite", + true, false, 0, 0, 1, false + }, + { "OptmON_FlushCloseTest_NonEmptyFile_0ByteWrite", + true, false, 2 * TEST_BUFFER_SIZE, 0, 1, false + }, + { "OptmON_CloseTest_EmptyFile_0ByteWrite", + true, true, 0, 0, 1, false + }, + { "OptmON_CloseTest_NonEmptyFile_0ByteWrite", + true, true, 2 * TEST_BUFFER_SIZE, 0, 1, false + }, + { "OptmOFF_FlushCloseTest_EmptyFile_0ByteWrite", + false, false, 0, 0, 1, false + }, + { "OptmOFF_FlushCloseTest_NonEmptyFile_0ByteWrite", + false, false, 2 * TEST_BUFFER_SIZE, 0, 1, false + }, + { "OptmOFF_CloseTest_EmptyFile_0ByteWrite", + false, true, 0, 0, 1, false + }, + { "OptmOFF_CloseTest_NonEmptyFile_0ByteWrite", + false, true, 2 * TEST_BUFFER_SIZE, 0, 1, false + }, + }); + } + public ITestSmallWriteOptimization() throws Exception { + super(); + } + + @Test + public void testSmallWriteOptimization() + throws IOException { + boolean serviceDefaultOptmSettings = DEFAULT_AZURE_ENABLE_SMALL_WRITE_OPTIMIZATION; + // Tests with Optimization should only run if service has the feature on by + // default. Default settings will be turned on when server support is + // available on all store prod regions. + if (enableSmallWriteOptimization) { + Assume.assumeTrue(serviceDefaultOptmSettings); + } + + final AzureBlobFileSystem currentfs = this.getFileSystem(); + Configuration config = currentfs.getConf(); + boolean isAppendBlobTestSettingEnabled = (config.get(FS_AZURE_TEST_APPENDBLOB_ENABLED) == "true"); + + // This optimization doesnt take effect when append blob is on. + Assume.assumeFalse(isAppendBlobTestSettingEnabled); + + config.set(ConfigurationKeys.AZURE_WRITE_BUFFER_SIZE, Integer.toString(TEST_BUFFER_SIZE)); + config.set(ConfigurationKeys.AZURE_ENABLE_SMALL_WRITE_OPTIMIZATION, Boolean.toString(enableSmallWriteOptimization)); + final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.get( + currentfs.getUri(), config); + + formulateSmallWriteTestAppendPattern(fs, startingFileSize, + recurringClientWriteSize, numOfClientWrites, + directCloseTest, flushExpectedToBeMergedWithAppend); + } + + /** + * if isDirectCloseTest == true, append + close is triggered + * if isDirectCloseTest == false, append + flush runs are repeated over + * iterations followed by close + * @param fs + * @param startingFileSize + * @param recurringWriteSize + * @param numOfWrites + * @param isDirectCloseTest + * @throws IOException + */ + private void formulateSmallWriteTestAppendPattern(final AzureBlobFileSystem fs, + int startingFileSize, + int recurringWriteSize, + int numOfWrites, + boolean isDirectCloseTest, + boolean flushExpectedToBeMergedWithAppend) throws IOException { + + int totalDataToBeAppended = 0; + int testIteration = 0; + int dataWrittenPerIteration = (numOfWrites * recurringWriteSize); + + if (isDirectCloseTest) { + totalDataToBeAppended = dataWrittenPerIteration; + testIteration = 1; + } else { + testIteration = TEST_FLUSH_ITERATION; + totalDataToBeAppended = testIteration * dataWrittenPerIteration; + } + + int totalFileSize = totalDataToBeAppended + startingFileSize; + // write buffer of file size created. This will be used as write + // source and for file content validation + final byte[] writeBuffer = new byte[totalFileSize]; + new Random().nextBytes(writeBuffer); + int writeBufferCursor = 0; + + Path testPath = new Path(getMethodName() + UUID.randomUUID().toString()); + FSDataOutputStream opStream; + + if (startingFileSize > 0) { + writeBufferCursor += createFileWithStartingTestSize(fs, writeBuffer, writeBufferCursor, testPath, + startingFileSize); + opStream = fs.append(testPath); + } else { + opStream = fs.create(testPath); + } + + final int writeBufferSize = fs.getAbfsStore() + .getAbfsConfiguration() + .getWriteBufferSize(); + long expectedTotalRequestsMade = fs.getInstrumentationMap() + .get(CONNECTIONS_MADE.getStatName()); + long expectedRequestsMadeWithData = fs.getInstrumentationMap() + .get(SEND_REQUESTS.getStatName()); + long expectedBytesSent = fs.getInstrumentationMap() + .get(BYTES_SENT.getStatName()); + + while (testIteration > 0) { + // trigger recurringWriteSize appends over numOfWrites + writeBufferCursor += executeWritePattern(opStream, writeBuffer, + writeBufferCursor, numOfWrites, recurringWriteSize); + + int numOfBuffersWrittenToStore = (int) Math.floor( + dataWrittenPerIteration / writeBufferSize); + int dataSizeWrittenToStore = numOfBuffersWrittenToStore * writeBufferSize; + int pendingDataToStore = dataWrittenPerIteration - dataSizeWrittenToStore; + + expectedTotalRequestsMade += numOfBuffersWrittenToStore; + expectedRequestsMadeWithData += numOfBuffersWrittenToStore; + expectedBytesSent += dataSizeWrittenToStore; + + if (isDirectCloseTest) { + opStream.close(); + } else { + opStream.hflush(); + } + + boolean wasDataPendingToBeWrittenToServer = (pendingDataToStore > 0); + // Small write optimization will only work if + // a. config for small write optimization is on + // b. no buffer writes have been triggered since last flush + // c. there is some pending data in buffer to write to store + final boolean smallWriteOptimizationEnabled = fs.getAbfsStore() + .getAbfsConfiguration() + .isSmallWriteOptimizationEnabled(); + boolean flushWillBeMergedWithAppend = smallWriteOptimizationEnabled + && (numOfBuffersWrittenToStore == 0) + && (wasDataPendingToBeWrittenToServer); + + Assertions.assertThat(flushWillBeMergedWithAppend) + .describedAs(flushExpectedToBeMergedWithAppend + ? "Flush was to be merged with Append" + : "Flush should not have been merged with Append") + .isEqualTo(flushExpectedToBeMergedWithAppend); + + int totalAppendFlushCalls = (flushWillBeMergedWithAppend + ? 1 // 1 append (with flush and close param) + : (wasDataPendingToBeWrittenToServer) + ? 2 // 1 append + 1 flush (with close) + : 1); // 1 flush (with close) + + expectedTotalRequestsMade += totalAppendFlushCalls; + expectedRequestsMadeWithData += totalAppendFlushCalls; + expectedBytesSent += wasDataPendingToBeWrittenToServer + ? pendingDataToStore + : 0; + + assertOpStats(fs.getInstrumentationMap(), expectedTotalRequestsMade, + expectedRequestsMadeWithData, expectedBytesSent); + + if (isDirectCloseTest) { + // stream already closed + validateStoreAppends(fs, testPath, totalFileSize, writeBuffer); + return; + } + + testIteration--; + } + + opStream.close(); + expectedTotalRequestsMade += 1; + expectedRequestsMadeWithData += 1; + // no change in expectedBytesSent + assertOpStats(fs.getInstrumentationMap(), expectedTotalRequestsMade, expectedRequestsMadeWithData, expectedBytesSent); + + validateStoreAppends(fs, testPath, totalFileSize, writeBuffer); + } + + private int createFileWithStartingTestSize(AzureBlobFileSystem fs, byte[] writeBuffer, + int writeBufferCursor, Path testPath, int startingFileSize) + throws IOException { + FSDataOutputStream opStream = fs.create(testPath); + writeBufferCursor += executeWritePattern(opStream, + writeBuffer, + writeBufferCursor, + 1, + startingFileSize); + + opStream.close(); + Assertions.assertThat(fs.getFileStatus(testPath).getLen()) + .describedAs("File should be of size %d at the start of test.", + startingFileSize) + .isEqualTo(startingFileSize); + + return writeBufferCursor; + } + + private void validateStoreAppends(AzureBlobFileSystem fs, + Path testPath, + int totalFileSize, + byte[] bufferWritten) + throws IOException { + // Final validation + Assertions.assertThat(fs.getFileStatus(testPath).getLen()) + .describedAs("File should be of size %d at the end of test.", + totalFileSize) + .isEqualTo(totalFileSize); + + byte[] fileReadFromStore = new byte[totalFileSize]; + fs.open(testPath).read(fileReadFromStore, 0, totalFileSize); + + assertArrayEquals("Test file content incorrect", bufferWritten, + fileReadFromStore); + } + + private void assertOpStats(Map metricMap, + long expectedTotalRequestsMade, + long expectedRequestsMadeWithData, + long expectedBytesSent) { + assertAbfsStatistics(CONNECTIONS_MADE, expectedTotalRequestsMade, + metricMap); + assertAbfsStatistics(SEND_REQUESTS, expectedRequestsMadeWithData, + metricMap); + assertAbfsStatistics(BYTES_SENT, expectedBytesSent, metricMap); + } + + private int executeWritePattern(FSDataOutputStream opStream, + byte[] buffer, + int startOffset, + int writeLoopCount, + int writeSize) + throws IOException { + int dataSizeWritten = startOffset; + + while (writeLoopCount > 0) { + opStream.write(buffer, startOffset, writeSize); + startOffset += writeSize; + writeLoopCount--; + } + + dataSizeWritten = startOffset - dataSizeWritten; + return dataSizeWritten; + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestWasbAbfsCompatibility.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestWasbAbfsCompatibility.java index e8f845c9a39bc..0534cdda99fc8 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestWasbAbfsCompatibility.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestWasbAbfsCompatibility.java @@ -58,11 +58,12 @@ public void testListFileStatus() throws Exception { AzureBlobFileSystem fs = getFileSystem(); // test only valid for non-namespace enabled account Assume.assumeFalse("Namespace enabled account does not support this test,", - fs.getIsNamespaceEnabled()); + getIsNamespaceEnabled(fs)); NativeAzureFileSystem wasb = getWasbFileSystem(); - Path path1 = new Path("/testfiles/~12/!008/3/abFsTestfile"); + Path testFiles = path("/testfiles"); + Path path1 = new Path(testFiles + "/~12/!008/3/abFsTestfile"); try(FSDataOutputStream abfsStream = fs.create(path1, true)) { abfsStream.write(ABFS_TEST_CONTEXT.getBytes()); abfsStream.flush(); @@ -70,7 +71,7 @@ public void testListFileStatus() throws Exception { } // create file using wasb - Path path2 = new Path("/testfiles/~12/!008/3/nativeFsTestfile"); + Path path2 = new Path(testFiles + "/~12/!008/3/nativeFsTestfile"); LOG.info("{}", wasb.getUri()); try(FSDataOutputStream nativeFsStream = wasb.create(path2, true)) { nativeFsStream.write(WASB_TEST_CONTEXT.getBytes()); @@ -78,8 +79,8 @@ public void testListFileStatus() throws Exception { nativeFsStream.hsync(); } // list file using abfs and wasb - FileStatus[] abfsFileStatus = fs.listStatus(new Path("/testfiles/~12/!008/3/")); - FileStatus[] nativeFsFileStatus = wasb.listStatus(new Path("/testfiles/~12/!008/3/")); + FileStatus[] abfsFileStatus = fs.listStatus(new Path(testFiles + "/~12/!008/3/")); + FileStatus[] nativeFsFileStatus = wasb.listStatus(new Path(testFiles + "/~12/!008/3/")); assertEquals(2, abfsFileStatus.length); assertEquals(2, nativeFsFileStatus.length); @@ -93,12 +94,13 @@ public void testReadFile() throws Exception { AzureBlobFileSystem abfs = getFileSystem(); // test only valid for non-namespace enabled account Assume.assumeFalse("Namespace enabled account does not support this test", - abfs.getIsNamespaceEnabled()); + getIsNamespaceEnabled(abfs)); NativeAzureFileSystem wasb = getWasbFileSystem(); + Path testFile = path("/testReadFile"); for (int i = 0; i< 4; i++) { - Path path = new Path("/testReadFile/~12/!008/testfile" + i); + Path path = new Path(testFile + "/~12/!008/testfile" + i); final FileSystem createFs = createFileWithAbfs[i] ? abfs : wasb; // Write @@ -133,12 +135,13 @@ public void testDir() throws Exception { AzureBlobFileSystem abfs = getFileSystem(); // test only valid for non-namespace enabled account Assume.assumeFalse("Namespace enabled account does not support this test", - abfs.getIsNamespaceEnabled()); + getIsNamespaceEnabled(abfs)); NativeAzureFileSystem wasb = getWasbFileSystem(); + Path testDir = path("/testDir"); for (int i = 0; i < 4; i++) { - Path path = new Path("/testDir/t" + i); + Path path = new Path(testDir + "/t" + i); //create final FileSystem createFs = createDirWithAbfs[i] ? abfs : wasb; assertTrue(createFs.mkdirs(path)); @@ -159,7 +162,7 @@ public void testUrlConversion(){ String abfsUrl = "abfs://abcde-1111-1111-1111-1111@xxxx.dfs.xxx.xxx.xxxx.xxxx"; String wabsUrl = "wasb://abcde-1111-1111-1111-1111@xxxx.blob.xxx.xxx.xxxx.xxxx"; assertEquals(abfsUrl, wasbUrlToAbfsUrl(wabsUrl)); - assertEquals(wabsUrl, abfsUrlToWasbUrl(abfsUrl)); + assertEquals(wabsUrl, abfsUrlToWasbUrl(abfsUrl, false)); } @Test @@ -168,15 +171,16 @@ public void testSetWorkingDirectory() throws Exception { AzureBlobFileSystem abfs = getFileSystem(); // test only valid for non-namespace enabled account Assume.assumeFalse("Namespace enabled account does not support this test", - abfs.getIsNamespaceEnabled()); + getIsNamespaceEnabled(abfs)); NativeAzureFileSystem wasb = getWasbFileSystem(); - Path d1d4 = new Path("/d1/d2/d3/d4"); + Path d1 = path("/d1"); + Path d1d4 = new Path(d1 + "/d2/d3/d4"); assertMkdirs(abfs, d1d4); //set working directory to path1 - Path path1 = new Path("/d1/d2"); + Path path1 = new Path(d1 + "/d2"); wasb.setWorkingDirectory(path1); abfs.setWorkingDirectory(path1); assertEquals(path1, wasb.getWorkingDirectory()); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsConfigurationFieldsValidation.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsConfigurationFieldsValidation.java index 0f550d825e101..fe25477beb61e 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsConfigurationFieldsValidation.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsConfigurationFieldsValidation.java @@ -30,10 +30,11 @@ import org.apache.hadoop.fs.azurebfs.contracts.annotations.ConfigurationValidationAnnotations.StringConfigurationValidatorAnnotation; import org.apache.hadoop.fs.azurebfs.contracts.annotations.ConfigurationValidationAnnotations.LongConfigurationValidatorAnnotation; import org.apache.hadoop.fs.azurebfs.contracts.annotations.ConfigurationValidationAnnotations.Base64StringConfigurationValidatorAnnotation; -import org.apache.hadoop.fs.azurebfs.contracts.exceptions.ConfigurationPropertyNotFoundException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.KeyProviderException; import org.apache.hadoop.fs.azurebfs.utils.Base64; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_SSL_CHANNEL_MODE_KEY; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_READ_AHEAD_RANGE; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_READ_BUFFER_SIZE; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_WRITE_BUFFER_SIZE; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_MAX_RETRY_ATTEMPTS; @@ -141,6 +142,7 @@ public void testConfigServiceImplAnnotatedFieldsInitialized() throws Exception { assertEquals(DEFAULT_MAX_RETRY_ATTEMPTS, abfsConfiguration.getMaxIoRetries()); assertEquals(MAX_AZURE_BLOCK_SIZE, abfsConfiguration.getAzureBlockSize()); assertEquals(AZURE_BLOCK_LOCATION_HOST_DEFAULT, abfsConfiguration.getAzureBlockLocationHost()); + assertEquals(DEFAULT_READ_AHEAD_RANGE, abfsConfiguration.getReadAheadRange()); } @Test @@ -155,7 +157,7 @@ public void testGetAccountKey() throws Exception { assertEquals(this.encodedAccountKey, accountKey); } - @Test(expected = ConfigurationPropertyNotFoundException.class) + @Test(expected = KeyProviderException.class) public void testGetAccountKeyWithNonExistingAccountName() throws Exception { Configuration configuration = new Configuration(); configuration.addResource(TestConfigurationKeys.TEST_CONFIGURATION_FILE_NAME); @@ -182,4 +184,11 @@ public void testSSLSocketFactoryConfiguration() assertEquals(DelegatingSSLSocketFactory.SSLChannelMode.OpenSSL, localAbfsConfiguration.getPreferredSSLFactoryOption()); } -} \ No newline at end of file + public static AbfsConfiguration updateRetryConfigs(AbfsConfiguration abfsConfig, + int retryCount, + int backoffTime) { + abfsConfig.setMaxIoRetries(retryCount); + abfsConfig.setMaxBackoffIntervalMilliseconds(backoffTime); + return abfsConfig; + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsErrorTranslation.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsErrorTranslation.java new file mode 100644 index 0000000000000..2c14b7af2821a --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsErrorTranslation.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import java.io.FileNotFoundException; +import java.net.HttpURLConnection; +import java.nio.file.AccessDeniedException; + +import org.junit.Test; + +import org.apache.hadoop.fs.FileAlreadyExistsException; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.checkException; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.AUTHORIZATION_PERMISSION_MISS_MATCH; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.PATH_ALREADY_EXISTS; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.PATH_NOT_FOUND; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +/** + * Test suite to verify exception conversion, filtering etc. + */ +public class TestAbfsErrorTranslation extends AbstractHadoopTestBase { + + public static final Path PATH = new Path("abfs//store/path"); + + @Test + public void testConvert403ToAccessDenied() throws Throwable { + assertTranslated(HttpURLConnection.HTTP_FORBIDDEN, + AUTHORIZATION_PERMISSION_MISS_MATCH, + AccessDeniedException.class, + AUTHORIZATION_PERMISSION_MISS_MATCH.getErrorCode()); + } + + @Test + public void testConvert404ToFNFE() throws Throwable { + assertTranslated(HttpURLConnection.HTTP_NOT_FOUND, + PATH_NOT_FOUND, + FileNotFoundException.class, + PATH_NOT_FOUND.getErrorCode()); + } + + @Test + public void testConvert409ToFileAlreadyExistsException() throws Throwable { + assertTranslated(HttpURLConnection.HTTP_CONFLICT, + PATH_ALREADY_EXISTS, + FileAlreadyExistsException.class, + PATH_ALREADY_EXISTS.getErrorCode()); + } + + /** + * Assert that for a given status code and AzureServiceErrorCode, a specific + * exception class is raised. + * @param type of exception + * @param httpStatus http status code + * @param exitCode AzureServiceErrorCode + * @param clazz class of raised exception + * @param expectedText text to expect in the exception + * @throws Exception any other exception than the one expected + */ + private void assertTranslated( + int httpStatus, AzureServiceErrorCode exitCode, + Class clazz, String expectedText) throws Exception { + AbfsRestOperationException ex = + new AbfsRestOperationException(httpStatus, exitCode.getErrorCode(), + "", null); + intercept(clazz, expectedText, () -> { + checkException(PATH, ex); + return "expected exception translation from " + ex; + }); + } + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsInputStreamStatistics.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsInputStreamStatistics.java new file mode 100644 index 0000000000000..22c247f98af63 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsInputStreamStatistics.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import org.junit.Test; + +import org.apache.hadoop.fs.azurebfs.services.AbfsInputStreamStatisticsImpl; + +public class TestAbfsInputStreamStatistics extends AbstractAbfsIntegrationTest { + + private static final int OPERATIONS = 100; + + public TestAbfsInputStreamStatistics() throws Exception { + } + + /** + * Test to check the bytesReadFromBuffer statistic value from AbfsInputStream. + */ + @Test + public void testBytesReadFromBufferStatistic() { + describe("Testing bytesReadFromBuffer statistics value in AbfsInputStream"); + + AbfsInputStreamStatisticsImpl abfsInputStreamStatistics = + new AbfsInputStreamStatisticsImpl(); + + //Increment the bytesReadFromBuffer value. + for (int i = 0; i < OPERATIONS; i++) { + abfsInputStreamStatistics.bytesReadFromBuffer(1); + } + + /* + * Since we incremented the bytesReadFromBuffer OPERATIONS times, this + * should be the expected value. + */ + assertEquals("Mismatch in bytesReadFromBuffer value", OPERATIONS, + abfsInputStreamStatistics.getBytesReadFromBuffer()); + + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsNetworkStatistics.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsNetworkStatistics.java new file mode 100644 index 0000000000000..628ad30863c9a --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsNetworkStatistics.java @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import java.io.IOException; +import java.util.Map; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; +import org.apache.hadoop.fs.azurebfs.services.AbfsCounters; +import org.apache.hadoop.fs.statistics.DurationTracker; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.StoreStatisticNames; + +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.HTTP_PATCH_REQUEST; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.HTTP_POST_REQUEST; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.extractStatistics; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.lookupMeanStatistic; + +public class TestAbfsNetworkStatistics extends AbstractAbfsIntegrationTest { + + private static final Logger LOG = + LoggerFactory.getLogger(TestAbfsNetworkStatistics.class); + private static final int LARGE_OPERATIONS = 1000; + private static final AbfsStatistic[] HTTP_DURATION_TRACKER_LIST = { + HTTP_POST_REQUEST, + HTTP_PATCH_REQUEST + }; + + public TestAbfsNetworkStatistics() throws Exception { + } + + /** + * Test to check correct values of read and write throttling statistics in + * {@code AbfsClientThrottlingAnalyzer}. + */ + @Test + public void testAbfsThrottlingStatistics() throws IOException { + describe("Test to check correct values of read throttle and write " + + "throttle statistics in Abfs"); + + AbfsCounters statistics = + new AbfsCountersImpl(getFileSystem().getUri()); + + /* + * Calling the throttle methods to check correct summation and values of + * the counters. + */ + for (int i = 0; i < LARGE_OPERATIONS; i++) { + statistics.incrementCounter(AbfsStatistic.READ_THROTTLES, 1); + statistics.incrementCounter(AbfsStatistic.WRITE_THROTTLES, 1); + } + + Map metricMap = statistics.toMap(); + + /* + * Test to check read and write throttle statistics gave correct values for + * 1000 calls. + */ + assertAbfsStatistics(AbfsStatistic.READ_THROTTLES, LARGE_OPERATIONS, + metricMap); + assertAbfsStatistics(AbfsStatistic.WRITE_THROTTLES, LARGE_OPERATIONS, + metricMap); + } + + /** + * Test to check if the DurationTrackers are tracking as expected whilst + * doing some work. + */ + @Test + public void testAbfsNetworkDurationTrackers() + throws IOException, InterruptedException { + describe("Test to verify the actual values of DurationTrackers are " + + "greater than 0.0 while tracking some work."); + + AbfsCounters abfsCounters = new AbfsCountersImpl(getFileSystem().getUri()); + // Start dummy work for the DurationTrackers and start tracking. + try (DurationTracker ignoredPatch = + abfsCounters.trackDuration(AbfsStatistic.getStatNameFromHttpCall(AbfsHttpConstants.HTTP_METHOD_PATCH)); + DurationTracker ignoredPost = + abfsCounters.trackDuration(AbfsStatistic.getStatNameFromHttpCall(AbfsHttpConstants.HTTP_METHOD_POST)) + ) { + // Emulates doing some work. + Thread.sleep(10); + LOG.info("Execute some Http requests..."); + } + + // Extract the iostats from the abfsCounters instance. + IOStatistics ioStatistics = extractStatistics(abfsCounters); + // Asserting that the durationTrackers have mean > 0.0. + for (AbfsStatistic abfsStatistic : HTTP_DURATION_TRACKER_LIST) { + Assertions.assertThat(lookupMeanStatistic(ioStatistics, + abfsStatistic.getStatName() + StoreStatisticNames.SUFFIX_MEAN).mean()) + .describedAs("The DurationTracker Named " + abfsStatistic.getStatName() + + " Doesn't match the expected value") + .isGreaterThan(0.0); + } + } + + /** + * Test to check if abfs counter for HTTP 503 statusCode works correctly + * when incremented. + */ + @Test + public void testAbfsHTTP503ErrorCounter() throws IOException { + describe("tests to verify the expected value of the HTTP 503 error " + + "counter is equal to number of times incremented."); + + AbfsCounters abfsCounters = new AbfsCountersImpl(getFileSystem().getUri()); + // Incrementing the server_unavailable counter. + for (int i = 0; i < LARGE_OPERATIONS; i++) { + abfsCounters.incrementCounter(AbfsStatistic.SERVER_UNAVAILABLE, 1); + } + // Getting the IOStatistics counter map from abfsCounters. + Map metricsMap = abfsCounters.toMap(); + assertAbfsStatistics(AbfsStatistic.SERVER_UNAVAILABLE, LARGE_OPERATIONS, + metricsMap); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsOutputStreamStatistics.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsOutputStreamStatistics.java new file mode 100644 index 0000000000000..5f9404302bd2c --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsOutputStreamStatistics.java @@ -0,0 +1,163 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import java.util.Random; + +import org.junit.Test; + +import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; +import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStreamStatisticsImpl; + +/** + * Unit tests for AbfsOutputStream statistics. + */ +public class TestAbfsOutputStreamStatistics + extends AbstractAbfsIntegrationTest { + + private static final int LOW_RANGE_FOR_RANDOM_VALUE = 49; + private static final int HIGH_RANGE_FOR_RANDOM_VALUE = 9999; + private static final int OPERATIONS = 10; + + public TestAbfsOutputStreamStatistics() throws Exception { + } + + /** + * Tests to check number of bytes failed to upload in + * {@link AbfsOutputStream}. + */ + @Test + public void testAbfsOutputStreamBytesFailed() { + describe("Testing number of bytes failed during upload in AbfsOutputSteam"); + + AbfsOutputStreamStatisticsImpl abfsOutputStreamStatistics = + new AbfsOutputStreamStatisticsImpl(); + + //Test for zero bytes uploaded. + assertEquals("Mismatch in number of bytes failed to upload", 0, + abfsOutputStreamStatistics.getBytesUploadFailed()); + + //Populating small random value for bytesFailed. + int randomBytesFailed = new Random().nextInt(LOW_RANGE_FOR_RANDOM_VALUE); + abfsOutputStreamStatistics.uploadFailed(randomBytesFailed); + //Test for bytes failed to upload. + assertEquals("Mismatch in number of bytes failed to upload", + randomBytesFailed, abfsOutputStreamStatistics.getBytesUploadFailed()); + + //Reset statistics for the next test. + abfsOutputStreamStatistics = new AbfsOutputStreamStatisticsImpl(); + + /* + * Entering multiple random values for bytesFailed to check correct + * summation of values. + */ + int expectedBytesFailed = 0; + for (int i = 0; i < OPERATIONS; i++) { + randomBytesFailed = new Random().nextInt(HIGH_RANGE_FOR_RANDOM_VALUE); + abfsOutputStreamStatistics.uploadFailed(randomBytesFailed); + expectedBytesFailed += randomBytesFailed; + } + //Test for bytes failed to upload. + assertEquals("Mismatch in number of bytes failed to upload", + expectedBytesFailed, abfsOutputStreamStatistics.getBytesUploadFailed()); + } + + /** + * Tests to check time spent on waiting for tasks to be complete on a + * blocking queue in {@link AbfsOutputStream}. + */ + @Test + public void testAbfsOutputStreamTimeSpentOnWaitTask() { + describe("Testing time Spent on waiting for task to be completed in " + + "AbfsOutputStream"); + + AbfsOutputStreamStatisticsImpl abfsOutputStreamStatistics = + new AbfsOutputStreamStatisticsImpl(); + + //Test for initial value of timeSpentWaitTask. + assertEquals("Mismatch in time spent on waiting for tasks to complete", 0, + abfsOutputStreamStatistics.getTimeSpentOnTaskWait()); + + abfsOutputStreamStatistics + .timeSpentTaskWait(); + //Test for one op call value of timeSpentWaitTask. + assertEquals("Mismatch in time spent on waiting for tasks to complete", + 1, abfsOutputStreamStatistics.getTimeSpentOnTaskWait()); + + //Reset statistics for the next test. + abfsOutputStreamStatistics = new AbfsOutputStreamStatisticsImpl(); + + /* + * Entering multiple values for timeSpentTaskWait() to check the + * summation is happening correctly. Also calculating the expected result. + */ + for (int i = 0; i < OPERATIONS; i++) { + abfsOutputStreamStatistics.timeSpentTaskWait(); + } + + /* + * Test to check correct value of timeSpentTaskWait after OPERATIONS + * number of op calls. + */ + assertEquals("Mismatch in time spent on waiting for tasks to complete", + OPERATIONS, + abfsOutputStreamStatistics.getTimeSpentOnTaskWait()); + } + + /** + * Unit Tests to check correct values of queue shrunk operations in + * AbfsOutputStream. + * + */ + @Test + public void testAbfsOutputStreamQueueShrink() { + describe("Testing queue shrink operations by AbfsOutputStream"); + + AbfsOutputStreamStatisticsImpl abfsOutputStreamStatistics = + new AbfsOutputStreamStatisticsImpl(); + + //Test for shrinking queue zero time. + assertEquals("Mismatch in queue shrunk operations", 0, + abfsOutputStreamStatistics.getQueueShrunkOps()); + + abfsOutputStreamStatistics.queueShrunk(); + + //Test for shrinking queue 1 time. + assertEquals("Mismatch in queue shrunk operations", 1, + abfsOutputStreamStatistics.getQueueShrunkOps()); + + //Reset statistics for the next test. + abfsOutputStreamStatistics = new AbfsOutputStreamStatisticsImpl(); + + /* + * Entering random values for queueShrunkOps and checking the correctness + * of summation for the statistic. + */ + int randomQueueValues = new Random().nextInt(HIGH_RANGE_FOR_RANDOM_VALUE); + for (int i = 0; i < randomQueueValues * OPERATIONS; i++) { + abfsOutputStreamStatistics.queueShrunk(); + } + /* + * Test for random times incrementing queue shrunk operations. + */ + assertEquals("Mismatch in queue shrunk operations", + randomQueueValues * OPERATIONS, + abfsOutputStreamStatistics.getQueueShrunkOps()); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsStatistics.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsStatistics.java new file mode 100644 index 0000000000000..f831d2d4cd26b --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsStatistics.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import java.io.IOException; +import java.util.Map; + +import org.junit.Test; + +import org.apache.hadoop.fs.azurebfs.services.AbfsCounters; + +/** + * Unit tests for Abfs common counters. + */ +public class TestAbfsStatistics extends AbstractAbfsIntegrationTest { + + private static final int LARGE_OPS = 100; + + public TestAbfsStatistics() throws Exception { + } + + /** + * Tests for op_get_delegation_token and error_ignore counter values. + */ + @Test + public void testInitializeStats() throws IOException { + describe("Testing the counter values after Abfs is initialised"); + + AbfsCounters instrumentation = + new AbfsCountersImpl(getFileSystem().getUri()); + + //Testing summation of the counter values. + for (int i = 0; i < LARGE_OPS; i++) { + instrumentation.incrementCounter(AbfsStatistic.CALL_GET_DELEGATION_TOKEN, 1); + instrumentation.incrementCounter(AbfsStatistic.ERROR_IGNORED, 1); + } + + Map metricMap = instrumentation.toMap(); + + assertAbfsStatistics(AbfsStatistic.CALL_GET_DELEGATION_TOKEN, LARGE_OPS, + metricMap); + assertAbfsStatistics(AbfsStatistic.ERROR_IGNORED, LARGE_OPS, metricMap); + + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAccountConfiguration.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAccountConfiguration.java index a790cf214872b..86bb2adbe56ed 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAccountConfiguration.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAccountConfiguration.java @@ -19,15 +19,32 @@ package org.apache.hadoop.fs.azurebfs; import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.ConfigurationPropertyNotFoundException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.TokenAccessProviderException; +import org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider; +import org.apache.hadoop.fs.azurebfs.oauth2.CustomTokenProviderAdapter; +import org.apache.hadoop.fs.azurebfs.services.AuthType; +import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.LambdaTestUtils; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_OAUTH_CLIENT_ENDPOINT; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_OAUTH_CLIENT_ID; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_OAUTH_CLIENT_SECRET; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_TOKEN_PROVIDER_TYPE_PROPERTY_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_SAS_TOKEN_PROVIDER_TYPE; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; -import org.junit.Test; - /** * Tests correct precedence of various configurations that might be returned. * Configuration can be specified with the account name as a suffix to the @@ -40,6 +57,20 @@ * that do allow default values (all others) follow another form. */ public class TestAccountConfiguration { + private static final String TEST_OAUTH_PROVIDER_CLASS_CONFIG = "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider"; + private static final String TEST_CUSTOM_PROVIDER_CLASS_CONFIG = "org.apache.hadoop.fs.azurebfs.oauth2.RetryTestTokenProvider"; + private static final String TEST_SAS_PROVIDER_CLASS_CONFIG_1 = "org.apache.hadoop.fs.azurebfs.extensions.MockErrorSASTokenProvider"; + private static final String TEST_SAS_PROVIDER_CLASS_CONFIG_2 = "org.apache.hadoop.fs.azurebfs.extensions.MockSASTokenProvider"; + + private static final String TEST_OAUTH_ENDPOINT = "oauthEndpoint"; + private static final String TEST_CLIENT_ID = "clientId"; + private static final String TEST_CLIENT_SECRET = "clientSecret"; + + private static final List CONFIG_KEYS = + Collections.unmodifiableList(Arrays.asList( + FS_AZURE_ACCOUNT_OAUTH_CLIENT_ENDPOINT, + FS_AZURE_ACCOUNT_OAUTH_CLIENT_ID, + FS_AZURE_ACCOUNT_OAUTH_CLIENT_SECRET)); @Test public void testStringPrecedence() @@ -248,7 +279,7 @@ private class GetClassImpl1 implements GetClassInterface { } @Test - public void testClassPrecedence() + public void testClass() throws IllegalAccessException, IOException, InvalidConfigurationValueException { final String accountName = "account"; @@ -264,22 +295,209 @@ public void testClassPrecedence() conf.setClass(globalKey, class0, xface); assertEquals("Default value returned even though account-agnostic config was set", - abfsConf.getClass(globalKey, class1, xface), class0); + abfsConf.getAccountAgnosticClass(globalKey, class1, xface), class0); conf.unset(globalKey); assertEquals("Default value not returned even though config was unset", - abfsConf.getClass(globalKey, class1, xface), class1); + abfsConf.getAccountAgnosticClass(globalKey, class1, xface), class1); conf.setClass(accountKey, class0, xface); assertEquals("Default value returned even though account-specific config was set", - abfsConf.getClass(globalKey, class1, xface), class0); + abfsConf.getAccountSpecificClass(globalKey, class1, xface), class0); conf.unset(accountKey); assertEquals("Default value not returned even though config was unset", - abfsConf.getClass(globalKey, class1, xface), class1); + abfsConf.getAccountSpecificClass(globalKey, class1, xface), class1); conf.setClass(accountKey, class1, xface); conf.setClass(globalKey, class0, xface); assertEquals("Account-agnostic or default value returned even though account-specific config was set", - abfsConf.getClass(globalKey, class0, xface), class1); + abfsConf.getAccountSpecificClass(globalKey, class0, xface), class1); + } + + @Test + public void testSASProviderPrecedence() + throws IOException, IllegalAccessException { + final String accountName = "account"; + + final Configuration conf = new Configuration(); + final AbfsConfiguration abfsConf = new AbfsConfiguration(conf, accountName); + + // AccountSpecific: SAS with provider set as SAS_Provider_1 + abfsConf.set(FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME + "." + accountName, + "SAS"); + abfsConf.set(FS_AZURE_SAS_TOKEN_PROVIDER_TYPE + "." + accountName, + TEST_SAS_PROVIDER_CLASS_CONFIG_1); + + // Global: SAS with provider set as SAS_Provider_2 + abfsConf.set(FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME, + AuthType.SAS.toString()); + abfsConf.set(FS_AZURE_SAS_TOKEN_PROVIDER_TYPE, + TEST_SAS_PROVIDER_CLASS_CONFIG_2); + + Assertions.assertThat( + abfsConf.getSASTokenProvider().getClass().getName()) + .describedAs( + "Account-specific SAS token provider should be in effect.") + .isEqualTo(TEST_SAS_PROVIDER_CLASS_CONFIG_1); + } + + @Test + public void testAccessTokenProviderPrecedence() + throws IllegalAccessException, IOException { + final String accountName = "account"; + + final Configuration conf = new Configuration(); + final AbfsConfiguration abfsConf = new AbfsConfiguration(conf, accountName); + + // Global: Custom , AccountSpecific: OAuth + testGlobalAndAccountOAuthPrecedence(abfsConf, AuthType.Custom, + AuthType.OAuth); + + // Global: OAuth , AccountSpecific: Custom + testGlobalAndAccountOAuthPrecedence(abfsConf, AuthType.OAuth, + AuthType.Custom); + + // Global: (non-oAuth) SAS , AccountSpecific: Custom + testGlobalAndAccountOAuthPrecedence(abfsConf, AuthType.SAS, + AuthType.Custom); + + // Global: Custom , AccountSpecific: - + testGlobalAndAccountOAuthPrecedence(abfsConf, AuthType.Custom, null); + + // Global: OAuth , AccountSpecific: - + testGlobalAndAccountOAuthPrecedence(abfsConf, AuthType.OAuth, null); + + // Global: - , AccountSpecific: Custom + testGlobalAndAccountOAuthPrecedence(abfsConf, null, AuthType.Custom); + + // Global: - , AccountSpecific: OAuth + testGlobalAndAccountOAuthPrecedence(abfsConf, null, AuthType.OAuth); + } + + @Test + public void testConfigPropNotFound() throws Throwable { + final String accountName = "account"; + + final Configuration conf = new Configuration(); + final AbfsConfiguration abfsConf = new AbfsConfiguration(conf, accountName); + + for (String key : CONFIG_KEYS) { + setAuthConfig(abfsConf, true, AuthType.OAuth); + abfsConf.unset(key + "." + accountName); + testMissingConfigKey(abfsConf, key); + } + + unsetAuthConfig(abfsConf, false); + unsetAuthConfig(abfsConf, true); + } + + private static void testMissingConfigKey(final AbfsConfiguration abfsConf, + final String confKey) throws Throwable { + GenericTestUtils.assertExceptionContains("Configuration property " + + confKey + " not found.", + LambdaTestUtils.verifyCause( + ConfigurationPropertyNotFoundException.class, + LambdaTestUtils.intercept(TokenAccessProviderException.class, + () -> abfsConf.getTokenProvider().getClass().getTypeName()))); + } + + public void testGlobalAndAccountOAuthPrecedence(AbfsConfiguration abfsConf, + AuthType globalAuthType, + AuthType accountSpecificAuthType) + throws IOException { + if (globalAuthType == null) { + unsetAuthConfig(abfsConf, false); + } else { + setAuthConfig(abfsConf, false, globalAuthType); + } + + if (accountSpecificAuthType == null) { + unsetAuthConfig(abfsConf, true); + } else { + setAuthConfig(abfsConf, true, accountSpecificAuthType); + } + + // If account specific AuthType is present, precedence is always for it. + AuthType expectedEffectiveAuthType; + if (accountSpecificAuthType != null) { + expectedEffectiveAuthType = accountSpecificAuthType; + } else { + expectedEffectiveAuthType = globalAuthType; + } + + Class expectedEffectiveTokenProviderClassType = + (expectedEffectiveAuthType == AuthType.OAuth) + ? ClientCredsTokenProvider.class + : CustomTokenProviderAdapter.class; + + Assertions.assertThat( + abfsConf.getTokenProvider().getClass().getTypeName()) + .describedAs( + "Account-specific settings takes precendence to global" + + " settings. In absence of Account settings, global settings " + + "should take effect.") + .isEqualTo(expectedEffectiveTokenProviderClassType.getTypeName()); + + + unsetAuthConfig(abfsConf, false); + unsetAuthConfig(abfsConf, true); + } + + public void setAuthConfig(AbfsConfiguration abfsConf, + boolean isAccountSetting, + AuthType authType) { + final String accountNameSuffix = "." + abfsConf.getAccountName(); + String authKey = FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME + + (isAccountSetting ? accountNameSuffix : ""); + String providerClassKey = ""; + String providerClassValue = ""; + + switch (authType) { + case OAuth: + providerClassKey = FS_AZURE_ACCOUNT_TOKEN_PROVIDER_TYPE_PROPERTY_NAME + + (isAccountSetting ? accountNameSuffix : ""); + providerClassValue = TEST_OAUTH_PROVIDER_CLASS_CONFIG; + + abfsConf.set(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ENDPOINT + + ((isAccountSetting) ? accountNameSuffix : ""), + TEST_OAUTH_ENDPOINT); + abfsConf.set(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ID + + ((isAccountSetting) ? accountNameSuffix : ""), + TEST_CLIENT_ID); + abfsConf.set(FS_AZURE_ACCOUNT_OAUTH_CLIENT_SECRET + + ((isAccountSetting) ? accountNameSuffix : ""), + TEST_CLIENT_SECRET); + break; + + case Custom: + providerClassKey = FS_AZURE_ACCOUNT_TOKEN_PROVIDER_TYPE_PROPERTY_NAME + + (isAccountSetting ? accountNameSuffix : ""); + providerClassValue = TEST_CUSTOM_PROVIDER_CLASS_CONFIG; + break; + + case SAS: + providerClassKey = FS_AZURE_SAS_TOKEN_PROVIDER_TYPE + + (isAccountSetting ? accountNameSuffix : ""); + providerClassValue = TEST_SAS_PROVIDER_CLASS_CONFIG_1; + break; + + default: // set nothing + } + + abfsConf.set(authKey, authType.toString()); + abfsConf.set(providerClassKey, providerClassValue); + } + + private void unsetAuthConfig(AbfsConfiguration abfsConf, boolean isAccountSettings) { + String accountNameSuffix = + isAccountSettings ? ("." + abfsConf.getAccountName()) : ""; + + abfsConf.unset(FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME + accountNameSuffix); + abfsConf.unset(FS_AZURE_ACCOUNT_TOKEN_PROVIDER_TYPE_PROPERTY_NAME + accountNameSuffix); + abfsConf.unset(FS_AZURE_SAS_TOKEN_PROVIDER_TYPE + accountNameSuffix); + + abfsConf.unset(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ENDPOINT + accountNameSuffix); + abfsConf.unset(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ID + accountNameSuffix); + abfsConf.unset(FS_AZURE_ACCOUNT_OAUTH_CLIENT_SECRET + accountNameSuffix); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestTracingContext.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestTracingContext.java new file mode 100644 index 0000000000000..7b4421c2c44eb --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestTracingContext.java @@ -0,0 +1,273 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.assertj.core.api.Assertions; +import org.junit.Assume; +import org.junit.AssumptionViolatedException; +import org.junit.Ignore; +import org.junit.Test; +import org.mockito.Mockito; + +import org.apache.hadoop.fs.CommonPathCapabilities; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.enums.Trilean; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; +import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; +import org.apache.hadoop.fs.azurebfs.services.AuthType; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_CLIENT_CORRELATIONID; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.MIN_BUFFER_SIZE; + +public class TestTracingContext extends AbstractAbfsIntegrationTest { + private static final String[] CLIENT_CORRELATIONID_LIST = { + "valid-corr-id-123", "inval!d", ""}; + private static final int HTTP_CREATED = 201; + + public TestTracingContext() throws Exception { + super(); + } + + @Test + public void testClientCorrelationId() throws Exception { + checkCorrelationConfigValidation(CLIENT_CORRELATIONID_LIST[0], true); + checkCorrelationConfigValidation(CLIENT_CORRELATIONID_LIST[1], false); + checkCorrelationConfigValidation(CLIENT_CORRELATIONID_LIST[2], false); + } + + private String getOctalNotation(FsPermission fsPermission) { + Preconditions.checkNotNull(fsPermission, "fsPermission"); + return String + .format(AbfsHttpConstants.PERMISSION_FORMAT, fsPermission.toOctal()); + } + + private String getRelativePath(final Path path) { + Preconditions.checkNotNull(path, "path"); + return path.toUri().getPath(); + } + + public void checkCorrelationConfigValidation(String clientCorrelationId, + boolean includeInHeader) throws Exception { + Configuration conf = getRawConfiguration(); + conf.set(FS_AZURE_CLIENT_CORRELATIONID, clientCorrelationId); + AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(conf); + + String correlationID = fs.getClientCorrelationId(); + if (includeInHeader) { + Assertions.assertThat(correlationID) + .describedAs("Correlation ID should match config when valid") + .isEqualTo(clientCorrelationId); + } else { + Assertions.assertThat(correlationID) + .describedAs("Invalid ID should be replaced with empty string") + .isEqualTo(EMPTY_STRING); + } + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fs.getFileSystemId(), FSOperationType.TEST_OP, + TracingHeaderFormat.ALL_ID_FORMAT, null); + boolean isNamespaceEnabled = fs.getIsNamespaceEnabled(tracingContext); + String path = getRelativePath(new Path("/testDir")); + String permission = isNamespaceEnabled + ? getOctalNotation(FsPermission.getDirDefault()) + : null; + String umask = isNamespaceEnabled + ? getOctalNotation(FsPermission.getUMask(fs.getConf())) + : null; + + //request should not fail for invalid clientCorrelationID + AbfsRestOperation op = fs.getAbfsClient() + .createPath(path, false, true, permission, umask, false, null, + tracingContext); + + int statusCode = op.getResult().getStatusCode(); + Assertions.assertThat(statusCode).describedAs("Request should not fail") + .isEqualTo(HTTP_CREATED); + + String requestHeader = op.getResult().getClientRequestId().replace("[", "") + .replace("]", ""); + Assertions.assertThat(requestHeader) + .describedAs("Client Request Header should match TracingContext") + .isEqualTo(tracingContext.getHeader()); + } + + @Ignore + @Test + //call test methods from the respective test classes + //can be ignored when running all tests as these get covered + public void runCorrelationTestForAllMethods() throws Exception { + Map testClasses = new HashMap<>(); + + testClasses.put(new ITestAzureBlobFileSystemListStatus(), //liststatus + ITestAzureBlobFileSystemListStatus.class.getMethod("testListPath")); + testClasses.put(new ITestAbfsReadWriteAndSeek(MIN_BUFFER_SIZE, true), //open, + // read, write + ITestAbfsReadWriteAndSeek.class.getMethod("testReadAheadRequestID")); + testClasses.put(new ITestAbfsReadWriteAndSeek(MIN_BUFFER_SIZE, false), //read (bypassreadahead) + ITestAbfsReadWriteAndSeek.class + .getMethod("testReadAndWriteWithDifferentBufferSizesAndSeek")); + testClasses.put(new ITestAzureBlobFileSystemAppend(), //append + ITestAzureBlobFileSystemAppend.class.getMethod("testTracingForAppend")); + testClasses.put(new ITestAzureBlobFileSystemFlush(), + ITestAzureBlobFileSystemFlush.class.getMethod( + "testTracingHeaderForAppendBlob")); //outputstream (appendblob) + testClasses.put(new ITestAzureBlobFileSystemCreate(), + ITestAzureBlobFileSystemCreate.class + .getMethod("testDefaultCreateOverwriteFileTest")); //create + testClasses.put(new ITestAzureBlobFilesystemAcl(), + ITestAzureBlobFilesystemAcl.class + .getMethod("testDefaultAclRenamedFile")); //rename + testClasses.put(new ITestAzureBlobFileSystemDelete(), + ITestAzureBlobFileSystemDelete.class + .getMethod("testDeleteFirstLevelDirectory")); //delete + testClasses.put(new ITestAzureBlobFileSystemCreate(), + ITestAzureBlobFileSystemCreate.class + .getMethod("testCreateNonRecursive")); //mkdirs + testClasses.put(new ITestAzureBlobFileSystemAttributes(), + ITestAzureBlobFileSystemAttributes.class + .getMethod("testSetGetXAttr")); //setxattr, getxattr + testClasses.put(new ITestAzureBlobFilesystemAcl(), + ITestAzureBlobFilesystemAcl.class.getMethod( + "testEnsureAclOperationWorksForRoot")); // setacl, getaclstatus, + // setowner, setpermission, modifyaclentries, + // removeaclentries, removedefaultacl, removeacl + + for (AbstractAbfsIntegrationTest testClass : testClasses.keySet()) { + try { + testClass.setup(); + testClasses.get(testClass).invoke(testClass); + testClass.teardown(); + } catch (InvocationTargetException e) { + if (!(e.getCause() instanceof AssumptionViolatedException)) { + throw new IOException(testClasses.get(testClass).getName() + + " failed tracing context validation test"); + } + } + } + } + + @Test + public void testExternalOps() throws Exception { + //validate tracing header for access, hasPathCapability + AzureBlobFileSystem fs = getFileSystem(); + + fs.registerListener(new TracingHeaderValidator( + fs.getAbfsStore().getAbfsConfiguration().getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.HAS_PATH_CAPABILITY, false, + 0)); + + // unset namespaceEnabled to call getAcl -> trigger tracing header validator + fs.getAbfsStore().setNamespaceEnabled(Trilean.UNKNOWN); + fs.hasPathCapability(new Path("/"), CommonPathCapabilities.FS_ACLS); + + Assume.assumeTrue(getIsNamespaceEnabled(getFileSystem())); + Assume.assumeTrue(getConfiguration().isCheckAccessEnabled()); + Assume.assumeTrue(getAuthType() == AuthType.OAuth); + + fs.setListenerOperation(FSOperationType.ACCESS); + fs.getAbfsStore().setNamespaceEnabled(Trilean.TRUE); + fs.access(new Path("/"), FsAction.READ); + } + + @Test + public void testRetryPrimaryRequestIdWhenInitiallySuppliedEmpty() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + final String fileSystemId = fs.getFileSystemId(); + final String clientCorrelationId = fs.getClientCorrelationId(); + final TracingHeaderFormat tracingHeaderFormat = TracingHeaderFormat.ALL_ID_FORMAT; + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.CREATE_FILESYSTEM, tracingHeaderFormat, new TracingHeaderValidator( + fs.getAbfsStore().getAbfsConfiguration().getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.CREATE_FILESYSTEM, false, + 0)); + AbfsHttpOperation abfsHttpOperation = Mockito.mock(AbfsHttpOperation.class); + Mockito.doNothing().when(abfsHttpOperation).setRequestProperty(Mockito.anyString(), Mockito.anyString()); + tracingContext.constructHeader(abfsHttpOperation, null); + String header = tracingContext.getHeader(); + String clientRequestIdUsed = header.split(":")[1]; + String[] clientRequestIdUsedParts = clientRequestIdUsed.split("-"); + String assertionPrimaryId = clientRequestIdUsedParts[clientRequestIdUsedParts.length - 1]; + + tracingContext.setRetryCount(1); + tracingContext.setListener(new TracingHeaderValidator( + fs.getAbfsStore().getAbfsConfiguration().getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.CREATE_FILESYSTEM, false, + 1)); + + tracingContext.constructHeader(abfsHttpOperation, "RT"); + header = tracingContext.getHeader(); + String primaryRequestId = header.split(":")[3]; + + Assertions.assertThat(primaryRequestId) + .describedAs("PrimaryRequestId in a retried request's " + + "tracingContext should be equal to last part of original " + + "request's clientRequestId UUID") + .isEqualTo(assertionPrimaryId); + } + + @Test + public void testRetryPrimaryRequestIdWhenInitiallySuppliedNonEmpty() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + final String fileSystemId = fs.getFileSystemId(); + final String clientCorrelationId = fs.getClientCorrelationId(); + final TracingHeaderFormat tracingHeaderFormat = TracingHeaderFormat.ALL_ID_FORMAT; + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.CREATE_FILESYSTEM, tracingHeaderFormat, new TracingHeaderValidator( + fs.getAbfsStore().getAbfsConfiguration().getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.CREATE_FILESYSTEM, false, + 0)); + tracingContext.setPrimaryRequestID(); + AbfsHttpOperation abfsHttpOperation = Mockito.mock(AbfsHttpOperation.class); + Mockito.doNothing().when(abfsHttpOperation).setRequestProperty(Mockito.anyString(), Mockito.anyString()); + tracingContext.constructHeader(abfsHttpOperation, null); + String header = tracingContext.getHeader(); + String assertionPrimaryId = header.split(":")[3]; + + tracingContext.setRetryCount(1); + tracingContext.setListener(new TracingHeaderValidator( + fs.getAbfsStore().getAbfsConfiguration().getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.CREATE_FILESYSTEM, false, + 1)); + + tracingContext.constructHeader(abfsHttpOperation, "RT"); + header = tracingContext.getHeader(); + String primaryRequestId = header.split(":")[3]; + + Assertions.assertThat(primaryRequestId) + .describedAs("PrimaryRequestId in a retried request's tracingContext " + + "should be equal to PrimaryRequestId in the original request.") + .isEqualTo(assertionPrimaryId); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TrileanTests.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TrileanTests.java new file mode 100644 index 0000000000000..45467d4140132 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TrileanTests.java @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import org.junit.Test; + +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.TrileanConversionException; +import org.apache.hadoop.fs.azurebfs.enums.Trilean; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.catchThrowable; + +/** + * Tests for the enum Trilean. + */ +public class TrileanTests { + + private static final String TRUE_STR = "true"; + private static final String FALSE_STR = "false"; + + @Test + public void testGetTrileanForBoolean() { + assertThat(Trilean.getTrilean(true)).describedAs( + "getTrilean should return Trilean.TRUE when true is passed") + .isEqualTo(Trilean.TRUE); + assertThat(Trilean.getTrilean(false)).describedAs( + "getTrilean should return Trilean.FALSE when false is passed") + .isEqualTo(Trilean.FALSE); + } + + @Test + public void testGetTrileanForString() { + assertThat(Trilean.getTrilean(TRUE_STR.toLowerCase())).describedAs( + "getTrilean should return Trilean.TRUE when true is passed") + .isEqualTo(Trilean.TRUE); + assertThat(Trilean.getTrilean(TRUE_STR.toUpperCase())).describedAs( + "getTrilean should return Trilean.TRUE when TRUE is passed") + .isEqualTo(Trilean.TRUE); + + assertThat(Trilean.getTrilean(FALSE_STR.toLowerCase())).describedAs( + "getTrilean should return Trilean.FALSE when false is passed") + .isEqualTo(Trilean.FALSE); + assertThat(Trilean.getTrilean(FALSE_STR.toUpperCase())).describedAs( + "getTrilean should return Trilean.FALSE when FALSE is passed") + .isEqualTo(Trilean.FALSE); + + testInvalidString(null); + testInvalidString(" "); + testInvalidString("invalid"); + testInvalidString("truee"); + testInvalidString("falsee"); + } + + private void testInvalidString(String invalidString) { + assertThat(Trilean.getTrilean(invalidString)).describedAs( + "getTrilean should return Trilean.UNKNOWN for anything not true/false") + .isEqualTo(Trilean.UNKNOWN); + } + + @Test + public void testToBoolean() throws TrileanConversionException { + assertThat(Trilean.TRUE.toBoolean()) + .describedAs("toBoolean should return true for Trilean.TRUE").isTrue(); + assertThat(Trilean.FALSE.toBoolean()) + .describedAs("toBoolean should return false for Trilean.FALSE") + .isFalse(); + + assertThat(catchThrowable(() -> Trilean.UNKNOWN.toBoolean())).describedAs( + "toBoolean on Trilean.UNKNOWN results in TrileanConversionException") + .isInstanceOf(TrileanConversionException.class).describedAs( + "Exception message should be: catchThrowable(()->Trilean.UNKNOWN" + + ".toBoolean())") + .hasMessage("Cannot convert Trilean.UNKNOWN to boolean"); + } + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/AbfsCommitTestHelper.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/AbfsCommitTestHelper.java new file mode 100644 index 0000000000000..da2a650489077 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/AbfsCommitTestHelper.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.commit; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azurebfs.contract.ABFSContractTestBinding; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.ManifestCommitterConstants; + +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_READ_SMALL_FILES_COMPLETELY; +import static org.apache.hadoop.mapreduce.lib.output.committer.manifest.ManifestCommitterConstants.OPT_STORE_OPERATIONS_CLASS; + +/** + * Helper methods for committer tests on ABFS. + */ +final class AbfsCommitTestHelper { + private AbfsCommitTestHelper() { + } + + /** + * Prepare the test configuration. + * @param contractTestBinding test binding + * @return an extracted and patched configuration. + */ + static Configuration prepareTestConfiguration( + ABFSContractTestBinding contractTestBinding) { + final Configuration conf = + contractTestBinding.getRawConfiguration(); + + // use ABFS Store operations + conf.set(OPT_STORE_OPERATIONS_CLASS, + AbfsManifestStoreOperations.NAME); + // turn on small file read if not explicitly set to a value. + conf.setBooleanIfUnset(AZURE_READ_SMALL_FILES_COMPLETELY, true); + // use a larger thread pool to compensate for latencies + final String size = Integer.toString(192); + conf.setIfUnset(ManifestCommitterConstants.OPT_IO_PROCESSORS, size); + conf.setIfUnset(ManifestCommitterConstants.OPT_WRITER_QUEUE_CAPACITY, size); + // no need for parallel delete here as we aren't at the scale where unified delete + // is going to time out + conf.setBooleanIfUnset(ManifestCommitterConstants.OPT_CLEANUP_PARALLEL_DELETE, false); + + return conf; + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/AbstractAbfsClusterITest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/AbstractAbfsClusterITest.java new file mode 100644 index 0000000000000..55752055f0c31 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/AbstractAbfsClusterITest.java @@ -0,0 +1,260 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.commit; + +import java.io.IOException; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; + +import org.junit.AfterClass; +import org.junit.Rule; +import org.junit.rules.TemporaryFolder; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.azure.integration.AzureTestConstants; +import org.apache.hadoop.fs.azurebfs.contract.ABFSContractTestBinding; +import org.apache.hadoop.fs.azurebfs.contract.AbfsFileSystemContract; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.AbstractManifestCommitterTest; +import org.apache.hadoop.mapreduce.v2.MiniMRYarnCluster; +import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; +import org.apache.hadoop.util.DurationInfo; + +import static java.util.Objects.requireNonNull; +import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.assumeScaleTestsEnabled; +import static org.apache.hadoop.io.IOUtils.closeStream; + +/** + * Tests which create a yarn minicluster. + * These are all considered scale tests; the probe for + * scale tests being enabled is executed before the cluster + * is set up to avoid wasting time on non-scale runs. + */ +public abstract class AbstractAbfsClusterITest extends + AbstractManifestCommitterTest { + + public static final int NO_OF_NODEMANAGERS = 2; + + private final ABFSContractTestBinding binding; + + + /** + * The static cluster binding with the lifecycle of this test; served + * through instance-level methods for sharing across methods in the + * suite. + */ + @SuppressWarnings("StaticNonFinalField") + private static ClusterBinding clusterBinding; + + protected AbstractAbfsClusterITest() throws Exception { + binding = new ABFSContractTestBinding(); + } + + @Override + protected int getTestTimeoutMillis() { + return AzureTestConstants.SCALE_TEST_TIMEOUT_MILLIS; + } + + @Override + public void setup() throws Exception { + binding.setup(); + super.setup(); + requireScaleTestsEnabled(); + if (getClusterBinding() == null) { + clusterBinding = demandCreateClusterBinding(); + } + assertNotNull("cluster is not bound", getClusterBinding()); + } + + @AfterClass + public static void teardownClusters() throws IOException { + terminateCluster(clusterBinding); + clusterBinding = null; + } + + @Override + protected AbstractFSContract createContract(final Configuration conf) { + return new AbfsFileSystemContract(conf, binding.isSecureMode()); + } + + @Override + protected Configuration createConfiguration() { + return AbfsCommitTestHelper.prepareTestConfiguration(binding); + } + + /** + * This is the cluster binding which every subclass must create. + */ + protected static final class ClusterBinding { + + private String clusterName; + + private final MiniMRYarnCluster yarn; + + public ClusterBinding( + final String clusterName, + final MiniMRYarnCluster yarn) { + this.clusterName = clusterName; + this.yarn = requireNonNull(yarn); + } + + + /** + * Get the cluster FS, which will either be HDFS or the local FS. + * @return a filesystem. + * @throws IOException failure + */ + public FileSystem getClusterFS() throws IOException { + return FileSystem.getLocal(yarn.getConfig()); + } + + public MiniMRYarnCluster getYarn() { + return yarn; + } + + public Configuration getConf() { + return getYarn().getConfig(); + } + + public String getClusterName() { + return clusterName; + } + + public void terminate() { + closeStream(getYarn()); + } + } + + /** + * Create the cluster binding. + * The configuration will be patched by propagating down options + * from the maven build (S3Guard binding etc) and turning off unwanted + * YARN features. + * + * If an HDFS cluster is requested, + * the HDFS and YARN clusters will share the same configuration, so + * the HDFS cluster binding is implicitly propagated to YARN. + * If one is not requested, the local filesystem is used as the cluster FS. + * @param conf configuration to start with. + * @return the cluster binding. + * @throws IOException failure. + */ + protected static ClusterBinding createCluster( + final JobConf conf) throws IOException { + try (DurationInfo d = new DurationInfo(LOG, "Creating YARN MiniCluster")) { + conf.setBoolean(JHAdminConfig.MR_HISTORY_CLEANER_ENABLE, false); + // create a unique cluster name based on the current time in millis. + String timestamp = LocalDateTime.now().format( + DateTimeFormatter.ofPattern("yyyy-MM-dd-HH.mm.ss.SS")); + String clusterName = "yarn-" + timestamp; + MiniMRYarnCluster yarnCluster = + new MiniMRYarnCluster(clusterName, NO_OF_NODEMANAGERS); + yarnCluster.init(conf); + yarnCluster.start(); + return new ClusterBinding(clusterName, yarnCluster); + } + } + + /** + * Terminate the cluster if it is not null. + * @param cluster the cluster + */ + protected static void terminateCluster(ClusterBinding cluster) { + if (cluster != null) { + cluster.terminate(); + } + } + + /** + * Get the cluster binding for this subclass. + * @return the cluster binding + */ + protected ClusterBinding getClusterBinding() { + return clusterBinding; + } + + protected MiniMRYarnCluster getYarn() { + return getClusterBinding().getYarn(); + } + + + /** + * We stage work into a temporary directory rather than directly under + * the user's home directory, as that is often rejected by CI test + * runners. + */ + @Rule + public final TemporaryFolder stagingFilesDir = new TemporaryFolder(); + + + /** + * binding on demand rather than in a BeforeClass static method. + * Subclasses can override this to change the binding options. + * @return the cluster binding + */ + protected ClusterBinding demandCreateClusterBinding() throws Exception { + return createCluster(new JobConf()); + } + + /** + * Create a job configuration. + * This creates a new job conf from the yarn + * cluster configuration then calls + * {@link #applyCustomConfigOptions(JobConf)} to allow it to be customized. + * @return the new job configuration. + * @throws IOException failure + */ + protected JobConf newJobConf() throws IOException { + JobConf jobConf = new JobConf(getYarn().getConfig()); + jobConf.addResource(getConfiguration()); + applyCustomConfigOptions(jobConf); + return jobConf; + } + + /** + * Patch the (job) configuration for this committer. + * @param jobConf configuration to patch + * @return a configuration which will run this configuration. + */ + protected Configuration patchConfigurationForCommitter( + final Configuration jobConf) { + enableManifestCommitter(jobConf); + return jobConf; + } + + /** + * Override point to let implementations tune the MR Job conf. + * @param jobConf configuration + */ + protected void applyCustomConfigOptions(JobConf jobConf) throws IOException { + + } + + + /** + * Assume that scale tests are enabled. + */ + protected void requireScaleTestsEnabled() { + assumeScaleTestsEnabled(getConfiguration()); + } + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsCleanupStage.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsCleanupStage.java new file mode 100644 index 0000000000000..a597c35376a71 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsCleanupStage.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.commit; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azurebfs.contract.ABFSContractTestBinding; +import org.apache.hadoop.fs.azurebfs.contract.AbfsFileSystemContract; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.TestCleanupStage; + +/** + * Cleanup logic on ABFS. + */ +public class ITestAbfsCleanupStage extends TestCleanupStage { + + private final ABFSContractTestBinding binding; + + public ITestAbfsCleanupStage() throws Exception { + binding = new ABFSContractTestBinding(); + } + + @Override + public void setup() throws Exception { + binding.setup(); + super.setup(); + } + + @Override + protected Configuration createConfiguration() { + return AbfsCommitTestHelper.prepareTestConfiguration(binding); + } + + @Override + protected AbstractFSContract createContract(final Configuration conf) { + return new AbfsFileSystemContract(conf, binding.isSecureMode()); + } + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsCommitTaskStage.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsCommitTaskStage.java new file mode 100644 index 0000000000000..a0aaec8532850 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsCommitTaskStage.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.commit; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azurebfs.contract.ABFSContractTestBinding; +import org.apache.hadoop.fs.azurebfs.contract.AbfsFileSystemContract; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.TestCommitTaskStage; + +/** + * ABFS storage test of task committer. + */ +public class ITestAbfsCommitTaskStage extends TestCommitTaskStage { + + private final ABFSContractTestBinding binding; + + public ITestAbfsCommitTaskStage() throws Exception { + binding = new ABFSContractTestBinding(); + } + + @Override + public void setup() throws Exception { + binding.setup(); + super.setup(); + } + + @Override + protected Configuration createConfiguration() { + return AbfsCommitTestHelper.prepareTestConfiguration(binding); + } + + @Override + protected AbstractFSContract createContract(final Configuration conf) { + return new AbfsFileSystemContract(conf, binding.isSecureMode()); + } + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsCreateOutputDirectoriesStage.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsCreateOutputDirectoriesStage.java new file mode 100644 index 0000000000000..6621b80da00c1 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsCreateOutputDirectoriesStage.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.commit; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azurebfs.contract.ABFSContractTestBinding; +import org.apache.hadoop.fs.azurebfs.contract.AbfsFileSystemContract; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.TestCreateOutputDirectoriesStage; + +/** + * ABFS storage test of directory creation. + */ +public class ITestAbfsCreateOutputDirectoriesStage extends TestCreateOutputDirectoriesStage { + + private final ABFSContractTestBinding binding; + + public ITestAbfsCreateOutputDirectoriesStage() throws Exception { + binding = new ABFSContractTestBinding(); + } + + @Override + public void setup() throws Exception { + binding.setup(); + super.setup(); + } + + @Override + protected Configuration createConfiguration() { + return AbfsCommitTestHelper.prepareTestConfiguration(binding); + } + + @Override + protected AbstractFSContract createContract(final Configuration conf) { + return new AbfsFileSystemContract(conf, binding.isSecureMode()); + } + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsJobThroughManifestCommitter.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsJobThroughManifestCommitter.java new file mode 100644 index 0000000000000..4e4c4f5996bc2 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsJobThroughManifestCommitter.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.commit; + +import java.io.IOException; +import java.util.List; + +import org.assertj.core.api.Assertions; +import org.junit.FixMethodOrder; +import org.junit.runners.MethodSorters; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.contract.ABFSContractTestBinding; +import org.apache.hadoop.fs.azurebfs.contract.AbfsFileSystemContract; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.TestJobThroughManifestCommitter; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.files.FileEntry; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.files.TaskManifest; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.impl.ManifestCommitterSupport; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.impl.ManifestStoreOperations; + +import static org.apache.hadoop.fs.azurebfs.commit.AbfsCommitTestHelper.prepareTestConfiguration; + +/** + * Test the Manifest committer stages against ABFS. + */ +@FixMethodOrder(MethodSorters.NAME_ASCENDING) +public class ITestAbfsJobThroughManifestCommitter + extends TestJobThroughManifestCommitter { + + private final ABFSContractTestBinding binding; + + public ITestAbfsJobThroughManifestCommitter() throws Exception { + binding = new ABFSContractTestBinding(); + } + + @Override + public void setup() throws Exception { + binding.setup(); + super.setup(); + } + + @Override + protected Configuration createConfiguration() { + return enableManifestCommitter(prepareTestConfiguration(binding)); + } + + @Override + protected AbstractFSContract createContract(final Configuration conf) { + return new AbfsFileSystemContract(conf, binding.isSecureMode()); + } + + @Override + protected boolean shouldDeleteTestRootAtEndOfTestRun() { + return true; + } + + /** + * Add read of manifest and validate of output's etags. + * @param attemptId attempt ID + * @param files files which were created. + * @param manifest manifest + * @throws IOException failure + */ + @Override + protected void validateTaskAttemptManifest(String attemptId, + List files, + TaskManifest manifest) throws IOException { + super.validateTaskAttemptManifest(attemptId, files, manifest); + final List commit = manifest.getFilesToCommit(); + final ManifestStoreOperations operations = getStoreOperations(); + for (FileEntry entry : commit) { + Assertions.assertThat(entry.getEtag()) + .describedAs("Etag of %s", entry) + .isNotEmpty(); + final FileStatus sourceStatus = operations.getFileStatus(entry.getSourcePath()); + final String etag = ManifestCommitterSupport.getEtag(sourceStatus); + Assertions.assertThat(etag) + .describedAs("Etag of %s", sourceStatus) + .isEqualTo(entry.getEtag()); + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsLoadManifestsStage.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsLoadManifestsStage.java new file mode 100644 index 0000000000000..367692fca5d22 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsLoadManifestsStage.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.commit; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azure.integration.AzureTestConstants; +import org.apache.hadoop.fs.azurebfs.contract.ABFSContractTestBinding; +import org.apache.hadoop.fs.azurebfs.contract.AbfsFileSystemContract; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.ManifestCommitterTestSupport; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.TestLoadManifestsStage; + +/** + * ABFS storage test of saving and loading a large number + * of manifests. + */ +public class ITestAbfsLoadManifestsStage extends TestLoadManifestsStage { + + private final ABFSContractTestBinding binding; + + public ITestAbfsLoadManifestsStage() throws Exception { + binding = new ABFSContractTestBinding(); + } + + @Override + public void setup() throws Exception { + binding.setup(); + super.setup(); + } + + @Override + protected Configuration createConfiguration() { + return AbfsCommitTestHelper.prepareTestConfiguration(binding); + } + + @Override + protected AbstractFSContract createContract(final Configuration conf) { + return new AbfsFileSystemContract(conf, binding.isSecureMode()); + } + + @Override + protected int getTestTimeoutMillis() { + return AzureTestConstants.SCALE_TEST_TIMEOUT_MILLIS; + } + + /** + * @return a smaller number of TAs than the base test suite does. + */ + @Override + protected int numberOfTaskAttempts() { + return ManifestCommitterTestSupport.NUMBER_OF_TASK_ATTEMPTS_SMALL; + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsManifestCommitProtocol.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsManifestCommitProtocol.java new file mode 100644 index 0000000000000..aac06f952dab2 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsManifestCommitProtocol.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.commit; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azurebfs.contract.ABFSContractTestBinding; +import org.apache.hadoop.fs.azurebfs.contract.AbfsFileSystemContract; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.TestManifestCommitProtocol; + +import static org.apache.hadoop.fs.azurebfs.commit.AbfsCommitTestHelper.prepareTestConfiguration; + +/** + * Test the Manifest protocol against ABFS. + */ +public class ITestAbfsManifestCommitProtocol extends + TestManifestCommitProtocol { + + private final ABFSContractTestBinding binding; + + public ITestAbfsManifestCommitProtocol() throws Exception { + binding = new ABFSContractTestBinding(); + } + + @Override + public void setup() throws Exception { + binding.setup(); + super.setup(); + } + + @Override + protected Configuration createConfiguration() { + return enableManifestCommitter(prepareTestConfiguration(binding)); + } + + @Override + protected AbstractFSContract createContract(final Configuration conf) { + return new AbfsFileSystemContract(conf, binding.isSecureMode()); + } + + + @Override + protected String suitename() { + return "ITestAbfsManifestCommitProtocol"; + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsManifestStoreOperations.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsManifestStoreOperations.java new file mode 100644 index 0000000000000..922782da29c5f --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsManifestStoreOperations.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.commit; + +import java.nio.charset.StandardCharsets; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.contract.ABFSContractTestBinding; +import org.apache.hadoop.fs.azurebfs.contract.AbfsFileSystemContract; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.AbstractManifestCommitterTest; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.files.FileEntry; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.impl.ManifestStoreOperations; + +import static org.apache.hadoop.fs.CommonPathCapabilities.ETAGS_PRESERVED_IN_RENAME; +import static org.apache.hadoop.fs.azurebfs.commit.AbfsCommitTestHelper.prepareTestConfiguration; +import static org.junit.Assume.assumeTrue; + +/** + * Test {@link AbfsManifestStoreOperations}. + * As this looks at etag handling through FS operations, it's actually testing how etags work + * in ABFS (preservation across renames) and in the client (are they consistent + * in LIST and HEAD calls). + * + * Skipped when tested against wasb-compatible stores. + */ +public class ITestAbfsManifestStoreOperations extends AbstractManifestCommitterTest { + + private static final Logger LOG = + LoggerFactory.getLogger(ITestAbfsManifestStoreOperations.class); + + private final ABFSContractTestBinding binding; + + public ITestAbfsManifestStoreOperations() throws Exception { + binding = new ABFSContractTestBinding(); + } + + @Override + public void setup() throws Exception { + binding.setup(); + super.setup(); + + // skip tests on non-HNS stores + assumeTrue("Resilient rename not available", + getFileSystem().hasPathCapability(getContract().getTestPath(), + ETAGS_PRESERVED_IN_RENAME)); + + } + + @Override + protected Configuration createConfiguration() { + return enableManifestCommitter(prepareTestConfiguration(binding)); + } + + @Override + protected AbstractFSContract createContract(final Configuration conf) { + return new AbfsFileSystemContract(conf, binding.isSecureMode()); + } + + /** + * basic consistency across operations, as well as being non-empty. + */ + @Test + public void testEtagConsistencyAcrossListAndHead() throws Throwable { + describe("Etag values must be non-empty and consistent across LIST and HEAD Calls."); + final Path path = methodPath(); + final FileSystem fs = getFileSystem(); + ContractTestUtils.touch(fs, path); + final ManifestStoreOperations operations = createManifestStoreOperations(); + Assertions.assertThat(operations) + .describedAs("Store operations class loaded via Configuration") + .isInstanceOf(AbfsManifestStoreOperations.class); + + final FileStatus st = operations.getFileStatus(path); + final String etag = operations.getEtag(st); + Assertions.assertThat(etag) + .describedAs("Etag of %s", st) + .isNotBlank(); + LOG.info("etag of empty file is \"{}\"", etag); + + final FileStatus[] statuses = fs.listStatus(path); + Assertions.assertThat(statuses) + .describedAs("List(%s)", path) + .hasSize(1); + final FileStatus lsStatus = statuses[0]; + Assertions.assertThat(operations.getEtag(lsStatus)) + .describedAs("etag of list status (%s) compared to HEAD value of %s", lsStatus, st) + .isEqualTo(etag); + } + + @Test + public void testEtagsOfDifferentDataDifferent() throws Throwable { + describe("Verify that two different blocks of data written have different tags"); + + final Path path = methodPath(); + final FileSystem fs = getFileSystem(); + Path src = new Path(path, "src"); + + ContractTestUtils.createFile(fs, src, true, + "data1234".getBytes(StandardCharsets.UTF_8)); + final ManifestStoreOperations operations = createManifestStoreOperations(); + final FileStatus srcStatus = operations.getFileStatus(src); + final String srcTag = operations.getEtag(srcStatus); + LOG.info("etag of file 1 is \"{}\"", srcTag); + + // now overwrite with data of same length + // (ensure that path or length aren't used exclusively as tag) + ContractTestUtils.createFile(fs, src, true, + "1234data".getBytes(StandardCharsets.UTF_8)); + + // validate + final String tag2 = operations.getEtag(operations.getFileStatus(src)); + LOG.info("etag of file 2 is \"{}\"", tag2); + + Assertions.assertThat(tag2) + .describedAs("etag of updated file") + .isNotEqualTo(srcTag); + } + + @Test + public void testEtagConsistencyAcrossRename() throws Throwable { + describe("Verify that when a file is renamed, the etag remains unchanged"); + final Path path = methodPath(); + final FileSystem fs = getFileSystem(); + Path src = new Path(path, "src"); + Path dest = new Path(path, "dest"); + + ContractTestUtils.createFile(fs, src, true, + "sample data".getBytes(StandardCharsets.UTF_8)); + final ManifestStoreOperations operations = createManifestStoreOperations(); + final FileStatus srcStatus = operations.getFileStatus(src); + final String srcTag = operations.getEtag(srcStatus); + LOG.info("etag of short file is \"{}\"", srcTag); + + Assertions.assertThat(srcTag) + .describedAs("Etag of %s", srcStatus) + .isNotBlank(); + + // rename + operations.commitFile(new FileEntry(src, dest, 0, srcTag)); + + // validate + FileStatus destStatus = operations.getFileStatus(dest); + final String destTag = operations.getEtag(destStatus); + Assertions.assertThat(destTag) + .describedAs("etag of list status (%s) compared to HEAD value of %s", destStatus, srcStatus) + .isEqualTo(srcTag); + } + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsRenameStageFailure.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsRenameStageFailure.java new file mode 100644 index 0000000000000..6b9830e8f33fc --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsRenameStageFailure.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.commit; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.contract.ABFSContractTestBinding; +import org.apache.hadoop.fs.azurebfs.contract.AbfsFileSystemContract; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.TestRenameStageFailure; +/** + * Rename failure logic on ABFS. + * This will go through the resilient rename operation. + */ +public class ITestAbfsRenameStageFailure extends TestRenameStageFailure { + + /** + * How many files to create. + */ + private static final int FILES_TO_CREATE = 20; + + private final ABFSContractTestBinding binding; + + public ITestAbfsRenameStageFailure() throws Exception { + binding = new ABFSContractTestBinding(); + } + + protected boolean isNamespaceEnabled() throws AzureBlobFileSystemException { + AzureBlobFileSystem fs = (AzureBlobFileSystem) getFileSystem(); + return fs.getAbfsStore().getIsNamespaceEnabled(AbstractAbfsIntegrationTest.getSampleTracingContext(fs, false)); + } + + @Override + public void setup() throws Exception { + binding.setup(); + super.setup(); + } + + @Override + protected Configuration createConfiguration() { + return AbfsCommitTestHelper.prepareTestConfiguration(binding); + } + + @Override + protected AbstractFSContract createContract(final Configuration conf) { + return new AbfsFileSystemContract(conf, binding.isSecureMode()); + } + + @Override + protected boolean requireRenameResilience() throws AzureBlobFileSystemException { + return isNamespaceEnabled(); + } + + @Override + protected int filesToCreate() { + return FILES_TO_CREATE; + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsTaskManifestFileIO.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsTaskManifestFileIO.java new file mode 100644 index 0000000000000..d2fe9de115c3b --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsTaskManifestFileIO.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.commit; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azurebfs.contract.ABFSContractTestBinding; +import org.apache.hadoop.fs.azurebfs.contract.AbfsFileSystemContract; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.TestTaskManifestFileIO; + +/** + * Test Reading/writing manifest file through ABFS. + */ +public class ITestAbfsTaskManifestFileIO extends TestTaskManifestFileIO { + + private final ABFSContractTestBinding binding; + + public ITestAbfsTaskManifestFileIO() throws Exception { + binding = new ABFSContractTestBinding(); + } + + @Override + public void setup() throws Exception { + binding.setup(); + super.setup(); + } + + @Override + protected Configuration createConfiguration() { + return AbfsCommitTestHelper.prepareTestConfiguration(binding); + } + + @Override + protected AbstractFSContract createContract(final Configuration conf) { + return new AbfsFileSystemContract(conf, binding.isSecureMode()); + } + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsTerasort.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsTerasort.java new file mode 100644 index 0000000000000..4b21b838decc5 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsTerasort.java @@ -0,0 +1,353 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.commit; + +import java.io.File; +import java.io.FileNotFoundException; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.function.Consumer; + +import org.junit.Assume; +import org.junit.FixMethodOrder; +import org.junit.Test; +import org.junit.runners.MethodSorters; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.examples.terasort.TeraGen; +import org.apache.hadoop.examples.terasort.TeraSort; +import org.apache.hadoop.examples.terasort.TeraSortConfigKeys; +import org.apache.hadoop.examples.terasort.TeraValidate; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.statistics.IOStatisticsLogging; +import org.apache.hadoop.fs.statistics.IOStatisticsSnapshot; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.lib.output.committer.manifest.files.ManifestSuccessData; +import org.apache.hadoop.util.DurationInfo; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hadoop.util.functional.RemoteIterators; + +import static java.util.Optional.empty; +import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL_INFO; +import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.snapshotIOStatistics; +import static org.apache.hadoop.mapreduce.lib.output.committer.manifest.ManifestCommitterTestSupport.loadSuccessFile; +import static org.apache.hadoop.mapreduce.lib.output.committer.manifest.ManifestCommitterTestSupport.validateSuccessFile; + +/** + * Runs Terasort against ABFS using the manifest committer. + * The tests run in sequence, so each operation is isolated. + * Scale test only (it is big and slow) + */ +@FixMethodOrder(MethodSorters.NAME_ASCENDING) +@SuppressWarnings({"StaticNonFinalField", "OptionalUsedAsFieldOrParameterType"}) +public class ITestAbfsTerasort extends AbstractAbfsClusterITest { + + private static final Logger LOG = + LoggerFactory.getLogger(ITestAbfsTerasort.class); + + public static final int EXPECTED_PARTITION_COUNT = 10; + + public static final int PARTITION_SAMPLE_SIZE = 1000; + + public static final int ROW_COUNT = 1000; + + /** + * This has to be common across all test methods. + */ + private static final Path TERASORT_PATH = new Path("/ITestAbfsTerasort"); + + /** + * Duration tracker created in the first of the test cases and closed + * in {@link #test_140_teracomplete()}. + */ + private static Optional terasortDuration = empty(); + + /** + * Tracker of which stages are completed and how long they took. + */ + private static final Map COMPLETED_STAGES = new HashMap<>(); + + /** + * FileSystem statistics are collected from the _SUCCESS markers. + */ + protected static final IOStatisticsSnapshot JOB_IOSTATS = + snapshotIOStatistics(); + + /** Base path for all the terasort input and output paths. */ + private Path terasortPath; + + /** Input (teragen) path. */ + private Path sortInput; + + /** Path where sorted data goes. */ + private Path sortOutput; + + /** Path for validated job's output. */ + private Path sortValidate; + + public ITestAbfsTerasort() throws Exception { + } + + + @Override + public void setup() throws Exception { + // superclass calls requireScaleTestsEnabled(); + super.setup(); + prepareToTerasort(); + } + + /** + * Set up the job conf with the options for terasort chosen by the scale + * options. + * @param conf configuration + */ + @Override + protected void applyCustomConfigOptions(JobConf conf) { + // small sample size for faster runs + conf.setInt(TeraSortConfigKeys.SAMPLE_SIZE.key(), + getSampleSizeForEachPartition()); + conf.setInt(TeraSortConfigKeys.NUM_PARTITIONS.key(), + getExpectedPartitionCount()); + conf.setBoolean( + TeraSortConfigKeys.USE_SIMPLE_PARTITIONER.key(), + false); + } + + private int getExpectedPartitionCount() { + return EXPECTED_PARTITION_COUNT; + } + + private int getSampleSizeForEachPartition() { + return PARTITION_SAMPLE_SIZE; + } + + protected int getRowCount() { + return ROW_COUNT; + } + + /** + * Set up the terasort by initializing paths variables + * The paths used must be unique across parameterized runs but + * common across all test cases in a single parameterized run. + */ + private void prepareToTerasort() { + terasortPath = getFileSystem().makeQualified(TERASORT_PATH); + sortInput = new Path(terasortPath, "sortin"); + sortOutput = new Path(terasortPath, "sortout"); + sortValidate = new Path(terasortPath, "validate"); + } + + /** + * Declare that a stage has completed. + * @param stage stage name/key in the map + * @param d duration. + */ + private static void completedStage(final String stage, + final DurationInfo d) { + COMPLETED_STAGES.put(stage, d); + } + + /** + * Declare a stage which is required for this test case. + * @param stage stage name + */ + private static void requireStage(final String stage) { + Assume.assumeTrue( + "Required stage was not completed: " + stage, + COMPLETED_STAGES.get(stage) != null); + } + + /** + * Execute a single stage in the terasort. + * Updates the completed stages map with the stage duration -if successful. + * @param stage Stage name for the stages map. + * @param jobConf job conf + * @param dest destination directory -the _SUCCESS file will be expected here. + * @param tool tool to run. + * @param args args for the tool. + * @param minimumFileCount minimum number of files to have been created + * @throws Exception any failure + */ + private void executeStage( + final String stage, + final JobConf jobConf, + final Path dest, + final Tool tool, + final String[] args, + final int minimumFileCount) throws Exception { + int result; + + // the duration info is created outside a try-with-resources + // clause as it is used later. + DurationInfo d = new DurationInfo(LOG, stage); + try { + result = ToolRunner.run(jobConf, tool, args); + } finally { + d.close(); + } + dumpOutputTree(dest); + assertEquals(stage + + "(" + StringUtils.join(", ", args) + ")" + + " failed", 0, result); + final ManifestSuccessData successFile = validateSuccessFile(getFileSystem(), dest, + minimumFileCount, ""); + JOB_IOSTATS.aggregate(successFile.getIOStatistics()); + + completedStage(stage, d); + } + + /** + * Set up terasort by cleaning out the destination, and note the initial + * time before any of the jobs are executed. + * + * This is executed first for each parameterized run. + * It is where all variables which need to be reset for each run need + * to be reset. + */ + @Test + public void test_100_terasort_setup() throws Throwable { + describe("Setting up for a terasort"); + + getFileSystem().delete(terasortPath, true); + terasortDuration = Optional.of(new DurationInfo(LOG, false, "Terasort")); + } + + @Test + public void test_110_teragen() throws Throwable { + describe("Teragen to %s", sortInput); + getFileSystem().delete(sortInput, true); + + JobConf jobConf = newJobConf(); + patchConfigurationForCommitter(jobConf); + executeStage("teragen", + jobConf, + sortInput, + new TeraGen(), + new String[]{Integer.toString(getRowCount()), sortInput.toString()}, + 1); + } + + + @Test + public void test_120_terasort() throws Throwable { + describe("Terasort from %s to %s", sortInput, sortOutput); + requireStage("teragen"); + getFileSystem().delete(sortOutput, true); + + loadSuccessFile(getFileSystem(), sortInput); + JobConf jobConf = newJobConf(); + patchConfigurationForCommitter(jobConf); + executeStage("terasort", + jobConf, + sortOutput, + new TeraSort(), + new String[]{sortInput.toString(), sortOutput.toString()}, + 1); + } + + @Test + public void test_130_teravalidate() throws Throwable { + describe("TeraValidate from %s to %s", sortOutput, sortValidate); + requireStage("terasort"); + getFileSystem().delete(sortValidate, true); + loadSuccessFile(getFileSystem(), sortOutput); + JobConf jobConf = newJobConf(); + patchConfigurationForCommitter(jobConf); + executeStage("teravalidate", + jobConf, + sortValidate, + new TeraValidate(), + new String[]{sortOutput.toString(), sortValidate.toString()}, + 1); + } + + /** + * Print the results, and save to the base dir as a CSV file. + * Why there? Makes it easy to list and compare. + */ + @Test + public void test_140_teracomplete() throws Throwable { + terasortDuration.ifPresent(d -> { + d.close(); + completedStage("overall", d); + }); + + // IO Statistics + IOStatisticsLogging.logIOStatisticsAtLevel(LOG, IOSTATISTICS_LOGGING_LEVEL_INFO, JOB_IOSTATS); + + // and the summary + final StringBuilder results = new StringBuilder(); + results.append("\"Operation\"\t\"Duration\"\n"); + + // this is how you dynamically create a function in a method + // for use afterwards. + // Works because there's no IOEs being raised in this sequence. + Consumer stage = (s) -> { + DurationInfo duration = COMPLETED_STAGES.get(s); + results.append(String.format("\"%s\"\t\"%s\"\n", + s, + duration == null ? "" : duration)); + }; + + stage.accept("teragen"); + stage.accept("terasort"); + stage.accept("teravalidate"); + stage.accept("overall"); + String text = results.toString(); + File resultsFile = File.createTempFile("results", ".csv"); + FileUtils.write(resultsFile, text, StandardCharsets.UTF_8); + LOG.info("Results are in {}\n{}", resultsFile, text); + } + + /** + * Reset the duration so if two committer tests are run sequentially. + * Without this the total execution time is reported as from the start of + * the first test suite to the end of the second. + */ + @Test + public void test_150_teracleanup() throws Throwable { + terasortDuration = Optional.empty(); + } + + @Test + public void test_200_directory_deletion() throws Throwable { + getFileSystem().delete(terasortPath, true); + } + + /** + * Dump the files under a path -but not fail if the path is not present., + * @param path path to dump + * @throws Exception any failure. + */ + protected void dumpOutputTree(Path path) throws Exception { + LOG.info("Files under output directory {}", path); + try { + RemoteIterators.foreach(getFileSystem().listFiles(path, true), + (status) -> LOG.info("{}", status)); + } catch (FileNotFoundException e) { + LOG.info("Output directory {} not found", path); + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/package-info.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/package-info.java new file mode 100644 index 0000000000000..3d49d62eaa865 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/package-info.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Unit and integration tests for the manifest committer. + * JSON job reports will be saved to + * {@code target/reports} + */ +package org.apache.hadoop.fs.azurebfs.commit; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/constants/TestConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/constants/TestConfigurationKeys.java index c8dcef3ef205e..9e40f22d231b0 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/constants/TestConfigurationKeys.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/constants/TestConfigurationKeys.java @@ -24,9 +24,14 @@ public final class TestConfigurationKeys { public static final String FS_AZURE_ACCOUNT_NAME = "fs.azure.account.name"; public static final String FS_AZURE_ABFS_ACCOUNT_NAME = "fs.azure.abfs.account.name"; + public static final String FS_AZURE_ABFS_ACCOUNT1_NAME = "fs.azure.abfs.account1.name"; + public static final String FS_AZURE_ENABLE_AUTOTHROTTLING = "fs.azure.enable.autothrottling"; + public static final String FS_AZURE_ANALYSIS_PERIOD = "fs.azure.analysis.period"; public static final String FS_AZURE_ACCOUNT_KEY = "fs.azure.account.key"; public static final String FS_AZURE_CONTRACT_TEST_URI = "fs.contract.test.fs.abfs"; public static final String FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT = "fs.azure.test.namespace.enabled"; + public static final String FS_AZURE_TEST_APPENDBLOB_ENABLED = "fs.azure.test.appendblob.enabled"; + public static final String FS_AZURE_TEST_CPK_ENABLED = "fs.azure.test.cpk.enabled"; public static final String FS_AZURE_BLOB_DATA_CONTRIBUTOR_CLIENT_ID = "fs.azure.account.oauth2.contributor.client.id"; public static final String FS_AZURE_BLOB_DATA_CONTRIBUTOR_CLIENT_SECRET = "fs.azure.account.oauth2.contributor.client.secret"; @@ -45,6 +50,17 @@ public final class TestConfigurationKeys { public static final String MOCK_SASTOKENPROVIDER_FAIL_INIT = "mock.sastokenprovider.fail.init"; public static final String MOCK_SASTOKENPROVIDER_RETURN_EMPTY_SAS_TOKEN = "mock.sastokenprovider.return.empty.sasToken"; + public static final String FS_AZURE_TEST_APP_SERVICE_PRINCIPAL_TENANT_ID = "fs.azure.test.app.service.principal.tenant.id"; + + public static final String FS_AZURE_TEST_APP_SERVICE_PRINCIPAL_OBJECT_ID = "fs.azure.test.app.service.principal.object.id"; + + public static final String FS_AZURE_TEST_APP_ID = "fs.azure.test.app.id"; + + public static final String FS_AZURE_TEST_APP_SECRET = "fs.azure.test.app.secret"; + + public static final String FS_AZURE_TEST_CPK_ENABLED_SECONDARY_ACCOUNT = "fs.azure.test.cpk-enabled-secondary-account"; + public static final String FS_AZURE_TEST_CPK_ENABLED_SECONDARY_ACCOUNT_KEY = "fs.azure.test.cpk-enabled-secondary-account.key"; + public static final String TEST_CONFIGURATION_FILE_NAME = "azure-test.xml"; public static final String TEST_CONTAINER_PREFIX = "abfs-testcontainer-"; public static final int TEST_TIMEOUT = 15 * 60 * 1000; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/AbfsFileSystemContract.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/AbfsFileSystemContract.java index 62bcca174ef8d..1319ea44c7c07 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/AbfsFileSystemContract.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/AbfsFileSystemContract.java @@ -34,7 +34,7 @@ public class AbfsFileSystemContract extends AbstractBondedFSContract { public static final String CONTRACT_XML = "abfs.xml"; private final boolean isSecure; - protected AbfsFileSystemContract(final Configuration conf, boolean secure) { + public AbfsFileSystemContract(final Configuration conf, boolean secure) { super(conf); //insert the base features addConfResource(CONTRACT_XML); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractDistCp.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractDistCp.java index 0c7db73cf79eb..3f06509241f74 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractDistCp.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractDistCp.java @@ -19,16 +19,24 @@ package org.apache.hadoop.fs.azurebfs.contract; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azure.integration.AzureTestConstants; import org.apache.hadoop.fs.azurebfs.services.AuthType; import org.apache.hadoop.tools.contract.AbstractContractDistCpTest; import org.junit.Assume; +import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.assumeScaleTestsEnabled; + /** * Contract test for distCp operation. */ public class ITestAbfsFileSystemContractDistCp extends AbstractContractDistCpTest { private final ABFSContractTestBinding binding; + @Override + protected int getTestTimeoutMillis() { + return AzureTestConstants.SCALE_TEST_TIMEOUT_MILLIS; + } + public ITestAbfsFileSystemContractDistCp() throws Exception { binding = new ABFSContractTestBinding(); Assume.assumeTrue(binding.getAuthType() != AuthType.OAuth); @@ -38,6 +46,7 @@ public ITestAbfsFileSystemContractDistCp() throws Exception { public void setup() throws Exception { binding.setup(); super.setup(); + assumeScaleTestsEnabled(binding.getRawConfiguration()); } @Override diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractEtag.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractEtag.java new file mode 100644 index 0000000000000..d498ae71a4b6f --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractEtag.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractEtagTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +/** + * Contract test for etag support. + */ +public class ITestAbfsFileSystemContractEtag extends AbstractContractEtagTest { + private final boolean isSecure; + private final ABFSContractTestBinding binding; + + public ITestAbfsFileSystemContractEtag() throws Exception { + binding = new ABFSContractTestBinding(); + this.isSecure = binding.isSecureMode(); + } + + @Override + public void setup() throws Exception { + binding.setup(); + super.setup(); + // Base rename contract test class re-uses the test folder + // This leads to failures when the test is re-run as same ABFS test + // containers are re-used for test run and creation of source and + // destination test paths fail, as they are already present. + binding.getFileSystem().delete(binding.getTestPath(), true); + } + + @Override + protected Configuration createConfiguration() { + return binding.getRawConfiguration(); + } + + @Override + protected AbstractFSContract createContract(final Configuration conf) { + return new AbfsFileSystemContract(conf, isSecure); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractRename.java index b92bef68a0908..cd60e6d5ae498 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractRename.java @@ -38,6 +38,11 @@ public ITestAbfsFileSystemContractRename() throws Exception { public void setup() throws Exception { binding.setup(); super.setup(); + // Base rename contract test class re-uses the test folder + // This leads to failures when the test is re-run as same ABFS test + // containers are re-used for test run and creation of source and + // destination test paths fail, as they are already present. + binding.getFileSystem().delete(binding.getTestPath(), true); } @Override diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSecureDistCp.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSecureDistCp.java deleted file mode 100644 index fa77c2e649ce1..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSecureDistCp.java +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azurebfs.contract; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.tools.contract.AbstractContractDistCpTest; - -/** - * Contract test for secure distCP operation. - */ -public class ITestAbfsFileSystemContractSecureDistCp extends AbstractContractDistCpTest { - private final ABFSContractTestBinding binding; - - public ITestAbfsFileSystemContractSecureDistCp() throws Exception { - binding = new ABFSContractTestBinding(); - } - - @Override - public void setup() throws Exception { - binding.setup(); - super.setup(); - } - - @Override - protected Configuration createConfiguration() { - return binding.getRawConfiguration(); - } - - @Override - protected AbfsFileSystemContract createContract(Configuration conf) { - return new AbfsFileSystemContract(conf, true); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSeek.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSeek.java index 35a5e1733d0e6..f7fe5039799d7 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSeek.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSeek.java @@ -18,10 +18,27 @@ package org.apache.hadoop.fs.azurebfs.contract; +import java.io.IOException; +import java.util.concurrent.CompletableFuture; + +import org.assertj.core.api.Assertions; +import org.junit.Test; + import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.services.AbfsInputStream; +import org.apache.hadoop.fs.azurebfs.services.AbfsInputStreamStatisticsImpl; import org.apache.hadoop.fs.contract.AbstractContractSeekTest; import org.apache.hadoop.fs.contract.AbstractFSContract; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_READ_AHEAD_RANGE; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_READ_BUFFER_SIZE; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.MIN_BUFFER_SIZE; +import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; +import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; +import static org.apache.hadoop.util.functional.FutureIO.awaitFuture; + /** * Contract test for seek operation. */ @@ -29,6 +46,8 @@ public class ITestAbfsFileSystemContractSeek extends AbstractContractSeekTest{ private final boolean isSecure; private final ABFSContractTestBinding binding; + private static final byte[] BLOCK = dataset(100 * 1024, 0, 255); + public ITestAbfsFileSystemContractSeek() throws Exception { binding = new ABFSContractTestBinding(); this.isSecure = binding.isSecureMode(); @@ -47,6 +66,225 @@ protected Configuration createConfiguration() { @Override protected AbstractFSContract createContract(final Configuration conf) { + conf.setInt(AZURE_READ_AHEAD_RANGE, MIN_BUFFER_SIZE); + conf.setInt(AZURE_READ_BUFFER_SIZE, MIN_BUFFER_SIZE); return new AbfsFileSystemContract(conf, isSecure); } + + /** + * Test verifies if the data is read correctly + * when {@code ConfigurationKeys#AZURE_READ_AHEAD_RANGE} is set. + * Reason for not breaking this test into smaller parts is we + * really want to simulate lot of forward and backward seeks + * similar to real production use case. + */ + @Test + public void testSeekAndReadWithReadAhead() throws IOException { + describe(" Testing seek and read with read ahead " + + "enabled for random reads"); + + Path testSeekFile = path(getMethodName() + "bigseekfile.txt"); + createDataSet(testSeekFile); + try (FSDataInputStream in = getFileSystem().open(testSeekFile)) { + AbfsInputStream inStream = ((AbfsInputStream) in.getWrappedStream()); + AbfsInputStreamStatisticsImpl streamStatistics = + (AbfsInputStreamStatisticsImpl) inStream.getStreamStatistics(); + assertEquals(String.format("Value of %s is not set correctly", AZURE_READ_AHEAD_RANGE), + MIN_BUFFER_SIZE, inStream.getReadAheadRange()); + + long remoteReadOperationsOldVal = streamStatistics.getRemoteReadOperations(); + Assertions.assertThat(remoteReadOperationsOldVal) + .describedAs("Number of remote read ops should be 0 " + + "before any read call is made") + .isEqualTo(0); + + // Test read at first position. Remote read. + Assertions.assertThat(inStream.getPos()) + .describedAs("First call to getPos() should return 0") + .isEqualTo(0); + assertDataAtPos(0, (byte) in.read()); + assertSeekBufferStats(0, streamStatistics.getSeekInBuffer()); + long remoteReadOperationsNewVal = streamStatistics.getRemoteReadOperations(); + assertIncrementInRemoteReadOps(remoteReadOperationsOldVal, + remoteReadOperationsNewVal); + remoteReadOperationsOldVal = remoteReadOperationsNewVal; + + // Seeking just before read ahead range. Read from buffer. + int newSeek = inStream.getReadAheadRange() - 1; + in.seek(newSeek); + assertGetPosition(newSeek, in.getPos()); + assertDataAtPos(newSeek, (byte) in.read()); + assertSeekBufferStats(1, streamStatistics.getSeekInBuffer()); + remoteReadOperationsNewVal = streamStatistics.getRemoteReadOperations(); + assertNoIncrementInRemoteReadOps(remoteReadOperationsOldVal, + remoteReadOperationsNewVal); + remoteReadOperationsOldVal = remoteReadOperationsNewVal; + + // Seeking boundary of read ahead range. Read from buffer manager. + newSeek = inStream.getReadAheadRange(); + inStream.seek(newSeek); + assertGetPosition(newSeek, in.getPos()); + assertDataAtPos(newSeek, (byte) in.read()); + assertSeekBufferStats(1, streamStatistics.getSeekInBuffer()); + remoteReadOperationsNewVal = streamStatistics.getRemoteReadOperations(); + assertNoIncrementInRemoteReadOps(remoteReadOperationsOldVal, + remoteReadOperationsNewVal); + remoteReadOperationsOldVal = remoteReadOperationsNewVal; + + // Seeking just after read ahead range. Read from buffer. + newSeek = inStream.getReadAheadRange() + 1; + in.seek(newSeek); + assertGetPosition(newSeek, in.getPos()); + assertDataAtPos(newSeek, (byte) in.read()); + assertSeekBufferStats(2, streamStatistics.getSeekInBuffer()); + remoteReadOperationsNewVal = streamStatistics.getRemoteReadOperations(); + assertNoIncrementInRemoteReadOps(remoteReadOperationsOldVal, + remoteReadOperationsNewVal); + remoteReadOperationsOldVal = remoteReadOperationsNewVal; + + // Seeking just 10 more bytes such that data is read from buffer. + newSeek += 10; + in.seek(newSeek); + assertGetPosition(newSeek, in.getPos()); + assertDataAtPos(newSeek, (byte) in.read()); + assertSeekBufferStats(3, streamStatistics.getSeekInBuffer()); + remoteReadOperationsNewVal = streamStatistics.getRemoteReadOperations(); + assertNoIncrementInRemoteReadOps(remoteReadOperationsOldVal, + remoteReadOperationsNewVal); + remoteReadOperationsOldVal = remoteReadOperationsNewVal; + + // Seek backward such that data is read from remote. + newSeek -= 106; + in.seek(newSeek); + assertGetPosition(newSeek, in.getPos()); + assertDataAtPos(newSeek, (byte) in.read()); + assertSeekBufferStats(3, streamStatistics.getSeekInBuffer()); + remoteReadOperationsNewVal = streamStatistics.getRemoteReadOperations(); + assertIncrementInRemoteReadOps(remoteReadOperationsOldVal, + remoteReadOperationsNewVal); + remoteReadOperationsOldVal = remoteReadOperationsNewVal; + + // Seeking just 10 more bytes such that data is read from buffer. + newSeek += 10; + in.seek(newSeek); + assertGetPosition(newSeek, in.getPos()); + assertDataAtPos(newSeek, (byte) in.read()); + assertSeekBufferStats(4, streamStatistics.getSeekInBuffer()); + remoteReadOperationsNewVal = streamStatistics.getRemoteReadOperations(); + assertNoIncrementInRemoteReadOps(remoteReadOperationsOldVal, + remoteReadOperationsNewVal); + remoteReadOperationsOldVal = remoteReadOperationsNewVal; + + // Read multiple bytes across read ahead range. Remote read. + long oldSeek = newSeek; + newSeek = 2*inStream.getReadAheadRange() -1; + byte[] bytes = new byte[5]; + in.readFully(newSeek, bytes); + // With readFully getPos should return oldSeek pos. + // Adding one as one byte is already read + // after the last seek is done. + assertGetPosition(oldSeek + 1, in.getPos()); + assertSeekBufferStats(4, streamStatistics.getSeekInBuffer()); + assertDatasetEquals(newSeek, "Read across read ahead ", + bytes, bytes.length); + remoteReadOperationsNewVal = streamStatistics.getRemoteReadOperations(); + assertIncrementInRemoteReadOps(remoteReadOperationsOldVal, + remoteReadOperationsNewVal); + } + } + + /** + * Test to validate the getPos() when a seek is done + * post {@code AbfsInputStream#unbuffer} call is made. + * Also using optimised builder api to open file. + */ + @Test + public void testSeekAfterUnbuffer() throws IOException { + describe("Test to make sure that seeking in AbfsInputStream after " + + "unbuffer() call is not doing anyIO."); + Path testFile = path(getMethodName() + ".txt"); + createDataSet(testFile); + final CompletableFuture future = + getFileSystem().openFile(testFile) + .build(); + try (FSDataInputStream inputStream = awaitFuture(future)) { + AbfsInputStream abfsInputStream = (AbfsInputStream) inputStream.getWrappedStream(); + AbfsInputStreamStatisticsImpl streamStatistics = + (AbfsInputStreamStatisticsImpl) abfsInputStream.getStreamStatistics(); + int readAheadRange = abfsInputStream.getReadAheadRange(); + long seekPos = readAheadRange; + inputStream.seek(seekPos); + assertDataAtPos(readAheadRange, (byte) inputStream.read()); + long currentRemoteReadOps = streamStatistics.getRemoteReadOperations(); + assertIncrementInRemoteReadOps(0, currentRemoteReadOps); + inputStream.unbuffer(); + seekPos -= 10; + inputStream.seek(seekPos); + // Seek backwards shouldn't do any IO + assertNoIncrementInRemoteReadOps(currentRemoteReadOps, streamStatistics.getRemoteReadOperations()); + assertGetPosition(seekPos, inputStream.getPos()); + } + } + + private void createDataSet(Path path) throws IOException { + createFile(getFileSystem(), path, true, BLOCK); + } + + private void assertGetPosition(long expected, long actual) { + final String seekPosErrorMsg = "getPos() should return %s"; + Assertions.assertThat(actual) + .describedAs(seekPosErrorMsg, expected) + .isEqualTo(actual); + } + + private void assertDataAtPos(int pos, byte actualData) { + final String dataErrorMsg = "Mismatch in data@%s"; + Assertions.assertThat(actualData) + .describedAs(dataErrorMsg, pos) + .isEqualTo(BLOCK[pos]); + } + + private void assertSeekBufferStats(long expected, long actual) { + final String statsErrorMsg = "Mismatch in seekInBuffer counts"; + Assertions.assertThat(actual) + .describedAs(statsErrorMsg) + .isEqualTo(expected); + } + + private void assertNoIncrementInRemoteReadOps(long oldVal, long newVal) { + final String incrementErrorMsg = "Number of remote read ops shouldn't increase"; + Assertions.assertThat(newVal) + .describedAs(incrementErrorMsg) + .isEqualTo(oldVal); + } + + private void assertIncrementInRemoteReadOps(long oldVal, long newVal) { + final String incrementErrorMsg = "Number of remote read ops should increase"; + Assertions.assertThat(newVal) + .describedAs(incrementErrorMsg) + .isGreaterThan(oldVal); + } + + /** + * Assert that the data read matches the dataset at the given offset. + * This helps verify that the seek process is moving the read pointer + * to the correct location in the file. + * @param readOffset the offset in the file where the read began. + * @param operation operation name for the assertion. + * @param data data read in. + * @param length length of data to check. + */ + private void assertDatasetEquals( + final int readOffset, + final String operation, + final byte[] data, + int length) { + for (int i = 0; i < length; i++) { + int o = readOffset + i; + Assertions.assertThat(data[i]) + .describedAs(operation + "with read offset " + readOffset + + ": data[" + i + "] != actualData[" + o + "]") + .isEqualTo(BLOCK[o]); + } + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAzureBlobFileSystemBasics.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAzureBlobFileSystemBasics.java index a9fa2d77194aa..e99d0895d11ee 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAzureBlobFileSystemBasics.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAzureBlobFileSystemBasics.java @@ -27,7 +27,9 @@ import org.junit.Before; import org.junit.Ignore; import org.junit.Test; +import org.junit.rules.Timeout; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.TEST_TIMEOUT; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -41,6 +43,7 @@ public ITestAzureBlobFileSystemBasics() throws Exception { // If all contract tests are running in parallel, some root level tests in FileSystemContractBaseTest will fail // due to the race condition. Hence for this contract test it should be tested in different container binding = new ABFSContractTestBinding(false); + globalTimeout = Timeout.millis(TEST_TIMEOUT); } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ListResultSchemaTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ListResultSchemaTest.java new file mode 100644 index 0000000000000..8a33ea5de0641 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ListResultSchemaTest.java @@ -0,0 +1,157 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.contract; + +import java.io.IOException; + +import org.codehaus.jackson.map.ObjectMapper; +import org.junit.Test; + +import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultEntrySchema; +import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Tests the JSON parsing for the listfilestatus response to ListResultSchema + */ +public class ListResultSchemaTest { + + /** + * Test parsing a JSON which matches the properties in the ListResultSchema + * and ListResultEntrySchema + *

    + * { + * "paths": [ + * { + * "contentLength": "0", + * "etag": "0x8D8186452785ADA", + * "group": "$superuser", + * "lastModified": "Wed, 24 Jun 2020 17:30:43 GMT", + * "name": "dest/filename", + * "owner": "$superuser", + * "permissions": "rw-r--r--" + * } + * ] + * } + */ + @Test + public void testMatchingJSON() throws IOException { + + String matchingJson = + "{ \"paths\": [ { \"contentLength\": \"0\", \"etag\": " + + "\"0x8D8186452785ADA\", \"group\": \"$superuser\", " + + "\"lastModified\": \"Wed, 24 Jun 2020 17:30:43 GMT\", \"name\": " + + "\"dest/filename\", \"owner\": \"$superuser\", \"permissions\": " + + "\"rw-r--r--\" } ] } "; + + final ObjectMapper objectMapper = new ObjectMapper(); + final ListResultSchema listResultSchema = objectMapper + .readValue(matchingJson, ListResultSchema.class); + + assertThat(listResultSchema.paths().size()) + .describedAs("Only one path is expected as present in the input JSON") + .isEqualTo(1); + + ListResultEntrySchema path = listResultSchema.paths().get(0); + assertThat(path.contentLength()) + .describedAs("contentLength should match the value in the input JSON") + .isEqualTo(0L); + assertThat(path.eTag()) + .describedAs("eTag should match the value in the input JSON") + .isEqualTo("0x8D8186452785ADA"); + assertThat(path.group()) + .describedAs("group should match the value in the input JSON") + .isEqualTo("$superuser"); + assertThat(path.lastModified()) + .describedAs("lastModified should match the value in the input JSON") + .isEqualTo("Wed, 24 Jun 2020 17:30:43 GMT"); + assertThat(path.name()) + .describedAs("lastModified should match the value in the input JSON") + .isEqualTo("dest/filename"); + assertThat(path.owner()) + .describedAs("lastModified should match the value in the input JSON") + .isEqualTo("$superuser"); + assertThat(path.permissions()) + .describedAs("lastModified should match the value in the input JSON") + .isEqualTo("rw-r--r--"); + } + + /** + * Test parsing a JSON which matches the properties in the ListResultSchema + * and ListResultEntrySchema along with an unknown property + *

    + * { + * "paths": [ + * { + * "contentLength": "0", + * "unknownProperty": "132374934429527192", + * "etag": "0x8D8186452785ADA", + * "group": "$superuser", + * "lastModified": "Wed, 24 Jun 2020 17:30:43 GMT", + * "name": "dest/filename", + * "owner": "$superuser", + * "permissions": "rw-r--r--" + * } + * ] + * } + */ + @Test + public void testJSONWithUnknownFields() throws IOException { + + String matchingJson = "{ \"paths\": [ { \"contentLength\": \"0\", " + + "\"unknownProperty\": \"132374934429527192\", \"etag\": " + + "\"0x8D8186452785ADA\", \"group\": \"$superuser\", " + + "\"lastModified\": \"Wed, 24 Jun 2020 17:30:43 GMT\", \"name\": " + + "\"dest/filename\", \"owner\": \"$superuser\", \"permissions\": " + + "\"rw-r--r--\" } ] } "; + + final ObjectMapper objectMapper = new ObjectMapper(); + final ListResultSchema listResultSchema = objectMapper + .readValue(matchingJson, ListResultSchema.class); + + assertThat(listResultSchema.paths().size()) + .describedAs("Only one path is expected as present in the input JSON") + .isEqualTo(1); + + ListResultEntrySchema path = listResultSchema.paths().get(0); + assertThat(path.contentLength()) + .describedAs("contentLength should match the value in the input JSON") + .isEqualTo(0L); + assertThat(path.eTag()) + .describedAs("eTag should match the value in the input JSON") + .isEqualTo("0x8D8186452785ADA"); + assertThat(path.group()) + .describedAs("group should match the value in the input JSON") + .isEqualTo("$superuser"); + assertThat(path.lastModified()) + .describedAs("lastModified should match the value in the input JSON") + .isEqualTo("Wed, 24 Jun 2020 17:30:43 GMT"); + assertThat(path.name()) + .describedAs("lastModified should match the value in the input JSON") + .isEqualTo("dest/filename"); + assertThat(path.owner()) + .describedAs("lastModified should match the value in the input JSON") + .isEqualTo("$superuser"); + assertThat(path.permissions()) + .describedAs("lastModified should match the value in the input JSON") + .isEqualTo("rw-r--r--"); + } + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/diagnostics/TestConfigurationValidators.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/diagnostics/TestConfigurationValidators.java index f02eadc9a0491..6a02435fc6e5e 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/diagnostics/TestConfigurationValidators.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/diagnostics/TestConfigurationValidators.java @@ -24,11 +24,14 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; import org.apache.hadoop.fs.azurebfs.utils.Base64; -import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.MIN_BUFFER_SIZE; -import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.MAX_BUFFER_SIZE; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_LEASE_DURATION; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_READ_BUFFER_SIZE; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_WRITE_BUFFER_SIZE; - +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.INFINITE_LEASE_DURATION; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.MAX_BUFFER_SIZE; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.MAX_LEASE_DURATION; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.MIN_BUFFER_SIZE; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.MIN_LEASE_DURATION; /** * Test configuration validators. @@ -58,6 +61,26 @@ public void testIntegerConfigValidatorThrowsIfMissingValidValue() throws Excepti integerConfigurationValidator.validate("3072"); } + @Test + public void testIntegerWithOutlierConfigValidator() throws Exception { + IntegerConfigurationBasicValidator integerConfigurationValidator = new IntegerConfigurationBasicValidator( + INFINITE_LEASE_DURATION, MIN_LEASE_DURATION, MAX_LEASE_DURATION, DEFAULT_LEASE_DURATION, FAKE_KEY, + false); + + assertEquals(INFINITE_LEASE_DURATION, (int) integerConfigurationValidator.validate("-1")); + assertEquals(DEFAULT_LEASE_DURATION, (int) integerConfigurationValidator.validate(null)); + assertEquals(MIN_LEASE_DURATION, (int) integerConfigurationValidator.validate("15")); + assertEquals(MAX_LEASE_DURATION, (int) integerConfigurationValidator.validate("60")); + } + + @Test(expected = InvalidConfigurationValueException.class) + public void testIntegerWithOutlierConfigValidatorThrowsIfMissingValidValue() throws Exception { + IntegerConfigurationBasicValidator integerConfigurationValidator = new IntegerConfigurationBasicValidator( + INFINITE_LEASE_DURATION, MIN_LEASE_DURATION, MAX_LEASE_DURATION, DEFAULT_LEASE_DURATION, FAKE_KEY, + true); + integerConfigurationValidator.validate("14"); + } + @Test public void testLongConfigValidator() throws Exception { LongConfigurationBasicValidator longConfigurationValidator = new LongConfigurationBasicValidator( diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/ClassicDelegationTokenManager.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/ClassicDelegationTokenManager.java index f87fc654f0908..f74e3e3b3e9a7 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/ClassicDelegationTokenManager.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/ClassicDelegationTokenManager.java @@ -22,7 +22,7 @@ import java.net.URI; import java.nio.charset.Charset; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java new file mode 100644 index 0000000000000..cf7d51da4c44a --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java @@ -0,0 +1,148 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.extensions; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.time.Instant; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; +import org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys; +import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidUriException; +import org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpHeader; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; +import org.apache.hadoop.fs.azurebfs.utils.Base64; +import org.apache.hadoop.fs.azurebfs.utils.DelegationSASGenerator; +import org.apache.hadoop.fs.azurebfs.utils.SASGenerator; +import org.apache.hadoop.security.AccessControlException; + +/** + * A mock SAS token provider implementation + */ +public class MockDelegationSASTokenProvider implements SASTokenProvider { + + private DelegationSASGenerator generator; + + public static final String TEST_OWNER = "325f1619-4205-432f-9fce-3fd594325ce5"; + public static final String CORRELATION_ID = "66ff4ffc-ff17-417e-a2a9-45db8c5b0b5c"; + public static final String NO_AGENT_PATH = "NoAgentPath"; + + @Override + public void initialize(Configuration configuration, String accountName) throws IOException { + String appID = configuration.get(TestConfigurationKeys.FS_AZURE_TEST_APP_ID); + String appSecret = configuration.get(TestConfigurationKeys.FS_AZURE_TEST_APP_SECRET); + String sktid = configuration.get(TestConfigurationKeys.FS_AZURE_TEST_APP_SERVICE_PRINCIPAL_TENANT_ID); + String skoid = configuration.get(TestConfigurationKeys.FS_AZURE_TEST_APP_SERVICE_PRINCIPAL_OBJECT_ID); + String skt = SASGenerator.ISO_8601_FORMATTER.format(Instant.now().minus(SASGenerator.FIVE_MINUTES)); + String ske = SASGenerator.ISO_8601_FORMATTER.format(Instant.now().plus(SASGenerator.ONE_DAY)); + String skv = SASGenerator.AuthenticationVersion.Dec19.toString(); + + byte[] key = getUserDelegationKey(accountName, appID, appSecret, sktid, skt, ske, skv); + + generator = new DelegationSASGenerator(key, skoid, sktid, skt, ske, skv); + } + + // Invokes the AAD v2.0 authentication endpoint with a client credentials grant to get an + // access token. See https://docs.microsoft.com/en-us/azure/active-directory/develop/v2-oauth2-client-creds-grant-flow. + private String getAuthorizationHeader(String accountName, String appID, String appSecret, String sktid) throws IOException { + String authEndPoint = String.format("https://login.microsoftonline.com/%s/oauth2/v2.0/token", sktid); + ClientCredsTokenProvider provider = new ClientCredsTokenProvider(authEndPoint, appID, appSecret); + return "Bearer " + provider.getToken().getAccessToken(); + } + + private byte[] getUserDelegationKey(String accountName, String appID, String appSecret, + String sktid, String skt, String ske, String skv) throws IOException { + + String method = "POST"; + String account = accountName.substring(0, accountName.indexOf(AbfsHttpConstants.DOT)); + + final StringBuilder sb = new StringBuilder(128); + sb.append("https://"); + sb.append(account); + sb.append(".blob.core.windows.net/?restype=service&comp=userdelegationkey"); + + URL url; + try { + url = new URL(sb.toString()); + } catch (MalformedURLException ex) { + throw new InvalidUriException(sb.toString()); + } + + List requestHeaders = new ArrayList(); + requestHeaders.add(new AbfsHttpHeader(HttpHeaderConfigurations.X_MS_VERSION, skv)); + requestHeaders.add(new AbfsHttpHeader(HttpHeaderConfigurations.CONTENT_TYPE, "application/x-www-form-urlencoded")); + requestHeaders.add(new AbfsHttpHeader(HttpHeaderConfigurations.AUTHORIZATION, getAuthorizationHeader(account, appID, appSecret, sktid))); + + final StringBuilder requestBody = new StringBuilder(512); + requestBody.append(""); + requestBody.append(skt); + requestBody.append(""); + requestBody.append(ske); + requestBody.append(""); + + AbfsHttpOperation op = new AbfsHttpOperation(url, method, requestHeaders); + + byte[] requestBuffer = requestBody.toString().getBytes(StandardCharsets.UTF_8.toString()); + op.sendRequest(requestBuffer, 0, requestBuffer.length); + + byte[] responseBuffer = new byte[4 * 1024]; + op.processResponse(responseBuffer, 0, responseBuffer.length); + + String responseBody = new String(responseBuffer, 0, (int) op.getBytesReceived(), StandardCharsets.UTF_8); + int beginIndex = responseBody.indexOf("") + "".length(); + int endIndex = responseBody.indexOf(""); + String value = responseBody.substring(beginIndex, endIndex); + return Base64.decode(value); + } + + /** + * Invokes the authorizer to obtain a SAS token. + * + * @param accountName the name of the storage account. + * @param fileSystem the name of the fileSystem. + * @param path the file or directory path. + * @param operation the operation to be performed on the path. + * @return a SAS token to perform the request operation. + * @throws IOException if there is a network error. + * @throws AccessControlException if access is denied. + */ + @Override + public String getSASToken(String accountName, String fileSystem, String path, + String operation) throws IOException, AccessControlException { + // Except for the special case where we test without an agent, + // the user for these tests is always TEST_OWNER. The check access operation + // requires suoid to check permissions for the user and will throw if the + // user does not have access and otherwise succeed. + String saoid = null; + String suoid = null; + if (path == null || !path.endsWith(NO_AGENT_PATH)) { + saoid = (operation == SASTokenProvider.CHECK_ACCESS_OPERATION) ? null : TEST_OWNER; + suoid = (operation == SASTokenProvider.CHECK_ACCESS_OPERATION) ? TEST_OWNER : null; + } + return generator.getDelegationSAS(accountName, fileSystem, path, operation, + saoid, suoid, CORRELATION_ID); + } +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockSASTokenProvider.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockSASTokenProvider.java index de841b0b29909..50ac20970f45f 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockSASTokenProvider.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockSASTokenProvider.java @@ -25,7 +25,7 @@ import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.fs.azurebfs.utils.Base64; -import org.apache.hadoop.fs.azurebfs.utils.SASGenerator; +import org.apache.hadoop.fs.azurebfs.utils.ServiceSASGenerator; /** * A mock SAS token provider implementation @@ -33,7 +33,7 @@ public class MockSASTokenProvider implements SASTokenProvider { private byte[] accountKey; - private SASGenerator generator; + private ServiceSASGenerator generator; private boolean skipAuthorizationForTestSetup = false; // For testing we use a container SAS for all operations. @@ -49,7 +49,7 @@ public void initialize(Configuration configuration, String accountName) throws I } catch (Exception ex) { throw new IOException(ex); } - generator = new SASGenerator(accountKey); + generator = new ServiceSASGenerator(accountKey); } /** diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockWithPrefixSASTokenProvider.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockWithPrefixSASTokenProvider.java new file mode 100644 index 0000000000000..ed701c4669c02 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockWithPrefixSASTokenProvider.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.extensions; + +import java.io.IOException; + +public class MockWithPrefixSASTokenProvider extends MockSASTokenProvider { + + /** + * Function to return an already generated SAS Token with a '?' prefix + * @param accountName the name of the storage account. + * @param fileSystem the name of the fileSystem. + * @param path the file or directory path. + * @param operation the operation to be performed on the path. + * @return + * @throws IOException + */ + @Override + public String getSASToken(String accountName, String fileSystem, String path, + String operation) throws IOException { + String token = super.getSASToken(accountName, fileSystem, path, operation); + return "?" + token; + } +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/StubAbfsTokenIdentifier.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/StubAbfsTokenIdentifier.java index 4271ba6dfdf56..b04517c8ac4df 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/StubAbfsTokenIdentifier.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/StubAbfsTokenIdentifier.java @@ -29,7 +29,7 @@ import java.util.Objects; import java.util.UUID; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.io.Text; import org.apache.hadoop.security.token.Token; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/oauth2/RetryTestTokenProvider.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/oauth2/RetryTestTokenProvider.java index 3566ebbaaaa2a..7427add29086c 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/oauth2/RetryTestTokenProvider.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/oauth2/RetryTestTokenProvider.java @@ -30,12 +30,12 @@ */ public class RetryTestTokenProvider implements CustomTokenProviderAdaptee { - // Need to track first token fetch otherwise will get counted as a retry too. - private static boolean isThisFirstTokenFetch = true; - public static int reTryCount = 0; + private static final Logger LOG = LoggerFactory.getLogger( + RetryTestTokenProvider.class); - private static final Logger LOG = LoggerFactory - .getLogger(RetryTestTokenProvider.class); + // Need to track first token fetch otherwise will get counted as a retry too. + private boolean isThisFirstTokenFetch = true; + private int retryCount = 0; @Override public void initialize(Configuration configuration, String accountName) @@ -43,9 +43,13 @@ public void initialize(Configuration configuration, String accountName) } - public static void ResetStatusToFirstTokenFetch() { + /** + * Clear earlier retry details and reset RetryTestTokenProvider instance to + * state of first access token fetch call. + */ + public void resetStatusToFirstTokenFetch() { isThisFirstTokenFetch = true; - reTryCount = 0; + retryCount = 0; } @Override @@ -53,7 +57,7 @@ public String getAccessToken() throws IOException { if (isThisFirstTokenFetch) { isThisFirstTokenFetch = false; } else { - reTryCount++; + retryCount++; } LOG.debug("RetryTestTokenProvider: Throw an exception in fetching tokens"); @@ -64,4 +68,13 @@ public String getAccessToken() throws IOException { public Date getExpiryTime() { return new Date(); } + + public static RetryTestTokenProvider getCurrentRetryTestProviderInstance( + AccessTokenProvider customTokenProvider) { + return (RetryTestTokenProvider) ((CustomTokenProviderAdapter) customTokenProvider).getCustomTokenProviderAdaptee(); + } + + public int getRetryCount() { + return retryCount; + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java new file mode 100644 index 0000000000000..c031e5daa6c44 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java @@ -0,0 +1,586 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.lang.reflect.Field; +import java.net.HttpURLConnection; +import java.net.ProtocolException; +import java.net.URL; +import java.util.List; +import java.util.Random; +import java.util.regex.Pattern; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.mockito.Mockito; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.TestAbfsConfigurationFieldsValidation; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; +import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; +import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; + +import static java.net.HttpURLConnection.HTTP_NOT_FOUND; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPEND_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PATCH; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.EXPECT; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_HTTP_METHOD_OVERRIDE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_POSITION; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_ABFS_ACCOUNT_NAME; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APN_VERSION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CLIENT_VERSION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DOT; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FORWARD_SLASH; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.JAVA_VENDOR; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.JAVA_VERSION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.OS_ARCH; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.OS_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.OS_VERSION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.SEMICOLON; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.SINGLE_WHITE_SPACE; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_CLUSTER_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_CLUSTER_TYPE; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_VALUE_UNKNOWN; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.TEST_CONFIGURATION_FILE_NAME; + +/** + * Test useragent of abfs client. + * + */ +public final class ITestAbfsClient extends AbstractAbfsIntegrationTest { + + private static final String ACCOUNT_NAME = "bogusAccountName.dfs.core.windows.net"; + private static final String FS_AZURE_USER_AGENT_PREFIX = "Partner Service"; + private static final String TEST_PATH = "/testfile"; + public static final int REDUCED_RETRY_COUNT = 2; + public static final int REDUCED_BACKOFF_INTERVAL = 100; + public static final int BUFFER_LENGTH = 5; + public static final int BUFFER_OFFSET = 0; + + private final Pattern userAgentStringPattern; + + public ITestAbfsClient() throws Exception { + StringBuilder regEx = new StringBuilder(); + regEx.append("^"); + regEx.append(APN_VERSION); + regEx.append(SINGLE_WHITE_SPACE); + regEx.append(CLIENT_VERSION); + regEx.append(SINGLE_WHITE_SPACE); + regEx.append("\\("); + regEx.append(System.getProperty(JAVA_VENDOR) + .replaceAll(SINGLE_WHITE_SPACE, EMPTY_STRING)); + regEx.append(SINGLE_WHITE_SPACE); + regEx.append("JavaJRE"); + regEx.append(SINGLE_WHITE_SPACE); + regEx.append(System.getProperty(JAVA_VERSION)); + regEx.append(SEMICOLON); + regEx.append(SINGLE_WHITE_SPACE); + regEx.append(System.getProperty(OS_NAME) + .replaceAll(SINGLE_WHITE_SPACE, EMPTY_STRING)); + regEx.append(SINGLE_WHITE_SPACE); + regEx.append(System.getProperty(OS_VERSION)); + regEx.append(FORWARD_SLASH); + regEx.append(System.getProperty(OS_ARCH)); + regEx.append(SEMICOLON); + regEx.append("([a-zA-Z].*; )?"); // Regex for sslProviderName + regEx.append("([a-zA-Z].*; )?"); // Regex for tokenProvider + regEx.append(" ?"); + regEx.append(".+"); // cluster name + regEx.append(FORWARD_SLASH); + regEx.append(".+"); // cluster type + regEx.append("\\)"); + regEx.append("( .*)?"); // Regex for user agent prefix + regEx.append("$"); + this.userAgentStringPattern = Pattern.compile(regEx.toString()); + } + + private String getUserAgentString(AbfsConfiguration config, + boolean includeSSLProvider) throws IOException { + AbfsClientContext abfsClientContext = new AbfsClientContextBuilder().build(); + AbfsClient client = new AbfsClient(new URL("https://azure.com"), null, + config, (AccessTokenProvider) null, abfsClientContext); + String sslProviderName = null; + if (includeSSLProvider) { + sslProviderName = DelegatingSSLSocketFactory.getDefaultFactory() + .getProviderName(); + } + return client.initializeUserAgent(config, sslProviderName); + } + + @Test + public void verifybBasicInfo() throws Exception { + final Configuration configuration = new Configuration(); + configuration.addResource(TEST_CONFIGURATION_FILE_NAME); + AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, + ACCOUNT_NAME); + verifybBasicInfo(getUserAgentString(abfsConfiguration, false)); + } + + private void verifybBasicInfo(String userAgentStr) { + Assertions.assertThat(userAgentStr) + .describedAs("User-Agent string [" + userAgentStr + + "] should be of the pattern: " + this.userAgentStringPattern.pattern()) + .matches(this.userAgentStringPattern) + .describedAs("User-Agent string should contain java vendor") + .contains(System.getProperty(JAVA_VENDOR) + .replaceAll(SINGLE_WHITE_SPACE, EMPTY_STRING)) + .describedAs("User-Agent string should contain java version") + .contains(System.getProperty(JAVA_VERSION)) + .describedAs("User-Agent string should contain OS name") + .contains(System.getProperty(OS_NAME) + .replaceAll(SINGLE_WHITE_SPACE, EMPTY_STRING)) + .describedAs("User-Agent string should contain OS version") + .contains(System.getProperty(OS_VERSION)) + .describedAs("User-Agent string should contain OS arch") + .contains(System.getProperty(OS_ARCH)); + } + + @Test + public void verifyUserAgentPrefix() + throws IOException, IllegalAccessException { + final Configuration configuration = new Configuration(); + configuration.addResource(TEST_CONFIGURATION_FILE_NAME); + configuration.set(ConfigurationKeys.FS_AZURE_USER_AGENT_PREFIX_KEY, FS_AZURE_USER_AGENT_PREFIX); + AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, + ACCOUNT_NAME); + String userAgentStr = getUserAgentString(abfsConfiguration, false); + + verifybBasicInfo(userAgentStr); + Assertions.assertThat(userAgentStr) + .describedAs("User-Agent string should contain " + FS_AZURE_USER_AGENT_PREFIX) + .contains(FS_AZURE_USER_AGENT_PREFIX); + + configuration.unset(ConfigurationKeys.FS_AZURE_USER_AGENT_PREFIX_KEY); + abfsConfiguration = new AbfsConfiguration(configuration, + ACCOUNT_NAME); + userAgentStr = getUserAgentString(abfsConfiguration, false); + + verifybBasicInfo(userAgentStr); + Assertions.assertThat(userAgentStr) + .describedAs("User-Agent string should not contain " + FS_AZURE_USER_AGENT_PREFIX) + .doesNotContain(FS_AZURE_USER_AGENT_PREFIX); + } + + @Test + public void verifyUserAgentWithoutSSLProvider() throws Exception { + final Configuration configuration = new Configuration(); + configuration.addResource(TEST_CONFIGURATION_FILE_NAME); + configuration.set(ConfigurationKeys.FS_AZURE_SSL_CHANNEL_MODE_KEY, + DelegatingSSLSocketFactory.SSLChannelMode.Default_JSSE.name()); + AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, + ACCOUNT_NAME); + String userAgentStr = getUserAgentString(abfsConfiguration, true); + + verifybBasicInfo(userAgentStr); + Assertions.assertThat(userAgentStr) + .describedAs("User-Agent string should contain sslProvider") + .contains(DelegatingSSLSocketFactory.getDefaultFactory().getProviderName()); + + userAgentStr = getUserAgentString(abfsConfiguration, false); + + verifybBasicInfo(userAgentStr); + Assertions.assertThat(userAgentStr) + .describedAs("User-Agent string should not contain sslProvider") + .doesNotContain(DelegatingSSLSocketFactory.getDefaultFactory().getProviderName()); + } + + @Test + public void verifyUserAgentClusterName() throws Exception { + final String clusterName = "testClusterName"; + final Configuration configuration = new Configuration(); + configuration.addResource(TEST_CONFIGURATION_FILE_NAME); + configuration.set(FS_AZURE_CLUSTER_NAME, clusterName); + AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, + ACCOUNT_NAME); + String userAgentStr = getUserAgentString(abfsConfiguration, false); + + verifybBasicInfo(userAgentStr); + Assertions.assertThat(userAgentStr) + .describedAs("User-Agent string should contain cluster name") + .contains(clusterName); + + configuration.unset(FS_AZURE_CLUSTER_NAME); + abfsConfiguration = new AbfsConfiguration(configuration, + ACCOUNT_NAME); + userAgentStr = getUserAgentString(abfsConfiguration, false); + + verifybBasicInfo(userAgentStr); + Assertions.assertThat(userAgentStr) + .describedAs("User-Agent string should not contain cluster name") + .doesNotContain(clusterName) + .describedAs("User-Agent string should contain UNKNOWN as cluster name config is absent") + .contains(DEFAULT_VALUE_UNKNOWN); + } + + @Test + public void verifyUserAgentClusterType() throws Exception { + final String clusterType = "testClusterType"; + final Configuration configuration = new Configuration(); + configuration.addResource(TEST_CONFIGURATION_FILE_NAME); + configuration.set(FS_AZURE_CLUSTER_TYPE, clusterType); + AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, + ACCOUNT_NAME); + String userAgentStr = getUserAgentString(abfsConfiguration, false); + + verifybBasicInfo(userAgentStr); + Assertions.assertThat(userAgentStr) + .describedAs("User-Agent string should contain cluster type") + .contains(clusterType); + + configuration.unset(FS_AZURE_CLUSTER_TYPE); + abfsConfiguration = new AbfsConfiguration(configuration, + ACCOUNT_NAME); + userAgentStr = getUserAgentString(abfsConfiguration, false); + + verifybBasicInfo(userAgentStr); + Assertions.assertThat(userAgentStr) + .describedAs("User-Agent string should not contain cluster type") + .doesNotContain(clusterType) + .describedAs("User-Agent string should contain UNKNOWN as cluster type config is absent") + .contains(DEFAULT_VALUE_UNKNOWN); + } + + public static AbfsClient createTestClientFromCurrentContext( + AbfsClient baseAbfsClientInstance, + AbfsConfiguration abfsConfig) throws IOException { + AuthType currentAuthType = abfsConfig.getAuthType( + abfsConfig.getAccountName()); + + AbfsPerfTracker tracker = new AbfsPerfTracker("test", + abfsConfig.getAccountName(), + abfsConfig); + + AbfsClientContext abfsClientContext = + new AbfsClientContextBuilder().withAbfsPerfTracker(tracker) + .withExponentialRetryPolicy( + new ExponentialRetryPolicy(abfsConfig.getMaxIoRetries())) + .build(); + + // Create test AbfsClient + AbfsClient testClient = new AbfsClient( + baseAbfsClientInstance.getBaseUrl(), + (currentAuthType == AuthType.SharedKey + ? new SharedKeyCredentials( + abfsConfig.getAccountName().substring(0, + abfsConfig.getAccountName().indexOf(DOT)), + abfsConfig.getStorageAccountKey()) + : null), + abfsConfig, + (currentAuthType == AuthType.OAuth + ? abfsConfig.getTokenProvider() + : null), + abfsClientContext); + + return testClient; + } + + public static AbfsClient getMockAbfsClient(AbfsClient baseAbfsClientInstance, + AbfsConfiguration abfsConfig) throws Exception { + AuthType currentAuthType = abfsConfig.getAuthType( + abfsConfig.getAccountName()); + + org.junit.Assume.assumeTrue( + (currentAuthType == AuthType.SharedKey) + || (currentAuthType == AuthType.OAuth)); + + AbfsClient client = mock(AbfsClient.class); + AbfsPerfTracker tracker = new AbfsPerfTracker( + "test", + abfsConfig.getAccountName(), + abfsConfig); + + when(client.getAbfsPerfTracker()).thenReturn(tracker); + when(client.getAuthType()).thenReturn(currentAuthType); + when(client.getRetryPolicy()).thenReturn( + new ExponentialRetryPolicy(1)); + + when(client.createDefaultUriQueryBuilder()).thenCallRealMethod(); + when(client.createRequestUrl(any(), any())).thenCallRealMethod(); + when(client.getAccessToken()).thenCallRealMethod(); + when(client.getSharedKeyCredentials()).thenCallRealMethod(); + when(client.createDefaultHeaders()).thenCallRealMethod(); + when(client.getAbfsConfiguration()).thenReturn(abfsConfig); + when(client.getIntercept()).thenReturn( + AbfsThrottlingInterceptFactory.getInstance( + abfsConfig.getAccountName().substring(0, + abfsConfig.getAccountName().indexOf(DOT)), abfsConfig)); + // override baseurl + client = ITestAbfsClient.setAbfsClientField(client, "abfsConfiguration", + abfsConfig); + + // override baseurl + client = ITestAbfsClient.setAbfsClientField(client, "baseUrl", + baseAbfsClientInstance.getBaseUrl()); + + // override auth provider + if (currentAuthType == AuthType.SharedKey) { + client = ITestAbfsClient.setAbfsClientField(client, "sharedKeyCredentials", + new SharedKeyCredentials( + abfsConfig.getAccountName().substring(0, + abfsConfig.getAccountName().indexOf(DOT)), + abfsConfig.getStorageAccountKey())); + } else { + client = ITestAbfsClient.setAbfsClientField(client, "tokenProvider", + abfsConfig.getTokenProvider()); + } + + // override user agent + String userAgent = "APN/1.0 Azure Blob FS/3.4.0-SNAPSHOT (PrivateBuild " + + "JavaJRE 1.8.0_252; Linux 5.3.0-59-generic/amd64; openssl-1.0; " + + "UNKNOWN/UNKNOWN) MSFT"; + client = ITestAbfsClient.setAbfsClientField(client, "userAgent", userAgent); + + return client; + } + + private static AbfsClient setAbfsClientField( + final AbfsClient client, + final String fieldName, + Object fieldObject) throws Exception { + + Field field = AbfsClient.class.getDeclaredField(fieldName); + field.setAccessible(true); + Field modifiersField = Field.class.getDeclaredField("modifiers"); + modifiersField.setAccessible(true); + modifiersField.setInt(field, + field.getModifiers() & ~java.lang.reflect.Modifier.FINAL); + field.set(client, fieldObject); + return client; + } + + /** + * Test helper method to access private createRequestUrl method. + * @param client test AbfsClient instace + * @param path path to generate Url + * @return return store path url + * @throws AzureBlobFileSystemException + */ + public static URL getTestUrl(AbfsClient client, String path) throws + AzureBlobFileSystemException { + final AbfsUriQueryBuilder abfsUriQueryBuilder + = client.createDefaultUriQueryBuilder(); + return client.createRequestUrl(path, abfsUriQueryBuilder.toString()); + } + + /** + * Test helper method to access private createDefaultHeaders method. + * @param client test AbfsClient instance + * @return List of AbfsHttpHeaders + */ + public static List getTestRequestHeaders(AbfsClient client) { + return client.createDefaultHeaders(); + } + + /** + * Test helper method to create an AbfsRestOperation instance. + * @param type RestOpType + * @param client AbfsClient + * @param method HttpMethod + * @param url Test path url + * @param requestHeaders request headers + * @return instance of AbfsRestOperation + */ + public static AbfsRestOperation getRestOp(AbfsRestOperationType type, + AbfsClient client, + String method, + URL url, + List requestHeaders) { + return new AbfsRestOperation( + type, + client, + method, + url, + requestHeaders); + } + + public static AccessTokenProvider getAccessTokenProvider(AbfsClient client) { + return client.getTokenProvider(); + } + + /** + * Test helper method to get random bytes array. + * @param length The length of byte buffer. + * @return byte buffer. + */ + private byte[] getRandomBytesArray(int length) { + final byte[] b = new byte[length]; + new Random().nextBytes(b); + return b; + } + + /** + * Test to verify that client retries append request without + * expect header enabled if append with expect header enabled fails + * with 4xx kind of error. + * @throws Exception + */ + @Test + public void testExpectHundredContinue() throws Exception { + // Get the filesystem. + final AzureBlobFileSystem fs = getFileSystem(); + + final Configuration configuration = new Configuration(); + configuration.addResource(TEST_CONFIGURATION_FILE_NAME); + AbfsClient abfsClient = fs.getAbfsStore().getClient(); + + AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, + configuration.get(FS_AZURE_ABFS_ACCOUNT_NAME)); + + // Update the configuration with reduced retry count and reduced backoff interval. + AbfsConfiguration abfsConfig + = TestAbfsConfigurationFieldsValidation.updateRetryConfigs( + abfsConfiguration, + REDUCED_RETRY_COUNT, REDUCED_BACKOFF_INTERVAL); + + // Gets the client. + AbfsClient testClient = Mockito.spy( + ITestAbfsClient.createTestClientFromCurrentContext( + abfsClient, + abfsConfig)); + + // Create the append request params with expect header enabled initially. + AppendRequestParameters appendRequestParameters + = new AppendRequestParameters( + BUFFER_OFFSET, BUFFER_OFFSET, BUFFER_LENGTH, + AppendRequestParameters.Mode.APPEND_MODE, false, null, true); + + byte[] buffer = getRandomBytesArray(BUFFER_LENGTH); + + // Create a test container to upload the data. + Path testPath = path(TEST_PATH); + fs.create(testPath); + String finalTestPath = testPath.toString() + .substring(testPath.toString().lastIndexOf("/")); + + // Creates a list of request headers. + final List requestHeaders + = ITestAbfsClient.getTestRequestHeaders(testClient); + requestHeaders.add( + new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, HTTP_METHOD_PATCH)); + if (appendRequestParameters.isExpectHeaderEnabled()) { + requestHeaders.add(new AbfsHttpHeader(EXPECT, HUNDRED_CONTINUE)); + } + + // Updates the query parameters. + final AbfsUriQueryBuilder abfsUriQueryBuilder + = testClient.createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, APPEND_ACTION); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_POSITION, + Long.toString(appendRequestParameters.getPosition())); + + // Creates the url for the specified path. + URL url = testClient.createRequestUrl(finalTestPath, abfsUriQueryBuilder.toString()); + + // Create a mock of the AbfsRestOperation to set the urlConnection in the corresponding httpOperation. + AbfsRestOperation op = Mockito.spy(new AbfsRestOperation( + AbfsRestOperationType.Append, + testClient, + HTTP_METHOD_PUT, + url, + requestHeaders, buffer, + appendRequestParameters.getoffset(), + appendRequestParameters.getLength(), null)); + + AbfsHttpOperation abfsHttpOperation = Mockito.spy(new AbfsHttpOperation(url, + HTTP_METHOD_PUT, requestHeaders)); + + // Sets the expect request property if expect header is enabled. + if (appendRequestParameters.isExpectHeaderEnabled()) { + Mockito.doReturn(HUNDRED_CONTINUE).when(abfsHttpOperation) + .getConnProperty(EXPECT); + } + + HttpURLConnection urlConnection = mock(HttpURLConnection.class); + Mockito.doNothing().when(urlConnection).setRequestProperty(Mockito + .any(), Mockito.any()); + Mockito.doReturn(HTTP_METHOD_PUT).when(urlConnection).getRequestMethod(); + Mockito.doReturn(url).when(urlConnection).getURL(); + Mockito.doReturn(urlConnection).when(abfsHttpOperation).getConnection(); + + Mockito.doNothing().when(abfsHttpOperation).setRequestProperty(Mockito + .any(), Mockito.any()); + Mockito.doReturn(url).when(abfsHttpOperation).getConnUrl(); + + // Give user error code 404 when processResponse is called. + Mockito.doReturn(HTTP_METHOD_PUT).when(abfsHttpOperation).getConnRequestMethod(); + Mockito.doReturn(HTTP_NOT_FOUND).when(abfsHttpOperation).getConnResponseCode(); + Mockito.doReturn("Resource Not Found") + .when(abfsHttpOperation) + .getConnResponseMessage(); + + // Make the getOutputStream throw IOException to see it returns from the sendRequest correctly. + Mockito.doThrow(new ProtocolException("Server rejected Operation")) + .when(abfsHttpOperation) + .getConnOutputStream(); + + // Sets the httpOperation for the rest operation. + Mockito.doReturn(abfsHttpOperation) + .when(op) + .createHttpOperation(); + + // Mock the restOperation for the client. + Mockito.doReturn(op) + .when(testClient) + .getAbfsRestOperationForAppend(Mockito.any(), + Mockito.any(), Mockito.any(), Mockito.any(), Mockito.any(), + Mockito.nullable(int.class), Mockito.nullable(int.class), + Mockito.any()); + + TracingContext tracingContext = Mockito.spy(new TracingContext("abcd", + "abcde", FSOperationType.APPEND, + TracingHeaderFormat.ALL_ID_FORMAT, null)); + + // Check that expect header is enabled before the append call. + Assertions.assertThat(appendRequestParameters.isExpectHeaderEnabled()) + .describedAs("The expect header is not true before the append call") + .isTrue(); + + intercept(AzureBlobFileSystemException.class, + () -> testClient.append(finalTestPath, buffer, appendRequestParameters, null, tracingContext)); + + // Verify that the request was not exponentially retried because of user error. + Assertions.assertThat(tracingContext.getRetryCount()) + .describedAs("The retry count is incorrect") + .isEqualTo(0); + + // Verify that the same request was retried with expect header disabled. + Assertions.assertThat(appendRequestParameters.isExpectHeaderEnabled()) + .describedAs("The expect header is not false") + .isFalse(); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsInputStream.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsInputStream.java new file mode 100644 index 0000000000000..66f072501dc4d --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsInputStream.java @@ -0,0 +1,258 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.lang.reflect.Field; +import java.util.Map; +import java.util.Random; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.junit.Test; + +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_MB; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.spy; + +public class ITestAbfsInputStream extends AbstractAbfsIntegrationTest { + + protected static final int HUNDRED = 100; + + public ITestAbfsInputStream() throws Exception { + } + + @Test + public void testWithNoOptimization() throws Exception { + for (int i = 2; i <= 7; i++) { + int fileSize = i * ONE_MB; + final AzureBlobFileSystem fs = getFileSystem(false, false, fileSize); + String fileName = methodName.getMethodName() + i; + byte[] fileContent = getRandomBytesArray(fileSize); + Path testFilePath = createFileWithContent(fs, fileName, fileContent); + testWithNoOptimization(fs, testFilePath, HUNDRED, fileContent); + } + } + + protected void testWithNoOptimization(final FileSystem fs, + final Path testFilePath, final int seekPos, final byte[] fileContent) + throws IOException { + FSDataInputStream iStream = fs.open(testFilePath); + try { + AbfsInputStream abfsInputStream = (AbfsInputStream) iStream + .getWrappedStream(); + + iStream = new FSDataInputStream(abfsInputStream); + seek(iStream, seekPos); + long totalBytesRead = 0; + int length = HUNDRED * HUNDRED; + do { + byte[] buffer = new byte[length]; + int bytesRead = iStream.read(buffer, 0, length); + totalBytesRead += bytesRead; + if ((totalBytesRead + seekPos) >= fileContent.length) { + length = (fileContent.length - seekPos) % length; + } + assertEquals(length, bytesRead); + assertContentReadCorrectly(fileContent, + (int) (seekPos + totalBytesRead - length), length, buffer, testFilePath); + + assertTrue(abfsInputStream.getFCursor() >= seekPos + totalBytesRead); + assertTrue(abfsInputStream.getFCursorAfterLastRead() >= seekPos + totalBytesRead); + assertTrue(abfsInputStream.getBCursor() >= totalBytesRead % abfsInputStream.getBufferSize()); + assertTrue(abfsInputStream.getLimit() >= totalBytesRead % abfsInputStream.getBufferSize()); + } while (totalBytesRead + seekPos < fileContent.length); + } finally { + iStream.close(); + } + } + + @Test + public void testExceptionInOptimization() throws Exception { + for (int i = 2; i <= 7; i++) { + int fileSize = i * ONE_MB; + final AzureBlobFileSystem fs = getFileSystem(true, true, fileSize); + String fileName = methodName.getMethodName() + i; + byte[] fileContent = getRandomBytesArray(fileSize); + Path testFilePath = createFileWithContent(fs, fileName, fileContent); + testExceptionInOptimization(fs, testFilePath, fileSize - HUNDRED, + fileSize / 4, fileContent); + } + } + + private void testExceptionInOptimization(final FileSystem fs, + final Path testFilePath, + final int seekPos, final int length, final byte[] fileContent) + throws IOException { + + FSDataInputStream iStream = fs.open(testFilePath); + try { + AbfsInputStream abfsInputStream = (AbfsInputStream) iStream + .getWrappedStream(); + abfsInputStream = spy(abfsInputStream); + doThrow(new IOException()) + .doCallRealMethod() + .when(abfsInputStream) + .readRemote(anyLong(), any(), anyInt(), anyInt(), + any(TracingContext.class)); + + iStream = new FSDataInputStream(abfsInputStream); + verifyBeforeSeek(abfsInputStream); + seek(iStream, seekPos); + byte[] buffer = new byte[length]; + int bytesRead = iStream.read(buffer, 0, length); + long actualLength = length; + if (seekPos + length > fileContent.length) { + long delta = seekPos + length - fileContent.length; + actualLength = length - delta; + } + assertEquals(bytesRead, actualLength); + assertContentReadCorrectly(fileContent, seekPos, (int) actualLength, buffer, testFilePath); + assertEquals(fileContent.length, abfsInputStream.getFCursor()); + assertEquals(fileContent.length, abfsInputStream.getFCursorAfterLastRead()); + assertEquals(actualLength, abfsInputStream.getBCursor()); + assertTrue(abfsInputStream.getLimit() >= actualLength); + } finally { + iStream.close(); + } + } + + protected AzureBlobFileSystem getFileSystem(boolean readSmallFilesCompletely) + throws IOException { + final AzureBlobFileSystem fs = getFileSystem(); + getAbfsStore(fs).getAbfsConfiguration() + .setReadSmallFilesCompletely(readSmallFilesCompletely); + return fs; + } + + private AzureBlobFileSystem getFileSystem(boolean optimizeFooterRead, + boolean readSmallFileCompletely, int fileSize) throws IOException { + final AzureBlobFileSystem fs = getFileSystem(); + getAbfsStore(fs).getAbfsConfiguration() + .setOptimizeFooterRead(optimizeFooterRead); + if (fileSize <= getAbfsStore(fs).getAbfsConfiguration() + .getReadBufferSize()) { + getAbfsStore(fs).getAbfsConfiguration() + .setReadSmallFilesCompletely(readSmallFileCompletely); + } + return fs; + } + + protected byte[] getRandomBytesArray(int length) { + final byte[] b = new byte[length]; + new Random().nextBytes(b); + return b; + } + + protected Path createFileWithContent(FileSystem fs, String fileName, + byte[] fileContent) throws IOException { + Path testFilePath = path(fileName); + try (FSDataOutputStream oStream = fs.create(testFilePath)) { + oStream.write(fileContent); + oStream.flush(); + } + return testFilePath; + } + + protected AzureBlobFileSystemStore getAbfsStore(FileSystem fs) + throws NoSuchFieldException, IllegalAccessException { + AzureBlobFileSystem abfs = (AzureBlobFileSystem) fs; + Field abfsStoreField = AzureBlobFileSystem.class + .getDeclaredField("abfsStore"); + abfsStoreField.setAccessible(true); + return (AzureBlobFileSystemStore) abfsStoreField.get(abfs); + } + + protected Map getInstrumentationMap(FileSystem fs) + throws NoSuchFieldException, IllegalAccessException { + AzureBlobFileSystem abfs = (AzureBlobFileSystem) fs; + Field abfsCountersField = AzureBlobFileSystem.class + .getDeclaredField("abfsCounters"); + abfsCountersField.setAccessible(true); + AbfsCounters abfsCounters = (AbfsCounters) abfsCountersField.get(abfs); + return abfsCounters.toMap(); + } + + protected void assertContentReadCorrectly(byte[] actualFileContent, int from, + int len, byte[] contentRead, Path testFilePath) { + for (int i = 0; i < len; i++) { + assertEquals("The test file path is " + testFilePath, contentRead[i], actualFileContent[i + from]); + } + } + + protected void assertBuffersAreNotEqual(byte[] actualContent, + byte[] contentRead, AbfsConfiguration conf, Path testFilePath) { + assertBufferEquality(actualContent, contentRead, conf, false, testFilePath); + } + + protected void assertBuffersAreEqual(byte[] actualContent, byte[] contentRead, + AbfsConfiguration conf, Path testFilePath) { + assertBufferEquality(actualContent, contentRead, conf, true, testFilePath); + } + + private void assertBufferEquality(byte[] actualContent, byte[] contentRead, + AbfsConfiguration conf, boolean assertEqual, Path testFilePath) { + int bufferSize = conf.getReadBufferSize(); + int actualContentSize = actualContent.length; + int n = (actualContentSize < bufferSize) ? actualContentSize : bufferSize; + int matches = 0; + for (int i = 0; i < n; i++) { + if (actualContent[i] == contentRead[i]) { + matches++; + } + } + if (assertEqual) { + assertEquals("The test file path is " + testFilePath, n, matches); + } else { + assertNotEquals("The test file path is " + testFilePath, n, matches); + } + } + + protected void seek(FSDataInputStream iStream, long seekPos) + throws IOException { + AbfsInputStream abfsInputStream = (AbfsInputStream) iStream.getWrappedStream(); + verifyBeforeSeek(abfsInputStream); + iStream.seek(seekPos); + verifyAfterSeek(abfsInputStream, seekPos); + } + + private void verifyBeforeSeek(AbfsInputStream abfsInputStream){ + assertEquals(0, abfsInputStream.getFCursor()); + assertEquals(-1, abfsInputStream.getFCursorAfterLastRead()); + assertEquals(0, abfsInputStream.getLimit()); + assertEquals(0, abfsInputStream.getBCursor()); + } + + private void verifyAfterSeek(AbfsInputStream abfsInputStream, long seekPos) throws IOException { + assertEquals(seekPos, abfsInputStream.getPos()); + assertEquals(-1, abfsInputStream.getFCursorAfterLastRead()); + assertEquals(0, abfsInputStream.getLimit()); + assertEquals(0, abfsInputStream.getBCursor()); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsInputStreamReadFooter.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsInputStreamReadFooter.java new file mode 100644 index 0000000000000..cb3eaffe029c9 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsInputStreamReadFooter.java @@ -0,0 +1,361 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.util.Map; + +import org.junit.Test; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; + +import static java.lang.Math.max; +import static java.lang.Math.min; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.spy; + +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_KB; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_MB; + +public class ITestAbfsInputStreamReadFooter extends ITestAbfsInputStream { + + private static final int TEN = 10; + private static final int TWENTY = 20; + + public ITestAbfsInputStreamReadFooter() throws Exception { + } + + @Test + public void testOnlyOneServerCallIsMadeWhenTheConfIsTrue() throws Exception { + testNumBackendCalls(true); + } + + @Test + public void testMultipleServerCallsAreMadeWhenTheConfIsFalse() + throws Exception { + testNumBackendCalls(false); + } + + private void testNumBackendCalls(boolean optimizeFooterRead) + throws Exception { + for (int i = 1; i <= 4; i++) { + int fileSize = i * ONE_MB; + final AzureBlobFileSystem fs = getFileSystem(optimizeFooterRead, + fileSize); + String fileName = methodName.getMethodName() + i; + byte[] fileContent = getRandomBytesArray(fileSize); + Path testFilePath = createFileWithContent(fs, fileName, fileContent); + int length = AbfsInputStream.FOOTER_SIZE; + try (FSDataInputStream iStream = fs.open(testFilePath)) { + byte[] buffer = new byte[length]; + + Map metricMap = getInstrumentationMap(fs); + long requestsMadeBeforeTest = metricMap + .get(CONNECTIONS_MADE.getStatName()); + + iStream.seek(fileSize - 8); + iStream.read(buffer, 0, length); + + iStream.seek(fileSize - (TEN * ONE_KB)); + iStream.read(buffer, 0, length); + + iStream.seek(fileSize - (TWENTY * ONE_KB)); + iStream.read(buffer, 0, length); + + metricMap = getInstrumentationMap(fs); + long requestsMadeAfterTest = metricMap + .get(CONNECTIONS_MADE.getStatName()); + + if (optimizeFooterRead) { + assertEquals(1, requestsMadeAfterTest - requestsMadeBeforeTest); + } else { + assertEquals(3, requestsMadeAfterTest - requestsMadeBeforeTest); + } + } + } + } + + @Test + public void testSeekToBeginAndReadWithConfTrue() throws Exception { + testSeekAndReadWithConf(true, SeekTo.BEGIN); + } + + @Test + public void testSeekToBeginAndReadWithConfFalse() throws Exception { + testSeekAndReadWithConf(false, SeekTo.BEGIN); + } + + @Test + public void testSeekToBeforeFooterAndReadWithConfTrue() throws Exception { + testSeekAndReadWithConf(true, SeekTo.BEFORE_FOOTER_START); + } + + @Test + public void testSeekToBeforeFooterAndReadWithConfFalse() throws Exception { + testSeekAndReadWithConf(false, SeekTo.BEFORE_FOOTER_START); + } + + @Test + public void testSeekToFooterAndReadWithConfTrue() throws Exception { + testSeekAndReadWithConf(true, SeekTo.AT_FOOTER_START); + } + + @Test + public void testSeekToFooterAndReadWithConfFalse() throws Exception { + testSeekAndReadWithConf(false, SeekTo.AT_FOOTER_START); + } + + @Test + public void testSeekToAfterFooterAndReadWithConfTrue() throws Exception { + testSeekAndReadWithConf(true, SeekTo.AFTER_FOOTER_START); + } + + @Test + public void testSeekToToAfterFooterAndReadWithConfFalse() throws Exception { + testSeekAndReadWithConf(false, SeekTo.AFTER_FOOTER_START); + } + + @Test + public void testSeekToEndAndReadWithConfTrue() throws Exception { + testSeekAndReadWithConf(true, SeekTo.END); + } + + @Test + public void testSeekToEndAndReadWithConfFalse() throws Exception { + testSeekAndReadWithConf(false, SeekTo.END); + } + + private void testSeekAndReadWithConf(boolean optimizeFooterRead, + SeekTo seekTo) throws Exception { + for (int i = 2; i <= 6; i++) { + int fileSize = i * ONE_MB; + final AzureBlobFileSystem fs = getFileSystem(optimizeFooterRead, + fileSize); + String fileName = methodName.getMethodName() + i; + byte[] fileContent = getRandomBytesArray(fileSize); + Path testFilePath = createFileWithContent(fs, fileName, fileContent); + seekReadAndTest(fs, testFilePath, seekPos(seekTo, fileSize), HUNDRED, + fileContent); + } + } + + private int seekPos(SeekTo seekTo, int fileSize) { + if (seekTo == SeekTo.BEGIN) { + return 0; + } + if (seekTo == SeekTo.BEFORE_FOOTER_START) { + return fileSize - AbfsInputStream.FOOTER_SIZE - 1; + } + if (seekTo == SeekTo.AT_FOOTER_START) { + return fileSize - AbfsInputStream.FOOTER_SIZE; + } + if (seekTo == SeekTo.END) { + return fileSize - 1; + } + //seekTo == SeekTo.AFTER_FOOTER_START + return fileSize - AbfsInputStream.FOOTER_SIZE + 1; + } + + private void seekReadAndTest(final FileSystem fs, final Path testFilePath, + final int seekPos, final int length, final byte[] fileContent) + throws IOException, NoSuchFieldException, IllegalAccessException { + AbfsConfiguration conf = getAbfsStore(fs).getAbfsConfiguration(); + long actualContentLength = fileContent.length; + try (FSDataInputStream iStream = fs.open(testFilePath)) { + AbfsInputStream abfsInputStream = (AbfsInputStream) iStream + .getWrappedStream(); + long bufferSize = abfsInputStream.getBufferSize(); + seek(iStream, seekPos); + byte[] buffer = new byte[length]; + long bytesRead = iStream.read(buffer, 0, length); + + long footerStart = max(0, + actualContentLength - AbfsInputStream.FOOTER_SIZE); + boolean optimizationOn = + conf.optimizeFooterRead() && seekPos >= footerStart; + + long actualLength = length; + if (seekPos + length > actualContentLength) { + long delta = seekPos + length - actualContentLength; + actualLength = length - delta; + } + long expectedLimit; + long expectedBCurson; + long expectedFCursor; + if (optimizationOn) { + if (actualContentLength <= bufferSize) { + expectedLimit = actualContentLength; + expectedBCurson = seekPos + actualLength; + } else { + expectedLimit = bufferSize; + long lastBlockStart = max(0, actualContentLength - bufferSize); + expectedBCurson = seekPos - lastBlockStart + actualLength; + } + expectedFCursor = actualContentLength; + } else { + if (seekPos + bufferSize < actualContentLength) { + expectedLimit = bufferSize; + expectedFCursor = bufferSize; + } else { + expectedLimit = actualContentLength - seekPos; + expectedFCursor = min(seekPos + bufferSize, actualContentLength); + } + expectedBCurson = actualLength; + } + + assertEquals(expectedFCursor, abfsInputStream.getFCursor()); + assertEquals(expectedFCursor, abfsInputStream.getFCursorAfterLastRead()); + assertEquals(expectedLimit, abfsInputStream.getLimit()); + assertEquals(expectedBCurson, abfsInputStream.getBCursor()); + assertEquals(actualLength, bytesRead); + // Verify user-content read + assertContentReadCorrectly(fileContent, seekPos, (int) actualLength, buffer, testFilePath); + // Verify data read to AbfsInputStream buffer + int from = seekPos; + if (optimizationOn) { + from = (int) max(0, actualContentLength - bufferSize); + } + assertContentReadCorrectly(fileContent, from, (int) abfsInputStream.getLimit(), + abfsInputStream.getBuffer(), testFilePath); + } + } + + @Test + public void testPartialReadWithNoData() + throws Exception { + for (int i = 2; i <= 6; i++) { + int fileSize = i * ONE_MB; + final AzureBlobFileSystem fs = getFileSystem(true, fileSize); + String fileName = methodName.getMethodName() + i; + byte[] fileContent = getRandomBytesArray(fileSize); + Path testFilePath = createFileWithContent(fs, fileName, fileContent); + testPartialReadWithNoData(fs, testFilePath, + fileSize - AbfsInputStream.FOOTER_SIZE, AbfsInputStream.FOOTER_SIZE, + fileContent); + } + } + + private void testPartialReadWithNoData(final FileSystem fs, + final Path testFilePath, final int seekPos, final int length, + final byte[] fileContent) + throws IOException, NoSuchFieldException, IllegalAccessException { + FSDataInputStream iStream = fs.open(testFilePath); + try { + AbfsInputStream abfsInputStream = (AbfsInputStream) iStream + .getWrappedStream(); + abfsInputStream = spy(abfsInputStream); + doReturn(10).doReturn(10).doCallRealMethod().when(abfsInputStream) + .readRemote(anyLong(), any(), anyInt(), anyInt(), + any(TracingContext.class)); + + iStream = new FSDataInputStream(abfsInputStream); + seek(iStream, seekPos); + + byte[] buffer = new byte[length]; + int bytesRead = iStream.read(buffer, 0, length); + assertEquals(length, bytesRead); + assertContentReadCorrectly(fileContent, seekPos, length, buffer, testFilePath); + assertEquals(fileContent.length, abfsInputStream.getFCursor()); + assertEquals(length, abfsInputStream.getBCursor()); + assertTrue(abfsInputStream.getLimit() >= length); + } finally { + iStream.close(); + } + } + + @Test + public void testPartialReadWithSomeDat() + throws Exception { + for (int i = 3; i <= 6; i++) { + int fileSize = i * ONE_MB; + final AzureBlobFileSystem fs = getFileSystem(true, fileSize); + String fileName = methodName.getMethodName() + i; + byte[] fileContent = getRandomBytesArray(fileSize); + Path testFilePath = createFileWithContent(fs, fileName, fileContent); + testPartialReadWithSomeDat(fs, testFilePath, + fileSize - AbfsInputStream.FOOTER_SIZE, AbfsInputStream.FOOTER_SIZE, + fileContent); + } + } + + private void testPartialReadWithSomeDat(final FileSystem fs, + final Path testFilePath, final int seekPos, final int length, + final byte[] fileContent) + throws IOException, NoSuchFieldException, IllegalAccessException { + FSDataInputStream iStream = fs.open(testFilePath); + try { + AbfsInputStream abfsInputStream = (AbfsInputStream) iStream + .getWrappedStream(); + abfsInputStream = spy(abfsInputStream); + // first readRemote, will return first 10 bytes + // second readRemote returns data till the last 2 bytes + int someDataLength = 2; + int secondReturnSize = + min(fileContent.length, abfsInputStream.getBufferSize()) - 10 + - someDataLength; + doReturn(10).doReturn(secondReturnSize).doCallRealMethod() + .when(abfsInputStream) + .readRemote(anyLong(), any(), anyInt(), anyInt(), + any(TracingContext.class)); + + iStream = new FSDataInputStream(abfsInputStream); + seek(iStream, seekPos); + + byte[] buffer = new byte[length]; + int bytesRead = iStream.read(buffer, 0, length); + assertEquals(length, bytesRead); + assertEquals(fileContent.length, abfsInputStream.getFCursor()); + // someDataLength(2), because in the do-while loop in read, the 2nd loop + // will go to readoneblock and that resets the bCursor to 0 as + // bCursor == limit finally when the 2 bytes are read bCursor and limit + // will be at someDataLength(2) + assertEquals(someDataLength, abfsInputStream.getBCursor()); + assertEquals(someDataLength, abfsInputStream.getLimit()); + } finally { + iStream.close(); + } + } + + private AzureBlobFileSystem getFileSystem(boolean optimizeFooterRead, + int fileSize) throws IOException { + final AzureBlobFileSystem fs = getFileSystem(); + getAbfsStore(fs).getAbfsConfiguration() + .setOptimizeFooterRead(optimizeFooterRead); + if (fileSize <= getAbfsStore(fs).getAbfsConfiguration() + .getReadBufferSize()) { + getAbfsStore(fs).getAbfsConfiguration() + .setReadSmallFilesCompletely(false); + } + return fs; + } + + private enum SeekTo { + BEGIN, AT_FOOTER_START, BEFORE_FOOTER_START, AFTER_FOOTER_START, END + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsInputStreamSmallFileReads.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsInputStreamSmallFileReads.java new file mode 100644 index 0000000000000..baa664d6e7fee --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsInputStreamSmallFileReads.java @@ -0,0 +1,329 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.util.Map; + +import org.junit.Test; + +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.spy; + +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_KB; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_MB; + +public class ITestAbfsInputStreamSmallFileReads extends ITestAbfsInputStream { + + public ITestAbfsInputStreamSmallFileReads() throws Exception { + } + + @Test + public void testOnlyOneServerCallIsMadeWhenTheConfIsTrue() throws Exception { + testNumBackendCalls(true); + } + + @Test + public void testMultipleServerCallsAreMadeWhenTheConfIsFalse() + throws Exception { + testNumBackendCalls(false); + } + + private void testNumBackendCalls(boolean readSmallFilesCompletely) + throws Exception { + final AzureBlobFileSystem fs = getFileSystem(readSmallFilesCompletely); + for (int i = 1; i <= 4; i++) { + String fileName = methodName.getMethodName() + i; + int fileSize = i * ONE_MB; + byte[] fileContent = getRandomBytesArray(fileSize); + Path testFilePath = createFileWithContent(fs, fileName, fileContent); + int length = ONE_KB; + try (FSDataInputStream iStream = fs.open(testFilePath)) { + byte[] buffer = new byte[length]; + + Map metricMap = getInstrumentationMap(fs); + long requestsMadeBeforeTest = metricMap + .get(CONNECTIONS_MADE.getStatName()); + + iStream.seek(seekPos(SeekTo.END, fileSize, length)); + iStream.read(buffer, 0, length); + + iStream.seek(seekPos(SeekTo.MIDDLE, fileSize, length)); + iStream.read(buffer, 0, length); + + iStream.seek(seekPos(SeekTo.BEGIN, fileSize, length)); + iStream.read(buffer, 0, length); + + metricMap = getInstrumentationMap(fs); + long requestsMadeAfterTest = metricMap + .get(CONNECTIONS_MADE.getStatName()); + + if (readSmallFilesCompletely) { + assertEquals(1, requestsMadeAfterTest - requestsMadeBeforeTest); + } else { + assertEquals(3, requestsMadeAfterTest - requestsMadeBeforeTest); + } + } + } + } + + @Test + public void testSeekToBeginingAndReadSmallFileWithConfTrue() + throws Exception { + testSeekAndReadWithConf(SeekTo.BEGIN, 2, 4, true); + } + + @Test + public void testSeekToBeginingAndReadSmallFileWithConfFalse() + throws Exception { + testSeekAndReadWithConf(SeekTo.BEGIN, 2, 4, false); + } + + @Test + public void testSeekToBeginingAndReadBigFileWithConfTrue() throws Exception { + testSeekAndReadWithConf(SeekTo.BEGIN, 5, 6, true); + } + + @Test + public void testSeekToBeginingAndReadBigFileWithConfFalse() throws Exception { + testSeekAndReadWithConf(SeekTo.BEGIN, 5, 6, false); + } + + @Test + public void testSeekToEndAndReadSmallFileWithConfTrue() throws Exception { + testSeekAndReadWithConf(SeekTo.END, 2, 4, true); + } + + @Test + public void testSeekToEndAndReadSmallFileWithConfFalse() throws Exception { + testSeekAndReadWithConf(SeekTo.END, 2, 4, false); + } + + @Test + public void testSeekToEndAndReadBigFileWithConfTrue() throws Exception { + testSeekAndReadWithConf(SeekTo.END, 5, 6, true); + } + + @Test + public void testSeekToEndAndReaBigFiledWithConfFalse() throws Exception { + testSeekAndReadWithConf(SeekTo.END, 5, 6, false); + } + + @Test + public void testSeekToMiddleAndReadSmallFileWithConfTrue() throws Exception { + testSeekAndReadWithConf(SeekTo.MIDDLE, 2, 4, true); + } + + @Test + public void testSeekToMiddleAndReadSmallFileWithConfFalse() throws Exception { + testSeekAndReadWithConf(SeekTo.MIDDLE, 2, 4, false); + } + + @Test + public void testSeekToMiddleAndReaBigFileWithConfTrue() throws Exception { + testSeekAndReadWithConf(SeekTo.MIDDLE, 5, 6, true); + } + + @Test + public void testSeekToMiddleAndReadBigFileWithConfFalse() throws Exception { + testSeekAndReadWithConf(SeekTo.MIDDLE, 5, 6, false); + } + + private void testSeekAndReadWithConf(SeekTo seekTo, int startFileSizeInMB, + int endFileSizeInMB, boolean readSmallFilesCompletely) throws Exception { + final AzureBlobFileSystem fs = getFileSystem(readSmallFilesCompletely); + for (int i = startFileSizeInMB; i <= endFileSizeInMB; i++) { + String fileName = methodName.getMethodName() + i; + int fileSize = i * ONE_MB; + byte[] fileContent = getRandomBytesArray(fileSize); + Path testFilePath = createFileWithContent(fs, fileName, fileContent); + int length = ONE_KB; + int seekPos = seekPos(seekTo, fileSize, length); + seekReadAndTest(fs, testFilePath, seekPos, length, fileContent); + } + } + + private int seekPos(SeekTo seekTo, int fileSize, int length) { + if (seekTo == SeekTo.BEGIN) { + return 0; + } + if (seekTo == SeekTo.END) { + return fileSize - length; + } + return fileSize / 2; + } + + private void seekReadAndTest(FileSystem fs, Path testFilePath, int seekPos, + int length, byte[] fileContent) + throws IOException, NoSuchFieldException, IllegalAccessException { + AbfsConfiguration conf = getAbfsStore(fs).getAbfsConfiguration(); + try (FSDataInputStream iStream = fs.open(testFilePath)) { + seek(iStream, seekPos); + byte[] buffer = new byte[length]; + int bytesRead = iStream.read(buffer, 0, length); + assertEquals(bytesRead, length); + assertContentReadCorrectly(fileContent, seekPos, length, buffer, testFilePath); + AbfsInputStream abfsInputStream = (AbfsInputStream) iStream + .getWrappedStream(); + + final int readBufferSize = conf.getReadBufferSize(); + final int fileContentLength = fileContent.length; + final boolean smallFile = fileContentLength <= readBufferSize; + int expectedLimit, expectedFCursor; + int expectedBCursor; + if (conf.readSmallFilesCompletely() && smallFile) { + assertBuffersAreEqual(fileContent, abfsInputStream.getBuffer(), conf, testFilePath); + expectedFCursor = fileContentLength; + expectedLimit = fileContentLength; + expectedBCursor = seekPos + length; + } else { + if ((seekPos == 0)) { + assertBuffersAreEqual(fileContent, abfsInputStream.getBuffer(), conf, testFilePath); + } else { + assertBuffersAreNotEqual(fileContent, abfsInputStream.getBuffer(), + conf, testFilePath); + } + expectedBCursor = length; + expectedFCursor = (fileContentLength < (seekPos + readBufferSize)) + ? fileContentLength + : (seekPos + readBufferSize); + expectedLimit = (fileContentLength < (seekPos + readBufferSize)) + ? (fileContentLength - seekPos) + : readBufferSize; + } + assertEquals(expectedFCursor, abfsInputStream.getFCursor()); + assertEquals(expectedFCursor, abfsInputStream.getFCursorAfterLastRead()); + assertEquals(expectedBCursor, abfsInputStream.getBCursor()); + assertEquals(expectedLimit, abfsInputStream.getLimit()); + } + } + + @Test + public void testPartialReadWithNoData() throws Exception { + for (int i = 2; i <= 4; i++) { + int fileSize = i * ONE_MB; + final AzureBlobFileSystem fs = getFileSystem(true); + String fileName = methodName.getMethodName() + i; + byte[] fileContent = getRandomBytesArray(fileSize); + Path testFilePath = createFileWithContent(fs, fileName, fileContent); + partialReadWithNoData(fs, testFilePath, fileSize / 2, fileSize / 4, + fileContent); + } + } + + private void partialReadWithNoData(final FileSystem fs, + final Path testFilePath, + final int seekPos, final int length, final byte[] fileContent) + throws IOException { + + FSDataInputStream iStream = fs.open(testFilePath); + try { + AbfsInputStream abfsInputStream = (AbfsInputStream) iStream + .getWrappedStream(); + abfsInputStream = spy(abfsInputStream); + doReturn(10) + .doReturn(10) + .doCallRealMethod() + .when(abfsInputStream) + .readRemote(anyLong(), any(), anyInt(), anyInt(), + any(TracingContext.class)); + + iStream = new FSDataInputStream(abfsInputStream); + seek(iStream, seekPos); + byte[] buffer = new byte[length]; + int bytesRead = iStream.read(buffer, 0, length); + assertEquals(bytesRead, length); + assertContentReadCorrectly(fileContent, seekPos, length, buffer, testFilePath); + assertEquals(fileContent.length, abfsInputStream.getFCursor()); + assertEquals(fileContent.length, + abfsInputStream.getFCursorAfterLastRead()); + assertEquals(length, abfsInputStream.getBCursor()); + assertTrue(abfsInputStream.getLimit() >= length); + } finally { + iStream.close(); + } + } + + @Test + public void testPartialReadWithSomeData() throws Exception { + for (int i = 2; i <= 4; i++) { + int fileSize = i * ONE_MB; + final AzureBlobFileSystem fs = getFileSystem(true); + String fileName = methodName.getMethodName() + i; + byte[] fileContent = getRandomBytesArray(fileSize); + Path testFilePath = createFileWithContent(fs, fileName, fileContent); + partialReadWithSomeData(fs, testFilePath, fileSize / 2, + fileSize / 4, fileContent); + } + } + + private void partialReadWithSomeData(final FileSystem fs, + final Path testFilePath, + final int seekPos, final int length, final byte[] fileContent) + throws IOException, NoSuchFieldException, IllegalAccessException { + FSDataInputStream iStream = fs.open(testFilePath); + try { + AbfsInputStream abfsInputStream = (AbfsInputStream) iStream + .getWrappedStream(); + abfsInputStream = spy(abfsInputStream); + // first readRemote, will return first 10 bytes + // second readRemote, seekPos - someDataLength(10) will reach the + // seekPos as 10 bytes are already read in the first call. Plus + // someDataLength(10) + int someDataLength = 10; + int secondReturnSize = seekPos - 10 + someDataLength; + doReturn(10) + .doReturn(secondReturnSize) + .doCallRealMethod() + .when(abfsInputStream) + .readRemote(anyLong(), any(), anyInt(), anyInt(), + any(TracingContext.class)); + + iStream = new FSDataInputStream(abfsInputStream); + seek(iStream, seekPos); + + byte[] buffer = new byte[length]; + int bytesRead = iStream.read(buffer, 0, length); + assertEquals(length, bytesRead); + assertTrue(abfsInputStream.getFCursor() > seekPos + length); + assertTrue(abfsInputStream.getFCursorAfterLastRead() > seekPos + length); + // Optimized read was no complete but it got some user requested data + // from server. So obviously the buffer will contain data more than + // seekPos + len + assertEquals(length - someDataLength, abfsInputStream.getBCursor()); + assertTrue(abfsInputStream.getLimit() > length - someDataLength); + } finally { + iStream.close(); + } + } + + private enum SeekTo {BEGIN, MIDDLE, END} + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsOutputStream.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsOutputStream.java new file mode 100644 index 0000000000000..431c456ae3daa --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsOutputStream.java @@ -0,0 +1,87 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import org.assertj.core.api.Assertions; +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys; + +/** + * Test create operation. + */ +public class ITestAbfsOutputStream extends AbstractAbfsIntegrationTest { + private static final String TEST_FILE_PATH = "testfile"; + + public ITestAbfsOutputStream() throws Exception { + super(); + } + + @Test + public void testMaxRequestsAndQueueCapacityDefaults() throws Exception { + Configuration conf = getRawConfiguration(); + final AzureBlobFileSystem fs = getFileSystem(conf); + try (FSDataOutputStream out = fs.create(path(TEST_FILE_PATH))) { + AbfsOutputStream stream = (AbfsOutputStream) out.getWrappedStream(); + + int maxConcurrentRequests + = getConfiguration().getWriteMaxConcurrentRequestCount(); + if (stream.isAppendBlobStream()) { + maxConcurrentRequests = 1; + } + + Assertions.assertThat(stream.getMaxConcurrentRequestCount()).describedAs( + "maxConcurrentRequests should be " + maxConcurrentRequests) + .isEqualTo(maxConcurrentRequests); + Assertions.assertThat(stream.getMaxRequestsThatCanBeQueued()).describedAs( + "maxRequestsToQueue should be " + getConfiguration() + .getMaxWriteRequestsToQueue()) + .isEqualTo(getConfiguration().getMaxWriteRequestsToQueue()); + } + } + + @Test + public void testMaxRequestsAndQueueCapacity() throws Exception { + Configuration conf = getRawConfiguration(); + int maxConcurrentRequests = 6; + int maxRequestsToQueue = 10; + conf.set(ConfigurationKeys.AZURE_WRITE_MAX_CONCURRENT_REQUESTS, + "" + maxConcurrentRequests); + conf.set(ConfigurationKeys.AZURE_WRITE_MAX_REQUESTS_TO_QUEUE, + "" + maxRequestsToQueue); + final AzureBlobFileSystem fs = getFileSystem(conf); + try (FSDataOutputStream out = fs.create(path(TEST_FILE_PATH))) { + AbfsOutputStream stream = (AbfsOutputStream) out.getWrappedStream(); + + if (stream.isAppendBlobStream()) { + maxConcurrentRequests = 1; + } + + Assertions.assertThat(stream.getMaxConcurrentRequestCount()).describedAs( + "maxConcurrentRequests should be " + maxConcurrentRequests).isEqualTo(maxConcurrentRequests); + Assertions.assertThat(stream.getMaxRequestsThatCanBeQueued()).describedAs("maxRequestsToQueue should be " + maxRequestsToQueue) + .isEqualTo(maxRequestsToQueue); + } + } + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsPositionedRead.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsPositionedRead.java new file mode 100644 index 0000000000000..25f33db1cae9e --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsPositionedRead.java @@ -0,0 +1,233 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.util.Arrays; +import java.util.concurrent.ExecutionException; + +import org.junit.Rule; +import org.junit.rules.TestName; +import org.junit.Test; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FutureDataInputStreamBuilder; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; +import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.StreamStatisticNames; +import org.assertj.core.api.Assertions; + +public class ITestAbfsPositionedRead extends AbstractAbfsIntegrationTest { + + private static final int TEST_FILE_DATA_SIZE = 100; + + @Rule + public TestName methodName = new TestName(); + + public ITestAbfsPositionedRead() throws Exception { + } + + @Test + public void testPositionedRead() throws IOException { + describe("Testing positioned reads in AbfsInputStream"); + Path dest = path(methodName.getMethodName()); + + byte[] data = ContractTestUtils.dataset(TEST_FILE_DATA_SIZE, 'a', 'z'); + ContractTestUtils.writeDataset(getFileSystem(), dest, data, data.length, + TEST_FILE_DATA_SIZE, true); + int bytesToRead = 10; + try (FSDataInputStream inputStream = getFileSystem().open(dest)) { + assertTrue( + "unexpected stream type " + + inputStream.getWrappedStream().getClass().getSimpleName(), + inputStream.getWrappedStream() instanceof AbfsInputStream); + byte[] readBuffer = new byte[bytesToRead]; + int readPos = 0; + Assertions + .assertThat(inputStream.read(readPos, readBuffer, 0, bytesToRead)) + .describedAs( + "AbfsInputStream pread did not read the correct number of bytes") + .isEqualTo(bytesToRead); + Assertions.assertThat(readBuffer) + .describedAs("AbfsInputStream pread did not read correct data") + .containsExactly( + Arrays.copyOfRange(data, readPos, readPos + bytesToRead)); + // Read only 10 bytes from offset 0. But by default it will do the seek + // and read where the entire 100 bytes get read into the + // AbfsInputStream buffer. + Assertions + .assertThat(Arrays.copyOfRange( + ((AbfsInputStream) inputStream.getWrappedStream()).getBuffer(), 0, + TEST_FILE_DATA_SIZE)) + .describedAs( + "AbfsInputStream pread did not read more data into its buffer") + .containsExactly(data); + // Check statistics + assertStatistics(inputStream.getIOStatistics(), bytesToRead, 1, 1, + TEST_FILE_DATA_SIZE); + + readPos = 50; + Assertions + .assertThat(inputStream.read(readPos, readBuffer, 0, bytesToRead)) + .describedAs( + "AbfsInputStream pread did not read the correct number of bytes") + .isEqualTo(bytesToRead); + Assertions.assertThat(readBuffer) + .describedAs("AbfsInputStream pread did not read correct data") + .containsExactly( + Arrays.copyOfRange(data, readPos, readPos + bytesToRead)); + // Check statistics + assertStatistics(inputStream.getIOStatistics(), 2 * bytesToRead, 2, 1, + TEST_FILE_DATA_SIZE); + // Did positioned read from pos 0 and then 50 but the stream pos should + // remain at 0. + Assertions.assertThat(inputStream.getPos()) + .describedAs("AbfsInputStream positioned reads moved stream position") + .isEqualTo(0); + } + } + + private void assertStatistics(IOStatistics ioStatistics, + long expectedBytesRead, long expectedReadOps, long expectedRemoteReadOps, + long expectedRemoteReadBytes) { + Assertions + .assertThat(ioStatistics.counters() + .get(StreamStatisticNames.STREAM_READ_BYTES).longValue()) + .describedAs("Mismatch in bytesRead statistics") + .isEqualTo(expectedBytesRead); + Assertions + .assertThat(ioStatistics.counters() + .get(StreamStatisticNames.STREAM_READ_OPERATIONS).longValue()) + .describedAs("Mismatch in readOps statistics") + .isEqualTo(expectedReadOps); + Assertions + .assertThat(ioStatistics.counters() + .get(StreamStatisticNames.REMOTE_READ_OP).longValue()) + .describedAs("Mismatch in remoteReadOps statistics") + .isEqualTo(expectedRemoteReadOps); + Assertions + .assertThat(ioStatistics.counters() + .get(StreamStatisticNames.REMOTE_BYTES_READ).longValue()) + .describedAs("Mismatch in remoteReadBytes statistics") + .isEqualTo(expectedRemoteReadBytes); + } + + @Test + public void testPositionedReadWithBufferedReadDisabled() throws IOException { + describe("Testing positioned reads in AbfsInputStream with BufferedReadDisabled"); + Path dest = path(methodName.getMethodName()); + byte[] data = ContractTestUtils.dataset(TEST_FILE_DATA_SIZE, 'a', 'z'); + ContractTestUtils.writeDataset(getFileSystem(), dest, data, data.length, + TEST_FILE_DATA_SIZE, true); + FutureDataInputStreamBuilder builder = getFileSystem().openFile(dest); + builder.opt(ConfigurationKeys.FS_AZURE_BUFFERED_PREAD_DISABLE, true); + FSDataInputStream inputStream = null; + try { + inputStream = builder.build().get(); + } catch (IllegalArgumentException | UnsupportedOperationException + | InterruptedException | ExecutionException e) { + throw new IOException( + "Exception opening " + dest + " with FutureDataInputStreamBuilder", + e); + } + assertNotNull("Null InputStream over " + dest, inputStream); + int bytesToRead = 10; + try { + AbfsInputStream abfsIs = (AbfsInputStream) inputStream.getWrappedStream(); + byte[] readBuffer = new byte[bytesToRead]; + int readPos = 10; + Assertions + .assertThat(inputStream.read(readPos, readBuffer, 0, bytesToRead)) + .describedAs( + "AbfsInputStream pread did not read the correct number of bytes") + .isEqualTo(bytesToRead); + Assertions.assertThat(readBuffer) + .describedAs("AbfsInputStream pread did not read correct data") + .containsExactly( + Arrays.copyOfRange(data, readPos, readPos + bytesToRead)); + // Read only 10 bytes from offset 10. This time, as buffered pread is + // disabled, it will only read the exact bytes as requested and no data + // will get read into the AbfsInputStream#buffer. Infact the buffer won't + // even get initialized. + assertNull("AbfsInputStream pread caused the internal buffer creation", + abfsIs.getBuffer()); + // Check statistics + assertStatistics(inputStream.getIOStatistics(), bytesToRead, 1, 1, + bytesToRead); + readPos = 40; + Assertions + .assertThat(inputStream.read(readPos, readBuffer, 0, bytesToRead)) + .describedAs( + "AbfsInputStream pread did not read the correct number of bytes") + .isEqualTo(bytesToRead); + Assertions.assertThat(readBuffer) + .describedAs("AbfsInputStream pread did not read correct data") + .containsExactly( + Arrays.copyOfRange(data, readPos, readPos + bytesToRead)); + assertStatistics(inputStream.getIOStatistics(), 2 * bytesToRead, 2, 2, + 2 * bytesToRead); + // Now make a seek and read so that internal buffer gets created + inputStream.seek(0); + Assertions.assertThat(inputStream.read(readBuffer)).describedAs( + "AbfsInputStream seek+read did not read the correct number of bytes") + .isEqualTo(bytesToRead); + // This read would have fetched all 100 bytes into internal buffer. + Assertions + .assertThat(Arrays.copyOfRange( + ((AbfsInputStream) inputStream.getWrappedStream()).getBuffer(), 0, + TEST_FILE_DATA_SIZE)) + .describedAs( + "AbfsInputStream seek+read did not read more data into its buffer") + .containsExactly(data); + assertStatistics(inputStream.getIOStatistics(), 3 * bytesToRead, 3, 3, + TEST_FILE_DATA_SIZE + 2 * bytesToRead); + resetBuffer(abfsIs.getBuffer()); + // Now again do pos read and make sure not any extra data being fetched. + readPos = 0; + Assertions + .assertThat(inputStream.read(readPos, readBuffer, 0, bytesToRead)) + .describedAs( + "AbfsInputStream pread did not read the correct number of bytes") + .isEqualTo(bytesToRead); + Assertions.assertThat(readBuffer) + .describedAs("AbfsInputStream pread did not read correct data") + .containsExactly( + Arrays.copyOfRange(data, readPos, readPos + bytesToRead)); + Assertions + .assertThat(Arrays.copyOfRange( + ((AbfsInputStream) inputStream.getWrappedStream()).getBuffer(), 0, + TEST_FILE_DATA_SIZE)) + .describedAs( + "AbfsInputStream pread read more data into its buffer than expected") + .doesNotContain(data); + assertStatistics(inputStream.getIOStatistics(), 4 * bytesToRead, 4, 4, + TEST_FILE_DATA_SIZE + 3 * bytesToRead); + } finally { + inputStream.close(); + } + } + + private void resetBuffer(byte[] buf) { + for (int i = 0; i < buf.length; i++) { + buf[i] = (byte) 0; + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsRestOperation.java new file mode 100644 index 0000000000000..6ffe2e2773bbf --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsRestOperation.java @@ -0,0 +1,358 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.io.OutputStream; +import java.net.HttpURLConnection; +import java.net.ProtocolException; +import java.net.URL; +import java.util.Arrays; +import java.util.List; +import java.util.Random; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.mockito.Mockito; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.TestAbfsConfigurationFieldsValidation; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; + +import static java.net.HttpURLConnection.HTTP_NOT_FOUND; +import static java.net.HttpURLConnection.HTTP_OK; +import static java.net.HttpURLConnection.HTTP_UNAVAILABLE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPEND_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PATCH; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.EXPECT; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_HTTP_METHOD_OVERRIDE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_POSITION; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_ABFS_ACCOUNT_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.TEST_CONFIGURATION_FILE_NAME; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; + +@RunWith(Parameterized.class) +public class ITestAbfsRestOperation extends AbstractAbfsIntegrationTest { + + // Specifies whether getOutputStream() or write() throws IOException. + public enum ErrorType {OUTPUTSTREAM, WRITE}; + + private static final int HTTP_EXPECTATION_FAILED = 417; + private static final int HTTP_ERROR = 0; + private static final int ZERO = 0; + private static final int REDUCED_RETRY_COUNT = 2; + private static final int REDUCED_BACKOFF_INTERVAL = 100; + private static final int BUFFER_LENGTH = 5; + private static final int BUFFER_OFFSET = 0; + private static final String TEST_PATH = "/testfile"; + + // Specifies whether the expect header is enabled or not. + @Parameterized.Parameter + public boolean expectHeaderEnabled; + + // Gives the http response code. + @Parameterized.Parameter(1) + public int responseCode; + + // Gives the http response message. + @Parameterized.Parameter(2) + public String responseMessage; + + // Gives the errorType based on the enum. + @Parameterized.Parameter(3) + public ErrorType errorType; + + // The intercept. + private AbfsThrottlingIntercept intercept; + + /* + HTTP_OK = 200, + HTTP_UNAVAILABLE = 503, + HTTP_NOT_FOUND = 404, + HTTP_EXPECTATION_FAILED = 417, + HTTP_ERROR = 0. + */ + @Parameterized.Parameters(name = "expect={0}-code={1}-ErrorType={3}") + public static Iterable params() { + return Arrays.asList(new Object[][]{ + {true, HTTP_OK, "OK", ErrorType.WRITE}, + {false, HTTP_OK, "OK", ErrorType.WRITE}, + {true, HTTP_UNAVAILABLE, "ServerBusy", ErrorType.OUTPUTSTREAM}, + {true, HTTP_NOT_FOUND, "Resource Not Found", ErrorType.OUTPUTSTREAM}, + {true, HTTP_EXPECTATION_FAILED, "Expectation Failed", ErrorType.OUTPUTSTREAM}, + {true, HTTP_ERROR, "Error", ErrorType.OUTPUTSTREAM} + }); + } + + public ITestAbfsRestOperation() throws Exception { + super(); + } + + /** + * Test helper method to get random bytes array. + * @param length The length of byte buffer + * @return byte buffer + */ + private byte[] getRandomBytesArray(int length) { + final byte[] b = new byte[length]; + new Random().nextBytes(b); + return b; + } + + /** + * Gives the AbfsRestOperation. + * @return abfsRestOperation. + */ + private AbfsRestOperation getRestOperation() throws Exception { + // Get the filesystem. + final AzureBlobFileSystem fs = getFileSystem(); + + final Configuration configuration = new Configuration(); + configuration.addResource(TEST_CONFIGURATION_FILE_NAME); + AbfsClient abfsClient = fs.getAbfsStore().getClient(); + + AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, + configuration.get(FS_AZURE_ABFS_ACCOUNT_NAME)); + + // Update the configuration with reduced retry count and reduced backoff interval. + AbfsConfiguration abfsConfig + = TestAbfsConfigurationFieldsValidation.updateRetryConfigs( + abfsConfiguration, + REDUCED_RETRY_COUNT, REDUCED_BACKOFF_INTERVAL); + + intercept = Mockito.mock(AbfsThrottlingIntercept.class); + Mockito.doNothing().when(intercept).updateMetrics(Mockito.any(), Mockito.any()); + + // Gets the client. + AbfsClient testClient = Mockito.spy(ITestAbfsClient.createTestClientFromCurrentContext( + abfsClient, + abfsConfig)); + + Mockito.doReturn(intercept).when(testClient).getIntercept(); + + // Expect header is enabled or not based on the parameter. + AppendRequestParameters appendRequestParameters + = new AppendRequestParameters( + BUFFER_OFFSET, BUFFER_OFFSET, BUFFER_LENGTH, + AppendRequestParameters.Mode.APPEND_MODE, false, null, + expectHeaderEnabled); + + byte[] buffer = getRandomBytesArray(5); + + // Create a test container to upload the data. + Path testPath = path(TEST_PATH); + fs.create(testPath); + String finalTestPath = testPath.toString().substring(testPath.toString().lastIndexOf("/")); + + // Creates a list of request headers. + final List requestHeaders = ITestAbfsClient.getTestRequestHeaders(testClient); + requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, HTTP_METHOD_PATCH)); + if (appendRequestParameters.isExpectHeaderEnabled()) { + requestHeaders.add(new AbfsHttpHeader(EXPECT, HUNDRED_CONTINUE)); + } + + // Updates the query parameters. + final AbfsUriQueryBuilder abfsUriQueryBuilder = testClient.createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, APPEND_ACTION); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_POSITION, Long.toString(appendRequestParameters.getPosition())); + + // Creates the url for the specified path. + URL url = testClient.createRequestUrl(finalTestPath, abfsUriQueryBuilder.toString()); + + // Create a mock of the AbfsRestOperation to set the urlConnection in the corresponding httpOperation. + AbfsRestOperation op = Mockito.spy(new AbfsRestOperation( + AbfsRestOperationType.Append, + testClient, + HTTP_METHOD_PUT, + url, + requestHeaders, buffer, + appendRequestParameters.getoffset(), + appendRequestParameters.getLength(), null)); + + AbfsHttpOperation abfsHttpOperation = Mockito.spy(new AbfsHttpOperation(url, HTTP_METHOD_PUT, requestHeaders)); + + // Sets the expect request property if expect header is enabled. + if (expectHeaderEnabled) { + Mockito.doReturn(HUNDRED_CONTINUE) + .when(abfsHttpOperation) + .getConnProperty(EXPECT); + } + + HttpURLConnection urlConnection = mock(HttpURLConnection.class); + Mockito.doNothing().when(urlConnection).setRequestProperty(Mockito + .any(), Mockito.any()); + Mockito.doReturn(HTTP_METHOD_PUT).when(urlConnection).getRequestMethod(); + Mockito.doReturn(url).when(urlConnection).getURL(); + Mockito.doReturn(urlConnection).when(abfsHttpOperation).getConnection(); + + Mockito.doNothing().when(abfsHttpOperation).setRequestProperty(Mockito + .any(), Mockito.any()); + Mockito.doReturn(url).when(abfsHttpOperation).getConnUrl(); + Mockito.doReturn(HTTP_METHOD_PUT).when(abfsHttpOperation).getConnRequestMethod(); + + switch (errorType) { + case OUTPUTSTREAM: + // If the getOutputStream() throws IOException and Expect Header is + // enabled, it returns back to processResponse and hence we have + // mocked the response code and the response message to check different + // behaviour based on response code. + Mockito.doReturn(responseCode).when(abfsHttpOperation).getConnResponseCode(); + Mockito.doReturn(responseMessage) + .when(abfsHttpOperation) + .getConnResponseMessage(); + Mockito.doThrow(new ProtocolException("Server rejected Operation")) + .when(abfsHttpOperation) + .getConnOutputStream(); + break; + case WRITE: + // If write() throws IOException and Expect Header is + // enabled or not, it should throw back the exception. + OutputStream outputStream = Mockito.spy(new OutputStream() { + @Override + public void write(final int i) throws IOException { + } + }); + Mockito.doReturn(outputStream).when(abfsHttpOperation).getConnOutputStream(); + Mockito.doThrow(new IOException()) + .when(outputStream) + .write(buffer, appendRequestParameters.getoffset(), + appendRequestParameters.getLength()); + break; + default: + break; + } + + // Sets the httpOperation for the rest operation. + Mockito.doReturn(abfsHttpOperation) + .when(op) + .createHttpOperation(); + return op; + } + + void assertTraceContextState(int retryCount, int assertRetryCount, int bytesSent, int assertBytesSent, + int expectedBytesSent, int assertExpectedBytesSent) { + // Assert that the request is retried or not. + Assertions.assertThat(retryCount) + .describedAs("The retry count is incorrect") + .isEqualTo(assertRetryCount); + + // Assert that metrics will be updated correctly. + Assertions.assertThat(bytesSent) + .describedAs("The bytes sent is incorrect") + .isEqualTo(assertBytesSent); + Assertions.assertThat(expectedBytesSent) + .describedAs("The expected bytes sent is incorrect") + .isEqualTo(assertExpectedBytesSent); + } + + /** + * Test the functionalities based on whether getOutputStream() or write() + * throws exception and what is the corresponding response code. + */ + @Test + public void testExpectHundredContinue() throws Exception { + // Gets the AbfsRestOperation. + AbfsRestOperation op = getRestOperation(); + AbfsHttpOperation httpOperation = op.createHttpOperation(); + + TracingContext tracingContext = Mockito.spy(new TracingContext("abcd", + "abcde", FSOperationType.APPEND, + TracingHeaderFormat.ALL_ID_FORMAT, null)); + + switch (errorType) { + case WRITE: + // If write() throws IOException and Expect Header is + // enabled or not, it should throw back the exception + // which is caught and exponential retry logic comes into place. + intercept(IOException.class, + () -> op.execute(tracingContext)); + + // Asserting update of metrics and retries. + assertTraceContextState(tracingContext.getRetryCount(), REDUCED_RETRY_COUNT, httpOperation.getBytesSent(), BUFFER_LENGTH, + 0, 0); + break; + case OUTPUTSTREAM: + switch (responseCode) { + case HTTP_UNAVAILABLE: + // In the case of 503 i.e. throttled case, we should retry. + intercept(IOException.class, + () -> op.execute(tracingContext)); + + // Asserting update of metrics and retries. + assertTraceContextState(tracingContext.getRetryCount(), REDUCED_RETRY_COUNT, httpOperation.getBytesSent(), ZERO, + httpOperation.getExpectedBytesToBeSent(), BUFFER_LENGTH); + + // Verifies that update Metrics call is made for throttle case and for the first without retry + + // for the retried cases as well. + Mockito.verify(intercept, times(REDUCED_RETRY_COUNT + 1)) + .updateMetrics(Mockito.any(), Mockito.any()); + break; + case HTTP_ERROR: + // In the case of http status code 0 i.e. ErrorType case, we should retry. + intercept(IOException.class, + () -> op.execute(tracingContext)); + + // Asserting update of metrics and retries. + assertTraceContextState(tracingContext.getRetryCount(), REDUCED_RETRY_COUNT, httpOperation.getBytesSent(), + ZERO, 0, 0); + + // Verifies that update Metrics call is made for ErrorType case and for the first without retry + + // for the retried cases as well. + Mockito.verify(intercept, times(REDUCED_RETRY_COUNT + 1)) + .updateMetrics(Mockito.any(), Mockito.any()); + break; + case HTTP_NOT_FOUND: + case HTTP_EXPECTATION_FAILED: + // In the case of 4xx ErrorType. i.e. user ErrorType, retry should not happen. + intercept(AzureBlobFileSystemException.class, + () -> op.execute(tracingContext)); + + // Asserting update of metrics and retries. + assertTraceContextState(tracingContext.getRetryCount(), ZERO, 0, + 0, 0, 0); + + // Verifies that update Metrics call is not made for user ErrorType case. + Mockito.verify(intercept, never()) + .updateMetrics(Mockito.any(), Mockito.any()); + break; + default: + break; + } + break; + default: + break; + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestReadBufferManager.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestReadBufferManager.java new file mode 100644 index 0000000000000..a57430fa808cc --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestReadBufferManager.java @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.util.LinkedList; +import java.util.List; +import java.util.Random; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.io.IOUtils; + +import org.assertj.core.api.Assertions; +import org.junit.Test; + +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_READ_BUFFER_SIZE; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_READ_AHEAD_BLOCK_SIZE; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_READ_AHEAD_QUEUE_DEPTH; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.MIN_BUFFER_SIZE; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_MB; +import static org.apache.hadoop.fs.azurebfs.constants.InternalConstants.CAPABILITY_SAFE_READAHEAD; +import static org.apache.hadoop.test.LambdaTestUtils.eventually; + +public class ITestReadBufferManager extends AbstractAbfsIntegrationTest { + + /** + * Time before the JUnit test times out for eventually() clauses + * to fail. This copes with slow network connections and debugging + * sessions, yet still allows for tests to fail with meaningful + * messages. + */ + public static final int TIMEOUT_OFFSET = 5 * 60_000; + + /** + * Interval between eventually preobes. + */ + public static final int PROBE_INTERVAL_MILLIS = 1_000; + + public ITestReadBufferManager() throws Exception { + } + + @Test + public void testPurgeBufferManagerForParallelStreams() throws Exception { + describe("Testing purging of buffers from ReadBufferManager for " + + "parallel input streams"); + final int numBuffers = 16; + final LinkedList freeList = new LinkedList<>(); + for (int i=0; i < numBuffers; i++) { + freeList.add(i); + } + ExecutorService executorService = Executors.newFixedThreadPool(4); + AzureBlobFileSystem fs = getABFSWithReadAheadConfig(); + // verify that the fs has the capability to validate the fix + Assertions.assertThat(fs.hasPathCapability(new Path("/"), CAPABILITY_SAFE_READAHEAD)) + .describedAs("path capability %s in %s", CAPABILITY_SAFE_READAHEAD, fs) + .isTrue(); + + try { + for (int i = 0; i < 4; i++) { + final String fileName = methodName.getMethodName() + i; + executorService.submit((Callable) () -> { + byte[] fileContent = getRandomBytesArray(ONE_MB); + Path testFilePath = createFileWithContent(fs, fileName, fileContent); + try (FSDataInputStream iStream = fs.open(testFilePath)) { + iStream.read(); + } + return null; + }); + } + } finally { + executorService.shutdown(); + // wait for all tasks to finish + executorService.awaitTermination(1, TimeUnit.MINUTES); + } + + ReadBufferManager bufferManager = ReadBufferManager.getBufferManager(); + // readahead queue is empty + assertListEmpty("ReadAheadQueue", bufferManager.getReadAheadQueueCopy()); + // verify the in progress list eventually empties out. + eventually(getTestTimeoutMillis() - TIMEOUT_OFFSET, PROBE_INTERVAL_MILLIS, () -> + assertListEmpty("InProgressList", bufferManager.getInProgressCopiedList())); + } + + private void assertListEmpty(String listName, List list) { + Assertions.assertThat(list) + .describedAs("After closing all streams %s should be empty", listName) + .hasSize(0); + } + + @Test + public void testPurgeBufferManagerForSequentialStream() throws Exception { + describe("Testing purging of buffers in ReadBufferManager for " + + "sequential input streams"); + AzureBlobFileSystem fs = getABFSWithReadAheadConfig(); + final String fileName = methodName.getMethodName(); + byte[] fileContent = getRandomBytesArray(ONE_MB); + Path testFilePath = createFileWithContent(fs, fileName, fileContent); + + AbfsInputStream iStream1 = null; + // stream1 will be closed right away. + try { + iStream1 = (AbfsInputStream) fs.open(testFilePath).getWrappedStream(); + // Just reading one byte will trigger all read ahead calls. + iStream1.read(); + } finally { + IOUtils.closeStream(iStream1); + } + ReadBufferManager bufferManager = ReadBufferManager.getBufferManager(); + AbfsInputStream iStream2 = null; + try { + iStream2 = (AbfsInputStream) fs.open(testFilePath).getWrappedStream(); + iStream2.read(); + // After closing stream1, no queued buffers of stream1 should be present + // assertions can't be made about the state of the other lists as it is + // too prone to race conditions. + assertListDoesnotContainBuffersForIstream(bufferManager.getReadAheadQueueCopy(), iStream1); + } finally { + // closing the stream later. + IOUtils.closeStream(iStream2); + } + // After closing stream2, no queued buffers of stream2 should be present. + assertListDoesnotContainBuffersForIstream(bufferManager.getReadAheadQueueCopy(), iStream2); + + // After closing both the streams, read queue should be empty. + assertListEmpty("ReadAheadQueue", bufferManager.getReadAheadQueueCopy()); + + } + + + private void assertListDoesnotContainBuffersForIstream(List list, + AbfsInputStream inputStream) { + for (ReadBuffer buffer : list) { + Assertions.assertThat(buffer.getStream()) + .describedAs("Buffers associated with closed input streams shouldn't be present") + .isNotEqualTo(inputStream); + } + } + + private AzureBlobFileSystem getABFSWithReadAheadConfig() throws Exception { + Configuration conf = getRawConfiguration(); + conf.setLong(FS_AZURE_READ_AHEAD_QUEUE_DEPTH, 8); + conf.setInt(AZURE_READ_BUFFER_SIZE, MIN_BUFFER_SIZE); + conf.setInt(FS_AZURE_READ_AHEAD_BLOCK_SIZE, MIN_BUFFER_SIZE); + return (AzureBlobFileSystem) FileSystem.newInstance(conf); + } + + protected byte[] getRandomBytesArray(int length) { + final byte[] b = new byte[length]; + new Random().nextBytes(b); + return b; + } + + protected Path createFileWithContent(FileSystem fs, String fileName, + byte[] fileContent) throws IOException { + Path testFilePath = path(fileName); + try (FSDataOutputStream oStream = fs.create(testFilePath)) { + oStream.write(fileContent); + oStream.flush(); + } + return testFilePath; + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java deleted file mode 100644 index deca8b355a9f7..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java +++ /dev/null @@ -1,97 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azurebfs.services; - -import java.net.URL; -import java.util.regex.Pattern; - -import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; -import org.junit.Assert; -import org.junit.Test; - -import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; -import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys; -import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; -import org.apache.hadoop.util.VersionInfo; - -/** - * Test useragent of abfs client. - * - */ -public final class TestAbfsClient { - - private final String accountName = "bogusAccountName.dfs.core.windows.net"; - - private void validateUserAgent(String expectedPattern, - URL baseUrl, - AbfsConfiguration config, - boolean includeSSLProvider) - throws AzureBlobFileSystemException { - AbfsClient client = new AbfsClient(baseUrl, null, - config, null, (AccessTokenProvider) null, null); - String sslProviderName = null; - if (includeSSLProvider) { - sslProviderName = DelegatingSSLSocketFactory.getDefaultFactory().getProviderName(); - } - String userAgent = client.initializeUserAgent(config, sslProviderName); - Pattern pattern = Pattern.compile(expectedPattern); - Assert.assertTrue("Incorrect User Agent String", - pattern.matcher(userAgent).matches()); - } - - @Test - public void verifyUnknownUserAgent() throws Exception { - String clientVersion = "Azure Blob FS/" + VersionInfo.getVersion(); - String expectedUserAgentPattern = String.format(clientVersion - + " %s", "\\(JavaJRE ([^\\)]+)\\)"); - final Configuration configuration = new Configuration(); - configuration.unset(ConfigurationKeys.FS_AZURE_USER_AGENT_PREFIX_KEY); - AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, accountName); - validateUserAgent(expectedUserAgentPattern, new URL("http://azure.com"), - abfsConfiguration, false); - } - - @Test - public void verifyUserAgent() throws Exception { - String clientVersion = "Azure Blob FS/" + VersionInfo.getVersion(); - String expectedUserAgentPattern = String.format(clientVersion - + " %s", "\\(JavaJRE ([^\\)]+)\\) Partner Service"); - final Configuration configuration = new Configuration(); - configuration.set(ConfigurationKeys.FS_AZURE_USER_AGENT_PREFIX_KEY, "Partner Service"); - AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, accountName); - validateUserAgent(expectedUserAgentPattern, new URL("http://azure.com"), - abfsConfiguration, false); - } - - @Test - public void verifyUserAgentWithSSLProvider() throws Exception { - String clientVersion = "Azure Blob FS/" + VersionInfo.getVersion(); - String expectedUserAgentPattern = String.format(clientVersion - + " %s", "\\(JavaJRE ([^\\)]+)\\) Partner Service"); - final Configuration configuration = new Configuration(); - configuration.set(ConfigurationKeys.FS_AZURE_USER_AGENT_PREFIX_KEY, "Partner Service"); - configuration.set(ConfigurationKeys.FS_AZURE_SSL_CHANNEL_MODE_KEY, - DelegatingSSLSocketFactory.SSLChannelMode.Default_JSSE.name()); - AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, accountName); - validateUserAgent(expectedUserAgentPattern, new URL("https://azure.com"), - abfsConfiguration, true); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClientThrottlingAnalyzer.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClientThrottlingAnalyzer.java index 3f680e499300d..22649cd190d83 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClientThrottlingAnalyzer.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClientThrottlingAnalyzer.java @@ -18,9 +18,15 @@ package org.apache.hadoop.fs.azurebfs.services; +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.fs.contract.ContractTestUtils; import org.junit.Test; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_ANALYSIS_PERIOD; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.TEST_CONFIGURATION_FILE_NAME; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -33,6 +39,15 @@ public class TestAbfsClientThrottlingAnalyzer { + ANALYSIS_PERIOD / 10; private static final long MEGABYTE = 1024 * 1024; private static final int MAX_ACCEPTABLE_PERCENT_DIFFERENCE = 20; + private AbfsConfiguration abfsConfiguration; + + public TestAbfsClientThrottlingAnalyzer() throws IOException, IllegalAccessException { + final Configuration configuration = new Configuration(); + configuration.addResource(TEST_CONFIGURATION_FILE_NAME); + configuration.setInt(FS_AZURE_ANALYSIS_PERIOD, 1000); + this.abfsConfiguration = new AbfsConfiguration(configuration, + "dummy"); + } private void sleep(long milliseconds) { try { @@ -82,8 +97,7 @@ private void validateLessThanOrEqual(long maxExpected, long actual) { @Test public void testNoMetricUpdatesThenNoWaiting() { AbfsClientThrottlingAnalyzer analyzer = new AbfsClientThrottlingAnalyzer( - "test", - ANALYSIS_PERIOD); + "test", abfsConfiguration); validate(0, analyzer.getSleepDuration()); sleep(ANALYSIS_PERIOD_PLUS_10_PERCENT); validate(0, analyzer.getSleepDuration()); @@ -96,8 +110,7 @@ public void testNoMetricUpdatesThenNoWaiting() { @Test public void testOnlySuccessThenNoWaiting() { AbfsClientThrottlingAnalyzer analyzer = new AbfsClientThrottlingAnalyzer( - "test", - ANALYSIS_PERIOD); + "test", abfsConfiguration); analyzer.addBytesTransferred(8 * MEGABYTE, false); validate(0, analyzer.getSleepDuration()); sleep(ANALYSIS_PERIOD_PLUS_10_PERCENT); @@ -112,8 +125,7 @@ public void testOnlySuccessThenNoWaiting() { @Test public void testOnlyErrorsAndWaiting() { AbfsClientThrottlingAnalyzer analyzer = new AbfsClientThrottlingAnalyzer( - "test", - ANALYSIS_PERIOD); + "test", abfsConfiguration); validate(0, analyzer.getSleepDuration()); analyzer.addBytesTransferred(4 * MEGABYTE, true); sleep(ANALYSIS_PERIOD_PLUS_10_PERCENT); @@ -132,8 +144,7 @@ public void testOnlyErrorsAndWaiting() { @Test public void testSuccessAndErrorsAndWaiting() { AbfsClientThrottlingAnalyzer analyzer = new AbfsClientThrottlingAnalyzer( - "test", - ANALYSIS_PERIOD); + "test", abfsConfiguration); validate(0, analyzer.getSleepDuration()); analyzer.addBytesTransferred(8 * MEGABYTE, false); analyzer.addBytesTransferred(2 * MEGABYTE, true); @@ -157,8 +168,7 @@ public void testSuccessAndErrorsAndWaiting() { @Test public void testManySuccessAndErrorsAndWaiting() { AbfsClientThrottlingAnalyzer analyzer = new AbfsClientThrottlingAnalyzer( - "test", - ANALYSIS_PERIOD); + "test", abfsConfiguration); validate(0, analyzer.getSleepDuration()); final int numberOfRequests = 20; for (int i = 0; i < numberOfRequests; i++) { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsHttpOperation.java new file mode 100644 index 0000000000000..36914a4e4f365 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsHttpOperation.java @@ -0,0 +1,116 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.UnsupportedEncodingException; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLEncoder; + +import org.assertj.core.api.Assertions; +import org.junit.Test; + +import org.apache.hadoop.fs.azurebfs.utils.UriUtils; + +public class TestAbfsHttpOperation { + + @Test + public void testMaskingAndEncoding() + throws MalformedURLException, UnsupportedEncodingException { + testIfMaskAndEncodeSuccessful("Where sig is the only query param", + "http://www.testurl.net?sig=abcd", "http://www.testurl.net?sig=XXXXX"); + + testIfMaskAndEncodeSuccessful("Where oid is the only query param", + "http://www.testurl.net?saoid=abcdef", + "http://www.testurl.net?saoid=abcXXX"); + + testIfMaskAndEncodeSuccessful("Where sig is the first query param, oid is last", + "http://www.testurl.net?sig=abcd&abc=xyz&saoid=pqrs456", + "http://www.testurl.net?sig=XXXXX&abc=xyz&saoid=pqrsXXX"); + + testIfMaskAndEncodeSuccessful( + "Where sig/oid are neither first nor last query param", + "http://www.testurl.net?lmn=abc&sig=abcd&suoid=mnop789&abc=xyz", + "http://www.testurl.net?lmn=abc&sig=XXXXX&suoid=mnopXXX&abc=xyz"); + + testIfMaskAndEncodeSuccessful("Where sig is the last query param, oid is first", + "http://www.testurl.net?skoid=pqrs123&abc=xyz&sig=abcd", + "http://www.testurl.net?skoid=pqrsXXX&abc=xyz&sig=XXXXX"); + + testIfMaskAndEncodeSuccessful("Where sig/oid query param are not present", + "http://www.testurl.net?abc=xyz", "http://www.testurl.net?abc=xyz"); + + testIfMaskAndEncodeSuccessful( + "Where sig/oid query param are not present but mysig and myoid", + "http://www.testurl.net?abc=xyz&mysig=qwerty&mysaoid=uvw", + "http://www.testurl.net?abc=xyz&mysig=qwerty&mysaoid=uvw"); + + testIfMaskAndEncodeSuccessful( + "Where sig/oid query param is not present but sigmy and oidmy", + "http://www.testurl.net?abc=xyz&sigmy=qwerty&skoidmy=uvw", + "http://www.testurl.net?abc=xyz&sigmy=qwerty&skoidmy=uvw"); + + testIfMaskAndEncodeSuccessful( + "Where sig/oid query param is not present but values sig and oid", + "http://www.testurl.net?abc=xyz&mnop=sig&pqr=saoid", + "http://www.testurl.net?abc=xyz&mnop=sig&pqr=saoid"); + + testIfMaskAndEncodeSuccessful( + "Where sig/oid query param is not present but a value ends with sig/oid", + "http://www.testurl.net?abc=xyzsaoid&mnop=abcsig", + "http://www.testurl.net?abc=xyzsaoid&mnop=abcsig"); + + testIfMaskAndEncodeSuccessful( + "Where sig/oid query param is not present but a value starts with sig/oid", + "http://www.testurl.net?abc=saoidxyz&mnop=sigabc", + "http://www.testurl.net?abc=saoidxyz&mnop=sigabc"); + } + + @Test + public void testUrlWithNullValues() + throws MalformedURLException, UnsupportedEncodingException { + testIfMaskAndEncodeSuccessful("Where param to be masked has null value", + "http://www.testurl.net?abc=xyz&saoid=&mnop=abcsig", + "http://www.testurl.net?abc=xyz&saoid=&mnop=abcsig"); + testIfMaskAndEncodeSuccessful("Where visible param has null value", + "http://www.testurl.net?abc=xyz&pqr=&mnop=abcd", + "http://www.testurl.net?abc=xyz&pqr=&mnop=abcd"); + testIfMaskAndEncodeSuccessful("Where last param has null value", + "http://www.testurl.net?abc=xyz&pqr=&mnop=", + "http://www.testurl.net?abc=xyz&pqr=&mnop="); + } + + private void testIfMaskAndEncodeSuccessful(final String scenario, + final String url, final String expectedMaskedUrl) + throws UnsupportedEncodingException, MalformedURLException { + + Assertions.assertThat(UriUtils.getMaskedUrl(new URL(url))) + .describedAs(url + " (" + scenario + ") after masking should be: " + + expectedMaskedUrl).isEqualTo(expectedMaskedUrl); + + final String expectedMaskedEncodedUrl = URLEncoder + .encode(expectedMaskedUrl, "UTF-8"); + Assertions.assertThat(UriUtils.encodedUrlStr(expectedMaskedUrl)) + .describedAs( + url + " (" + scenario + ") after masking and encoding should " + + "be: " + expectedMaskedEncodedUrl) + .isEqualTo(expectedMaskedEncodedUrl); + } + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsInputStream.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsInputStream.java new file mode 100644 index 0000000000000..0395c4183b9b7 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsInputStream.java @@ -0,0 +1,921 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Optional; +import java.util.Random; +import java.util.concurrent.ExecutionException; + +import org.assertj.core.api.Assertions; +import org.junit.Assert; +import org.junit.Test; +import org.mockito.Mockito; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FutureDataInputStreamBuilder; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.TimeoutException; +import org.apache.hadoop.fs.azurebfs.contracts.services.ReadBufferStatus; +import org.apache.hadoop.fs.azurebfs.utils.TestCachedSASToken; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.impl.OpenFileParameters; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FORWARD_SLASH; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_READ_AHEAD_QUEUE_DEPTH; + +/** + * Unit test AbfsInputStream. + */ +public class TestAbfsInputStream extends + AbstractAbfsIntegrationTest { + + private static final int ONE_KB = 1 * 1024; + private static final int TWO_KB = 2 * 1024; + private static final int THREE_KB = 3 * 1024; + private static final int SIXTEEN_KB = 16 * ONE_KB; + private static final int FORTY_EIGHT_KB = 48 * ONE_KB; + private static final int ONE_MB = 1 * 1024 * 1024; + private static final int FOUR_MB = 4 * ONE_MB; + private static final int EIGHT_MB = 8 * ONE_MB; + private static final int TEST_READAHEAD_DEPTH_2 = 2; + private static final int TEST_READAHEAD_DEPTH_4 = 4; + private static final int REDUCED_READ_BUFFER_AGE_THRESHOLD = 3000; // 3 sec + private static final int INCREASED_READ_BUFFER_AGE_THRESHOLD = + REDUCED_READ_BUFFER_AGE_THRESHOLD * 10; // 30 sec + private static final int ALWAYS_READ_BUFFER_SIZE_TEST_FILE_SIZE = 16 * ONE_MB; + + @Override + public void teardown() throws Exception { + super.teardown(); + ReadBufferManager.getBufferManager().testResetReadBufferManager(); + } + + private AbfsRestOperation getMockRestOp() { + AbfsRestOperation op = mock(AbfsRestOperation.class); + AbfsHttpOperation httpOp = mock(AbfsHttpOperation.class); + when(httpOp.getBytesReceived()).thenReturn(1024L); + when(op.getResult()).thenReturn(httpOp); + when(op.getSasToken()).thenReturn(TestCachedSASToken.getTestCachedSASTokenInstance().get()); + return op; + } + + private AbfsClient getMockAbfsClient() { + // Mock failure for client.read() + AbfsClient client = mock(AbfsClient.class); + AbfsPerfTracker tracker = new AbfsPerfTracker( + "test", + this.getAccountName(), + this.getConfiguration()); + when(client.getAbfsPerfTracker()).thenReturn(tracker); + + return client; + } + + private AbfsInputStream getAbfsInputStream(AbfsClient mockAbfsClient, + String fileName) throws IOException { + AbfsInputStreamContext inputStreamContext = new AbfsInputStreamContext(-1); + // Create AbfsInputStream with the client instance + AbfsInputStream inputStream = new AbfsInputStream( + mockAbfsClient, + null, + FORWARD_SLASH + fileName, + THREE_KB, + inputStreamContext.withReadBufferSize(ONE_KB).withReadAheadQueueDepth(10).withReadAheadBlockSize(ONE_KB), + "eTag", + getTestTracingContext(null, false)); + + inputStream.setCachedSasToken( + TestCachedSASToken.getTestCachedSASTokenInstance()); + + return inputStream; + } + + public AbfsInputStream getAbfsInputStream(AbfsClient abfsClient, + String fileName, + int fileSize, + String eTag, + int readAheadQueueDepth, + int readBufferSize, + boolean alwaysReadBufferSize, + int readAheadBlockSize) throws IOException { + AbfsInputStreamContext inputStreamContext = new AbfsInputStreamContext(-1); + // Create AbfsInputStream with the client instance + AbfsInputStream inputStream = new AbfsInputStream( + abfsClient, + null, + FORWARD_SLASH + fileName, + fileSize, + inputStreamContext.withReadBufferSize(readBufferSize) + .withReadAheadQueueDepth(readAheadQueueDepth) + .withShouldReadBufferSizeAlways(alwaysReadBufferSize) + .withReadAheadBlockSize(readAheadBlockSize), + eTag, + getTestTracingContext(getFileSystem(), false)); + + inputStream.setCachedSasToken( + TestCachedSASToken.getTestCachedSASTokenInstance()); + + return inputStream; + } + + private void queueReadAheads(AbfsInputStream inputStream) { + // Mimic AbfsInputStream readAhead queue requests + ReadBufferManager.getBufferManager() + .queueReadAhead(inputStream, 0, ONE_KB, inputStream.getTracingContext()); + ReadBufferManager.getBufferManager() + .queueReadAhead(inputStream, ONE_KB, ONE_KB, + inputStream.getTracingContext()); + ReadBufferManager.getBufferManager() + .queueReadAhead(inputStream, TWO_KB, TWO_KB, + inputStream.getTracingContext()); + } + + private void verifyReadCallCount(AbfsClient client, int count) throws + AzureBlobFileSystemException, InterruptedException { + // ReadAhead threads are triggered asynchronously. + // Wait a second before verifying the number of total calls. + Thread.sleep(1000); + verify(client, times(count)).read(any(String.class), any(Long.class), + any(byte[].class), any(Integer.class), any(Integer.class), + any(String.class), any(String.class), any(TracingContext.class)); + } + + private void checkEvictedStatus(AbfsInputStream inputStream, int position, boolean expectedToThrowException) + throws Exception { + // Sleep for the eviction threshold time + Thread.sleep(ReadBufferManager.getBufferManager().getThresholdAgeMilliseconds() + 1000); + + // Eviction is done only when AbfsInputStream tries to queue new items. + // 1 tryEvict will remove 1 eligible item. To ensure that the current test buffer + // will get evicted (considering there could be other tests running in parallel), + // call tryEvict for the number of items that are there in completedReadList. + int numOfCompletedReadListItems = ReadBufferManager.getBufferManager().getCompletedReadListSize(); + while (numOfCompletedReadListItems > 0) { + ReadBufferManager.getBufferManager().callTryEvict(); + numOfCompletedReadListItems--; + } + + if (expectedToThrowException) { + intercept(IOException.class, + () -> inputStream.read(position, new byte[ONE_KB], 0, ONE_KB)); + } else { + inputStream.read(position, new byte[ONE_KB], 0, ONE_KB); + } + } + + public TestAbfsInputStream() throws Exception { + super(); + // Reduce thresholdAgeMilliseconds to 3 sec for the tests + ReadBufferManager.getBufferManager().setThresholdAgeMilliseconds(REDUCED_READ_BUFFER_AGE_THRESHOLD); + } + + private void writeBufferToNewFile(Path testFile, byte[] buffer) throws IOException { + AzureBlobFileSystem fs = getFileSystem(); + fs.create(testFile); + FSDataOutputStream out = fs.append(testFile); + out.write(buffer); + out.close(); + } + + private void verifyOpenWithProvidedStatus(Path path, FileStatus fileStatus, + byte[] buf, AbfsRestOperationType source) + throws IOException, ExecutionException, InterruptedException { + byte[] readBuf = new byte[buf.length]; + AzureBlobFileSystem fs = getFileSystem(); + FutureDataInputStreamBuilder builder = fs.openFile(path); + builder.withFileStatus(fileStatus); + FSDataInputStream in = builder.build().get(); + assertEquals(String.format( + "Open with fileStatus [from %s result]: Incorrect number of bytes read", + source), buf.length, in.read(readBuf)); + assertArrayEquals(String + .format("Open with fileStatus [from %s result]: Incorrect read data", + source), readBuf, buf); + } + + private void checkGetPathStatusCalls(Path testFile, FileStatus fileStatus, + AzureBlobFileSystemStore abfsStore, AbfsClient mockClient, + AbfsRestOperationType source, TracingContext tracingContext) + throws IOException { + + // verify GetPathStatus not invoked when FileStatus is provided + abfsStore.openFileForRead(testFile, Optional + .ofNullable(new OpenFileParameters().withStatus(fileStatus)), null, tracingContext); + verify(mockClient, times(0).description((String.format( + "FileStatus [from %s result] provided, GetFileStatus should not be invoked", + source)))).getPathStatus(anyString(), anyBoolean(), any(TracingContext.class)); + + // verify GetPathStatus invoked when FileStatus not provided + abfsStore.openFileForRead(testFile, + Optional.empty(), null, + tracingContext); + verify(mockClient, times(1).description( + "GetPathStatus should be invoked when FileStatus not provided")) + .getPathStatus(anyString(), anyBoolean(), any(TracingContext.class)); + + Mockito.reset(mockClient); //clears invocation count for next test case + } + + @Test + public void testOpenFileWithOptions() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + String testFolder = "/testFolder"; + Path smallTestFile = new Path(testFolder + "/testFile0"); + Path largeTestFile = new Path(testFolder + "/testFile1"); + fs.mkdirs(new Path(testFolder)); + int readBufferSize = getConfiguration().getReadBufferSize(); + byte[] smallBuffer = new byte[5]; + byte[] largeBuffer = new byte[readBufferSize + 5]; + new Random().nextBytes(smallBuffer); + new Random().nextBytes(largeBuffer); + writeBufferToNewFile(smallTestFile, smallBuffer); + writeBufferToNewFile(largeTestFile, largeBuffer); + + FileStatus[] getFileStatusResults = {fs.getFileStatus(smallTestFile), + fs.getFileStatus(largeTestFile)}; + FileStatus[] listStatusResults = fs.listStatus(new Path(testFolder)); + + // open with fileStatus from GetPathStatus + verifyOpenWithProvidedStatus(smallTestFile, getFileStatusResults[0], + smallBuffer, AbfsRestOperationType.GetPathStatus); + verifyOpenWithProvidedStatus(largeTestFile, getFileStatusResults[1], + largeBuffer, AbfsRestOperationType.GetPathStatus); + + // open with fileStatus from ListStatus + verifyOpenWithProvidedStatus(smallTestFile, listStatusResults[0], smallBuffer, + AbfsRestOperationType.ListPaths); + verifyOpenWithProvidedStatus(largeTestFile, listStatusResults[1], largeBuffer, + AbfsRestOperationType.ListPaths); + + // verify number of GetPathStatus invocations + AzureBlobFileSystemStore abfsStore = getAbfsStore(fs); + AbfsClient mockClient = spy(getAbfsClient(abfsStore)); + setAbfsClient(abfsStore, mockClient); + TracingContext tracingContext = getTestTracingContext(fs, false); + checkGetPathStatusCalls(smallTestFile, getFileStatusResults[0], + abfsStore, mockClient, AbfsRestOperationType.GetPathStatus, tracingContext); + checkGetPathStatusCalls(largeTestFile, getFileStatusResults[1], + abfsStore, mockClient, AbfsRestOperationType.GetPathStatus, tracingContext); + checkGetPathStatusCalls(smallTestFile, listStatusResults[0], + abfsStore, mockClient, AbfsRestOperationType.ListPaths, tracingContext); + checkGetPathStatusCalls(largeTestFile, listStatusResults[1], + abfsStore, mockClient, AbfsRestOperationType.ListPaths, tracingContext); + + // Verify with incorrect filestatus + getFileStatusResults[0].setPath(new Path("wrongPath")); + intercept(ExecutionException.class, + () -> verifyOpenWithProvidedStatus(smallTestFile, + getFileStatusResults[0], smallBuffer, + AbfsRestOperationType.GetPathStatus)); + } + + /** + * This test expects AbfsInputStream to throw the exception that readAhead + * thread received on read. The readAhead thread must be initiated from the + * active read request itself. + * Also checks that the ReadBuffers are evicted as per the ReadBufferManager + * threshold criteria. + * @throws Exception + */ + @Test + public void testFailedReadAhead() throws Exception { + AbfsClient client = getMockAbfsClient(); + AbfsRestOperation successOp = getMockRestOp(); + + // Stub : + // Read request leads to 3 readahead calls: Fail all 3 readahead-client.read() + // Actual read request fails with the failure in readahead thread + doThrow(new TimeoutException("Internal Server error for RAH-Thread-X")) + .doThrow(new TimeoutException("Internal Server error for RAH-Thread-Y")) + .doThrow(new TimeoutException("Internal Server error RAH-Thread-Z")) + .doReturn(successOp) // Any extra calls to read, pass it. + .when(client) + .read(any(String.class), any(Long.class), any(byte[].class), + any(Integer.class), any(Integer.class), any(String.class), + any(String.class), any(TracingContext.class)); + + AbfsInputStream inputStream = getAbfsInputStream(client, "testFailedReadAhead.txt"); + + // Scenario: ReadAhead triggered from current active read call failed + // Before the change to return exception from readahead buffer, + // AbfsInputStream would have triggered an extra readremote on noticing + // data absent in readahead buffers + // In this test, a read should trigger 3 client.read() calls as file is 3 KB + // and readahead buffer size set in AbfsInputStream is 1 KB + // There should only be a total of 3 client.read() in this test. + intercept(IOException.class, + () -> inputStream.read(new byte[ONE_KB])); + + // Only the 3 readAhead threads should have triggered client.read + verifyReadCallCount(client, 3); + + // Stub returns success for the 4th read request, if ReadBuffers still + // persisted, ReadAheadManager getBlock would have returned exception. + checkEvictedStatus(inputStream, 0, false); + } + + @Test + public void testFailedReadAheadEviction() throws Exception { + AbfsClient client = getMockAbfsClient(); + AbfsRestOperation successOp = getMockRestOp(); + ReadBufferManager.setThresholdAgeMilliseconds(INCREASED_READ_BUFFER_AGE_THRESHOLD); + // Stub : + // Read request leads to 3 readahead calls: Fail all 3 readahead-client.read() + // Actual read request fails with the failure in readahead thread + doThrow(new TimeoutException("Internal Server error")) + .when(client) + .read(any(String.class), any(Long.class), any(byte[].class), + any(Integer.class), any(Integer.class), any(String.class), + any(String.class), any(TracingContext.class)); + + AbfsInputStream inputStream = getAbfsInputStream(client, "testFailedReadAheadEviction.txt"); + + // Add a failed buffer to completed queue and set to no free buffers to read ahead. + ReadBuffer buff = new ReadBuffer(); + buff.setStatus(ReadBufferStatus.READ_FAILED); + ReadBufferManager.getBufferManager().testMimicFullUseAndAddFailedBuffer(buff); + + // if read failed buffer eviction is tagged as a valid eviction, it will lead to + // wrong assumption of queue logic that a buffer is freed up and can lead to : + // java.util.EmptyStackException + // at java.util.Stack.peek(Stack.java:102) + // at java.util.Stack.pop(Stack.java:84) + // at org.apache.hadoop.fs.azurebfs.services.ReadBufferManager.queueReadAhead + ReadBufferManager.getBufferManager().queueReadAhead(inputStream, 0, ONE_KB, + getTestTracingContext(getFileSystem(), true)); + } + + /** + * + * The test expects AbfsInputStream to initiate a remote read request for + * the request offset and length when previous read ahead on the offset had failed. + * Also checks that the ReadBuffers are evicted as per the ReadBufferManager + * threshold criteria. + * @throws Exception + */ + @Test + public void testOlderReadAheadFailure() throws Exception { + AbfsClient client = getMockAbfsClient(); + AbfsRestOperation successOp = getMockRestOp(); + + // Stub : + // First Read request leads to 3 readahead calls: Fail all 3 readahead-client.read() + // A second read request will see that readahead had failed for data in + // the requested offset range and also that its is an older readahead request. + // So attempt a new read only for the requested range. + doThrow(new TimeoutException("Internal Server error for RAH-X")) + .doThrow(new TimeoutException("Internal Server error for RAH-Y")) + .doThrow(new TimeoutException("Internal Server error for RAH-Z")) + .doReturn(successOp) // pass the read for second read request + .doReturn(successOp) // pass success for post eviction test + .when(client) + .read(any(String.class), any(Long.class), any(byte[].class), + any(Integer.class), any(Integer.class), any(String.class), + any(String.class), any(TracingContext.class)); + + AbfsInputStream inputStream = getAbfsInputStream(client, "testOlderReadAheadFailure.txt"); + + // First read request that fails as the readahead triggered from this request failed. + intercept(IOException.class, + () -> inputStream.read(new byte[ONE_KB])); + + // Only the 3 readAhead threads should have triggered client.read + verifyReadCallCount(client, 3); + + // Sleep for thresholdAgeMs so that the read ahead buffer qualifies for being old. + Thread.sleep(ReadBufferManager.getBufferManager().getThresholdAgeMilliseconds()); + + // Second read request should retry the read (and not issue any new readaheads) + inputStream.read(ONE_KB, new byte[ONE_KB], 0, ONE_KB); + + // Once created, mock will remember all interactions. So total number of read + // calls will be one more from earlier (there is a reset mock which will reset the + // count, but the mock stub is erased as well which needs AbsInputStream to be recreated, + // which beats the purpose) + verifyReadCallCount(client, 4); + + // Stub returns success for the 5th read request, if ReadBuffers still + // persisted request would have failed for position 0. + checkEvictedStatus(inputStream, 0, false); + } + + /** + * The test expects AbfsInputStream to utilize any data read ahead for + * requested offset and length. + * @throws Exception + */ + @Test + public void testSuccessfulReadAhead() throws Exception { + // Mock failure for client.read() + AbfsClient client = getMockAbfsClient(); + + // Success operation mock + AbfsRestOperation op = getMockRestOp(); + + // Stub : + // Pass all readAheads and fail the post eviction request to + // prove ReadAhead buffer is used + // for post eviction check, fail all read aheads + doReturn(op) + .doReturn(op) + .doReturn(op) + .doThrow(new TimeoutException("Internal Server error for RAH-X")) + .doThrow(new TimeoutException("Internal Server error for RAH-Y")) + .doThrow(new TimeoutException("Internal Server error for RAH-Z")) + .when(client) + .read(any(String.class), any(Long.class), any(byte[].class), + any(Integer.class), any(Integer.class), any(String.class), + any(String.class), any(TracingContext.class)); + + AbfsInputStream inputStream = getAbfsInputStream(client, "testSuccessfulReadAhead.txt"); + int beforeReadCompletedListSize = ReadBufferManager.getBufferManager().getCompletedReadListSize(); + + // First read request that triggers readAheads. + inputStream.read(new byte[ONE_KB]); + + // Only the 3 readAhead threads should have triggered client.read + verifyReadCallCount(client, 3); + int newAdditionsToCompletedRead = + ReadBufferManager.getBufferManager().getCompletedReadListSize() + - beforeReadCompletedListSize; + // read buffer might be dumped if the ReadBufferManager getblock preceded + // the action of buffer being picked for reading from readaheadqueue, so that + // inputstream can proceed with read and not be blocked on readahead thread + // availability. So the count of buffers in completedReadQueue for the stream + // can be same or lesser than the requests triggered to queue readahead. + Assertions.assertThat(newAdditionsToCompletedRead) + .describedAs( + "New additions to completed reads should be same or less than as number of readaheads") + .isLessThanOrEqualTo(3); + + // Another read request whose requested data is already read ahead. + inputStream.read(ONE_KB, new byte[ONE_KB], 0, ONE_KB); + + // Once created, mock will remember all interactions. + // As the above read should not have triggered any server calls, total + // number of read calls made at this point will be same as last. + verifyReadCallCount(client, 3); + + // Stub will throw exception for client.read() for 4th and later calls + // if not using the read-ahead buffer exception will be thrown on read + checkEvictedStatus(inputStream, 0, true); + } + + /** + * This test expects InProgressList is not purged by the inputStream close. + */ + @Test + public void testStreamPurgeDuringReadAheadCallExecuting() throws Exception { + AbfsClient client = getMockAbfsClient(); + AbfsRestOperation successOp = getMockRestOp(); + final Long serverCommunicationMockLatency = 3_000L; + final Long readBufferTransferToInProgressProbableTime = 1_000L; + final Integer readBufferQueuedCount = 3; + + Mockito.doAnswer(invocationOnMock -> { + //sleeping thread to mock the network latency from client to backend. + Thread.sleep(serverCommunicationMockLatency); + return successOp; + }) + .when(client) + .read(any(String.class), any(Long.class), any(byte[].class), + any(Integer.class), any(Integer.class), any(String.class), + any(String.class), any(TracingContext.class)); + + final ReadBufferManager readBufferManager + = ReadBufferManager.getBufferManager(); + + final int readBufferTotal = readBufferManager.getNumBuffers(); + final int expectedFreeListBufferCount = readBufferTotal + - readBufferQueuedCount; + + try (AbfsInputStream inputStream = getAbfsInputStream(client, + "testSuccessfulReadAhead.txt")) { + // As this is try-with-resources block, the close() method of the created + // abfsInputStream object shall be called on the end of the block. + queueReadAheads(inputStream); + + //Sleeping to give ReadBufferWorker to pick the readBuffers for processing. + Thread.sleep(readBufferTransferToInProgressProbableTime); + + Assertions.assertThat(readBufferManager.getInProgressCopiedList()) + .describedAs(String.format("InProgressList should have %d elements", + readBufferQueuedCount)) + .hasSize(readBufferQueuedCount); + Assertions.assertThat(readBufferManager.getFreeListCopy()) + .describedAs(String.format("FreeList should have %d elements", + expectedFreeListBufferCount)) + .hasSize(expectedFreeListBufferCount); + Assertions.assertThat(readBufferManager.getCompletedReadListCopy()) + .describedAs("CompletedList should have 0 elements") + .hasSize(0); + } + + Assertions.assertThat(readBufferManager.getInProgressCopiedList()) + .describedAs(String.format("InProgressList should have %d elements", + readBufferQueuedCount)) + .hasSize(readBufferQueuedCount); + Assertions.assertThat(readBufferManager.getFreeListCopy()) + .describedAs(String.format("FreeList should have %d elements", + expectedFreeListBufferCount)) + .hasSize(expectedFreeListBufferCount); + Assertions.assertThat(readBufferManager.getCompletedReadListCopy()) + .describedAs("CompletedList should have 0 elements") + .hasSize(0); + } + + /** + * This test expects ReadAheadManager to throw exception if the read ahead + * thread had failed within the last thresholdAgeMilliseconds. + * Also checks that the ReadBuffers are evicted as per the ReadBufferManager + * threshold criteria. + * @throws Exception + */ + @Test + public void testReadAheadManagerForFailedReadAhead() throws Exception { + AbfsClient client = getMockAbfsClient(); + AbfsRestOperation successOp = getMockRestOp(); + + // Stub : + // Read request leads to 3 readahead calls: Fail all 3 readahead-client.read() + // Actual read request fails with the failure in readahead thread + doThrow(new TimeoutException("Internal Server error for RAH-Thread-X")) + .doThrow(new TimeoutException("Internal Server error for RAH-Thread-Y")) + .doThrow(new TimeoutException("Internal Server error RAH-Thread-Z")) + .doReturn(successOp) // Any extra calls to read, pass it. + .when(client) + .read(any(String.class), any(Long.class), any(byte[].class), + any(Integer.class), any(Integer.class), any(String.class), + any(String.class), any(TracingContext.class)); + + AbfsInputStream inputStream = getAbfsInputStream(client, "testReadAheadManagerForFailedReadAhead.txt"); + + queueReadAheads(inputStream); + + // AbfsInputStream Read would have waited for the read-ahead for the requested offset + // as we are testing from ReadAheadManager directly, sleep for a sec to + // get the read ahead threads to complete + Thread.sleep(1000); + + // if readAhead failed for specific offset, getBlock should + // throw exception from the ReadBuffer that failed within last thresholdAgeMilliseconds sec + intercept(IOException.class, + () -> ReadBufferManager.getBufferManager().getBlock( + inputStream, + 0, + ONE_KB, + new byte[ONE_KB])); + + // Only the 3 readAhead threads should have triggered client.read + verifyReadCallCount(client, 3); + + // Stub returns success for the 4th read request, if ReadBuffers still + // persisted, ReadAheadManager getBlock would have returned exception. + checkEvictedStatus(inputStream, 0, false); + } + + /** + * The test expects ReadAheadManager to return 0 receivedBytes when previous + * read ahead on the offset had failed and not throw exception received then. + * Also checks that the ReadBuffers are evicted as per the ReadBufferManager + * threshold criteria. + * @throws Exception + */ + @Test + public void testReadAheadManagerForOlderReadAheadFailure() throws Exception { + AbfsClient client = getMockAbfsClient(); + AbfsRestOperation successOp = getMockRestOp(); + + // Stub : + // First Read request leads to 3 readahead calls: Fail all 3 readahead-client.read() + // A second read request will see that readahead had failed for data in + // the requested offset range but also that its is an older readahead request. + // System issue could have resolved by now, so attempt a new read only for the requested range. + doThrow(new TimeoutException("Internal Server error for RAH-X")) + .doThrow(new TimeoutException("Internal Server error for RAH-X")) + .doThrow(new TimeoutException("Internal Server error for RAH-X")) + .doReturn(successOp) // pass the read for second read request + .doReturn(successOp) // pass success for post eviction test + .when(client) + .read(any(String.class), any(Long.class), any(byte[].class), + any(Integer.class), any(Integer.class), any(String.class), + any(String.class), any(TracingContext.class)); + + AbfsInputStream inputStream = getAbfsInputStream(client, "testReadAheadManagerForOlderReadAheadFailure.txt"); + + queueReadAheads(inputStream); + + // AbfsInputStream Read would have waited for the read-ahead for the requested offset + // as we are testing from ReadAheadManager directly, sleep for thresholdAgeMilliseconds so that + // read buffer qualifies for to be an old buffer + Thread.sleep(ReadBufferManager.getBufferManager().getThresholdAgeMilliseconds()); + + // Only the 3 readAhead threads should have triggered client.read + verifyReadCallCount(client, 3); + + // getBlock from a new read request should return 0 if there is a failure + // 30 sec before in read ahead buffer for respective offset. + int bytesRead = ReadBufferManager.getBufferManager().getBlock( + inputStream, + ONE_KB, + ONE_KB, + new byte[ONE_KB]); + Assert.assertEquals("bytesRead should be zero when previously read " + + "ahead buffer had failed", 0, bytesRead); + + // Stub returns success for the 5th read request, if ReadBuffers still + // persisted request would have failed for position 0. + checkEvictedStatus(inputStream, 0, false); + } + + /** + * The test expects ReadAheadManager to return data from previously read + * ahead data of same offset. + * @throws Exception + */ + @Test + public void testReadAheadManagerForSuccessfulReadAhead() throws Exception { + // Mock failure for client.read() + AbfsClient client = getMockAbfsClient(); + + // Success operation mock + AbfsRestOperation op = getMockRestOp(); + + // Stub : + // Pass all readAheads and fail the post eviction request to + // prove ReadAhead buffer is used + doReturn(op) + .doReturn(op) + .doReturn(op) + .doThrow(new TimeoutException("Internal Server error for RAH-X")) // for post eviction request + .doThrow(new TimeoutException("Internal Server error for RAH-Y")) + .doThrow(new TimeoutException("Internal Server error for RAH-Z")) + .when(client) + .read(any(String.class), any(Long.class), any(byte[].class), + any(Integer.class), any(Integer.class), any(String.class), + any(String.class), any(TracingContext.class)); + + AbfsInputStream inputStream = getAbfsInputStream(client, "testSuccessfulReadAhead.txt"); + + queueReadAheads(inputStream); + + // AbfsInputStream Read would have waited for the read-ahead for the requested offset + // as we are testing from ReadAheadManager directly, sleep for a sec to + // get the read ahead threads to complete + Thread.sleep(1000); + + // Only the 3 readAhead threads should have triggered client.read + verifyReadCallCount(client, 3); + + // getBlock for a new read should return the buffer read-ahead + int bytesRead = ReadBufferManager.getBufferManager().getBlock( + inputStream, + ONE_KB, + ONE_KB, + new byte[ONE_KB]); + + Assert.assertTrue("bytesRead should be non-zero from the " + + "buffer that was read-ahead", bytesRead > 0); + + // Once created, mock will remember all interactions. + // As the above read should not have triggered any server calls, total + // number of read calls made at this point will be same as last. + verifyReadCallCount(client, 3); + + // Stub will throw exception for client.read() for 4th and later calls + // if not using the read-ahead buffer exception will be thrown on read + checkEvictedStatus(inputStream, 0, true); + } + + /** + * Test readahead with different config settings for request request size and + * readAhead block size + * @throws Exception + */ + @Test + public void testDiffReadRequestSizeAndRAHBlockSize() throws Exception { + // Set requestRequestSize = 4MB and readAheadBufferSize=8MB + resetReadBufferManager(FOUR_MB, INCREASED_READ_BUFFER_AGE_THRESHOLD); + testReadAheadConfigs(FOUR_MB, TEST_READAHEAD_DEPTH_4, false, EIGHT_MB); + + // Test for requestRequestSize =16KB and readAheadBufferSize=16KB + resetReadBufferManager(SIXTEEN_KB, INCREASED_READ_BUFFER_AGE_THRESHOLD); + AbfsInputStream inputStream = testReadAheadConfigs(SIXTEEN_KB, + TEST_READAHEAD_DEPTH_2, true, SIXTEEN_KB); + testReadAheads(inputStream, SIXTEEN_KB, SIXTEEN_KB); + + // Test for requestRequestSize =16KB and readAheadBufferSize=48KB + resetReadBufferManager(FORTY_EIGHT_KB, INCREASED_READ_BUFFER_AGE_THRESHOLD); + inputStream = testReadAheadConfigs(SIXTEEN_KB, TEST_READAHEAD_DEPTH_2, true, + FORTY_EIGHT_KB); + testReadAheads(inputStream, SIXTEEN_KB, FORTY_EIGHT_KB); + + // Test for requestRequestSize =48KB and readAheadBufferSize=16KB + resetReadBufferManager(FORTY_EIGHT_KB, INCREASED_READ_BUFFER_AGE_THRESHOLD); + inputStream = testReadAheadConfigs(FORTY_EIGHT_KB, TEST_READAHEAD_DEPTH_2, + true, + SIXTEEN_KB); + testReadAheads(inputStream, FORTY_EIGHT_KB, SIXTEEN_KB); + } + + @Test + public void testDefaultReadaheadQueueDepth() throws Exception { + Configuration config = getRawConfiguration(); + config.unset(FS_AZURE_READ_AHEAD_QUEUE_DEPTH); + AzureBlobFileSystem fs = getFileSystem(config); + Path testFile = path("/testFile"); + fs.create(testFile).close(); + FSDataInputStream in = fs.open(testFile); + Assertions.assertThat( + ((AbfsInputStream) in.getWrappedStream()).getReadAheadQueueDepth()) + .describedAs("readahead queue depth should be set to default value 2") + .isEqualTo(2); + in.close(); + } + + + private void testReadAheads(AbfsInputStream inputStream, + int readRequestSize, + int readAheadRequestSize) + throws Exception { + if (readRequestSize > readAheadRequestSize) { + readAheadRequestSize = readRequestSize; + } + + byte[] firstReadBuffer = new byte[readRequestSize]; + byte[] secondReadBuffer = new byte[readAheadRequestSize]; + + // get the expected bytes to compare + byte[] expectedFirstReadAheadBufferContents = new byte[readRequestSize]; + byte[] expectedSecondReadAheadBufferContents = new byte[readAheadRequestSize]; + getExpectedBufferData(0, readRequestSize, expectedFirstReadAheadBufferContents); + getExpectedBufferData(readRequestSize, readAheadRequestSize, + expectedSecondReadAheadBufferContents); + + Assertions.assertThat(inputStream.read(firstReadBuffer, 0, readRequestSize)) + .describedAs("Read should be of exact requested size") + .isEqualTo(readRequestSize); + + assertTrue("Data mismatch found in RAH1", + Arrays.equals(firstReadBuffer, + expectedFirstReadAheadBufferContents)); + + Assertions.assertThat(inputStream.read(secondReadBuffer, 0, readAheadRequestSize)) + .describedAs("Read should be of exact requested size") + .isEqualTo(readAheadRequestSize); + + assertTrue("Data mismatch found in RAH2", + Arrays.equals(secondReadBuffer, + expectedSecondReadAheadBufferContents)); + } + + public AbfsInputStream testReadAheadConfigs(int readRequestSize, + int readAheadQueueDepth, + boolean alwaysReadBufferSizeEnabled, + int readAheadBlockSize) throws Exception { + Configuration + config = new Configuration( + this.getRawConfiguration()); + config.set("fs.azure.read.request.size", Integer.toString(readRequestSize)); + config.set("fs.azure.readaheadqueue.depth", + Integer.toString(readAheadQueueDepth)); + config.set("fs.azure.read.alwaysReadBufferSize", + Boolean.toString(alwaysReadBufferSizeEnabled)); + config.set("fs.azure.read.readahead.blocksize", + Integer.toString(readAheadBlockSize)); + if (readRequestSize > readAheadBlockSize) { + readAheadBlockSize = readRequestSize; + } + + Path testPath = path("/testReadAheadConfigs"); + final AzureBlobFileSystem fs = createTestFile(testPath, + ALWAYS_READ_BUFFER_SIZE_TEST_FILE_SIZE, config); + byte[] byteBuffer = new byte[ONE_MB]; + AbfsInputStream inputStream = this.getAbfsStore(fs) + .openFileForRead(testPath, null, getTestTracingContext(fs, false)); + + Assertions.assertThat(inputStream.getBufferSize()) + .describedAs("Unexpected AbfsInputStream buffer size") + .isEqualTo(readRequestSize); + + Assertions.assertThat(inputStream.getReadAheadQueueDepth()) + .describedAs("Unexpected ReadAhead queue depth") + .isEqualTo(readAheadQueueDepth); + + Assertions.assertThat(inputStream.shouldAlwaysReadBufferSize()) + .describedAs("Unexpected AlwaysReadBufferSize settings") + .isEqualTo(alwaysReadBufferSizeEnabled); + + Assertions.assertThat(ReadBufferManager.getBufferManager().getReadAheadBlockSize()) + .describedAs("Unexpected readAhead block size") + .isEqualTo(readAheadBlockSize); + + return inputStream; + } + + private void getExpectedBufferData(int offset, int length, byte[] b) { + boolean startFillingIn = false; + int indexIntoBuffer = 0; + char character = 'a'; + + for (int i = 0; i < (offset + length); i++) { + if (i == offset) { + startFillingIn = true; + } + + if ((startFillingIn) && (indexIntoBuffer < length)) { + b[indexIntoBuffer] = (byte) character; + indexIntoBuffer++; + } + + character = (character == 'z') ? 'a' : (char) ((int) character + 1); + } + } + + private AzureBlobFileSystem createTestFile(Path testFilePath, long testFileSize, + Configuration config) throws Exception { + AzureBlobFileSystem fs; + + if (config == null) { + fs = this.getFileSystem(); + } else { + final AzureBlobFileSystem currentFs = getFileSystem(); + fs = (AzureBlobFileSystem) FileSystem.newInstance(currentFs.getUri(), + config); + } + + if (fs.exists(testFilePath)) { + FileStatus status = fs.getFileStatus(testFilePath); + if (status.getLen() >= testFileSize) { + return fs; + } + } + + byte[] buffer = new byte[EIGHT_MB]; + char character = 'a'; + for (int i = 0; i < buffer.length; i++) { + buffer[i] = (byte) character; + character = (character == 'z') ? 'a' : (char) ((int) character + 1); + } + + try (FSDataOutputStream outputStream = fs.create(testFilePath)) { + int bytesWritten = 0; + while (bytesWritten < testFileSize) { + outputStream.write(buffer); + bytesWritten += buffer.length; + } + } + + Assertions.assertThat(fs.getFileStatus(testFilePath).getLen()) + .describedAs("File not created of expected size") + .isEqualTo(testFileSize); + + return fs; + } + + private void resetReadBufferManager(int bufferSize, int threshold) { + ReadBufferManager.getBufferManager() + .testResetReadBufferManager(bufferSize, threshold); + // Trigger GC as aggressive recreation of ReadBufferManager buffers + // by successive tests can lead to OOM based on the dev VM/machine capacity. + System.gc(); + } +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsOutputStream.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsOutputStream.java new file mode 100644 index 0000000000000..e26ba938cf5db --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsOutputStream.java @@ -0,0 +1,576 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Random; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; + +import org.junit.Test; + +import org.mockito.ArgumentCaptor; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; +import org.apache.hadoop.fs.store.DataBlocks; +import org.apache.hadoop.util.BlockingThreadPoolExecutorService; +import org.apache.hadoop.util.SemaphoredDelegatingExecutor; + +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.DATA_BLOCKS_BUFFER; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_BLOCK_UPLOAD_BUFFER_DIR; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.BLOCK_UPLOAD_ACTIVE_BLOCKS_DEFAULT; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DATA_BLOCKS_BUFFER_DEFAULT; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.ArgumentMatchers.isNull; +import static org.mockito.ArgumentMatchers.refEq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.anyBoolean; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyString; +import static org.mockito.Mockito.anyLong; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters.Mode.APPEND_MODE; + +public final class TestAbfsOutputStream { + + private static final int BUFFER_SIZE = 4096; + private static final int WRITE_SIZE = 1000; + private static final String PATH = "~/testpath"; + private final String globalKey = "fs.azure.configuration"; + private final String accountName1 = "account1"; + private final String accountKey1 = globalKey + "." + accountName1; + private final String accountValue1 = "one"; + + private AbfsOutputStreamContext populateAbfsOutputStreamContext( + int writeBufferSize, + boolean isFlushEnabled, + boolean disableOutputStreamFlush, + boolean isAppendBlob, + boolean isExpectHeaderEnabled, + AbfsClient client, + String path, + TracingContext tracingContext, + ExecutorService executorService) throws IOException, + IllegalAccessException { + AbfsConfiguration abfsConf = new AbfsConfiguration(new Configuration(), + accountName1); + String blockFactoryName = + abfsConf.getRawConfiguration().getTrimmed(DATA_BLOCKS_BUFFER, + DATA_BLOCKS_BUFFER_DEFAULT); + DataBlocks.BlockFactory blockFactory = + DataBlocks.createFactory(FS_AZURE_BLOCK_UPLOAD_BUFFER_DIR, + abfsConf.getRawConfiguration(), + blockFactoryName); + + return new AbfsOutputStreamContext(2) + .withWriteBufferSize(writeBufferSize) + .enableExpectHeader(isExpectHeaderEnabled) + .enableFlush(isFlushEnabled) + .disableOutputStreamFlush(disableOutputStreamFlush) + .withStreamStatistics(new AbfsOutputStreamStatisticsImpl()) + .withAppendBlob(isAppendBlob) + .withWriteMaxConcurrentRequestCount(abfsConf.getWriteMaxConcurrentRequestCount()) + .withMaxWriteRequestsToQueue(abfsConf.getMaxWriteRequestsToQueue()) + .withClient(client) + .withPath(path) + .withTracingContext(tracingContext) + .withExecutorService(executorService) + .withBlockFactory(blockFactory) + .build(); + } + + /** + * The test verifies OutputStream shortwrite case(2000bytes write followed by flush, hflush, hsync) is making correct HTTP calls to the server + */ + @Test + public void verifyShortWriteRequest() throws Exception { + + AbfsClient client = mock(AbfsClient.class); + AbfsRestOperation op = mock(AbfsRestOperation.class); + AbfsConfiguration abfsConf; + final Configuration conf = new Configuration(); + conf.set(accountKey1, accountValue1); + abfsConf = new AbfsConfiguration(conf, accountName1); + AbfsPerfTracker tracker = new AbfsPerfTracker("test", accountName1, abfsConf); + when(client.getAbfsPerfTracker()).thenReturn(tracker); + when(client.append(anyString(), any(byte[].class), + any(AppendRequestParameters.class), any(), any(TracingContext.class))) + .thenReturn(op); + when(client.flush(anyString(), anyLong(), anyBoolean(), anyBoolean(), any(), + isNull(), any(TracingContext.class))).thenReturn(op); + + AbfsOutputStream out = new AbfsOutputStream( + populateAbfsOutputStreamContext( + BUFFER_SIZE, + true, + false, + false, + true, + client, + PATH, + new TracingContext(abfsConf.getClientCorrelationId(), "test-fs-id", + FSOperationType.WRITE, abfsConf.getTracingHeaderFormat(), + null), + createExecutorService(abfsConf))); + final byte[] b = new byte[WRITE_SIZE]; + new Random().nextBytes(b); + out.write(b); + out.hsync(); + + final byte[] b1 = new byte[2*WRITE_SIZE]; + new Random().nextBytes(b1); + out.write(b1); + out.flush(); + out.hflush(); + + out.hsync(); + + AppendRequestParameters firstReqParameters = new AppendRequestParameters( + 0, 0, WRITE_SIZE, APPEND_MODE, false, null, true); + AppendRequestParameters secondReqParameters = new AppendRequestParameters( + WRITE_SIZE, 0, 2 * WRITE_SIZE, APPEND_MODE, false, null, true); + + verify(client, times(1)).append( + eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), + any(TracingContext.class)); + verify(client, times(1)).append( + eq(PATH), any(byte[].class), refEq(secondReqParameters), any(), any(TracingContext.class)); + // confirm there were only 2 invocations in all + verify(client, times(2)).append( + eq(PATH), any(byte[].class), any(), any(), any(TracingContext.class)); + } + + /** + * The test verifies OutputStream Write of WRITE_SIZE(1000 bytes) followed by a close is making correct HTTP calls to the server + */ + @Test + public void verifyWriteRequest() throws Exception { + + AbfsClient client = mock(AbfsClient.class); + AbfsRestOperation op = mock(AbfsRestOperation.class); + AbfsConfiguration abfsConf; + final Configuration conf = new Configuration(); + conf.set(accountKey1, accountValue1); + abfsConf = new AbfsConfiguration(conf, accountName1); + AbfsPerfTracker tracker = new AbfsPerfTracker("test", accountName1, abfsConf); + TracingContext tracingContext = new TracingContext("test-corr-id", + "test-fs-id", FSOperationType.WRITE, + TracingHeaderFormat.ALL_ID_FORMAT, null); + + when(client.getAbfsPerfTracker()).thenReturn(tracker); + when(client.append(anyString(), any(byte[].class), any(AppendRequestParameters.class), any(), any(TracingContext.class))).thenReturn(op); + when(client.flush(anyString(), anyLong(), anyBoolean(), anyBoolean(), any(), isNull(), any(TracingContext.class))).thenReturn(op); + + AbfsOutputStream out = new AbfsOutputStream( + populateAbfsOutputStreamContext( + BUFFER_SIZE, + true, + false, + false, + true, + client, + PATH, + tracingContext, + createExecutorService(abfsConf))); + final byte[] b = new byte[WRITE_SIZE]; + new Random().nextBytes(b); + + for (int i = 0; i < 5; i++) { + out.write(b); + } + out.close(); + + AppendRequestParameters firstReqParameters = new AppendRequestParameters( + 0, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); + AppendRequestParameters secondReqParameters = new AppendRequestParameters( + BUFFER_SIZE, 0, 5*WRITE_SIZE-BUFFER_SIZE, APPEND_MODE, false, null, true); + + verify(client, times(1)).append( + eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), + any(TracingContext.class)); + verify(client, times(1)).append( + eq(PATH), any(byte[].class), refEq(secondReqParameters), any(), + any(TracingContext.class)); + // confirm there were only 2 invocations in all + verify(client, times(2)).append( + eq(PATH), any(byte[].class), any(), any(), + any(TracingContext.class)); + + ArgumentCaptor acFlushPath = ArgumentCaptor.forClass(String.class); + ArgumentCaptor acFlushPosition = ArgumentCaptor.forClass(Long.class); + ArgumentCaptor acTracingContext = ArgumentCaptor + .forClass(TracingContext.class); + ArgumentCaptor acFlushRetainUnCommittedData = ArgumentCaptor.forClass(Boolean.class); + ArgumentCaptor acFlushClose = ArgumentCaptor.forClass(Boolean.class); + ArgumentCaptor acFlushSASToken = ArgumentCaptor.forClass(String.class); + + verify(client, times(1)).flush(acFlushPath.capture(), acFlushPosition.capture(), acFlushRetainUnCommittedData.capture(), acFlushClose.capture(), + acFlushSASToken.capture(), isNull(), acTracingContext.capture()); + assertThat(Arrays.asList(PATH)).describedAs("path").isEqualTo(acFlushPath.getAllValues()); + assertThat(Arrays.asList(Long.valueOf(5*WRITE_SIZE))).describedAs("position").isEqualTo(acFlushPosition.getAllValues()); + assertThat(Arrays.asList(false)).describedAs("RetainUnCommittedData flag").isEqualTo(acFlushRetainUnCommittedData.getAllValues()); + assertThat(Arrays.asList(true)).describedAs("Close flag").isEqualTo(acFlushClose.getAllValues()); + } + + /** + * The test verifies OutputStream Write of BUFFER_SIZE(4KB) followed by a close is making correct HTTP calls to the server + */ + @Test + public void verifyWriteRequestOfBufferSizeAndClose() throws Exception { + + AbfsClient client = mock(AbfsClient.class); + AbfsRestOperation op = mock(AbfsRestOperation.class); + AbfsHttpOperation httpOp = mock(AbfsHttpOperation.class); + AbfsConfiguration abfsConf; + final Configuration conf = new Configuration(); + conf.set(accountKey1, accountValue1); + abfsConf = new AbfsConfiguration(conf, accountName1); + AbfsPerfTracker tracker = new AbfsPerfTracker("test", accountName1, abfsConf); + TracingContext tracingContext = new TracingContext( + abfsConf.getClientCorrelationId(), "test-fs-id", + FSOperationType.WRITE, abfsConf.getTracingHeaderFormat(), null); + + when(client.getAbfsPerfTracker()).thenReturn(tracker); + when(client.append(anyString(), any(byte[].class), any(AppendRequestParameters.class), any(), any(TracingContext.class))).thenReturn(op); + when(client.flush(anyString(), anyLong(), anyBoolean(), anyBoolean(), any(), isNull(), any(TracingContext.class))).thenReturn(op); + when(op.getSasToken()).thenReturn("testToken"); + when(op.getResult()).thenReturn(httpOp); + + AbfsOutputStream out = new AbfsOutputStream( + populateAbfsOutputStreamContext( + BUFFER_SIZE, + true, + false, + false, + true, + client, + PATH, + tracingContext, + createExecutorService(abfsConf))); + final byte[] b = new byte[BUFFER_SIZE]; + new Random().nextBytes(b); + + for (int i = 0; i < 2; i++) { + out.write(b); + } + out.close(); + + AppendRequestParameters firstReqParameters = new AppendRequestParameters( + 0, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); + AppendRequestParameters secondReqParameters = new AppendRequestParameters( + BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); + + verify(client, times(1)).append( + eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), any(TracingContext.class)); + verify(client, times(1)).append( + eq(PATH), any(byte[].class), refEq(secondReqParameters), any(), any(TracingContext.class)); + // confirm there were only 2 invocations in all + verify(client, times(2)).append( + eq(PATH), any(byte[].class), any(), any(), any(TracingContext.class)); + + ArgumentCaptor acFlushPath = ArgumentCaptor.forClass(String.class); + ArgumentCaptor acFlushPosition = ArgumentCaptor.forClass(Long.class); + ArgumentCaptor acTracingContext = ArgumentCaptor + .forClass(TracingContext.class); + ArgumentCaptor acFlushRetainUnCommittedData = ArgumentCaptor.forClass(Boolean.class); + ArgumentCaptor acFlushClose = ArgumentCaptor.forClass(Boolean.class); + ArgumentCaptor acFlushSASToken = ArgumentCaptor.forClass(String.class); + + verify(client, times(1)).flush(acFlushPath.capture(), acFlushPosition.capture(), acFlushRetainUnCommittedData.capture(), acFlushClose.capture(), + acFlushSASToken.capture(), isNull(), acTracingContext.capture()); + assertThat(Arrays.asList(PATH)).describedAs("path").isEqualTo(acFlushPath.getAllValues()); + assertThat(Arrays.asList(Long.valueOf(2*BUFFER_SIZE))).describedAs("position").isEqualTo(acFlushPosition.getAllValues()); + assertThat(Arrays.asList(false)).describedAs("RetainUnCommittedData flag").isEqualTo(acFlushRetainUnCommittedData.getAllValues()); + assertThat(Arrays.asList(true)).describedAs("Close flag").isEqualTo(acFlushClose.getAllValues()); + } + + /** + * The test verifies OutputStream Write of BUFFER_SIZE(4KB) is making correct HTTP calls to the server + */ + @Test + public void verifyWriteRequestOfBufferSize() throws Exception { + + AbfsClient client = mock(AbfsClient.class); + AbfsRestOperation op = mock(AbfsRestOperation.class); + AbfsHttpOperation httpOp = mock(AbfsHttpOperation.class); + AbfsConfiguration abfsConf; + final Configuration conf = new Configuration(); + conf.set(accountKey1, accountValue1); + abfsConf = new AbfsConfiguration(conf, accountName1); + AbfsPerfTracker tracker = new AbfsPerfTracker("test", accountName1, abfsConf); + + when(client.getAbfsPerfTracker()).thenReturn(tracker); + when(client.append(anyString(), any(byte[].class), + any(AppendRequestParameters.class), any(), any(TracingContext.class))) + .thenReturn(op); + when(client.flush(anyString(), anyLong(), anyBoolean(), anyBoolean(), + any(), isNull(), any(TracingContext.class))).thenReturn(op); + when(op.getSasToken()).thenReturn("testToken"); + when(op.getResult()).thenReturn(httpOp); + + AbfsOutputStream out = new AbfsOutputStream( + populateAbfsOutputStreamContext( + BUFFER_SIZE, + true, + false, + false, + true, + client, + PATH, + new TracingContext(abfsConf.getClientCorrelationId(), "test-fs-id", + FSOperationType.WRITE, abfsConf.getTracingHeaderFormat(), + null), + createExecutorService(abfsConf))); + final byte[] b = new byte[BUFFER_SIZE]; + new Random().nextBytes(b); + + for (int i = 0; i < 2; i++) { + out.write(b); + } + Thread.sleep(1000); + + AppendRequestParameters firstReqParameters = new AppendRequestParameters( + 0, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); + AppendRequestParameters secondReqParameters = new AppendRequestParameters( + BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); + + verify(client, times(1)).append( + eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), any(TracingContext.class)); + verify(client, times(1)).append( + eq(PATH), any(byte[].class), refEq(secondReqParameters), any(), any(TracingContext.class)); + // confirm there were only 2 invocations in all + verify(client, times(2)).append( + eq(PATH), any(byte[].class), any(), any(), any(TracingContext.class)); + } + + /** + * The test verifies OutputStream Write of BUFFER_SIZE(4KB) on a AppendBlob based stream is making correct HTTP calls to the server + */ + @Test + public void verifyWriteRequestOfBufferSizeWithAppendBlob() throws Exception { + + AbfsClient client = mock(AbfsClient.class); + AbfsRestOperation op = mock(AbfsRestOperation.class); + AbfsConfiguration abfsConf; + final Configuration conf = new Configuration(); + conf.set(accountKey1, accountValue1); + abfsConf = new AbfsConfiguration(conf, accountName1); + AbfsPerfTracker tracker = new AbfsPerfTracker("test", accountName1, abfsConf); + + when(client.getAbfsPerfTracker()).thenReturn(tracker); + when(client.append(anyString(), any(byte[].class), + any(AppendRequestParameters.class), any(), any(TracingContext.class))) + .thenReturn(op); + when(client.flush(anyString(), anyLong(), anyBoolean(), anyBoolean(), any(), + isNull(), any(TracingContext.class))).thenReturn(op); + + AbfsOutputStream out = new AbfsOutputStream( + populateAbfsOutputStreamContext( + BUFFER_SIZE, + true, + false, + true, + true, + client, + PATH, + new TracingContext(abfsConf.getClientCorrelationId(), "test-fs-id", + FSOperationType.OPEN, abfsConf.getTracingHeaderFormat(), + null), + createExecutorService(abfsConf))); + final byte[] b = new byte[BUFFER_SIZE]; + new Random().nextBytes(b); + + for (int i = 0; i < 2; i++) { + out.write(b); + } + Thread.sleep(1000); + + AppendRequestParameters firstReqParameters = new AppendRequestParameters( + 0, 0, BUFFER_SIZE, APPEND_MODE, true, null, true); + AppendRequestParameters secondReqParameters = new AppendRequestParameters( + BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, true, null, true); + + verify(client, times(1)).append( + eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), any(TracingContext.class)); + verify(client, times(1)).append( + eq(PATH), any(byte[].class), refEq(secondReqParameters), any(), any(TracingContext.class)); + // confirm there were only 2 invocations in all + verify(client, times(2)).append( + eq(PATH), any(byte[].class), any(), any(), any(TracingContext.class)); + } + + /** + * The test verifies OutputStream Write of BUFFER_SIZE(4KB) followed by a hflush call is making correct HTTP calls to the server + */ + @Test + public void verifyWriteRequestOfBufferSizeAndHFlush() throws Exception { + + AbfsClient client = mock(AbfsClient.class); + AbfsRestOperation op = mock(AbfsRestOperation.class); + when(op.getSasToken()).thenReturn(""); + AbfsConfiguration abfsConf; + final Configuration conf = new Configuration(); + conf.set(accountKey1, accountValue1); + abfsConf = new AbfsConfiguration(conf, accountName1); + AbfsPerfTracker tracker = new AbfsPerfTracker("test", accountName1, abfsConf); + TracingContext tracingContext = new TracingContext( + abfsConf.getClientCorrelationId(), "test-fs-id", + FSOperationType.WRITE, abfsConf.getTracingHeaderFormat(), null); + + when(client.getAbfsPerfTracker()).thenReturn(tracker); + when(client.append(anyString(), any(byte[].class), + any(AppendRequestParameters.class), any(), any(TracingContext.class))) + .thenReturn(op); + when(client.flush(anyString(), anyLong(), anyBoolean(), anyBoolean(), any(), + isNull(), any(TracingContext.class))).thenReturn(op); + + AbfsOutputStream out = new AbfsOutputStream( + populateAbfsOutputStreamContext( + BUFFER_SIZE, + true, + false, + false, + true, + client, + PATH, + new TracingContext(abfsConf.getClientCorrelationId(), "test-fs-id", + FSOperationType.OPEN, abfsConf.getTracingHeaderFormat(), + null), + createExecutorService(abfsConf))); + final byte[] b = new byte[BUFFER_SIZE]; + new Random().nextBytes(b); + + for (int i = 0; i < 2; i++) { + out.write(b); + } + out.hflush(); + + AppendRequestParameters firstReqParameters = new AppendRequestParameters( + 0, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); + AppendRequestParameters secondReqParameters = new AppendRequestParameters( + BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); + + verify(client, times(1)).append( + eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), any(TracingContext.class)); + verify(client, times(1)).append( + eq(PATH), any(byte[].class), refEq(secondReqParameters), any(), any(TracingContext.class)); + // confirm there were only 2 invocations in all + verify(client, times(2)).append( + eq(PATH), any(byte[].class), any(), any(), any(TracingContext.class)); + + ArgumentCaptor acFlushPath = ArgumentCaptor.forClass(String.class); + ArgumentCaptor acFlushPosition = ArgumentCaptor.forClass(Long.class); + ArgumentCaptor acTracingContext = ArgumentCaptor + .forClass(TracingContext.class); + ArgumentCaptor acFlushRetainUnCommittedData = ArgumentCaptor.forClass(Boolean.class); + ArgumentCaptor acFlushClose = ArgumentCaptor.forClass(Boolean.class); + ArgumentCaptor acFlushSASToken = ArgumentCaptor.forClass(String.class); + + verify(client, times(1)).flush(acFlushPath.capture(), acFlushPosition.capture(), acFlushRetainUnCommittedData.capture(), acFlushClose.capture(), + acFlushSASToken.capture(), isNull(), acTracingContext.capture()); + assertThat(Arrays.asList(PATH)).describedAs("path").isEqualTo(acFlushPath.getAllValues()); + assertThat(Arrays.asList(Long.valueOf(2*BUFFER_SIZE))).describedAs("position").isEqualTo(acFlushPosition.getAllValues()); + assertThat(Arrays.asList(false)).describedAs("RetainUnCommittedData flag").isEqualTo(acFlushRetainUnCommittedData.getAllValues()); + assertThat(Arrays.asList(false)).describedAs("Close flag").isEqualTo(acFlushClose.getAllValues()); + } + + /** + * The test verifies OutputStream Write of BUFFER_SIZE(4KB) followed by a flush call is making correct HTTP calls to the server + */ + @Test + public void verifyWriteRequestOfBufferSizeAndFlush() throws Exception { + + AbfsClient client = mock(AbfsClient.class); + AbfsRestOperation op = mock(AbfsRestOperation.class); + AbfsConfiguration abfsConf; + final Configuration conf = new Configuration(); + conf.set(accountKey1, accountValue1); + abfsConf = new AbfsConfiguration(conf, accountName1); + AbfsPerfTracker tracker = new AbfsPerfTracker("test", accountName1, abfsConf); + when(client.getAbfsPerfTracker()).thenReturn(tracker); + when(client.append(anyString(), any(byte[].class), + any(AppendRequestParameters.class), any(), any(TracingContext.class))) + .thenReturn(op); + when(client.flush(anyString(), anyLong(), anyBoolean(), anyBoolean(), any(), + isNull(), any(TracingContext.class))).thenReturn(op); + + AbfsOutputStream out = new AbfsOutputStream( + populateAbfsOutputStreamContext( + BUFFER_SIZE, + true, + false, + false, + true, + client, + PATH, + new TracingContext(abfsConf.getClientCorrelationId(), "test-fs-id", + FSOperationType.WRITE, abfsConf.getTracingHeaderFormat(), + null), + createExecutorService(abfsConf))); + final byte[] b = new byte[BUFFER_SIZE]; + new Random().nextBytes(b); + + for (int i = 0; i < 2; i++) { + out.write(b); + } + Thread.sleep(1000); + out.flush(); + Thread.sleep(1000); + + AppendRequestParameters firstReqParameters = new AppendRequestParameters( + 0, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); + AppendRequestParameters secondReqParameters = new AppendRequestParameters( + BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); + + verify(client, times(1)).append( + eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), any(TracingContext.class)); + verify(client, times(1)).append( + eq(PATH), any(byte[].class), refEq(secondReqParameters), any(), any(TracingContext.class)); + // confirm there were only 2 invocations in all + verify(client, times(2)).append( + eq(PATH), any(byte[].class), any(), any(), any(TracingContext.class)); + } + + /** + * Method to create an executor Service for AbfsOutputStream. + * @param abfsConf Configuration. + * @return ExecutorService. + */ + private ExecutorService createExecutorService( + AbfsConfiguration abfsConf) { + ExecutorService executorService = + new SemaphoredDelegatingExecutor(BlockingThreadPoolExecutorService.newInstance( + abfsConf.getWriteMaxConcurrentRequestCount(), + abfsConf.getMaxWriteRequestsToQueue(), + 10L, TimeUnit.SECONDS, + "abfs-test-bounded"), + BLOCK_UPLOAD_ACTIVE_BLOCKS_DEFAULT, true); + return executorService; + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsPerfTracker.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsPerfTracker.java index 4f4210287ce75..191d6e77ae09b 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsPerfTracker.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsPerfTracker.java @@ -34,6 +34,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; + import static org.assertj.core.api.Assertions.assertThat; /** @@ -405,4 +407,15 @@ private void verifyNoException(AbfsPerfTracker abfsPerfTracker) throws Exception tracker13.registerResult(httpOperation).registerSuccess(false).registerAggregates(Instant.MIN, TEST_AGGREGATE_COUNT); } } + + /** + * Test helper method to create an AbfsPerfTracker instance. + * @param abfsConfig active test abfs config + * @return instance of AbfsPerfTracker + */ + public static AbfsPerfTracker getAPerfTrackerInstance(AbfsConfiguration abfsConfig) { + AbfsPerfTracker tracker = new AbfsPerfTracker("test", + abfsConfig.getAccountName(), abfsConfig); + return tracker; + } } \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java new file mode 100644 index 0000000000000..cef1c9ae5a1e7 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java @@ -0,0 +1,545 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.SocketException; +import java.net.URL; +import java.time.Duration; + +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.assertj.core.api.Assertions; +import org.junit.Assume; +import org.junit.Test; +import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.EtagSource; +import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore; +import org.apache.hadoop.fs.azurebfs.commit.ResilientCommitByRename; +import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.SOURCE_PATH_NOT_FOUND; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.PATH_ALREADY_EXISTS; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.RENAME_DESTINATION_PARENT_PATH_NOT_FOUND; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.RENAME_PATH_ATTEMPTS; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.assertThatStatisticCounter; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.lookupCounterStatistic; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +/** + * Testing Abfs Rename recovery using Mockito. + */ +public class TestAbfsRenameRetryRecovery extends AbstractAbfsIntegrationTest { + + private static final Logger LOG = + LoggerFactory.getLogger(TestAbfsRenameRetryRecovery.class); + + private boolean isNamespaceEnabled; + + public TestAbfsRenameRetryRecovery() throws Exception { + isNamespaceEnabled = getConfiguration() + .getBoolean(TestConfigurationKeys.FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, false); + } + + /** + * Mock the AbfsClient to run a metadata incomplete scenario with recovery + * rename. + */ + @Test + public void testRenameFailuresDueToIncompleteMetadata() throws Exception { + String sourcePath = getMethodName() + "Source"; + String destNoParentPath = "/NoParent/Dest"; + AzureBlobFileSystem fs = getFileSystem(); + + AbfsClient mockClient = ITestAbfsClient.getMockAbfsClient( + fs.getAbfsStore().getClient(), + fs.getAbfsStore().getAbfsConfiguration()); + + AbfsCounters abfsCounters = mock(AbfsCounters.class); + when(mockClient.getAbfsCounters()).thenReturn(abfsCounters); + // SuccessFul Result. + AbfsRestOperation successOp = + new AbfsRestOperation(AbfsRestOperationType.RenamePath, mockClient, + HTTP_METHOD_PUT, null, null); + AbfsClientRenameResult successResult = mock(AbfsClientRenameResult.class); + doReturn(successOp).when(successResult).getOp(); + when(successResult.isIncompleteMetadataState()).thenReturn(false); + + // Failed Result. + AbfsRestOperation failedOp = new AbfsRestOperation(AbfsRestOperationType.RenamePath, mockClient, + HTTP_METHOD_PUT, null, null); + AbfsClientRenameResult recoveredMetaDataIncompleteResult = + mock(AbfsClientRenameResult.class); + + doReturn(failedOp).when(recoveredMetaDataIncompleteResult).getOp(); + when(recoveredMetaDataIncompleteResult.isIncompleteMetadataState()).thenReturn(true); + + // No destination Parent dir exception. + AzureBlobFileSystemException destParentNotFound + = getMockAbfsRestOperationException( + RENAME_DESTINATION_PARENT_PATH_NOT_FOUND.getStatusCode(), + RENAME_DESTINATION_PARENT_PATH_NOT_FOUND.getErrorCode()); + + // We need to throw an exception once a rename is triggered with + // destination having no parent, but after a retry it needs to succeed. + when(mockClient.renamePath(sourcePath, destNoParentPath, null, null, + null, false, isNamespaceEnabled)) + .thenThrow(destParentNotFound) + .thenReturn(recoveredMetaDataIncompleteResult); + + // Dest parent not found exc. to be raised. + intercept(AzureBlobFileSystemException.class, + () -> mockClient.renamePath(sourcePath, + destNoParentPath, null, null, + null, false, isNamespaceEnabled)); + + AbfsClientRenameResult resultOfSecondRenameCall = + mockClient.renamePath(sourcePath, + destNoParentPath, null, null, + null, false, isNamespaceEnabled); + + // the second rename call should be the recoveredResult due to + // metaDataIncomplete + Assertions.assertThat(resultOfSecondRenameCall) + .describedAs("This result should be recovered result due to MetaData " + + "being in incomplete state") + .isSameAs(recoveredMetaDataIncompleteResult); + // Verify Incomplete metadata state happened for our second rename call. + assertTrue("Metadata incomplete state should be true if a rename is " + + "retried after no Parent directory is found", + resultOfSecondRenameCall.isIncompleteMetadataState()); + + + // Verify renamePath occurred two times implying a retry was attempted. + verify(mockClient, times(2)) + .renamePath(sourcePath, destNoParentPath, null, null, null, false, + isNamespaceEnabled); + + } + + AbfsClient getMockAbfsClient() throws IOException { + AzureBlobFileSystem fs = getFileSystem(); + + // adding mock objects to current AbfsClient + AbfsClient spyClient = Mockito.spy(fs.getAbfsStore().getClient()); + + Mockito.doAnswer(answer -> { + AbfsRestOperation op = new AbfsRestOperation(AbfsRestOperationType.RenamePath, + spyClient, HTTP_METHOD_PUT, answer.getArgument(0), answer.getArgument(1)); + AbfsRestOperation spiedOp = Mockito.spy(op); + addSpyBehavior(spiedOp, op, spyClient); + return spiedOp; + }).when(spyClient).createRenameRestOperation(Mockito.any(URL.class), anyList()); + + return spyClient; + + } + + /** + * Spies on a rest operation to inject transient failure. + * the first createHttpOperation() invocation will return an abfs rest operation + * which will fail. + * @param spiedRestOp spied operation whose createHttpOperation() will fail first time + * @param normalRestOp normal operation the good operation + * @param client client. + * @throws IOException failure + */ + private void addSpyBehavior(final AbfsRestOperation spiedRestOp, + final AbfsRestOperation normalRestOp, + final AbfsClient client) + throws IOException { + AbfsHttpOperation failingOperation = Mockito.spy(normalRestOp.createHttpOperation()); + AbfsHttpOperation normalOp1 = normalRestOp.createHttpOperation(); + executeThenFail(client, normalRestOp, failingOperation, normalOp1); + AbfsHttpOperation normalOp2 = normalRestOp.createHttpOperation(); + normalOp2.getConnection().setRequestProperty(HttpHeaderConfigurations.AUTHORIZATION, + client.getAccessToken()); + + when(spiedRestOp.createHttpOperation()) + .thenReturn(failingOperation) + .thenReturn(normalOp2); + } + + /** + * Mock an idempotency failure by executing the normal operation, then + * raising an IOE. + * @param normalRestOp the rest operation used to sign the requests. + * @param failingOperation failing operation + * @param normalOp good operation + * @throws IOException failure + */ + private void executeThenFail(final AbfsClient client, + final AbfsRestOperation normalRestOp, + final AbfsHttpOperation failingOperation, + final AbfsHttpOperation normalOp) + throws IOException { + + Mockito.doAnswer(answer -> { + LOG.info("Executing first attempt with post-operation fault injection"); + final byte[] buffer = answer.getArgument(0); + final int offset = answer.getArgument(1); + final int length = answer.getArgument(2); + normalRestOp.signRequest(normalOp, length); + normalOp.sendRequest(buffer, offset, length); + normalOp.processResponse(buffer, offset, length); + LOG.info("Actual outcome is {} \"{}\" \"{}\"; injecting failure", + normalOp.getStatusCode(), + normalOp.getStorageErrorCode(), + normalOp.getStorageErrorMessage()); + throw new SocketException("connection-reset"); + }).when(failingOperation).sendRequest(Mockito.nullable(byte[].class), + Mockito.nullable(int.class), Mockito.nullable(int.class)); + + } + + /** + * This is the good outcome: resilient rename. + */ + @Test + public void testRenameRecoveryEtagMatchFsLevel() throws IOException { + AzureBlobFileSystem fs = getFileSystem(); + AzureBlobFileSystemStore abfsStore = fs.getAbfsStore(); + TracingContext testTracingContext = getTestTracingContext(fs, false); + + Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext)); + + AbfsClient mockClient = getMockAbfsClient(); + + String base = "/" + getMethodName(); + String path1 = base + "/dummyFile1"; + String path2 = base + "/dummyFile2"; + + touch(new Path(path1)); + + setAbfsClient(abfsStore, mockClient); + + // checking correct count in AbfsCounters + AbfsCounters counter = mockClient.getAbfsCounters(); + IOStatistics ioStats = counter.getIOStatistics(); + + Long connMadeBeforeRename = lookupCounterStatistic(ioStats, CONNECTIONS_MADE.getStatName()); + Long renamePathAttemptsBeforeRename = lookupCounterStatistic(ioStats, RENAME_PATH_ATTEMPTS.getStatName()); + + // 404 and retry, send sourceEtag as null + // source eTag matches -> rename should pass even when execute throws exception + fs.rename(new Path(path1), new Path(path2)); + + // validating stat counters after rename + // 4 calls should have happened in total for rename + // 1 -> original rename rest call, 2 -> first retry, + // +2 for getPathStatus calls + assertThatStatisticCounter(ioStats, + CONNECTIONS_MADE.getStatName()) + .isEqualTo(4 + connMadeBeforeRename); + // the RENAME_PATH_ATTEMPTS stat should be incremented by 1 + // retries happen internally within AbfsRestOperation execute() + // the stat for RENAME_PATH_ATTEMPTS is updated only once before execute() is called + assertThatStatisticCounter(ioStats, + RENAME_PATH_ATTEMPTS.getStatName()) + .isEqualTo(1 + renamePathAttemptsBeforeRename); + + } + + /** + * execute a failing rename but have the file at the far end not match. + * This is done by explicitly passing in a made up etag for the source + * etag and creating a file at the far end. + * The first rename will actually fail with a path exists exception, + * but as that is swallowed, it's not a problem. + */ + @Test + public void testRenameRecoveryEtagMismatchFsLevel() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + AzureBlobFileSystemStore abfsStore = fs.getAbfsStore(); + TracingContext testTracingContext = getTestTracingContext(fs, false); + + Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext)); + + AbfsClient mockClient = getMockAbfsClient(); + + String base = "/" + getMethodName(); + String path1 = base + "/dummyFile1"; + String path2 = base + "/dummyFile2"; + + fs.create(new Path(path2)); + + setAbfsClient(abfsStore, mockClient); + + // source eTag does not match -> rename should be a failure + assertEquals(false, fs.rename(new Path(path1), new Path(path2))); + + } + + @Test + public void testRenameRecoveryFailsForDirFsLevel() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + AzureBlobFileSystemStore abfsStore = fs.getAbfsStore(); + TracingContext testTracingContext = getTestTracingContext(fs, false); + + Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext)); + + AbfsClient mockClient = getMockAbfsClient(); + + String dir1 = "/dummyDir1"; + String dir2 = "/dummyDir2"; + + Path path1 = new Path(dir1); + Path path2 = new Path(dir2); + + fs.mkdirs(path1); + + setAbfsClient(abfsStore, mockClient); + + // checking correct count in AbfsCounters + AbfsCounters counter = mockClient.getAbfsCounters(); + IOStatistics ioStats = counter.getIOStatistics(); + + Long connMadeBeforeRename = lookupCounterStatistic(ioStats, CONNECTIONS_MADE.getStatName()); + Long renamePathAttemptsBeforeRename = lookupCounterStatistic(ioStats, RENAME_PATH_ATTEMPTS.getStatName()); + + // source eTag does not match -> rename should be a failure + boolean renameResult = fs.rename(path1, path2); + assertEquals(false, renameResult); + + // validating stat counters after rename + // 3 calls should have happened in total for rename + // 1 -> original rename rest call, 2 -> first retry, + // +1 for getPathStatus calls + // last getPathStatus call should be skipped + assertThatStatisticCounter(ioStats, + CONNECTIONS_MADE.getStatName()) + .isEqualTo(3 + connMadeBeforeRename); + + // the RENAME_PATH_ATTEMPTS stat should be incremented by 1 + // retries happen internally within AbfsRestOperation execute() + // the stat for RENAME_PATH_ATTEMPTS is updated only once before execute() is called + assertThatStatisticCounter(ioStats, + RENAME_PATH_ATTEMPTS.getStatName()) + .isEqualTo(1 + renamePathAttemptsBeforeRename); + } + + /** + * Assert that an exception failed with a specific error code. + * @param code code + * @param e exception + * @throws AbfsRestOperationException if there is a mismatch + */ + private static void expectErrorCode(final AzureServiceErrorCode code, + final AbfsRestOperationException e) throws AbfsRestOperationException { + if (e.getErrorCode() != code) { + throw e; + } + } + + /** + * Directory rename failure is unrecoverable. + */ + @Test + public void testDirRenameRecoveryUnsupported() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + TracingContext testTracingContext = getTestTracingContext(fs, false); + + Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext)); + + AbfsClient spyClient = getMockAbfsClient(); + + String base = "/" + getMethodName(); + String path1 = base + "/dummyDir1"; + String path2 = base + "/dummyDir2"; + + fs.mkdirs(new Path(path1)); + + // source eTag does not match -> throw exception + expectErrorCode(SOURCE_PATH_NOT_FOUND, intercept(AbfsRestOperationException.class, () -> + spyClient.renamePath(path1, path2, null, testTracingContext, null, false, + isNamespaceEnabled))); + } + + /** + * Even with failures, having + */ + @Test + public void testExistingPathCorrectlyRejected() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + TracingContext testTracingContext = getTestTracingContext(fs, false); + + Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext)); + + AbfsClient spyClient = getMockAbfsClient(); + + String base = "/" + getMethodName(); + String path1 = base + "/dummyDir1"; + String path2 = base + "/dummyDir2"; + + + touch(new Path(path1)); + touch(new Path(path2)); + + // source eTag does not match -> throw exception + expectErrorCode(PATH_ALREADY_EXISTS, intercept(AbfsRestOperationException.class, () -> + spyClient.renamePath(path1, path2, null, testTracingContext, null, false, + isNamespaceEnabled))); + } + + /** + * Test that rename recovery remains unsupported for + * FNS configurations. + */ + @Test + public void testRenameRecoveryUnsupportedForFlatNamespace() throws Exception { + Assume.assumeTrue(!isNamespaceEnabled); + AzureBlobFileSystem fs = getFileSystem(); + AzureBlobFileSystemStore abfsStore = fs.getAbfsStore(); + TracingContext testTracingContext = getTestTracingContext(fs, false); + + AbfsClient mockClient = getMockAbfsClient(); + + String base = "/" + getMethodName(); + String path1 = base + "/dummyFile1"; + String path2 = base + "/dummyFile2"; + + touch(new Path(path1)); + + setAbfsClient(abfsStore, mockClient); + + // checking correct count in AbfsCounters + AbfsCounters counter = mockClient.getAbfsCounters(); + IOStatistics ioStats = counter.getIOStatistics(); + + Long connMadeBeforeRename = lookupCounterStatistic(ioStats, CONNECTIONS_MADE.getStatName()); + Long renamePathAttemptsBeforeRename = lookupCounterStatistic(ioStats, RENAME_PATH_ATTEMPTS.getStatName()); + + expectErrorCode(SOURCE_PATH_NOT_FOUND, intercept(AbfsRestOperationException.class, () -> + mockClient.renamePath(path1, path2, null, testTracingContext, null, false, + isNamespaceEnabled))); + + // validating stat counters after rename + + // only 2 calls should have happened in total for rename + // 1 -> original rename rest call, 2 -> first retry, + // no getPathStatus calls + // last getPathStatus call should be skipped + assertThatStatisticCounter(ioStats, + CONNECTIONS_MADE.getStatName()) + .isEqualTo(2 + connMadeBeforeRename); + + // the RENAME_PATH_ATTEMPTS stat should be incremented by 1 + // retries happen internally within AbfsRestOperation execute() + // the stat for RENAME_PATH_ATTEMPTS is updated only once before execute() is called + assertThatStatisticCounter(ioStats, + RENAME_PATH_ATTEMPTS.getStatName()) + .isEqualTo(1 + renamePathAttemptsBeforeRename); + } + + /** + * Test the resilient commit code works through fault injection, including + * reporting recovery. + */ + @Test + public void testResilientCommitOperation() throws Throwable { + AzureBlobFileSystem fs = getFileSystem(); + TracingContext testTracingContext = getTestTracingContext(fs, false); + + final AzureBlobFileSystemStore store = fs.getAbfsStore(); + Assume.assumeTrue(store.getIsNamespaceEnabled(testTracingContext)); + + // patch in the mock abfs client to the filesystem, for the resilient + // commit API to pick up. + setAbfsClient(store, getMockAbfsClient()); + + String base = "/" + getMethodName(); + String path1 = base + "/dummyDir1"; + String path2 = base + "/dummyDir2"; + + + final Path source = new Path(path1); + touch(source); + final String sourceTag = ((EtagSource) fs.getFileStatus(source)).getEtag(); + + final ResilientCommitByRename commit = fs.createResilientCommitSupport(source); + final Pair outcome = + commit.commitSingleFileByRename(source, new Path(path2), sourceTag); + Assertions.assertThat(outcome.getKey()) + .describedAs("recovery flag") + .isTrue(); + } + /** + * Test the resilient commit code works through fault injection, including + * reporting recovery. + */ + @Test + public void testResilientCommitOperationTagMismatch() throws Throwable { + AzureBlobFileSystem fs = getFileSystem(); + TracingContext testTracingContext = getTestTracingContext(fs, false); + + final AzureBlobFileSystemStore store = fs.getAbfsStore(); + Assume.assumeTrue(store.getIsNamespaceEnabled(testTracingContext)); + + // patch in the mock abfs client to the filesystem, for the resilient + // commit API to pick up. + setAbfsClient(store, getMockAbfsClient()); + + String base = "/" + getMethodName(); + String path1 = base + "/dummyDir1"; + String path2 = base + "/dummyDir2"; + + + final Path source = new Path(path1); + touch(source); + final String sourceTag = ((EtagSource) fs.getFileStatus(source)).getEtag(); + + final ResilientCommitByRename commit = fs.createResilientCommitSupport(source); + intercept(FileNotFoundException.class, () -> + commit.commitSingleFileByRename(source, new Path(path2), "not the right tag")); + } + + /** + * Method to create an AbfsRestOperationException. + * @param statusCode status code to be used. + * @param errorCode error code to be used. + * @return the exception. + */ + private AbfsRestOperationException getMockAbfsRestOperationException( + int statusCode, String errorCode) { + return new AbfsRestOperationException(statusCode, errorCode, + "No Parent found for the Destination file", + new Exception()); + } + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java new file mode 100644 index 0000000000000..bfa524a25e600 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java @@ -0,0 +1,302 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.io.InterruptedIOException; +import java.net.HttpURLConnection; +import java.net.SocketException; +import java.net.SocketTimeoutException; +import java.net.UnknownHostException; +import java.util.ArrayList; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.mockito.Mockito; +import org.mockito.stubbing.Stubber; + +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; + +import static java.net.HttpURLConnection.HTTP_BAD_REQUEST; +import static java.net.HttpURLConnection.HTTP_INTERNAL_ERROR; +import static java.net.HttpURLConnection.HTTP_OK; +import static java.net.HttpURLConnection.HTTP_UNAVAILABLE; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.EGRESS_OVER_ACCOUNT_LIMIT; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.INGRESS_OVER_ACCOUNT_LIMIT; +import static org.apache.hadoop.fs.azurebfs.services.AuthType.OAuth; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_RESET_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_RESET_MESSAGE; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_JDK_MESSAGE; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.EGRESS_LIMIT_BREACH_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.INGRESS_LIMIT_BREACH_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.IO_EXCEPTION_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.OPERATION_BREACH_MESSAGE; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.OPERATION_LIMIT_BREACH_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.READ_TIMEOUT_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.READ_TIMEOUT_JDK_MESSAGE; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.SOCKET_EXCEPTION_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.UNKNOWN_HOST_EXCEPTION_ABBREVIATION; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.nullable; + +public class TestAbfsRestOperationMockFailures { + + @Test + public void testClientRequestIdForConnectTimeoutRetry() throws Exception { + Exception[] exceptions = new Exception[1]; + String[] abbreviations = new String[1]; + exceptions[0] = new SocketTimeoutException(CONNECTION_TIMEOUT_JDK_MESSAGE); + abbreviations[0] = CONNECTION_TIMEOUT_ABBREVIATION; + testClientRequestIdForTimeoutRetry(exceptions, abbreviations, 1); + } + + @Test + public void testClientRequestIdForConnectAndReadTimeoutRetry() + throws Exception { + Exception[] exceptions = new Exception[2]; + String[] abbreviations = new String[2]; + exceptions[0] = new SocketTimeoutException(CONNECTION_TIMEOUT_JDK_MESSAGE); + abbreviations[0] = CONNECTION_TIMEOUT_ABBREVIATION; + exceptions[1] = new SocketTimeoutException(READ_TIMEOUT_JDK_MESSAGE); + abbreviations[1] = READ_TIMEOUT_ABBREVIATION; + testClientRequestIdForTimeoutRetry(exceptions, abbreviations, 1); + } + + @Test + public void testClientRequestIdForReadTimeoutRetry() throws Exception { + Exception[] exceptions = new Exception[1]; + String[] abbreviations = new String[1]; + exceptions[0] = new SocketTimeoutException(READ_TIMEOUT_JDK_MESSAGE); + abbreviations[0] = READ_TIMEOUT_ABBREVIATION; + testClientRequestIdForTimeoutRetry(exceptions, abbreviations, 1); + } + + @Test + public void testClientRequestIdForUnknownHostRetry() throws Exception { + Exception[] exceptions = new Exception[1]; + String[] abbreviations = new String[1]; + exceptions[0] = new UnknownHostException(); + abbreviations[0] = UNKNOWN_HOST_EXCEPTION_ABBREVIATION; + testClientRequestIdForTimeoutRetry(exceptions, abbreviations, 1); + } + + @Test + public void testClientRequestIdForConnectionResetRetry() throws Exception { + Exception[] exceptions = new Exception[1]; + String[] abbreviations = new String[1]; + exceptions[0] = new SocketTimeoutException(CONNECTION_RESET_MESSAGE + " by peer"); + abbreviations[0] = CONNECTION_RESET_ABBREVIATION; + testClientRequestIdForTimeoutRetry(exceptions, abbreviations, 1); + } + + @Test + public void testClientRequestIdForUnknownSocketExRetry() throws Exception { + Exception[] exceptions = new Exception[1]; + String[] abbreviations = new String[1]; + exceptions[0] = new SocketException("unknown"); + abbreviations[0] = SOCKET_EXCEPTION_ABBREVIATION; + testClientRequestIdForTimeoutRetry(exceptions, abbreviations, 1); + } + + @Test + public void testClientRequestIdForIOERetry() throws Exception { + Exception[] exceptions = new Exception[1]; + String[] abbreviations = new String[1]; + exceptions[0] = new InterruptedIOException(); + abbreviations[0] = IO_EXCEPTION_ABBREVIATION; + testClientRequestIdForTimeoutRetry(exceptions, abbreviations, 1); + } + + @Test + public void testClientRequestIdFor400Retry() throws Exception { + testClientRequestIdForStatusRetry(HTTP_BAD_REQUEST, "", "400"); + } + + @Test + public void testClientRequestIdFor500Retry() throws Exception { + testClientRequestIdForStatusRetry(HTTP_INTERNAL_ERROR, "", "500"); + } + + @Test + public void testClientRequestIdFor503INGRetry() throws Exception { + testClientRequestIdForStatusRetry(HTTP_UNAVAILABLE, + INGRESS_OVER_ACCOUNT_LIMIT.getErrorMessage(), + INGRESS_LIMIT_BREACH_ABBREVIATION); + } + + @Test + public void testClientRequestIdFor503egrRetry() throws Exception { + testClientRequestIdForStatusRetry(HTTP_UNAVAILABLE, + EGRESS_OVER_ACCOUNT_LIMIT.getErrorMessage(), + EGRESS_LIMIT_BREACH_ABBREVIATION); + } + + @Test + public void testClientRequestIdFor503OPRRetry() throws Exception { + testClientRequestIdForStatusRetry(HTTP_UNAVAILABLE, + OPERATION_BREACH_MESSAGE, OPERATION_LIMIT_BREACH_ABBREVIATION); + } + + @Test + public void testClientRequestIdFor503OtherRetry() throws Exception { + testClientRequestIdForStatusRetry(HTTP_UNAVAILABLE, "Other.", "503"); + } + + private void testClientRequestIdForStatusRetry(int status, + String serverErrorMessage, + String keyExpected) throws Exception { + + AbfsClient abfsClient = Mockito.mock(AbfsClient.class); + ExponentialRetryPolicy retryPolicy = Mockito.mock( + ExponentialRetryPolicy.class); + addMockBehaviourToAbfsClient(abfsClient, retryPolicy); + + + AbfsRestOperation abfsRestOperation = Mockito.spy(new AbfsRestOperation( + AbfsRestOperationType.ReadFile, + abfsClient, + "PUT", + null, + new ArrayList<>() + )); + + AbfsHttpOperation httpOperation = Mockito.mock(AbfsHttpOperation.class); + addMockBehaviourToRestOpAndHttpOp(abfsRestOperation, httpOperation); + + Mockito.doNothing() + .doNothing() + .when(httpOperation) + .processResponse(nullable(byte[].class), nullable(int.class), + nullable(int.class)); + + int[] statusCount = new int[1]; + statusCount[0] = 0; + Mockito.doAnswer(answer -> { + if (statusCount[0] <= 5) { + statusCount[0]++; + return status; + } + return HTTP_OK; + }).when(httpOperation).getStatusCode(); + + Mockito.doReturn(serverErrorMessage) + .when(httpOperation) + .getStorageErrorMessage(); + + TracingContext tracingContext = Mockito.mock(TracingContext.class); + Mockito.doNothing().when(tracingContext).setRetryCount(nullable(int.class)); + + int[] count = new int[1]; + count[0] = 0; + Mockito.doAnswer(invocationOnMock -> { + if (count[0] == 1) { + Assertions.assertThat((String) invocationOnMock.getArgument(1)) + .isEqualTo(keyExpected); + } + count[0]++; + return null; + }).when(tracingContext).constructHeader(any(), any()); + + abfsRestOperation.execute(tracingContext); + Assertions.assertThat(count[0]).isEqualTo(2); + + } + + private void testClientRequestIdForTimeoutRetry(Exception[] exceptions, + String[] abbreviationsExpected, + int len) throws Exception { + AbfsClient abfsClient = Mockito.mock(AbfsClient.class); + ExponentialRetryPolicy retryPolicy = Mockito.mock( + ExponentialRetryPolicy.class); + addMockBehaviourToAbfsClient(abfsClient, retryPolicy); + + + AbfsRestOperation abfsRestOperation = Mockito.spy(new AbfsRestOperation( + AbfsRestOperationType.ReadFile, + abfsClient, + "PUT", + null, + new ArrayList<>() + )); + + AbfsHttpOperation httpOperation = Mockito.mock(AbfsHttpOperation.class); + addMockBehaviourToRestOpAndHttpOp(abfsRestOperation, httpOperation); + + Stubber stubber = Mockito.doThrow(exceptions[0]); + for (int iteration = 1; iteration < len; iteration++) { + stubber.doThrow(exceptions[iteration]); + } + stubber + .doNothing() + .when(httpOperation) + .processResponse(nullable(byte[].class), nullable(int.class), + nullable(int.class)); + + Mockito.doReturn(HTTP_OK).when(httpOperation).getStatusCode(); + + TracingContext tracingContext = Mockito.mock(TracingContext.class); + Mockito.doNothing().when(tracingContext).setRetryCount(nullable(int.class)); + + int[] count = new int[1]; + count[0] = 0; + Mockito.doAnswer(invocationOnMock -> { + if (count[0] > 0 && count[0] <= len) { + Assertions.assertThat((String) invocationOnMock.getArgument(1)) + .isEqualTo(abbreviationsExpected[count[0] - 1]); + } + count[0]++; + return null; + }).when(tracingContext).constructHeader(any(), any()); + + abfsRestOperation.execute(tracingContext); + Assertions.assertThat(count[0]).isEqualTo(len + 1); + } + + private void addMockBehaviourToRestOpAndHttpOp(final AbfsRestOperation abfsRestOperation, + final AbfsHttpOperation httpOperation) throws IOException { + HttpURLConnection httpURLConnection = Mockito.mock(HttpURLConnection.class); + Mockito.doNothing() + .when(httpURLConnection) + .setRequestProperty(nullable(String.class), nullable(String.class)); + Mockito.doReturn(httpURLConnection).when(httpOperation).getConnection(); + Mockito.doReturn("").when(abfsRestOperation).getClientLatency(); + Mockito.doReturn(httpOperation).when(abfsRestOperation).createHttpOperation(); + } + + private void addMockBehaviourToAbfsClient(final AbfsClient abfsClient, + final ExponentialRetryPolicy retryPolicy) throws IOException { + Mockito.doReturn(OAuth).when(abfsClient).getAuthType(); + Mockito.doReturn("").when(abfsClient).getAccessToken(); + AbfsThrottlingIntercept intercept = Mockito.mock( + AbfsThrottlingIntercept.class); + Mockito.doReturn(intercept).when(abfsClient).getIntercept(); + Mockito.doNothing() + .when(intercept) + .sendingRequest(any(), nullable(AbfsCounters.class)); + Mockito.doNothing().when(intercept).updateMetrics(any(), any()); + + Mockito.doReturn(retryPolicy).when(abfsClient).getRetryPolicy(); + Mockito.doReturn(true) + .when(retryPolicy) + .shouldRetry(nullable(Integer.class), nullable(Integer.class)); + Mockito.doReturn(false).when(retryPolicy).shouldRetry(1, HTTP_OK); + Mockito.doReturn(false).when(retryPolicy).shouldRetry(2, HTTP_OK); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAzureADAuthenticator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAzureADAuthenticator.java new file mode 100644 index 0000000000000..8e79288cf6e7d --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAzureADAuthenticator.java @@ -0,0 +1,120 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; + +import org.assertj.core.api.Assertions; +import org.junit.Test; + +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; + +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_OAUTH_TOKEN_FETCH_RETRY_COUNT; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_OAUTH_TOKEN_FETCH_RETRY_DELTA_BACKOFF; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_OAUTH_TOKEN_FETCH_RETRY_MAX_BACKOFF; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_OAUTH_TOKEN_FETCH_RETRY_MIN_BACKOFF; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_DELTA_BACKOFF; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MAX_ATTEMPTS; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MAX_BACKOFF_INTERVAL; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MIN_BACKOFF_INTERVAL; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_ACCOUNT_NAME; + +public class TestAzureADAuthenticator extends AbstractAbfsIntegrationTest { + + private static final int TEST_RETRY_COUNT = 10; + private static final int TEST_MIN_BACKOFF = 20; + private static final int TEST_MAX_BACKOFF = 30; + private static final int TEST_DELTA_BACKOFF = 40; + + public TestAzureADAuthenticator() throws Exception { + super(); + } + + @Test + public void testDefaultOAuthTokenFetchRetryPolicy() throws Exception { + getConfiguration().unset(AZURE_OAUTH_TOKEN_FETCH_RETRY_COUNT); + getConfiguration().unset(AZURE_OAUTH_TOKEN_FETCH_RETRY_MIN_BACKOFF); + getConfiguration().unset(AZURE_OAUTH_TOKEN_FETCH_RETRY_MAX_BACKOFF); + getConfiguration().unset(AZURE_OAUTH_TOKEN_FETCH_RETRY_DELTA_BACKOFF); + + String accountName = getConfiguration().get(FS_AZURE_ACCOUNT_NAME); + AbfsConfiguration abfsConfig = new AbfsConfiguration(getRawConfiguration(), + accountName); + + ExponentialRetryPolicy retryPolicy = abfsConfig + .getOauthTokenFetchRetryPolicy(); + + Assertions.assertThat(retryPolicy.getRetryCount()).describedAs( + "retryCount should be the default value {} as the same " + + "is not configured", + DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MAX_ATTEMPTS) + .isEqualTo(DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MAX_ATTEMPTS); + Assertions.assertThat(retryPolicy.getMinBackoff()).describedAs( + "minBackOff should be the default value {} as the same is " + + "not configured", + DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MIN_BACKOFF_INTERVAL) + .isEqualTo(DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MIN_BACKOFF_INTERVAL); + Assertions.assertThat(retryPolicy.getMaxBackoff()).describedAs( + "maxBackOff should be the default value {} as the same is " + + "not configured", + DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MAX_BACKOFF_INTERVAL) + .isEqualTo(DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MAX_BACKOFF_INTERVAL); + Assertions.assertThat(retryPolicy.getDeltaBackoff()).describedAs( + "deltaBackOff should be the default value {} as the same " + "is " + + "not configured", + DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_DELTA_BACKOFF) + .isEqualTo(DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_DELTA_BACKOFF); + + } + + @Test + public void testOAuthTokenFetchRetryPolicy() + throws IOException, IllegalAccessException { + + getConfiguration() + .set(AZURE_OAUTH_TOKEN_FETCH_RETRY_COUNT, String.valueOf(TEST_RETRY_COUNT)); + getConfiguration().set(AZURE_OAUTH_TOKEN_FETCH_RETRY_MIN_BACKOFF, + String.valueOf(TEST_MIN_BACKOFF)); + getConfiguration().set(AZURE_OAUTH_TOKEN_FETCH_RETRY_MAX_BACKOFF, + String.valueOf(TEST_MAX_BACKOFF)); + getConfiguration().set(AZURE_OAUTH_TOKEN_FETCH_RETRY_DELTA_BACKOFF, + String.valueOf(TEST_DELTA_BACKOFF)); + + String accountName = getConfiguration().get(FS_AZURE_ACCOUNT_NAME); + AbfsConfiguration abfsConfig = new AbfsConfiguration(getRawConfiguration(), + accountName); + + ExponentialRetryPolicy retryPolicy = abfsConfig + .getOauthTokenFetchRetryPolicy(); + + Assertions.assertThat(retryPolicy.getRetryCount()) + .describedAs("retryCount should be {}", TEST_RETRY_COUNT) + .isEqualTo(TEST_RETRY_COUNT); + Assertions.assertThat(retryPolicy.getMinBackoff()) + .describedAs("minBackOff should be {}", TEST_MIN_BACKOFF) + .isEqualTo(TEST_MIN_BACKOFF); + Assertions.assertThat(retryPolicy.getMaxBackoff()) + .describedAs("maxBackOff should be {}", TEST_MAX_BACKOFF) + .isEqualTo(TEST_MAX_BACKOFF); + Assertions.assertThat(retryPolicy.getDeltaBackoff()) + .describedAs("deltaBackOff should be {}", TEST_DELTA_BACKOFF) + .isEqualTo(TEST_DELTA_BACKOFF); + } + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java new file mode 100644 index 0000000000000..12ab4e9ead688 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java @@ -0,0 +1,310 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import static java.net.HttpURLConnection.HTTP_INTERNAL_ERROR; + +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_BACKOFF_INTERVAL; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_MAX_BACKOFF_INTERVAL; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_MAX_IO_RETRIES; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_MIN_BACKOFF_INTERVAL; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_AUTOTHROTTLING; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.MIN_BUFFER_SIZE; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_ABFS_ACCOUNT1_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_ACCOUNT_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.TEST_CONFIGURATION_FILE_NAME; + +import static org.junit.Assume.assumeTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import org.apache.hadoop.fs.FSDataInputStream; + +import org.assertj.core.api.Assertions; +import org.junit.Assume; +import org.mockito.Mockito; + +import java.net.URI; +import java.util.Random; + +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.junit.Assert; +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; + +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; + +/** + * Unit test TestExponentialRetryPolicy. + */ +public class TestExponentialRetryPolicy extends AbstractAbfsIntegrationTest { + private final int maxRetryCount = 30; + private final int noRetryCount = 0; + private final int retryCount = new Random().nextInt(maxRetryCount); + private final int retryCountBeyondMax = maxRetryCount + 1; + private static final String TEST_PATH = "/testfile"; + private static final double MULTIPLYING_FACTOR = 1.5; + private static final int ANALYSIS_PERIOD = 10000; + + + public TestExponentialRetryPolicy() throws Exception { + super(); + } + + @Test + public void testDifferentMaxIORetryCount() throws Exception { + AbfsConfiguration abfsConfig = getAbfsConfig(); + abfsConfig.setMaxIoRetries(noRetryCount); + testMaxIOConfig(abfsConfig); + abfsConfig.setMaxIoRetries(retryCount); + testMaxIOConfig(abfsConfig); + abfsConfig.setMaxIoRetries(retryCountBeyondMax); + testMaxIOConfig(abfsConfig); + } + + @Test + public void testDefaultMaxIORetryCount() throws Exception { + AbfsConfiguration abfsConfig = getAbfsConfig(); + Assert.assertEquals( + String.format("default maxIORetry count is %s.", maxRetryCount), + maxRetryCount, abfsConfig.getMaxIoRetries()); + testMaxIOConfig(abfsConfig); + } + + @Test + public void testThrottlingIntercept() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + final Configuration configuration = new Configuration(); + configuration.addResource(TEST_CONFIGURATION_FILE_NAME); + configuration.setBoolean(FS_AZURE_ENABLE_AUTOTHROTTLING, false); + + // On disabling throttling AbfsNoOpThrottlingIntercept object is returned + AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, + "dummy.dfs.core.windows.net"); + AbfsThrottlingIntercept intercept; + AbfsClient abfsClient = ITestAbfsClient.createTestClientFromCurrentContext(fs.getAbfsStore().getClient(), abfsConfiguration); + intercept = abfsClient.getIntercept(); + Assertions.assertThat(intercept) + .describedAs("AbfsNoOpThrottlingIntercept instance expected") + .isInstanceOf(AbfsNoOpThrottlingIntercept.class); + + configuration.setBoolean(FS_AZURE_ENABLE_AUTOTHROTTLING, true); + configuration.setBoolean(FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED, true); + // On disabling throttling AbfsClientThrottlingIntercept object is returned + AbfsConfiguration abfsConfiguration1 = new AbfsConfiguration(configuration, + "dummy1.dfs.core.windows.net"); + AbfsClient abfsClient1 = ITestAbfsClient.createTestClientFromCurrentContext(fs.getAbfsStore().getClient(), abfsConfiguration1); + intercept = abfsClient1.getIntercept(); + Assertions.assertThat(intercept) + .describedAs("AbfsClientThrottlingIntercept instance expected") + .isInstanceOf(AbfsClientThrottlingIntercept.class); + } + + @Test + public void testCreateMultipleAccountThrottling() throws Exception { + Configuration config = new Configuration(getRawConfiguration()); + String accountName = config.get(FS_AZURE_ACCOUNT_NAME); + if (accountName == null) { + // check if accountName is set using different config key + accountName = config.get(FS_AZURE_ABFS_ACCOUNT1_NAME); + } + assumeTrue("Not set: " + FS_AZURE_ABFS_ACCOUNT1_NAME, + accountName != null && !accountName.isEmpty()); + + Configuration rawConfig1 = new Configuration(); + rawConfig1.addResource(TEST_CONFIGURATION_FILE_NAME); + + AbfsRestOperation successOp = mock(AbfsRestOperation.class); + AbfsHttpOperation http500Op = mock(AbfsHttpOperation.class); + when(http500Op.getStatusCode()).thenReturn(HTTP_INTERNAL_ERROR); + when(successOp.getResult()).thenReturn(http500Op); + + AbfsConfiguration configuration = Mockito.mock(AbfsConfiguration.class); + when(configuration.getAnalysisPeriod()).thenReturn(ANALYSIS_PERIOD); + when(configuration.isAutoThrottlingEnabled()).thenReturn(true); + when(configuration.accountThrottlingEnabled()).thenReturn(false); + + AbfsThrottlingIntercept instance1 = AbfsThrottlingInterceptFactory.getInstance(accountName, configuration); + String accountName1 = config.get(FS_AZURE_ABFS_ACCOUNT1_NAME); + + assumeTrue("Not set: " + FS_AZURE_ABFS_ACCOUNT1_NAME, + accountName1 != null && !accountName1.isEmpty()); + + AbfsThrottlingIntercept instance2 = AbfsThrottlingInterceptFactory.getInstance(accountName1, configuration); + //if singleton is enabled, for different accounts both the instances should return same value + Assertions.assertThat(instance1) + .describedAs( + "if singleton is enabled, for different accounts both the instances should return same value") + .isEqualTo(instance2); + + when(configuration.accountThrottlingEnabled()).thenReturn(true); + AbfsThrottlingIntercept instance3 = AbfsThrottlingInterceptFactory.getInstance(accountName, configuration); + AbfsThrottlingIntercept instance4 = AbfsThrottlingInterceptFactory.getInstance(accountName1, configuration); + AbfsThrottlingIntercept instance5 = AbfsThrottlingInterceptFactory.getInstance(accountName, configuration); + //if singleton is not enabled, for different accounts instances should return different value + Assertions.assertThat(instance3) + .describedAs( + "iff singleton is not enabled, for different accounts instances should return different value") + .isNotEqualTo(instance4); + + //if singleton is not enabled, for same accounts instances should return same value + Assertions.assertThat(instance3) + .describedAs( + "if singleton is not enabled, for same accounts instances should return same value") + .isEqualTo(instance5); + } + + @Test + public void testOperationOnAccountIdle() throws Exception { + //Get the filesystem. + AzureBlobFileSystem fs = getFileSystem(); + AbfsClient client = fs.getAbfsStore().getClient(); + AbfsConfiguration configuration1 = client.getAbfsConfiguration(); + Assume.assumeTrue(configuration1.isAutoThrottlingEnabled()); + Assume.assumeTrue(configuration1.accountThrottlingEnabled()); + + AbfsClientThrottlingIntercept accountIntercept + = (AbfsClientThrottlingIntercept) client.getIntercept(); + final byte[] b = new byte[2 * MIN_BUFFER_SIZE]; + new Random().nextBytes(b); + + Path testPath = path(TEST_PATH); + + //Do an operation on the filesystem. + try (FSDataOutputStream stream = fs.create(testPath)) { + stream.write(b); + } + + //Don't perform any operation on the account. + int sleepTime = (int) ((getAbfsConfig().getAccountOperationIdleTimeout()) * MULTIPLYING_FACTOR); + Thread.sleep(sleepTime); + + try (FSDataInputStream streamRead = fs.open(testPath)) { + streamRead.read(b); + } + + //Perform operations on another account. + AzureBlobFileSystem fs1 = new AzureBlobFileSystem(); + Configuration config = new Configuration(getRawConfiguration()); + String accountName1 = config.get(FS_AZURE_ABFS_ACCOUNT1_NAME); + assumeTrue("Not set: " + FS_AZURE_ABFS_ACCOUNT1_NAME, + accountName1 != null && !accountName1.isEmpty()); + final String abfsUrl1 = this.getFileSystemName() + "12" + "@" + accountName1; + URI defaultUri1 = null; + defaultUri1 = new URI("abfss", abfsUrl1, null, null, null); + fs1.initialize(defaultUri1, getRawConfiguration()); + AbfsClient client1 = fs1.getAbfsStore().getClient(); + AbfsClientThrottlingIntercept accountIntercept1 + = (AbfsClientThrottlingIntercept) client1.getIntercept(); + try (FSDataOutputStream stream1 = fs1.create(testPath)) { + stream1.write(b); + } + + //Verify the write analyzer for first account is idle but the read analyzer is not idle. + Assertions.assertThat(accountIntercept.getWriteThrottler() + .getIsOperationOnAccountIdle() + .get()) + .describedAs("Write analyzer for first account should be idle the first time") + .isTrue(); + + Assertions.assertThat( + accountIntercept.getReadThrottler() + .getIsOperationOnAccountIdle() + .get()) + .describedAs("Read analyzer for first account should not be idle") + .isFalse(); + + //Verify the write analyzer for second account is not idle. + Assertions.assertThat( + accountIntercept1.getWriteThrottler() + .getIsOperationOnAccountIdle() + .get()) + .describedAs("Write analyzer for second account should not be idle") + .isFalse(); + + //Again perform an operation on the first account. + try (FSDataOutputStream stream2 = fs.create(testPath)) { + stream2.write(b); + } + + //Verify the write analyzer on first account is not idle. + Assertions.assertThat( + + accountIntercept.getWriteThrottler() + .getIsOperationOnAccountIdle() + .get()) + .describedAs( + "Write analyzer for first account should not be idle second time") + .isFalse(); + } + + @Test + public void testAbfsConfigConstructor() throws Exception { + // Ensure we choose expected values that are not defaults + ExponentialRetryPolicy template = new ExponentialRetryPolicy( + getAbfsConfig().getMaxIoRetries()); + int testModifier = 1; + int expectedMaxRetries = template.getRetryCount() + testModifier; + int expectedMinBackoff = template.getMinBackoff() + testModifier; + int expectedMaxBackoff = template.getMaxBackoff() + testModifier; + int expectedDeltaBackoff = template.getDeltaBackoff() + testModifier; + + Configuration config = new Configuration(this.getRawConfiguration()); + config.setInt(AZURE_MAX_IO_RETRIES, expectedMaxRetries); + config.setInt(AZURE_MIN_BACKOFF_INTERVAL, expectedMinBackoff); + config.setInt(AZURE_MAX_BACKOFF_INTERVAL, expectedMaxBackoff); + config.setInt(AZURE_BACKOFF_INTERVAL, expectedDeltaBackoff); + + ExponentialRetryPolicy policy = new ExponentialRetryPolicy( + new AbfsConfiguration(config, "dummyAccountName")); + + Assert.assertEquals("Max retry count was not set as expected.", expectedMaxRetries, policy.getRetryCount()); + Assert.assertEquals("Min backoff interval was not set as expected.", expectedMinBackoff, policy.getMinBackoff()); + Assert.assertEquals("Max backoff interval was not set as expected.", expectedMaxBackoff, policy.getMaxBackoff()); + Assert.assertEquals("Delta backoff interval was not set as expected.", expectedDeltaBackoff, policy.getDeltaBackoff()); + } + + private AbfsConfiguration getAbfsConfig() throws Exception { + Configuration + config = new Configuration(this.getRawConfiguration()); + return new AbfsConfiguration(config, "dummyAccountName"); + } + + private void testMaxIOConfig(AbfsConfiguration abfsConfig) { + ExponentialRetryPolicy retryPolicy = new ExponentialRetryPolicy( + abfsConfig.getMaxIoRetries()); + int localRetryCount = 0; + + while (localRetryCount < abfsConfig.getMaxIoRetries()) { + Assert.assertTrue( + "Retry should be allowed when retryCount less than max count configured.", + retryPolicy.shouldRetry(localRetryCount, -1)); + localRetryCount++; + } + + Assert.assertEquals( + "When all retries are exhausted, the retryCount will be same as max configured", + abfsConfig.getMaxIoRetries(), localRetryCount); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestRetryReason.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestRetryReason.java new file mode 100644 index 0000000000000..76fcc6dc2c8a4 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestRetryReason.java @@ -0,0 +1,134 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.net.SocketException; +import java.net.SocketTimeoutException; +import java.net.UnknownHostException; + +import org.assertj.core.api.Assertions; +import org.junit.Test; + +import static java.net.HttpURLConnection.HTTP_FORBIDDEN; +import static java.net.HttpURLConnection.HTTP_INTERNAL_ERROR; +import static java.net.HttpURLConnection.HTTP_UNAVAILABLE; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.EGRESS_OVER_ACCOUNT_LIMIT; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.INGRESS_OVER_ACCOUNT_LIMIT; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_RESET_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_RESET_MESSAGE; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_JDK_MESSAGE; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.EGRESS_LIMIT_BREACH_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.INGRESS_LIMIT_BREACH_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.IO_EXCEPTION_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.OPERATION_BREACH_MESSAGE; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.OPERATION_LIMIT_BREACH_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.READ_TIMEOUT_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.READ_TIMEOUT_JDK_MESSAGE; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.SOCKET_EXCEPTION_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.UNKNOWN_HOST_EXCEPTION_ABBREVIATION; + +public class TestRetryReason { + + @Test + public void test4xxStatusRetryReason() { + Assertions.assertThat(RetryReason.getAbbreviation(null, HTTP_FORBIDDEN, null)) + .describedAs("Abbreviation for 4xx should be equal to 4xx") + .isEqualTo(HTTP_FORBIDDEN + ""); + } + + @Test + public void testConnectionResetRetryReason() { + SocketException connReset = new SocketException(CONNECTION_RESET_MESSAGE.toUpperCase()); + Assertions.assertThat(RetryReason.getAbbreviation(connReset, null, null)).isEqualTo(CONNECTION_RESET_ABBREVIATION); + } + + @Test + public void testConnectionTimeoutRetryReason() { + SocketTimeoutException connectionTimeoutException = new SocketTimeoutException(CONNECTION_TIMEOUT_JDK_MESSAGE); + Assertions.assertThat(RetryReason.getAbbreviation(connectionTimeoutException, null, null)).isEqualTo( + CONNECTION_TIMEOUT_ABBREVIATION + ); + } + + @Test + public void testReadTimeoutRetryReason() { + SocketTimeoutException connectionTimeoutException = new SocketTimeoutException(READ_TIMEOUT_JDK_MESSAGE); + Assertions.assertThat(RetryReason.getAbbreviation(connectionTimeoutException, null, null)).isEqualTo( + READ_TIMEOUT_ABBREVIATION + ); + } + + @Test + public void testEgressLimitRetryReason() { + Assertions.assertThat(RetryReason.getAbbreviation(null, HTTP_UNAVAILABLE, EGRESS_OVER_ACCOUNT_LIMIT.getErrorMessage())).isEqualTo( + EGRESS_LIMIT_BREACH_ABBREVIATION + ); + } + + @Test + public void testIngressLimitRetryReason() { + Assertions.assertThat(RetryReason.getAbbreviation(null, HTTP_UNAVAILABLE, INGRESS_OVER_ACCOUNT_LIMIT.getErrorMessage())).isEqualTo( + INGRESS_LIMIT_BREACH_ABBREVIATION + ); + } + + @Test + public void testOperationLimitRetryReason() { + Assertions.assertThat(RetryReason.getAbbreviation(null, HTTP_UNAVAILABLE, OPERATION_BREACH_MESSAGE)).isEqualTo( + OPERATION_LIMIT_BREACH_ABBREVIATION + ); + } + + @Test + public void test503UnknownRetryReason() { + Assertions.assertThat(RetryReason.getAbbreviation(null, HTTP_UNAVAILABLE, null)).isEqualTo( + "503" + ); + } + + @Test + public void test500RetryReason() { + Assertions.assertThat(RetryReason.getAbbreviation(null, HTTP_INTERNAL_ERROR, null)).isEqualTo( + "500" + ); + } + + @Test + public void testUnknownHostRetryReason() { + Assertions.assertThat(RetryReason.getAbbreviation(new UnknownHostException(), null, null)).isEqualTo( + UNKNOWN_HOST_EXCEPTION_ABBREVIATION + ); + } + + @Test + public void testUnknownIOExceptionRetryReason() { + Assertions.assertThat(RetryReason.getAbbreviation(new IOException(), null, null)).isEqualTo( + IO_EXCEPTION_ABBREVIATION + ); + } + + @Test + public void testUnknownSocketException() { + Assertions.assertThat(RetryReason.getAbbreviation(new SocketException(), null, null)).isEqualTo( + SOCKET_EXCEPTION_ABBREVIATION + ); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestTextFileBasedIdentityHandler.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestTextFileBasedIdentityHandler.java new file mode 100644 index 0000000000000..f9950faf944df --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestTextFileBasedIdentityHandler.java @@ -0,0 +1,149 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.charset.Charset; + +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.fs.azurebfs.utils.TextFileBasedIdentityHandler; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +public class TestTextFileBasedIdentityHandler { + + @ClassRule + public static TemporaryFolder tempDir = new TemporaryFolder(); + private static File userMappingFile = null; + private static File groupMappingFile = null; + private static final String NEW_LINE = "\n"; + private static String testUserDataLine1 = + "a2b27aec-77bd-46dd-8c8c-39611a333331:user1:11000:21000:spi-user1:abcf86e9-5a5b-49e2-a253-f5c9e2afd4ec" + + NEW_LINE; + private static String testUserDataLine2 = + "#i2j27aec-77bd-46dd-8c8c-39611a333331:user2:41000:21000:spi-user2:mnof86e9-5a5b-49e2-a253-f5c9e2afd4ec" + + NEW_LINE; + private static String testUserDataLine3 = + "c2d27aec-77bd-46dd-8c8c-39611a333331:user2:21000:21000:spi-user2:deff86e9-5a5b-49e2-a253-f5c9e2afd4ec" + + NEW_LINE; + private static String testUserDataLine4 = "e2f27aec-77bd-46dd-8c8c-39611a333331c" + NEW_LINE; + private static String testUserDataLine5 = + "g2h27aec-77bd-46dd-8c8c-39611a333331:user4:41000:21000:spi-user4:jklf86e9-5a5b-49e2-a253-f5c9e2afd4ec" + + NEW_LINE; + private static String testUserDataLine6 = " " + NEW_LINE; + private static String testUserDataLine7 = + "i2j27aec-77bd-46dd-8c8c-39611a333331:user5:41000:21000:spi-user5:mknf86e9-5a5b-49e2-a253-f5c9e2afd4ec" + + NEW_LINE; + + private static String testGroupDataLine1 = "1d23024d-957c-4456-aac1-a57f9e2de914:group1:21000:sgp-group1" + NEW_LINE; + private static String testGroupDataLine2 = "3d43024d-957c-4456-aac1-a57f9e2de914:group2:21000:sgp-group2" + NEW_LINE; + private static String testGroupDataLine3 = "5d63024d-957c-4456-aac1-a57f9e2de914" + NEW_LINE; + private static String testGroupDataLine4 = " " + NEW_LINE; + private static String testGroupDataLine5 = "7d83024d-957c-4456-aac1-a57f9e2de914:group4:21000:sgp-group4" + NEW_LINE; + + @BeforeClass + public static void init() throws IOException { + userMappingFile = tempDir.newFile("user-mapping.conf"); + groupMappingFile = tempDir.newFile("group-mapping.conf"); + + //Stage data for user mapping + FileUtils.writeStringToFile(userMappingFile, testUserDataLine1, Charset.forName("UTF-8"), true); + FileUtils.writeStringToFile(userMappingFile, testUserDataLine2, Charset.forName("UTF-8"), true); + FileUtils.writeStringToFile(userMappingFile, testUserDataLine3, Charset.forName("UTF-8"), true); + FileUtils.writeStringToFile(userMappingFile, testUserDataLine4, Charset.forName("UTF-8"), true); + FileUtils.writeStringToFile(userMappingFile, testUserDataLine5, Charset.forName("UTF-8"), true); + FileUtils.writeStringToFile(userMappingFile, testUserDataLine6, Charset.forName("UTF-8"), true); + FileUtils.writeStringToFile(userMappingFile, testUserDataLine7, Charset.forName("UTF-8"), true); + FileUtils.writeStringToFile(userMappingFile, NEW_LINE, Charset.forName("UTF-8"), true); + + //Stage data for group mapping + FileUtils.writeStringToFile(groupMappingFile, testGroupDataLine1, Charset.forName("UTF-8"), true); + FileUtils.writeStringToFile(groupMappingFile, testGroupDataLine2, Charset.forName("UTF-8"), true); + FileUtils.writeStringToFile(groupMappingFile, testGroupDataLine3, Charset.forName("UTF-8"), true); + FileUtils.writeStringToFile(groupMappingFile, testGroupDataLine4, Charset.forName("UTF-8"), true); + FileUtils.writeStringToFile(groupMappingFile, testGroupDataLine5, Charset.forName("UTF-8"), true); + FileUtils.writeStringToFile(groupMappingFile, NEW_LINE, Charset.forName("UTF-8"), true); + } + + private void assertUserLookup(TextFileBasedIdentityHandler handler, String userInTest, String expectedUser) + throws IOException { + String actualUser = handler.lookupForLocalUserIdentity(userInTest); + Assert.assertEquals("Wrong user identity for ", expectedUser, actualUser); + } + + @Test + public void testLookupForUser() throws IOException { + TextFileBasedIdentityHandler handler = + new TextFileBasedIdentityHandler(userMappingFile.getPath(), groupMappingFile.getPath()); + + //Success scenario => user in test -> user2. + assertUserLookup(handler, testUserDataLine3.split(":")[0], testUserDataLine3.split(":")[1]); + + //No username found in the mapping file. + assertUserLookup(handler, "bogusIdentity", ""); + + //Edge case when username is empty string. + assertUserLookup(handler, "", ""); + } + + @Test + public void testLookupForUserFileNotFound() throws Exception { + TextFileBasedIdentityHandler handler = + new TextFileBasedIdentityHandler(userMappingFile.getPath() + ".test", groupMappingFile.getPath()); + intercept(FileNotFoundException.class, "FileNotFoundException", + () -> handler.lookupForLocalUserIdentity(testUserDataLine3.split(":")[0])); + } + + private void assertGroupLookup(TextFileBasedIdentityHandler handler, String groupInTest, String expectedGroup) + throws IOException { + String actualGroup = handler.lookupForLocalGroupIdentity(groupInTest); + Assert.assertEquals("Wrong group identity for ", expectedGroup, actualGroup); + } + + @Test + public void testLookupForGroup() throws IOException { + TextFileBasedIdentityHandler handler = + new TextFileBasedIdentityHandler(userMappingFile.getPath(), groupMappingFile.getPath()); + + //Success scenario. + assertGroupLookup(handler, testGroupDataLine2.split(":")[0], testGroupDataLine2.split(":")[1]); + + //No group name found in the mapping file. + assertGroupLookup(handler, "bogusIdentity", ""); + + //Edge case when group name is empty string. + assertGroupLookup(handler, "", ""); + } + + @Test + public void testLookupForGroupFileNotFound() throws Exception { + TextFileBasedIdentityHandler handler = + new TextFileBasedIdentityHandler(userMappingFile.getPath(), groupMappingFile.getPath() + ".test"); + intercept(FileNotFoundException.class, "FileNotFoundException", + () -> handler.lookupForLocalGroupIdentity(testGroupDataLine2.split(":")[0])); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DelegationSASGenerator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DelegationSASGenerator.java new file mode 100644 index 0000000000000..6f2209a6e8ced --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DelegationSASGenerator.java @@ -0,0 +1,192 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.utils; + +import java.time.Instant; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; +import org.apache.hadoop.fs.azurebfs.services.AbfsUriQueryBuilder; + + +/** + * Test Delegation SAS generator. + */ +public class DelegationSASGenerator extends SASGenerator { + private final String skoid; + private final String sktid; + private final String skt; + private final String ske; + private final String sks = "b"; + private final String skv; + + public DelegationSASGenerator(byte[] userDelegationKey, String skoid, String sktid, String skt, String ske, String skv) { + super(userDelegationKey); + this.skoid = skoid; + this.sktid = sktid; + this.skt = skt; + this.ske = ske; + this.skv = skv; + } + + public String getDelegationSAS(String accountName, String containerName, String path, String operation, + String saoid, String suoid, String scid) { + + final String sv = AuthenticationVersion.Feb20.toString(); + final String st = ISO_8601_FORMATTER.format(Instant.now().minus(FIVE_MINUTES)); + final String se = ISO_8601_FORMATTER.format(Instant.now().plus(ONE_DAY)); + String sr = "b"; + String sdd = null; + String sp; + + switch (operation) { + case SASTokenProvider.CREATE_FILE_OPERATION: + case SASTokenProvider.CREATE_DIRECTORY_OPERATION: + case SASTokenProvider.WRITE_OPERATION: + case SASTokenProvider.SET_PROPERTIES_OPERATION: + sp = "w"; + break; + case SASTokenProvider.DELETE_OPERATION: + sp = "d"; + break; + case SASTokenProvider.DELETE_RECURSIVE_OPERATION: + sp = "d"; + sr = "d"; + sdd = Integer.toString(StringUtils.countMatches(path, "/")); + break; + case SASTokenProvider.CHECK_ACCESS_OPERATION: + case SASTokenProvider.GET_ACL_OPERATION: + case SASTokenProvider.GET_STATUS_OPERATION: + sp = "e"; + break; + case SASTokenProvider.LIST_OPERATION: + sp = "l"; + break; + case SASTokenProvider.GET_PROPERTIES_OPERATION: + case SASTokenProvider.READ_OPERATION: + sp = "r"; + break; + case SASTokenProvider.RENAME_DESTINATION_OPERATION: + case SASTokenProvider.RENAME_SOURCE_OPERATION: + sp = "m"; + break; + case SASTokenProvider.SET_ACL_OPERATION: + case SASTokenProvider.SET_PERMISSION_OPERATION: + sp = "p"; + break; + case SASTokenProvider.SET_OWNER_OPERATION: + sp = "o"; + break; + default: + throw new IllegalArgumentException(operation); + } + + String signature = computeSignatureForSAS(sp, st, se, sv, sr, accountName, containerName, + path, saoid, suoid, scid); + + AbfsUriQueryBuilder qb = new AbfsUriQueryBuilder(); + qb.addQuery("skoid", skoid); + qb.addQuery("sktid", sktid); + qb.addQuery("skt", skt); + qb.addQuery("ske", ske); + qb.addQuery("sks", sks); + qb.addQuery("skv", skv); + if (saoid != null) { + qb.addQuery("saoid", saoid); + } + if (suoid != null) { + qb.addQuery("suoid", suoid); + } + if (scid != null) { + qb.addQuery("scid", scid); + } + qb.addQuery("sp", sp); + qb.addQuery("st", st); + qb.addQuery("se", se); + qb.addQuery("sv", sv); + qb.addQuery("sr", sr); + if (sdd != null) { + qb.addQuery("sdd", sdd); + } + qb.addQuery("sig", signature); + return qb.toString().substring(1); + } + + private String computeSignatureForSAS(String sp, String st, String se, String sv, + String sr, String accountName, String containerName, + String path, String saoid, String suoid, String scid) { + + StringBuilder sb = new StringBuilder(); + sb.append(sp); + sb.append("\n"); + sb.append(st); + sb.append("\n"); + sb.append(se); + sb.append("\n"); + // canonicalized resource + sb.append("/blob/"); + sb.append(accountName); + sb.append("/"); + sb.append(containerName); + if (path != null && !sr.equals("c")) { + sb.append(path); + } + sb.append("\n"); + sb.append(skoid); + sb.append("\n"); + sb.append(sktid); + sb.append("\n"); + sb.append(skt); + sb.append("\n"); + sb.append(ske); + sb.append("\n"); + sb.append(sks); + sb.append("\n"); + sb.append(skv); + sb.append("\n"); + if (saoid != null) { + sb.append(saoid); + } + sb.append("\n"); + if (suoid != null) { + sb.append(suoid); + } + sb.append("\n"); + if (scid != null) { + sb.append(scid); + } + sb.append("\n"); + + sb.append("\n"); // sip + sb.append("\n"); // spr + sb.append(sv); + sb.append("\n"); + sb.append(sr); + sb.append("\n"); + sb.append("\n"); // - For optional : rscc - ResponseCacheControl + sb.append("\n"); // - For optional : rscd - ResponseContentDisposition + sb.append("\n"); // - For optional : rsce - ResponseContentEncoding + sb.append("\n"); // - For optional : rscl - ResponseContentLanguage + sb.append("\n"); // - For optional : rsct - ResponseContentType + + String stringToSign = sb.toString(); + LOG.debug("Delegation SAS stringToSign: " + stringToSign.replace("\n", ".")); + return computeHmac256(stringToSign); + } +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/SASGenerator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/SASGenerator.java index 19bf9e2c45327..2e9289d8d44c7 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/SASGenerator.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/SASGenerator.java @@ -19,104 +19,76 @@ package org.apache.hadoop.fs.azurebfs.utils; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; +import java.time.Duration; import java.time.format.DateTimeFormatter; -import java.time.Instant; import java.time.ZoneId; import java.util.Locale; import javax.crypto.Mac; import javax.crypto.spec.SecretKeySpec; -import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; -import org.apache.hadoop.fs.azurebfs.services.AbfsUriQueryBuilder; - +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** - * Test container SAS generator. + * Test SAS generator. */ -public class SASGenerator { +public abstract class SASGenerator { + + public enum AuthenticationVersion { + Nov18("2018-11-09"), + Dec19("2019-12-12"), + Feb20("2020-02-10"); + + private final String ver; + + AuthenticationVersion(String version) { + this.ver = version; + } - private static final String HMAC_SHA256 = "HmacSHA256"; - private static final int TOKEN_START_PERIOD_IN_SECONDS = 5 * 60; - private static final int TOKEN_EXPIRY_PERIOD_IN_SECONDS = 24 * 60 * 60; - public static final DateTimeFormatter ISO_8601_UTC_DATE_FORMATTER = + @Override + public String toString() { + return ver; + } + } + + protected static final Logger LOG = LoggerFactory.getLogger(SASGenerator.class); + public static final Duration FIVE_MINUTES = Duration.ofMinutes(5); + public static final Duration ONE_DAY = Duration.ofDays(1); + public static final DateTimeFormatter ISO_8601_FORMATTER = DateTimeFormatter .ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT) .withZone(ZoneId.of("UTC")); + private Mac hmacSha256; private byte[] key; - public SASGenerator(byte[] key) { - this.key = key; - initializeMac(); - } - - public String getContainerSASWithFullControl(String accountName, String containerName) { - String sp = "rcwdl"; - String sv = "2018-11-09"; - String sr = "c"; - String st = ISO_8601_UTC_DATE_FORMATTER.format(Instant.now().minusSeconds(TOKEN_START_PERIOD_IN_SECONDS)); - String se = - ISO_8601_UTC_DATE_FORMATTER.format(Instant.now().plusSeconds(TOKEN_EXPIRY_PERIOD_IN_SECONDS)); - - String signature = computeSignatureForSAS(sp, st, se, sv, "c", - accountName, containerName); - - AbfsUriQueryBuilder qb = new AbfsUriQueryBuilder(); - qb.addQuery("sp", sp); - qb.addQuery("st", st); - qb.addQuery("se", se); - qb.addQuery("sv", sv); - qb.addQuery("sr", sr); - qb.addQuery("sig", signature); - return qb.toString().substring(1); + // hide default constructor + private SASGenerator() { } - private String computeSignatureForSAS(String sp, String st, - String se, String sv, String sr, String accountName, String containerName) { - - StringBuilder sb = new StringBuilder(); - sb.append(sp); - sb.append("\n"); - sb.append(st); - sb.append("\n"); - sb.append(se); - sb.append("\n"); - // canonicalized resource - sb.append("/blob/"); - sb.append(accountName); - sb.append("/"); - sb.append(containerName); - sb.append("\n"); - sb.append("\n"); // si - sb.append("\n"); // sip - sb.append("\n"); // spr - sb.append(sv); - sb.append("\n"); - sb.append(sr); - sb.append("\n"); - sb.append("\n"); // - For optional : rscc - ResponseCacheControl - sb.append("\n"); // - For optional : rscd - ResponseContentDisposition - sb.append("\n"); // - For optional : rsce - ResponseContentEncoding - sb.append("\n"); // - For optional : rscl - ResponseContentLanguage - sb.append("\n"); // - For optional : rsct - ResponseContentType - - String stringToSign = sb.toString(); - return computeHmac256(stringToSign); + /** + * Called by subclasses to initialize the cryptographic SHA-256 HMAC provider. + * @param key - a 256-bit secret key + */ + protected SASGenerator(byte[] key) { + this.key = key; + initializeMac(); } private void initializeMac() { // Initializes the HMAC-SHA256 Mac and SecretKey. try { - hmacSha256 = Mac.getInstance(HMAC_SHA256); - hmacSha256.init(new SecretKeySpec(key, HMAC_SHA256)); + hmacSha256 = Mac.getInstance("HmacSHA256"); + hmacSha256.init(new SecretKeySpec(key, "HmacSHA256")); } catch (final Exception e) { throw new IllegalArgumentException(e); } } - private String computeHmac256(final String stringToSign) { + protected String computeHmac256(final String stringToSign) { byte[] utf8Bytes; try { - utf8Bytes = stringToSign.getBytes(AbfsHttpConstants.UTF_8); + utf8Bytes = stringToSign.getBytes(StandardCharsets.UTF_8.toString()); } catch (final UnsupportedEncodingException e) { throw new IllegalArgumentException(e); } @@ -126,4 +98,4 @@ private String computeHmac256(final String stringToSign) { } return Base64.encode(hmac); } -} +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/ServiceSASGenerator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/ServiceSASGenerator.java new file mode 100644 index 0000000000000..24a1cea255b4a --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/ServiceSASGenerator.java @@ -0,0 +1,96 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.utils; + +import java.time.Instant; + +import org.apache.hadoop.fs.azurebfs.services.AbfsUriQueryBuilder; + +/** + * Test Service SAS generator. + */ +public class ServiceSASGenerator extends SASGenerator { + + /** + * Creates a SAS Generator for Service SAS + * (https://docs.microsoft.com/en-us/rest/api/storageservices/create-service-sas). + * @param accountKey - the storage account key + */ + public ServiceSASGenerator(byte[] accountKey) { + super(accountKey); + } + + public String getContainerSASWithFullControl(String accountName, String containerName) { + String sp = "rcwdl"; + String sv = AuthenticationVersion.Feb20.toString(); + String sr = "c"; + String st = ISO_8601_FORMATTER.format(Instant.now().minus(FIVE_MINUTES)); + String se = ISO_8601_FORMATTER.format(Instant.now().plus(ONE_DAY)); + + String signature = computeSignatureForSAS(sp, st, se, sv, "c", + accountName, containerName, null); + + AbfsUriQueryBuilder qb = new AbfsUriQueryBuilder(); + qb.addQuery("sp", sp); + qb.addQuery("st", st); + qb.addQuery("se", se); + qb.addQuery("sv", sv); + qb.addQuery("sr", sr); + qb.addQuery("sig", signature); + return qb.toString().substring(1); + } + + private String computeSignatureForSAS(String sp, String st, String se, String sv, + String sr, String accountName, String containerName, String path) { + + StringBuilder sb = new StringBuilder(); + sb.append(sp); + sb.append("\n"); + sb.append(st); + sb.append("\n"); + sb.append(se); + sb.append("\n"); + // canonicalized resource + sb.append("/blob/"); + sb.append(accountName); + sb.append("/"); + sb.append(containerName); + if (path != null && !sr.equals("c")) { + //sb.append("/"); + sb.append(path); + } + sb.append("\n"); + sb.append("\n"); // si + sb.append("\n"); // sip + sb.append("\n"); // spr + sb.append(sv); + sb.append("\n"); + sb.append(sr); + sb.append("\n"); + sb.append("\n"); // - For optional : rscc - ResponseCacheControl + sb.append("\n"); // - For optional : rscd - ResponseContentDisposition + sb.append("\n"); // - For optional : rsce - ResponseContentEncoding + sb.append("\n"); // - For optional : rscl - ResponseContentLanguage + sb.append("\n"); // - For optional : rsct - ResponseContentType + + String stringToSign = sb.toString(); + LOG.debug("Service SAS stringToSign: " + stringToSign.replace("\n", ".")); + return computeHmac256(stringToSign); + } +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestCachedSASToken.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestCachedSASToken.java new file mode 100644 index 0000000000000..cbba80877206f --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestCachedSASToken.java @@ -0,0 +1,196 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.utils; + +import java.io.IOException; +import java.time.OffsetDateTime; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.util.UUID; + +import org.junit.Assert; +import org.junit.Test; + +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_SAS_TOKEN_RENEW_PERIOD_FOR_STREAMS_IN_SECONDS; +import static java.time.temporal.ChronoUnit.SECONDS; +import static java.time.temporal.ChronoUnit.DAYS; + +/** + * Test CachedSASToken. + */ +public final class TestCachedSASToken { + + @Test + public void testUpdateAndGet() throws IOException { + CachedSASToken cachedSasToken = new CachedSASToken(); + + String se1 = OffsetDateTime.now(ZoneOffset.UTC).plus( + DEFAULT_SAS_TOKEN_RENEW_PERIOD_FOR_STREAMS_IN_SECONDS * 2, + SECONDS).format(DateTimeFormatter.ISO_DATE_TIME); + String token1 = "se=" + se1; + + // set first time and ensure reference equality + cachedSasToken.update(token1); + String cachedToken = cachedSasToken.get(); + Assert.assertTrue(token1 == cachedToken); + + // update with same token and ensure reference equality + cachedSasToken.update(token1); + cachedToken = cachedSasToken.get(); + Assert.assertTrue(token1 == cachedToken); + + // renew and ensure reference equality + String se2 = OffsetDateTime.now(ZoneOffset.UTC).plus( + DEFAULT_SAS_TOKEN_RENEW_PERIOD_FOR_STREAMS_IN_SECONDS * 2, + SECONDS).format(DateTimeFormatter.ISO_DATE_TIME); + String token2 = "se=" + se2; + cachedSasToken.update(token2); + cachedToken = cachedSasToken.get(); + Assert.assertTrue(token2 == cachedToken); + + // renew and ensure reference equality with ske + String se3 = OffsetDateTime.now(ZoneOffset.UTC).plus( + DEFAULT_SAS_TOKEN_RENEW_PERIOD_FOR_STREAMS_IN_SECONDS * 4, + SECONDS).format(DateTimeFormatter.ISO_DATE_TIME); + + String ske3 = OffsetDateTime.now(ZoneOffset.UTC).plus( + DEFAULT_SAS_TOKEN_RENEW_PERIOD_FOR_STREAMS_IN_SECONDS * 2, + SECONDS).format(DateTimeFormatter.ISO_DATE_TIME); + String token3 = "se=" + se3 + "&ske=" + ske3; + cachedSasToken.update(token3); + cachedToken = cachedSasToken.get(); + Assert.assertTrue(token3 == cachedToken); + } + + @Test + public void testGetExpiration() throws IOException { + CachedSASToken cachedSasToken = new CachedSASToken(); + + String se = OffsetDateTime.now(ZoneOffset.UTC).plus( + DEFAULT_SAS_TOKEN_RENEW_PERIOD_FOR_STREAMS_IN_SECONDS - 1, + SECONDS).format(DateTimeFormatter.ISO_DATE_TIME); + OffsetDateTime seDate = OffsetDateTime.parse(se, DateTimeFormatter.ISO_DATE_TIME); + String token = "se=" + se; + + // By-pass the normal validation provided by update method + // by callng set with expired SAS, then ensure the get + // method returns null (auto expiration as next REST operation will use + // SASTokenProvider to get a new SAS). + cachedSasToken.setForTesting(token, seDate); + String cachedToken = cachedSasToken.get(); + Assert.assertNull(cachedToken); + } + + @Test + public void testUpdateAndGetWithExpiredToken() throws IOException { + CachedSASToken cachedSasToken = new CachedSASToken(); + + String se1 = OffsetDateTime.now(ZoneOffset.UTC).plus( + DEFAULT_SAS_TOKEN_RENEW_PERIOD_FOR_STREAMS_IN_SECONDS - 1, + SECONDS).format(DateTimeFormatter.ISO_DATE_TIME); + String token1 = "se=" + se1; + + // set expired token and ensure not cached + cachedSasToken.update(token1); + String cachedToken = cachedSasToken.get(); + Assert.assertNull(cachedToken); + + String se2 = OffsetDateTime.now(ZoneOffset.UTC).plus( + DEFAULT_SAS_TOKEN_RENEW_PERIOD_FOR_STREAMS_IN_SECONDS * 2, + SECONDS).format(DateTimeFormatter.ISO_DATE_TIME); + + String ske2 = OffsetDateTime.now(ZoneOffset.UTC).plus( + DEFAULT_SAS_TOKEN_RENEW_PERIOD_FOR_STREAMS_IN_SECONDS - 1, + SECONDS).format(DateTimeFormatter.ISO_DATE_TIME); + String token2 = "se=" + se2 + "&ske=" + ske2; + + // set with expired ske and ensure not cached + cachedSasToken.update(token2); + cachedToken = cachedSasToken.get(); + Assert.assertNull(cachedToken); + + } + + @Test + public void testUpdateAndGetWithInvalidToken() throws IOException { + CachedSASToken cachedSasToken = new CachedSASToken(); + + // set and ensure reference that it is not cached + String token1 = "se="; + cachedSasToken.update(token1); + String cachedToken = cachedSasToken.get(); + Assert.assertNull(cachedToken); + + // set and ensure reference that it is not cached + String token2 = "se=xyz"; + cachedSasToken.update(token2); + cachedToken = cachedSasToken.get(); + Assert.assertNull(cachedToken); + + // set and ensure reference that it is not cached + String token3 = "se=2100-01-01T00:00:00Z&ske="; + cachedSasToken.update(token3); + cachedToken = cachedSasToken.get(); + Assert.assertNull(cachedToken); + + // set and ensure reference that it is not cached + String token4 = "se=2100-01-01T00:00:00Z&ske=xyz&"; + cachedSasToken.update(token4); + cachedToken = cachedSasToken.get(); + Assert.assertNull(cachedToken); + + // set and ensure reference that it is not cached + String token5 = "se=abc&ske=xyz&"; + cachedSasToken.update(token5); + cachedToken = cachedSasToken.get(); + Assert.assertNull(cachedToken); + } + + public static CachedSASToken getTestCachedSASTokenInstance() { + String expiryPostADay = OffsetDateTime.now(ZoneOffset.UTC) + .plus(1, DAYS) + .format(DateTimeFormatter.ISO_DATE_TIME); + String version = "2020-20-20"; + + StringBuilder sb = new StringBuilder(); + sb.append("skoid="); + sb.append(UUID.randomUUID().toString()); + sb.append("&sktid="); + sb.append(UUID.randomUUID().toString()); + sb.append("&skt="); + sb.append(OffsetDateTime.now(ZoneOffset.UTC) + .minus(1, DAYS) + .format(DateTimeFormatter.ISO_DATE_TIME)); + sb.append("&ske="); + sb.append(expiryPostADay); + sb.append("&sks=b"); + sb.append("&skv="); + sb.append(version); + sb.append("&sp=rw"); + sb.append("&sr=b"); + sb.append("&se="); + sb.append(expiryPostADay); + sb.append("&sv=2"); + sb.append(version); + + CachedSASToken cachedSASToken = new CachedSASToken(); + cachedSASToken.update(sb.toString()); + return cachedSASToken; + } +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestMockHelpers.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestMockHelpers.java new file mode 100644 index 0000000000000..e25a099a00ef3 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestMockHelpers.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.utils; + +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; + +/** + * Test Mock Helpers. + */ +public final class TestMockHelpers { + + /** + * Sets a class field by reflection. + * @param type + * @param obj + * @param fieldName + * @param fieldObject + * @param + * @return + * @throws Exception + */ + public static T setClassField( + Class type, + final T obj, + final String fieldName, + Object fieldObject) throws Exception { + + Field field = type.getDeclaredField(fieldName); + field.setAccessible(true); + Field modifiersField = Field.class.getDeclaredField("modifiers"); + modifiersField.setAccessible(true); + modifiersField.setInt(field, + field.getModifiers() & ~Modifier.FINAL); + field.set(obj, fieldObject); + + return obj; + } + + private TestMockHelpers() { + // Not called. - For checkstyle: HideUtilityClassConstructor + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestUriUtils.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestUriUtils.java index 690e56c510559..25d3f7caa4f39 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestUriUtils.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestUriUtils.java @@ -18,9 +18,21 @@ package org.apache.hadoop.fs.azurebfs.utils; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + import org.junit.Assert; import org.junit.Test; +import org.apache.http.NameValuePair; +import org.apache.http.client.utils.URLEncodedUtils; +import org.apache.http.message.BasicNameValuePair; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + /** * Test ABFS UriUtils. */ @@ -45,4 +57,79 @@ public void testExtractRawAccountName() throws Exception { Assert.assertEquals(null, UriUtils.extractAccountNameFromHostName(null)); Assert.assertEquals(null, UriUtils.extractAccountNameFromHostName("abfs.dfs.cores.windows.net")); } + + @Test + // If a config for partial masking is introduced, this test will have to be + // modified for the config-controlled partial mask length + public void testMaskUrlQueryParameters() throws Exception { + Set fullMask = new HashSet<>(Arrays.asList("abc", "bcd")); + Set partialMask = new HashSet<>(Arrays.asList("pqr", "xyz")); + + //Partial and full masking test + List keyValueList = URLEncodedUtils + .parse("abc=123&pqr=45678&def=789&bcd=012&xyz=678", + StandardCharsets.UTF_8); + Assert.assertEquals("Incorrect masking", + "abc=XXXXX&pqr=456XX&def=789&bcd=XXXXX&xyz=67X", + UriUtils.maskUrlQueryParameters(keyValueList, fullMask, partialMask)); + + //Mask GUIDs + keyValueList = URLEncodedUtils + .parse("abc=123&pqr=256877f2-c094-48c8-83df-ddb5825694fd&def=789", + StandardCharsets.UTF_8); + Assert.assertEquals("Incorrect partial masking for guid", + "abc=XXXXX&pqr=256877f2-c094-48c8XXXXXXXXXXXXXXXXXX&def=789", + UriUtils.maskUrlQueryParameters(keyValueList, fullMask, partialMask)); + + //For params entered for both full and partial masks, full mask applies + partialMask.add("abc"); + Assert.assertEquals("Full mask should apply", + "abc=XXXXX&pqr=256877f2-c094-48c8XXXXXXXXXXXXXXXXXX&def=789", + UriUtils.maskUrlQueryParameters(keyValueList, fullMask, partialMask)); + + //Duplicate key (to be masked) with different values + keyValueList = URLEncodedUtils + .parse("abc=123&pqr=4561234&abc=789", StandardCharsets.UTF_8); + Assert.assertEquals("Duplicate key: Both values should get masked", + "abc=XXXXX&pqr=4561XXX&abc=XXXXX", + UriUtils.maskUrlQueryParameters(keyValueList, fullMask, partialMask)); + + //Duplicate key (not to be masked) with different values + keyValueList = URLEncodedUtils + .parse("abc=123&def=456&pqrs=789&def=000", StandardCharsets.UTF_8); + Assert.assertEquals("Duplicate key: Values should not get masked", + "abc=XXXXX&def=456&pqrs=789&def=000", + UriUtils.maskUrlQueryParameters(keyValueList, fullMask, partialMask)); + + //Empty param value + keyValueList = URLEncodedUtils + .parse("abc=123&def=&pqr=789&s=1", StandardCharsets.UTF_8); + Assert.assertEquals("Incorrect url with empty query value", + "abc=XXXXX&def=&pqr=78X&s=1", + UriUtils.maskUrlQueryParameters(keyValueList, fullMask, partialMask)); + + //Empty param key + keyValueList = URLEncodedUtils + .parse("def=2&pqr=789&s=1", StandardCharsets.UTF_8); + keyValueList.add(new BasicNameValuePair("", "m1")); + List finalKeyValueList = keyValueList; + intercept(IllegalArgumentException.class, () -> UriUtils + .maskUrlQueryParameters(finalKeyValueList, fullMask, partialMask)); + + //Param (not to be masked) with null value + keyValueList = URLEncodedUtils + .parse("abc=123&s=1", StandardCharsets.UTF_8); + keyValueList.add(new BasicNameValuePair("null1", null)); + Assert.assertEquals("Null value, incorrect query construction", + "abc=XXXXX&s=1&null1=", + UriUtils.maskUrlQueryParameters(keyValueList, fullMask, partialMask)); + + //Param (to be masked) with null value + keyValueList.add(new BasicNameValuePair("null2", null)); + fullMask.add("null2"); + Assert.assertEquals("No mask should be added for null value", + "abc=XXXXX&s=1&null1=&null2=", UriUtils + .maskUrlQueryParameters(keyValueList, fullMask, + partialMask)); //no mask + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderValidator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderValidator.java new file mode 100644 index 0000000000000..7569c80d67c61 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderValidator.java @@ -0,0 +1,155 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.utils; + +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.assertj.core.api.Assertions; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; + +/** + * Used to validate correlation identifiers provided during testing against + * values that get associated with a request through its TracingContext instance + */ +public class TracingHeaderValidator implements Listener { + private String clientCorrelationId; + private String fileSystemId; + private String primaryRequestId = EMPTY_STRING; + private boolean needsPrimaryRequestId; + private String streamID = ""; + private FSOperationType operation; + private int retryNum; + private TracingHeaderFormat format; + + private static final String GUID_PATTERN = "^[0-9a-fA-F]{8}-([0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}$"; + + @Override + public void callTracingHeaderValidator(String tracingContextHeader, + TracingHeaderFormat format) { + this.format = format; + validateTracingHeader(tracingContextHeader); + } + + @Override + public TracingHeaderValidator getClone() { + TracingHeaderValidator tracingHeaderValidator = new TracingHeaderValidator( + clientCorrelationId, fileSystemId, operation, needsPrimaryRequestId, + retryNum, streamID); + tracingHeaderValidator.primaryRequestId = primaryRequestId; + return tracingHeaderValidator; + } + + public TracingHeaderValidator(String clientCorrelationId, String fileSystemId, + FSOperationType operation, boolean needsPrimaryRequestId, int retryNum) { + this.clientCorrelationId = clientCorrelationId; + this.fileSystemId = fileSystemId; + this.operation = operation; + this.retryNum = retryNum; + this.needsPrimaryRequestId = needsPrimaryRequestId; + } + + public TracingHeaderValidator(String clientCorrelationId, String fileSystemId, + FSOperationType operation, boolean needsPrimaryRequestId, int retryNum, + String streamID) { + this(clientCorrelationId, fileSystemId, operation, needsPrimaryRequestId, + retryNum); + this.streamID = streamID; + } + + private void validateTracingHeader(String tracingContextHeader) { + String[] idList = tracingContextHeader.split(":"); + validateBasicFormat(idList); + if (format != TracingHeaderFormat.ALL_ID_FORMAT) { + return; + } + if (!primaryRequestId.isEmpty() && !idList[3].isEmpty()) { + Assertions.assertThat(idList[3]) + .describedAs("PrimaryReqID should be common for these requests") + .isEqualTo(primaryRequestId); + } + if (!streamID.isEmpty()) { + Assertions.assertThat(idList[4]) + .describedAs("Stream id should be common for these requests") + .isEqualTo(streamID); + } + } + + private void validateBasicFormat(String[] idList) { + if (format == TracingHeaderFormat.ALL_ID_FORMAT) { + Assertions.assertThat(idList) + .describedAs("header should have 7 elements").hasSize(7); + } else if (format == TracingHeaderFormat.TWO_ID_FORMAT) { + Assertions.assertThat(idList) + .describedAs("header should have 2 elements").hasSize(2); + } else { + Assertions.assertThat(idList).describedAs("header should have 1 element") + .hasSize(1); + Assertions.assertThat(idList[0]) + .describedAs("Client request ID is a guid").matches(GUID_PATTERN); + return; + } + + if (clientCorrelationId.matches("[a-zA-Z0-9-]*")) { + Assertions.assertThat(idList[0]) + .describedAs("Correlation ID should match config") + .isEqualTo(clientCorrelationId); + } else { + Assertions.assertThat(idList[0]) + .describedAs("Invalid config should be replaced with empty string") + .isEmpty(); + } + Assertions.assertThat(idList[1]).describedAs("Client request ID is a guid") + .matches(GUID_PATTERN); + + if (format != TracingHeaderFormat.ALL_ID_FORMAT) { + return; + } + + Assertions.assertThat(idList[2]).describedAs("Filesystem ID incorrect") + .isEqualTo(fileSystemId); + if (needsPrimaryRequestId && !operation + .equals(FSOperationType.READ)) { + Assertions.assertThat(idList[3]).describedAs("should have primaryReqId") + .isNotEmpty(); + } + Assertions.assertThat(idList[5]).describedAs("Operation name incorrect") + .isEqualTo(operation.toString()); + if (idList[6].contains("_")) { + idList[6] = idList[6].split("_")[0]; + } + int retryCount = Integer.parseInt(idList[6]); + Assertions.assertThat(retryCount) + .describedAs("Retry was required due to issue on server side") + .isEqualTo(retryNum); + } + + /** + * Sets the value of expected Hadoop operation + * @param operation Hadoop operation code (String of two characters) + */ + @Override + public void setOperation(FSOperationType operation) { + this.operation = operation; + } + + @Override + public void updatePrimaryRequestID(String primaryRequestId) { + this.primaryRequestId = primaryRequestId; + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/resources/abfs.xml b/hadoop-tools/hadoop-azure/src/test/resources/abfs.xml index 1561da2234c6f..f06e5cac9b8b2 100644 --- a/hadoop-tools/hadoop-azure/src/test/resources/abfs.xml +++ b/hadoop-tools/hadoop-azure/src/test/resources/abfs.xml @@ -66,4 +66,20 @@ fs.contract.supports-unbuffer true + + + fs.contract.supports-hflush + true + + + + fs.contract.supports-hsync + true + + + + fs.contract.metadata_updated_on_hsync + true + + diff --git a/hadoop-tools/hadoop-azure/src/test/resources/azure-auth-keys.xml.template b/hadoop-tools/hadoop-azure/src/test/resources/azure-auth-keys.xml.template new file mode 100644 index 0000000000000..12dbbfab47970 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/resources/azure-auth-keys.xml.template @@ -0,0 +1,175 @@ + + + + + + + + + + + fs.azure.account.auth.type + SharedKey + + + + + + fs.azure.account.key.{ABFS_ACCOUNT_NAME}.dfs.core.windows.net + {ACCOUNT_ACCESS_KEY} + Account access key + + + + fs.azure.account.oauth.provider.type.{ABFS_ACCOUNT_NAME}.dfs.core.windows.net + + org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider + OAuth token provider implementation class + + + + fs.azure.account.oauth2.client.endpoint.{ABFS_ACCOUNT_NAME}.dfs.core.windows.net + + https://login.microsoftonline.com/{TENANTID}/oauth2/token + Token end point, this can be found through Azure portal + + + + + fs.azure.account.oauth2.client.id.{ABFS_ACCOUNT_NAME}.dfs.core.windows.net + + {client id} + AAD client id. + + + + fs.azure.account.oauth2.client.secret.{ABFS_ACCOUNT_NAME}.dfs.core.windows.net + + {client secret} + AAD client secret + + + + + fs.contract.test.fs.abfs + abfs://{CONTAINER_NAME}@{ACCOUNT_NAME}.dfs.core.windows.net + + + fs.contract.test.fs.abfss + abfss://{CONTAINER_NAME}@{ACCOUNT_NAME}.dfs.core.windows.net + + + + + fs.azure.wasb.account.name + {WASB_ACCOUNT_NAME}.blob.core.windows.net + + + fs.azure.account.key.{WASB_ACCOUNT_NAME}.blob.core.windows.net + WASB account key + + + fs.contract.test.fs.wasb + wasb://{WASB_FILESYSTEM}@{WASB_ACCOUNT_NAME}.blob.core.windows.net + + + + + + fs.azure.account.oauth2.contributor.client.id + {Client id of SP with RBAC Storage Blob Data Contributor} + + + fs.azure.account.oauth2.contributor.client.secret + {Client secret of SP with RBAC Storage Blob Data Contributor} + + + fs.azure.account.oauth2.reader.client.id + {Client id of SP with RBAC Storage Blob Data Reader} + + + fs.azure.account.oauth2.reader.client.secret + {Client secret of SP with RBAC Storage Blob Data Reader} + + + + + + + fs.azure.account.test.oauth2.client.id + {client id} + The client id(app id) for the app created on step 1 + + + + fs.azure.account.test.oauth2.client.secret + {client secret} + +The client secret(application's secret) for the app created on step 1 + + + + fs.azure.check.access.testuser.guid + {guid} + The guid fetched on step 2 + + + fs.azure.account.oauth2.client.endpoint.{account name}.dfs.core +.windows.net + https://login.microsoftonline.com/{TENANTID}/oauth2/token + +Token end point. This can be found through Azure portal. As part of CheckAccess +test cases. The access will be tested for an FS instance created with the +above mentioned client credentials. So this configuration is necessary to +create the test FS instance. + + + + + + fs.azure.test.appendblob.enabled + false + If made true, tests will be running under the assumption that + append blob is enabled and the root directory and contract test root + directory will be part of the append blob directories. Should be false for + non-HNS accounts. + + + + diff --git a/hadoop-tools/hadoop-azure/src/test/resources/azure-test.xml b/hadoop-tools/hadoop-azure/src/test/resources/azure-test.xml index d43d67e7b5fa3..24ffeb5d107a0 100644 --- a/hadoop-tools/hadoop-azure/src/test/resources/azure-test.xml +++ b/hadoop-tools/hadoop-azure/src/test/resources/azure-test.xml @@ -38,6 +38,11 @@ false + + fs.contract.rename-returns-false-if-dest-exists + true + + @@ -61,4 +66,8 @@ + + + + diff --git a/hadoop-tools/hadoop-azure/src/test/resources/core-site.xml b/hadoop-tools/hadoop-azure/src/test/resources/core-site.xml new file mode 100644 index 0000000000000..7d2d11c04eff3 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/resources/core-site.xml @@ -0,0 +1,25 @@ + + + + + + + + + diff --git a/hadoop-tools/hadoop-benchmark/pom.xml b/hadoop-tools/hadoop-benchmark/pom.xml new file mode 100644 index 0000000000000..ae575f1b3de16 --- /dev/null +++ b/hadoop-tools/hadoop-benchmark/pom.xml @@ -0,0 +1,87 @@ + + + + 4.0.0 + + org.apache.hadoop + hadoop-project + 3.3.6 + ../../hadoop-project/pom.xml + + hadoop-benchmark + 3.3.6 + jar + + Apache Hadoop Common Benchmark + Apache Hadoop Common Benchmark + + + + org.apache.hadoop + hadoop-common + + + org.openjdk.jmh + jmh-core + + + org.openjdk.jmh + jmh-generator-annprocess + + + + + + + maven-assembly-plugin + + + + org.apache.hadoop.benchmark.VectoredReadBenchmark + + + + src/main/assembly/uber.xml + + + + + make-assembly + package + + single + + + + + + com.github.spotbugs + spotbugs-maven-plugin + + ${basedir}/src/main/findbugs/exclude.xml + + + + org.apache.maven.plugins + maven-compiler-plugin + + + + diff --git a/hadoop-tools/hadoop-benchmark/src/main/assembly/uber.xml b/hadoop-tools/hadoop-benchmark/src/main/assembly/uber.xml new file mode 100644 index 0000000000000..014eab951b3cf --- /dev/null +++ b/hadoop-tools/hadoop-benchmark/src/main/assembly/uber.xml @@ -0,0 +1,33 @@ + + + uber + + jar + + false + + + / + true + true + runtime + + + + + metaInf-services + + + diff --git a/hadoop-tools/hadoop-benchmark/src/main/findbugs/exclude.xml b/hadoop-tools/hadoop-benchmark/src/main/findbugs/exclude.xml new file mode 100644 index 0000000000000..05f2a067cf01e --- /dev/null +++ b/hadoop-tools/hadoop-benchmark/src/main/findbugs/exclude.xml @@ -0,0 +1,22 @@ + + + + + + + + + + diff --git a/hadoop-tools/hadoop-benchmark/src/main/java/org/apache/hadoop/benchmark/VectoredReadBenchmark.java b/hadoop-tools/hadoop-benchmark/src/main/java/org/apache/hadoop/benchmark/VectoredReadBenchmark.java new file mode 100644 index 0000000000000..5df46c36786df --- /dev/null +++ b/hadoop-tools/hadoop-benchmark/src/main/java/org/apache/hadoop/benchmark/VectoredReadBenchmark.java @@ -0,0 +1,245 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.benchmark; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import java.io.EOFException; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.AsynchronousFileChannel; +import java.nio.channels.CompletionHandler; +import java.nio.file.FileSystems; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.function.IntFunction; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileRange; +import org.apache.hadoop.fs.impl.FileRangeImpl; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +public class VectoredReadBenchmark { + + static final Path DATA_PATH = getTestDataPath(); + static final String DATA_PATH_PROPERTY = "bench.data"; + static final int READ_SIZE = 64 * 1024; + static final long SEEK_SIZE = 1024L * 1024; + + + static Path getTestDataPath() { + String value = System.getProperty(DATA_PATH_PROPERTY); + return new Path(value == null ? "/tmp/taxi.orc" : value); + } + + @State(Scope.Thread) + public static class FileSystemChoice { + + @Param({"local", "raw"}) + private String fileSystemKind; + + private Configuration conf; + private FileSystem fs; + + @Setup(Level.Trial) + public void setup() { + conf = new Configuration(); + try { + LocalFileSystem local = FileSystem.getLocal(conf); + fs = "raw".equals(fileSystemKind) ? local.getRaw() : local; + } catch (IOException e) { + throw new IllegalArgumentException("Can't get filesystem", e); + } + } + } + + @State(Scope.Thread) + public static class BufferChoice { + @Param({"direct", "array"}) + private String bufferKind; + + private IntFunction allocate; + @Setup(Level.Trial) + public void setup() { + allocate = "array".equals(bufferKind) + ? ByteBuffer::allocate : ByteBuffer::allocateDirect; + } + } + + @Benchmark + public void asyncRead(FileSystemChoice fsChoice, + BufferChoice bufferChoice, + Blackhole blackhole) throws Exception { + FSDataInputStream stream = fsChoice.fs.open(DATA_PATH); + List ranges = new ArrayList<>(); + for(int m=0; m < 100; ++m) { + FileRange range = FileRange.createFileRange(m * SEEK_SIZE, READ_SIZE); + ranges.add(range); + } + stream.readVectored(ranges, bufferChoice.allocate); + for(FileRange range: ranges) { + blackhole.consume(range.getData().get()); + } + stream.close(); + } + + static class Joiner implements CompletionHandler { + private int remaining; + private final ByteBuffer[] result; + private Throwable exception = null; + + Joiner(int total) { + remaining = total; + result = new ByteBuffer[total]; + } + + synchronized void finish() { + remaining -= 1; + if (remaining == 0) { + notify(); + } + } + + synchronized ByteBuffer[] join() throws InterruptedException, IOException { + while (remaining > 0 && exception == null) { + wait(); + } + if (exception != null) { + throw new IOException("problem reading", exception); + } + return result; + } + + + @Override + public synchronized void completed(ByteBuffer buffer, FileRange attachment) { + result[--remaining] = buffer; + if (remaining == 0) { + notify(); + } + } + + @Override + public synchronized void failed(Throwable exc, FileRange attachment) { + this.exception = exc; + notify(); + } + } + + static class FileRangeCallback extends FileRangeImpl implements + CompletionHandler { + private final AsynchronousFileChannel channel; + private final ByteBuffer buffer; + private int completed = 0; + private final Joiner joiner; + + FileRangeCallback(AsynchronousFileChannel channel, long offset, + int length, Joiner joiner, ByteBuffer buffer) { + super(offset, length, null); + this.channel = channel; + this.joiner = joiner; + this.buffer = buffer; + } + + @Override + public void completed(Integer result, FileRangeCallback attachment) { + final int bytes = result; + if (bytes == -1) { + failed(new EOFException("Read past end of file"), this); + } + completed += bytes; + if (completed < this.getLength()) { + channel.read(buffer, this.getOffset() + completed, this, this); + } else { + buffer.flip(); + joiner.finish(); + } + } + + @Override + public void failed(Throwable exc, FileRangeCallback attachment) { + joiner.failed(exc, this); + } + } + + @Benchmark + public void asyncFileChanArray(BufferChoice bufferChoice, + Blackhole blackhole) throws Exception { + java.nio.file.Path path = FileSystems.getDefault().getPath(DATA_PATH.toString()); + AsynchronousFileChannel channel = AsynchronousFileChannel.open(path, StandardOpenOption.READ); + List ranges = new ArrayList<>(); + Joiner joiner = new Joiner(100); + for(int m=0; m < 100; ++m) { + ByteBuffer buffer = bufferChoice.allocate.apply(READ_SIZE); + FileRangeCallback range = new FileRangeCallback(channel, m * SEEK_SIZE, + READ_SIZE, joiner, buffer); + ranges.add(range); + channel.read(buffer, range.getOffset(), range, range); + } + joiner.join(); + channel.close(); + blackhole.consume(ranges); + } + + @Benchmark + public void syncRead(FileSystemChoice fsChoice, + Blackhole blackhole) throws Exception { + FSDataInputStream stream = fsChoice.fs.open(DATA_PATH); + List result = new ArrayList<>(); + for(int m=0; m < 100; ++m) { + byte[] buffer = new byte[READ_SIZE]; + stream.readFully(m * SEEK_SIZE, buffer); + result.add(buffer); + } + blackhole.consume(result); + stream.close(); + } + + /** + * Run the benchmarks. + * @param args the pathname of a 100MB data file + * @throws Exception any ex. + */ + public static void main(String[] args) throws Exception { + OptionsBuilder opts = new OptionsBuilder(); + opts.include("VectoredReadBenchmark"); + opts.jvmArgs("-server", "-Xms256m", "-Xmx2g", + "-D" + DATA_PATH_PROPERTY + "=" + args[0]); + opts.forks(1); + new Runner(opts.build()).run(); + } +} diff --git a/hadoop-tools/hadoop-benchmark/src/main/java/org/apache/hadoop/benchmark/package-info.java b/hadoop-tools/hadoop-benchmark/src/main/java/org/apache/hadoop/benchmark/package-info.java new file mode 100644 index 0000000000000..95d6977e3aba7 --- /dev/null +++ b/hadoop-tools/hadoop-benchmark/src/main/java/org/apache/hadoop/benchmark/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Benchmark for Vectored Read IO operations. + */ +package org.apache.hadoop.benchmark; diff --git a/hadoop-tools/hadoop-datajoin/pom.xml b/hadoop-tools/hadoop-datajoin/pom.xml index 2353cfbff6626..7a663a26613f0 100644 --- a/hadoop-tools/hadoop-datajoin/pom.xml +++ b/hadoop-tools/hadoop-datajoin/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-datajoin - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop Data Join Apache Hadoop Data Join jar @@ -108,10 +108,9 @@ - org.codehaus.mojo - findbugs-maven-plugin + com.github.spotbugs + spotbugs-maven-plugin - true true ${basedir}/dev-support/findbugs-exclude.xml diff --git a/hadoop-tools/hadoop-distcp/pom.xml b/hadoop-tools/hadoop-distcp/pom.xml index 711c3fcff7eef..4455c01a80db2 100644 --- a/hadoop-tools/hadoop-distcp/pom.xml +++ b/hadoop-tools/hadoop-distcp/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-distcp - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop Distributed Copy Apache Hadoop Distributed Copy jar @@ -128,7 +128,6 @@ org.apache.maven.plugins maven-surefire-plugin - ${ignoreTestFailure} 1 false 600 @@ -137,24 +136,12 @@ **/Test*.java true - - - test.build.data - ${basedir}/target/test/data - - - hadoop.log.dir - target/test/logs - - - org.apache.commons.logging.Log - org.apache.commons.logging.impl.SimpleLog - - - org.apache.commons.logging.simplelog.defaultlog - warn - - + + ${basedir}/target/test/data + target/test/logs + org.apache.commons.logging.impl.SimpleLog + warn + diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyFilter.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyFilter.java index f5f00f17cf5c1..a3a435a83d289 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyFilter.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyFilter.java @@ -46,6 +46,14 @@ public void initialize() {} */ public abstract boolean shouldCopy(Path path); + public boolean shouldCopy(CopyListingFileStatus fileStatus){ + return shouldCopy(fileStatus.getPath()); + } + + public boolean supportFileStatus(){ + return false; + } + /** * Public factory method which returns the appropriate implementation of * CopyFilter. diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java index 6f8aa34b29584..164d014876dbd 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java @@ -35,7 +35,7 @@ import java.net.URI; import java.util.Set; -import com.google.common.collect.Sets; +import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; /** * The CopyListing abstraction is responsible for how the list of diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListingFileStatus.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListingFileStatus.java index 9f6f136c6e6f2..0d722abcdaba6 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListingFileStatus.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListingFileStatus.java @@ -40,9 +40,9 @@ import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableUtils; -import com.google.common.base.Objects; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.base.Objects; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; /** * CopyListingFileStatus is a view of {@link FileStatus}, recording additional diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DirCopyFilter.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DirCopyFilter.java new file mode 100644 index 0000000000000..2020b506d74c8 --- /dev/null +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DirCopyFilter.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.tools; + +import java.io.IOException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DirCopyFilter extends FileStatusCopyFilter { + private static final Logger LOG = LoggerFactory.getLogger(DirCopyFilter.class); + Configuration conf; + + public DirCopyFilter(Configuration conf) { + this.conf = conf; + } + + @Override + public boolean shouldCopy(Path path) { + try { + FileSystem fs = path.getFileSystem(this.conf); + if (fs.getFileStatus(path).isDirectory()) { + return true; + } + } catch (IOException e) { + throw new RuntimeException(e); + } + + LOG.debug("Skipping {} as it is not a directory", path.toString()); + return false; + } + + @Override + public boolean shouldCopy(CopyListingFileStatus fileStatus) { + return fileStatus.isDirectory(); + } +} diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java index c36335afc161c..21ef47e2b05df 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java @@ -21,7 +21,7 @@ import java.io.IOException; import java.util.Random; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -44,7 +44,7 @@ import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * DistCp is the main driver-class for DistCpV2. @@ -214,6 +214,8 @@ public Job createAndSubmitJob() throws Exception { String jobID = job.getJobID().toString(); job.getConfiguration().set(DistCpConstants.CONF_LABEL_DISTCP_JOB_ID, jobID); + // Set the jobId for the applications running through run method. + getConf().set(DistCpConstants.CONF_LABEL_DISTCP_JOB_ID, jobID); LOG.info("DistCp job-id: " + jobID); return job; diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java index 25815687c2973..0291d949a18ec 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java @@ -139,6 +139,27 @@ private DistCpConstants() { public static final String CONF_LABEL_BLOCKS_PER_CHUNK = "distcp.blocks.per.chunk"; + public static final String CONF_LABEL_USE_ITERATOR = "distcp.use.iterator"; + + /** + * Enabling {@code distcp -update} to use modification time of source and + * target file to check while copying same file with same size but + * different content. + * + * The check would verify if the target file is perceived as older than the + * source then it indicates that the source has been recently updated and it + * is a newer version than what was synced, so we should not skip the copy. + * {@value} + */ + public static final String CONF_LABEL_UPDATE_MOD_TIME = + "distcp.update.modification.time"; + + /** + * Default value for 'distcp.update.modification.time' configuration. + */ + public static final boolean CONF_LABEL_UPDATE_MOD_TIME_DEFAULT = + true; + /** * Constants for DistCp return code to shell / consumer of ToolRunner's run */ diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpContext.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpContext.java index 1e63d802e876c..8e9d64a376e9f 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpContext.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpContext.java @@ -171,6 +171,10 @@ public int getBlocksPerChunk() { return options.getBlocksPerChunk(); } + public boolean shouldUseIterator() { + return options.shouldUseIterator(); + } + public final boolean splitLargeFile() { return options.getBlocksPerChunk() > 0; } @@ -200,7 +204,7 @@ public String toString() { return options.toString() + ", sourcePaths=" + sourcePaths + ", targetPathExists=" + targetPathExists + - ", preserveRawXattrs" + preserveRawXattrs; + ", preserveRawXattrs=" + preserveRawXattrs; } } diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java index 49ffc59344e75..39da614a6093c 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java @@ -235,7 +235,12 @@ public enum DistCpOptionSwitch { */ DIRECT_WRITE(DistCpConstants.CONF_LABEL_DIRECT_WRITE, new Option("direct", false, "Write files directly to the" - + " target location, avoiding temporary file rename.")); + + " target location, avoiding temporary file rename.")), + + USE_ITERATOR(DistCpConstants.CONF_LABEL_USE_ITERATOR, + new Option("useiterator", false, + "Use single threaded list status iterator to build " + + "the listing to save the memory utilisation at the client")); public static final String PRESERVE_STATUS_DEFAULT = "-prbugpct"; diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java index 4a6552fed6b55..c9c7661569afa 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java @@ -18,8 +18,8 @@ package org.apache.hadoop.tools; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,8 +30,10 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.tools.util.DistCpUtils; +import java.util.ArrayList; import java.util.Collections; import java.util.EnumSet; +import java.util.LinkedHashSet; import java.util.List; import java.util.NoSuchElementException; import java.util.Set; @@ -158,6 +160,8 @@ public final class DistCpOptions { /** Whether data should be written directly to the target paths. */ private final boolean directWrite; + private final boolean useIterator; + /** * File attributes for preserve. * @@ -221,6 +225,8 @@ private DistCpOptions(Builder builder) { this.trackPath = builder.trackPath; this.directWrite = builder.directWrite; + + this.useIterator = builder.useIterator; } public Path getSourceFileListing() { @@ -229,7 +235,18 @@ public Path getSourceFileListing() { public List getSourcePaths() { return sourcePaths == null ? - null : Collections.unmodifiableList(sourcePaths); + null : + Collections.unmodifiableList(getUniquePaths(sourcePaths)); + } + + private List getUniquePaths(List srcPaths) { + Set uniquePaths = new LinkedHashSet<>(); + for (Path path : srcPaths) { + if (!uniquePaths.add(path)) { + LOG.info("Path: {} added multiple times, ignoring the redundant entry.", path); + } + } + return new ArrayList<>(uniquePaths); } public Path getTargetPath() { @@ -352,6 +369,10 @@ public boolean shouldDirectWrite() { return directWrite; } + public boolean shouldUseIterator() { + return useIterator; + } + /** * Add options to configuration. These will be used in the Mapper/committer * @@ -402,6 +423,9 @@ public void appendToConf(Configuration conf) { } DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.DIRECT_WRITE, String.valueOf(directWrite)); + + DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.USE_ITERATOR, + String.valueOf(useIterator)); } /** @@ -439,6 +463,7 @@ public String toString() { ", copyBufferSize=" + copyBufferSize + ", verboseLog=" + verboseLog + ", directWrite=" + directWrite + + ", useiterator=" + useIterator + '}'; } @@ -490,6 +515,8 @@ public static class Builder { private boolean directWrite = false; + private boolean useIterator = false; + public Builder(List sourcePaths, Path targetPath) { Preconditions.checkArgument(sourcePaths != null && !sourcePaths.isEmpty(), "Source paths should not be null or empty!"); @@ -642,11 +669,28 @@ public Builder withAppend(boolean newAppend) { return this; } + /** + * whether builder with crc. + * @param newSkipCRC whether to skip crc check + * @return Builder object whether to skip crc check + * @deprecated Use {@link #withSkipCRC(boolean)} instead. + */ + @Deprecated public Builder withCRC(boolean newSkipCRC) { this.skipCRC = newSkipCRC; return this; } + /** + * whether builder with crc. + * @param newSkipCRC whether to skip crc check + * @return Builder object whether to skip crc check + */ + public Builder withSkipCRC(boolean newSkipCRC) { + this.skipCRC = newSkipCRC; + return this; + } + public Builder withBlocking(boolean newBlocking) { this.blocking = newBlocking; return this; @@ -747,6 +791,11 @@ public Builder withDirectWrite(boolean newDirectWrite) { this.directWrite = newDirectWrite; return this; } + + public Builder withUseIterator(boolean useItr) { + this.useIterator = useItr; + return this; + } } } diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java index 35ef3e4ab77e7..e2fe56d341015 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.tools; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -36,6 +37,7 @@ import java.util.EnumMap; import java.util.ArrayList; import java.util.HashSet; +import java.util.Collections; /** * This class provides the basic functionality to sync two FileSystems based on @@ -57,6 +59,9 @@ class DistCpSync { // private EnumMap> diffMap; private DiffInfo[] renameDiffs; + // entries which are marked deleted because of rename to a excluded target + // path + private List deletedByExclusionDiffs; private CopyFilter copyFilter; DistCpSync(DistCpContext context, Configuration conf) { @@ -66,6 +71,11 @@ class DistCpSync { this.copyFilter.initialize(); } + @VisibleForTesting + public void setCopyFilter(CopyFilter copyFilter) { + this.copyFilter = copyFilter; + } + private boolean isRdiff() { return context.shouldUseRdiff(); } @@ -206,7 +216,7 @@ private boolean getAllDiffs() throws IOException { SnapshotDiffReport.DiffType.values()) { diffMap.put(type, new ArrayList()); } - + deletedByExclusionDiffs = null; for (SnapshotDiffReport.DiffReportEntry entry : report.getDiffList()) { // If the entry is the snapshot root, usually a item like "M\t." // in the diff report. We don't need to handle it and cannot handle it, @@ -234,8 +244,13 @@ private boolean getAllDiffs() throws IOException { list.add(new DiffInfo(source, target, dt)); } else { list = diffMap.get(SnapshotDiffReport.DiffType.DELETE); - list.add(new DiffInfo(source, target, - SnapshotDiffReport.DiffType.DELETE)); + DiffInfo info = new DiffInfo(source, null, + SnapshotDiffReport.DiffType.DELETE); + list.add(info); + if (deletedByExclusionDiffs == null) { + deletedByExclusionDiffs = new ArrayList<>(); + } + deletedByExclusionDiffs.add(info); } } else if (copyFilter.shouldCopy(relativeTarget)) { list = diffMap.get(SnapshotDiffReport.DiffType.CREATE); @@ -244,6 +259,9 @@ private boolean getAllDiffs() throws IOException { } } } + if (deletedByExclusionDiffs != null) { + Collections.sort(deletedByExclusionDiffs, DiffInfo.sourceComparator); + } return true; } catch (IOException e) { DistCp.LOG.warn("Failed to compute snapshot diff on " + ssDir, e); @@ -493,6 +511,33 @@ private DiffInfo getRenameItem(DiffInfo diff, DiffInfo[] renameDiffArray) { return null; } + /** + * checks if a parent dir is marked deleted as a part of dir rename happening + * to a path which is excluded by the the filter. + * @return true if it's marked deleted + */ + private boolean isParentOrSelfMarkedDeleted(DiffInfo diff, + List deletedDirDiffArray) { + for (DiffInfo item : deletedDirDiffArray) { + if (item.getSource().equals(diff.getSource())) { + // The same path string may appear in: + // 1. both deleted and modified snapshot diff entries. + // 2. both deleted and created snapshot diff entries. + // Case 1 is the about same file/directory, whereas case 2 + // is about two different files/directories. + // We are finding case 1 here, thus we check against DiffType.MODIFY. + if (diff.getType() == SnapshotDiffReport.DiffType.MODIFY) { + return true; + } + } else if (isParentOf(item.getSource(), diff.getSource())) { + // If deleted entry is the parent of diff entry, then both MODIFY and + // CREATE diff entries should be handled. + return true; + } + } + return false; + } + /** * For a given sourcePath, get its real path if it or its parent was renamed. * @@ -545,6 +590,19 @@ public ArrayList prepareDiffListForCopyListing() { renameDiffsList.toArray(new DiffInfo[renameDiffsList.size()]); Arrays.sort(renameDiffArray, DiffInfo.sourceComparator); for (DiffInfo diff : modifyAndCreateDiffs) { + // In cases, where files/dirs got created after a snapshot is taken + // and then the parent dir is moved to location which is excluded by + // the filters. For example, files/dirs created inside a dir in an + // encryption zone in HDFS. When the parent dir gets deleted, it will be + // moved to trash within which is inside the encryption zone itself. + // If the trash path gets excluded by filters , the dir will be marked + // for DELETE for the target location. All the subsequent creates should + // for such dirs should be ignored as well as the modify operation + // on the dir itself. + if (deletedByExclusionDiffs != null && isParentOrSelfMarkedDeleted(diff, + deletedByExclusionDiffs)) { + continue; + } DiffInfo renameItem = getRenameItem(diff, renameDiffArray); if (renameItem == null) { diff.setTarget(diff.getSource()); diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/FileStatusCopyFilter.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/FileStatusCopyFilter.java new file mode 100644 index 0000000000000..25158fa853bee --- /dev/null +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/FileStatusCopyFilter.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.tools; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.reflect.Constructor; + +/** + * Interface for excluding files from DistCp. + * + */ +public abstract class FileStatusCopyFilter extends CopyFilter{ + @Override + public boolean supportFileStatus() { + return true; + } +} diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java index 3b9d13b3b0308..a596d9c798688 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java @@ -33,7 +33,7 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.Path; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; /** * The OptionsParser parses out the command-line options passed to DistCp, @@ -108,14 +108,16 @@ public static DistCpOptions parse(String[] args) command.hasOption(DistCpOptionSwitch.OVERWRITE.getSwitch())) .withAppend( command.hasOption(DistCpOptionSwitch.APPEND.getSwitch())) - .withCRC( + .withSkipCRC( command.hasOption(DistCpOptionSwitch.SKIP_CRC.getSwitch())) .withBlocking( !command.hasOption(DistCpOptionSwitch.BLOCKING.getSwitch())) .withVerboseLog( command.hasOption(DistCpOptionSwitch.VERBOSE_LOG.getSwitch())) .withDirectWrite( - command.hasOption(DistCpOptionSwitch.DIRECT_WRITE.getSwitch())); + command.hasOption(DistCpOptionSwitch.DIRECT_WRITE.getSwitch())) + .withUseIterator( + command.hasOption(DistCpOptionSwitch.USE_ITERATOR.getSwitch())); if (command.hasOption(DistCpOptionSwitch.DIFF.getSwitch())) { String[] snapshots = getVals(command, @@ -197,9 +199,6 @@ public static DistCpOptions parse(String[] args) if (command.hasOption(DistCpOptionSwitch.PRESERVE_STATUS.getSwitch())) { builder.preserve( getVal(command, DistCpOptionSwitch.PRESERVE_STATUS.getSwitch())); - } else { - // No "preserve" settings specified. Preserve block-size. - builder.preserve(DistCpOptions.FileAttribute.BLOCKSIZE); } if (command.hasOption(DistCpOptionSwitch.FILE_LIMIT.getSwitch())) { diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/RegexCopyFilter.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/RegexCopyFilter.java index 2f4ea69e8fd5c..080bcef7956e9 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/RegexCopyFilter.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/RegexCopyFilter.java @@ -35,7 +35,7 @@ import java.util.List; import java.util.regex.Pattern; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * A CopyFilter which compares Java Regex Patterns to each Path to determine diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java index 7e5a26a36abe7..27b9a552b01a2 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java @@ -18,13 +18,15 @@ package org.apache.hadoop.tools; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.statistics.IOStatisticsLogging; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport; import org.apache.hadoop.io.SequenceFile; @@ -36,10 +38,12 @@ import org.apache.hadoop.tools.util.WorkReport; import org.apache.hadoop.tools.util.WorkRequest; import org.apache.hadoop.tools.util.WorkRequestProcessor; +import org.apache.hadoop.util.DurationInfo; +import org.apache.hadoop.util.functional.RemoteIterators; import org.apache.hadoop.mapreduce.security.TokenCache; import org.apache.hadoop.security.Credentials; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import java.io.FileNotFoundException; import java.io.IOException; @@ -49,6 +53,7 @@ import java.util.List; import java.util.Random; import java.util.LinkedList; +import java.util.Stack; import static org.apache.hadoop.tools.DistCpConstants .HDFS_RESERVED_RAW_DIRECTORY_NAME; @@ -60,7 +65,8 @@ * Note: The SimpleCopyListing doesn't handle wild-cards in the input-paths. */ public class SimpleCopyListing extends CopyListing { - private static final Logger LOG = LoggerFactory.getLogger(SimpleCopyListing.class); + public static final Logger LOG = + LoggerFactory.getLogger(SimpleCopyListing.class); public static final int DEFAULT_FILE_STATUS_SIZE = 1000; public static final boolean DEFAULT_RANDOMIZE_FILE_LISTING = true; @@ -94,11 +100,9 @@ protected SimpleCopyListing(Configuration configuration, Credentials credentials randomizeFileListing = getConf().getBoolean( DistCpConstants.CONF_LABEL_SIMPLE_LISTING_RANDOMIZE_FILES, DEFAULT_RANDOMIZE_FILE_LISTING); - if (LOG.isDebugEnabled()) { - LOG.debug("numListstatusThreads=" + numListstatusThreads - + ", fileStatusLimit=" + fileStatusLimit - + ", randomizeFileListing=" + randomizeFileListing); - } + LOG.debug( + "numListstatusThreads={}, fileStatusLimit={}, randomizeFileListing={}", + numListstatusThreads, fileStatusLimit, randomizeFileListing); copyFilter = CopyFilter.getCopyFilter(getConf()); copyFilter.initialize(); } @@ -286,10 +290,8 @@ protected void doBuildListingWithSnapshotDiff( FileStatus sourceStatus = sourceFS.getFileStatus(diff.getTarget()); if (sourceStatus.isDirectory()) { - if (LOG.isDebugEnabled()) { - LOG.debug("Adding source dir for traverse: " + - sourceStatus.getPath()); - } + LOG.debug("Adding source dir for traverse: {}", + sourceStatus.getPath()); HashSet excludeList = distCpSync.getTraverseExcludeList(diff.getSource(), @@ -298,8 +300,9 @@ protected void doBuildListingWithSnapshotDiff( ArrayList sourceDirs = new ArrayList<>(); sourceDirs.add(sourceStatus); - traverseDirectory(fileListWriter, sourceFS, sourceDirs, - sourceRoot, context, excludeList, fileStatuses); + new TraverseDirectory(fileListWriter, sourceFS, sourceDirs, + sourceRoot, context, excludeList, fileStatuses) + .traverseDirectory(); } } } @@ -366,9 +369,8 @@ protected void doBuildListing(SequenceFile.Writer fileListWriter, if (explore) { ArrayList sourceDirs = new ArrayList(); for (FileStatus sourceStatus: sourceFiles) { - if (LOG.isDebugEnabled()) { - LOG.debug("Recording source-path: " + sourceStatus.getPath() + " for copy."); - } + LOG.debug("Recording source-path: {} for copy.", + sourceStatus.getPath()); LinkedList sourceCopyListingStatus = DistCpUtils.toCopyListingFileStatus(sourceFS, sourceStatus, preserveAcls && sourceStatus.isDirectory(), @@ -384,14 +386,13 @@ protected void doBuildListing(SequenceFile.Writer fileListWriter, } } if (sourceStatus.isDirectory()) { - if (LOG.isDebugEnabled()) { - LOG.debug("Adding source dir for traverse: " + sourceStatus.getPath()); - } + LOG.debug("Adding source dir for traverse: {}", + sourceStatus.getPath()); sourceDirs.add(sourceStatus); } } - traverseDirectory(fileListWriter, sourceFS, sourceDirs, - sourcePathRoot, context, null, statusList); + new TraverseDirectory(fileListWriter, sourceFS, sourceDirs, + sourcePathRoot, context, null, statusList).traverseDirectory(); } } if (randomizeFileListing) { @@ -429,16 +430,12 @@ private void writeToFileListing(List fileStatusInfoList, */ Collections.shuffle(fileStatusInfoList, rnd); for (FileStatusInfo fileStatusInfo : fileStatusInfoList) { - if (LOG.isDebugEnabled()) { - LOG.debug("Adding " + fileStatusInfo.fileStatus.getPath()); - } + LOG.debug("Adding {}", fileStatusInfo.fileStatus.getPath()); writeToFileListing(fileListWriter, fileStatusInfo.fileStatus, fileStatusInfo.sourceRootPath); } - if (LOG.isDebugEnabled()) { - LOG.debug("Number of paths written to fileListing=" - + fileStatusInfoList.size()); - } + LOG.debug("Number of paths written to fileListing={}", + fileStatusInfoList.size()); fileStatusInfoList.clear(); } @@ -494,6 +491,10 @@ protected boolean shouldCopy(Path path) { return copyFilter.shouldCopy(path); } + protected boolean shouldCopy(CopyListingFileStatus fileStatus){ + return copyFilter.shouldCopy(fileStatus); + } + /** {@inheritDoc} */ @Override protected long getBytesToCopy() { @@ -590,8 +591,8 @@ public WorkReport processItem( result = new WorkReport(getFileStatus(parent.getPath()), retry, true); } catch (FileNotFoundException fnf) { - LOG.error("FileNotFoundException exception in listStatus: " + - fnf.getMessage()); + LOG.error("FileNotFoundException exception in listStatus: {}", + fnf.getMessage()); result = new WorkReport(new FileStatus[0], retry, true, fnf); } catch (Exception e) { @@ -605,8 +606,7 @@ public WorkReport processItem( } private void printStats() { - LOG.info("Paths (files+dirs) cnt = " + totalPaths + - "; dirCnt = " + totalDirs); + LOG.info("Paths (files+dirs) cnt = {}; dirCnt = {}", totalPaths, totalDirs); } private void maybePrintStats() { @@ -615,79 +615,6 @@ private void maybePrintStats() { } } - private void traverseDirectory(SequenceFile.Writer fileListWriter, - FileSystem sourceFS, - ArrayList sourceDirs, - Path sourcePathRoot, - DistCpContext context, - HashSet excludeList, - List fileStatuses) - throws IOException { - final boolean preserveAcls = context.shouldPreserve(FileAttribute.ACL); - final boolean preserveXAttrs = context.shouldPreserve(FileAttribute.XATTR); - final boolean preserveRawXattrs = context.shouldPreserveRawXattrs(); - - assert numListstatusThreads > 0; - if (LOG.isDebugEnabled()) { - LOG.debug("Starting thread pool of " + numListstatusThreads + - " listStatus workers."); - } - ProducerConsumer workers = - new ProducerConsumer(numListstatusThreads); - for (int i = 0; i < numListstatusThreads; i++) { - workers.addWorker( - new FileStatusProcessor(sourcePathRoot.getFileSystem(getConf()), - excludeList)); - } - - for (FileStatus status : sourceDirs) { - workers.put(new WorkRequest(status, 0)); - } - - while (workers.hasWork()) { - try { - WorkReport workResult = workers.take(); - int retry = workResult.getRetry(); - for (FileStatus child: workResult.getItem()) { - if (LOG.isDebugEnabled()) { - LOG.debug("Recording source-path: " + child.getPath() + " for copy."); - } - if (workResult.getSuccess()) { - LinkedList childCopyListingStatus = - DistCpUtils.toCopyListingFileStatus(sourceFS, child, - preserveAcls && child.isDirectory(), - preserveXAttrs && child.isDirectory(), - preserveRawXattrs && child.isDirectory(), - context.getBlocksPerChunk()); - - for (CopyListingFileStatus fs : childCopyListingStatus) { - if (randomizeFileListing) { - addToFileListing(fileStatuses, - new FileStatusInfo(fs, sourcePathRoot), fileListWriter); - } else { - writeToFileListing(fileListWriter, fs, sourcePathRoot); - } - } - } - if (retry < maxRetries) { - if (child.isDirectory()) { - if (LOG.isDebugEnabled()) { - LOG.debug("Traversing into source dir: " + child.getPath()); - } - workers.put(new WorkRequest(child, retry)); - } - } else { - LOG.error("Giving up on " + child.getPath() + - " after " + retry + " retries."); - } - } - } catch (InterruptedException ie) { - LOG.error("Could not get item from childQueue. Retrying..."); - } - } - workers.shutdown(); - } - private void writeToFileListingRoot(SequenceFile.Writer fileListWriter, LinkedList fileStatus, Path sourcePathRoot, DistCpContext context) throws IOException { @@ -697,9 +624,7 @@ private void writeToFileListingRoot(SequenceFile.Writer fileListWriter, if (fs.getPath().equals(sourcePathRoot) && fs.isDirectory() && syncOrOverwrite) { // Skip the root-paths when syncOrOverwrite - if (LOG.isDebugEnabled()) { - LOG.debug("Skip " + fs.getPath()); - } + LOG.debug("Skip {}", fs.getPath()); return; } writeToFileListing(fileListWriter, fs, sourcePathRoot); @@ -709,12 +634,15 @@ private void writeToFileListingRoot(SequenceFile.Writer fileListWriter, private void writeToFileListing(SequenceFile.Writer fileListWriter, CopyListingFileStatus fileStatus, Path sourcePathRoot) throws IOException { - if (LOG.isDebugEnabled()) { - LOG.debug("REL PATH: " + DistCpUtils.getRelativePath(sourcePathRoot, - fileStatus.getPath()) + ", FULL PATH: " + fileStatus.getPath()); - } + LOG.debug("REL PATH: {}, FULL PATH: {}", + DistCpUtils.getRelativePath(sourcePathRoot, fileStatus.getPath()), + fileStatus.getPath()); - if (!shouldCopy(fileStatus.getPath())) { + if(copyFilter.supportFileStatus()){ + if(!shouldCopy(fileStatus)){ + return; + } + }else if (!shouldCopy(fileStatus.getPath())) { return; } @@ -730,4 +658,159 @@ private void writeToFileListing(SequenceFile.Writer fileListWriter, totalPaths++; maybePrintStats(); } + + /** + * A utility class to traverse a directory. + */ + private final class TraverseDirectory { + + private SequenceFile.Writer fileListWriter; + private FileSystem sourceFS; + private ArrayList sourceDirs; + private Path sourcePathRoot; + private DistCpContext context; + private HashSet excludeList; + private List fileStatuses; + private final boolean preserveAcls; + private final boolean preserveXAttrs; + private final boolean preserveRawXattrs; + + private TraverseDirectory(SequenceFile.Writer fileListWriter, + FileSystem sourceFS, ArrayList sourceDirs, + Path sourcePathRoot, DistCpContext context, HashSet excludeList, + List fileStatuses) { + this.fileListWriter = fileListWriter; + this.sourceFS = sourceFS; + this.sourceDirs = sourceDirs; + this.sourcePathRoot = sourcePathRoot; + this.context = context; + this.excludeList = excludeList; + this.fileStatuses = fileStatuses; + this.preserveAcls = context.shouldPreserve(FileAttribute.ACL); + this.preserveXAttrs = context.shouldPreserve(FileAttribute.XATTR); + this.preserveRawXattrs = context.shouldPreserveRawXattrs(); + } + + public void traverseDirectory() throws IOException { + if (context.shouldUseIterator()) { + try (DurationInfo ignored = new DurationInfo(LOG, + "Building listing using iterator mode for %s", sourcePathRoot)) { + traverseDirectoryLegacy(); + } + } else { + try (DurationInfo ignored = new DurationInfo(LOG, + "Building listing using multi threaded approach for %s", + sourcePathRoot)) { + traverseDirectoryMultiThreaded(); + } + } + } + + public void traverseDirectoryMultiThreaded() throws IOException { + assert numListstatusThreads > 0; + + LOG.debug("Starting thread pool of {} listStatus workers.", + numListstatusThreads); + + ProducerConsumer workers = + new ProducerConsumer(numListstatusThreads); + try { + for (int i = 0; i < numListstatusThreads; i++) { + workers.addWorker( + new FileStatusProcessor(sourcePathRoot.getFileSystem(getConf()), + excludeList)); + } + + for (FileStatus status : sourceDirs) { + workers.put(new WorkRequest(status, 0)); + } + + while (workers.hasWork()) { + try { + WorkReport workResult = workers.take(); + int retry = workResult.getRetry(); + for (FileStatus child : workResult.getItem()) { + LOG.debug("Recording source-path: {} for copy.", child.getPath()); + boolean isChildDirectory = child.isDirectory(); + if (workResult.getSuccess()) { + LinkedList childCopyListingStatus = + DistCpUtils.toCopyListingFileStatus(sourceFS, child, + preserveAcls && isChildDirectory, + preserveXAttrs && isChildDirectory, + preserveRawXattrs && isChildDirectory, + context.getBlocksPerChunk()); + + for (CopyListingFileStatus fs : childCopyListingStatus) { + if (randomizeFileListing) { + addToFileListing(fileStatuses, + new FileStatusInfo(fs, sourcePathRoot), fileListWriter); + } else { + writeToFileListing(fileListWriter, fs, sourcePathRoot); + } + } + } + if (retry < maxRetries) { + if (isChildDirectory) { + LOG.debug("Traversing into source dir: {}", child.getPath()); + workers.put(new WorkRequest(child, retry)); + } + } else { + LOG.error("Giving up on {} after {} retries.", child.getPath(), + retry); + } + } + } catch (InterruptedException ie) { + LOG.error("Could not get item from childQueue. Retrying..."); + } + } + } finally { + workers.shutdown(); + } + } + + private void traverseDirectoryLegacy() throws IOException { + Stack pathStack = new Stack(); + for (FileStatus fs : sourceDirs) { + if (excludeList == null || !excludeList + .contains(fs.getPath().toUri().getPath())) { + pathStack.add(fs); + } + } + while (!pathStack.isEmpty()) { + prepareListing(pathStack.pop().getPath()); + } + } + + private void prepareListing(Path path) throws IOException { + LOG.debug("Recording source-path: {} for copy.", path); + RemoteIterator listStatus = RemoteIterators + .filteringRemoteIterator(sourceFS.listStatusIterator(path), + i -> excludeList == null || !excludeList + .contains(i.getPath().toUri().getPath())); + while (listStatus.hasNext()) { + FileStatus child = listStatus.next(); + LinkedList childCopyListingStatus = DistCpUtils + .toCopyListingFileStatus(sourceFS, child, + preserveAcls && child.isDirectory(), + preserveXAttrs && child.isDirectory(), + preserveRawXattrs && child.isDirectory(), + context.getBlocksPerChunk()); + for (CopyListingFileStatus fs : childCopyListingStatus) { + if (randomizeFileListing) { + addToFileListing(fileStatuses, + new FileStatusInfo(fs, sourcePathRoot), fileListWriter); + } else { + writeToFileListing(fileListWriter, fs, sourcePathRoot); + } + } + if (child.isDirectory()) { + LOG.debug("Traversing into source dir: {}", child.getPath()); + prepareListing(child.getPath()); + } + } + IOStatisticsLogging + .logIOStatisticsAtDebug(LOG, "RemoteIterator Statistics: {}", + listStatus); + } + } } diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java index 139bd08fd7abc..1cafb23febe9d 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java @@ -149,9 +149,15 @@ public void abortJob(JobContext jobContext, } private void cleanupTempFiles(JobContext context) { - try { - Configuration conf = context.getConfiguration(); + Configuration conf = context.getConfiguration(); + + final boolean directWrite = conf.getBoolean( + DistCpOptionSwitch.DIRECT_WRITE.getConfigLabel(), false); + if (directWrite) { + return; + } + try { Path targetWorkPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH)); FileSystem targetFS = targetWorkPath.getFileSystem(conf); @@ -318,8 +324,10 @@ private void preserveFileAttributesForDirectories(Configuration conf) SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(sourceListing)); long totalLen = clusterFS.getFileStatus(sourceListing).getLen(); - - Path targetRoot = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH)); + // For Atomic Copy the Final & Work Path are different & atomic copy has + // already moved it to final path. + Path targetRoot = + new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH)); long preservedEntries = 0; try { @@ -551,10 +559,6 @@ private Path listTargetFiles(final Configuration conf, conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH)); List targets = new ArrayList<>(1); targets.add(targetFinalPath); - Path resultNonePath = Path.getPathWithoutSchemeAndAuthority(targetFinalPath) - .toString().startsWith(DistCpConstants.HDFS_RESERVED_RAW_DIRECTORY_NAME) - ? DistCpConstants.RAW_NONE_PATH - : DistCpConstants.NONE_PATH; // // Set up options to be the same from the CopyListing.buildListing's // perspective, so to collect similar listings as when doing the copy @@ -562,12 +566,15 @@ private Path listTargetFiles(final Configuration conf, // thread count is picked up from the job int threads = conf.getInt(DistCpConstants.CONF_LABEL_LISTSTATUS_THREADS, DistCpConstants.DEFAULT_LISTSTATUS_THREADS); + boolean useIterator = + conf.getBoolean(DistCpConstants.CONF_LABEL_USE_ITERATOR, false); LOG.info("Scanning destination directory {} with thread count: {}", targetFinalPath, threads); - DistCpOptions options = new DistCpOptions.Builder(targets, resultNonePath) + DistCpOptions options = new DistCpOptions.Builder(targets, targetFinalPath) .withOverwrite(overwrite) .withSyncFolder(syncFolder) .withNumListstatusThreads(threads) + .withUseIterator(useIterator) .build(); DistCpContext distCpContext = new DistCpContext(options); distCpContext.setTargetPathExists(targetPathExists); diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java index f3c5b4ba7aedb..d0b34f0db390a 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java @@ -41,6 +41,8 @@ import org.apache.hadoop.tools.util.DistCpUtils; import org.apache.hadoop.util.StringUtils; +import static org.apache.hadoop.tools.DistCpConstants.CONF_LABEL_UPDATE_MOD_TIME_DEFAULT; + /** * Mapper class that executes the DistCp copy operation. * Implements the o.a.h.mapreduce.Mapper interface. @@ -74,6 +76,15 @@ static enum FileAction { OVERWRITE, // Overwrite the whole file } + /** + * Indicates the checksum comparison result. + */ + public enum ChecksumComparison { + TRUE, // checksum comparison is compatible and true. + FALSE, // checksum comparison is compatible and false. + INCOMPATIBLE, // checksum comparison is not compatible. + } + private static Logger LOG = LoggerFactory.getLogger(CopyMapper.class); private Configuration conf; @@ -85,6 +96,7 @@ static enum FileAction { private boolean append = false; private boolean verboseLog = false; private boolean directWrite = false; + private boolean useModTimeToUpdate; private EnumSet preserve = EnumSet.noneOf(FileAttribute.class); private FileSystem targetFS = null; @@ -114,6 +126,9 @@ public void setup(Context context) throws IOException, InterruptedException { PRESERVE_STATUS.getConfigLabel())); directWrite = conf.getBoolean( DistCpOptionSwitch.DIRECT_WRITE.getConfigLabel(), false); + useModTimeToUpdate = + conf.getBoolean(DistCpConstants.CONF_LABEL_UPDATE_MOD_TIME, + CONF_LABEL_UPDATE_MOD_TIME_DEFAULT); targetWorkPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH)); Path targetFinalPath = new Path(conf.get( @@ -350,13 +365,65 @@ private boolean canSkip(FileSystem sourceFS, CopyListingFileStatus source, boolean sameLength = target.getLen() == source.getLen(); boolean sameBlockSize = source.getBlockSize() == target.getBlockSize() || !preserve.contains(FileAttribute.BLOCKSIZE); + // Skip the copy if a 0 size file is being copied. + if (sameLength && source.getLen() == 0) { + return true; + } + // If the src and target file have same size and block size, we would + // check if the checkCrc flag is enabled or not. If enabled, and the + // modTime comparison is enabled then return true if target file is older + // than the source file, since this indicates that the target file is + // recently updated and the source is not changed more recently than the + // update, we can skip the copy else we would copy. + // If skipCrc flag is disabled, we would check the checksum comparison + // which is an enum representing 3 values, of which if the comparison + // returns NOT_COMPATIBLE, we'll try to check modtime again, else return + // the result of checksum comparison which are compatible(true or false). + // + // Note: Different object stores can have different checksum algorithms + // resulting in no checksum comparison that results in return true + // always, having the modification time enabled can help in these + // scenarios to not incorrectly skip a copy. Refer: HADOOP-18596. + if (sameLength && sameBlockSize) { - return skipCrc || - DistCpUtils.checksumsAreEqual(sourceFS, source.getPath(), null, - targetFS, target.getPath(), source.getLen()); - } else { - return false; + if (skipCrc) { + return maybeUseModTimeToCompare(source, target); + } else { + ChecksumComparison checksumComparison = DistCpUtils + .checksumsAreEqual(sourceFS, source.getPath(), null, + targetFS, target.getPath(), source.getLen()); + LOG.debug("Result of checksum comparison between src {} and target " + + "{} : {}", source, target, checksumComparison); + if (checksumComparison.equals(ChecksumComparison.INCOMPATIBLE)) { + return maybeUseModTimeToCompare(source, target); + } + // if skipCrc is disabled and checksumComparison is compatible we + // need not check the mod time. + return checksumComparison.equals(ChecksumComparison.TRUE); + } + } + return false; + } + + /** + * If the mod time comparison is enabled, check the mod time else return + * false. + * Comparison: If the target file perceives to have greater or equal mod time + * (older) than the source file, we can assume that there has been no new + * changes that occurred in the source file, hence we should return true to + * skip the copy of the file. + * + * @param source Source fileStatus. + * @param target Target fileStatus. + * @return boolean representing result of modTime check. + */ + private boolean maybeUseModTimeToCompare( + CopyListingFileStatus source, FileStatus target) { + if (useModTimeToUpdate) { + return source.getModificationTime() <= target.getModificationTime(); } + // if we cannot check mod time, return true (skip the copy). + return true; } @Override diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/DeletedDirTracker.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/DeletedDirTracker.java index 64431f7e5459c..7e11add9b7a79 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/DeletedDirTracker.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/DeletedDirTracker.java @@ -18,9 +18,9 @@ package org.apache.hadoop.tools.mapred; -import com.google.common.base.Preconditions; -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.cache.Cache; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheBuilder; import org.apache.hadoop.fs.Path; import org.apache.hadoop.tools.CopyListingFileStatus; diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java index 4ed856fcb7deb..d6825f75d8c71 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java @@ -45,7 +45,11 @@ import org.apache.hadoop.tools.util.RetriableCommand; import org.apache.hadoop.tools.util.ThrottledInputStream; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.classification.VisibleForTesting; + +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL; +import static org.apache.hadoop.util.functional.FutureIO.awaitFuture; /** * This class extends RetriableCommand to implement the copy of files, @@ -145,7 +149,7 @@ private long doCopy(CopyListingFileStatus source, Path target, if (!source.isSplit()) { DistCpUtils.compareFileLengthsAndChecksums(source.getLen(), sourceFS, sourcePath, sourceChecksum, targetFS, - targetPath, skipCrc, source.getLen()); + targetPath, skipCrc, offset + bytesRead); } // it's not append or direct write (preferred for s3a) case, thus we first // write to a temporary file, then rename it to the target path. @@ -328,7 +332,11 @@ private static ThrottledInputStream getInputStream(Path path, FileSystem fs = path.getFileSystem(conf); float bandwidthMB = conf.getFloat(DistCpConstants.CONF_LABEL_BANDWIDTH_MB, DistCpConstants.DEFAULT_BANDWIDTH_MB); - FSDataInputStream in = fs.open(path); + // open with sequential read, but not whole-file + FSDataInputStream in = awaitFuture(fs.openFile(path) + .opt(FS_OPTION_OPENFILE_READ_POLICY, + FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL) + .build()); return new ThrottledInputStream(in, bandwidthMB * 1024 * 1024); } catch (IOException e) { diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/UniformRecordInputFormat.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/UniformRecordInputFormat.java new file mode 100644 index 0000000000000..6d83ab16b95f6 --- /dev/null +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/UniformRecordInputFormat.java @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.tools.mapred; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.InputFormat; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.input.FileSplit; +import org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader; +import org.apache.hadoop.tools.CopyListingFileStatus; +import org.apache.hadoop.tools.util.DistCpUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class UniformRecordInputFormat extends InputFormat { + private static final Logger LOG = LoggerFactory.getLogger(UniformRecordInputFormat.class); + + public List getSplits(JobContext context) throws IOException, InterruptedException { + Configuration conf = context.getConfiguration(); + int numSplits = getNumSplits(conf); + if (numSplits == 0) return new ArrayList(); + + return createSplits(conf, numSplits, getNumberOfRecords(conf)); + } + + private List createSplits(Configuration configuration, int numSplits, long numRecords) throws IOException { + List splits = new ArrayList(numSplits); + long nRecordsPerSplit = (long) Math.floor(numRecords * 1.0 / numSplits); + if (LOG.isDebugEnabled()) { + LOG.debug("Average records per map: " + nRecordsPerSplit + + ", Number of maps: " + numSplits + ", total records: " + numRecords); + } + + Path listingFilePath = getListingFilePath(configuration); + CopyListingFileStatus srcFileStatus = new CopyListingFileStatus(); + Text srcRelPath = new Text(); + long lastPosition = 0L; + long count = 0L; + long remains = numRecords - nRecordsPerSplit * (long) numSplits; + + SequenceFile.Reader reader = null; + try { + reader = getListingFileReader(configuration); + while (reader.next(srcRelPath, srcFileStatus)) { + count++; + + if((remains > 0 && count % (nRecordsPerSplit + 1) == 0) || + (remains == 0 && count % nRecordsPerSplit == 0)){ + + long currentPosition = reader.getPosition(); + FileSplit split = new FileSplit(listingFilePath, lastPosition, + currentPosition - lastPosition, null); + if(LOG.isDebugEnabled()){ + LOG.debug("Creating split: " + split + ", records in split: " + count); + } + + splits.add(split); + lastPosition = currentPosition; + if(remains > 0){ + remains--; + } + count = 0L; + } + } + + return splits; + } finally { + IOUtils.closeStream(reader); + } + } + + public RecordReader createRecordReader( + InputSplit split, TaskAttemptContext context) + throws IOException, InterruptedException { + return new SequenceFileRecordReader(); + } + + private static Path getListingFilePath(Configuration configuration) { + String listingFilePathString = + configuration.get("distcp.listing.file.path", ""); + + assert !listingFilePathString.equals("") + : "Couldn't find listing file. Invalid input."; + return new Path(listingFilePathString); + } + + private SequenceFile.Reader getListingFileReader(Configuration conf) { + Path listingFilePath = getListingFilePath(conf); + + try { + FileSystem fs = listingFilePath.getFileSystem(conf); + if (!fs.exists(listingFilePath)) { + throw new IllegalArgumentException("Listing file doesn't exist at: " + + listingFilePath); + } + return new SequenceFile.Reader(conf, + SequenceFile.Reader.file(listingFilePath)); + } catch (IOException exception) { + LOG.error("Couldn't find listing file at: " + listingFilePath, exception); + throw new IllegalArgumentException("Couldn't find listing-file at: " + + listingFilePath, exception); + } + } + + private static long getNumberOfRecords(Configuration configuration) { + return DistCpUtils.getLong(configuration, "mapred.number.of.records"); + } + + private static int getNumSplits(Configuration configuration) { + return DistCpUtils.getInt(configuration, "mapreduce.job.maps"); + } +} diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java index 73c49bb8f1a61..e77b2031a76db 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java @@ -18,7 +18,7 @@ package org.apache.hadoop.tools.util; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,6 +41,7 @@ import org.apache.hadoop.tools.CopyListingFileStatus; import org.apache.hadoop.tools.DistCpContext; import org.apache.hadoop.tools.DistCpOptions.FileAttribute; +import org.apache.hadoop.tools.mapred.CopyMapper; import org.apache.hadoop.tools.mapred.UniformSizeInputFormat; import org.apache.hadoop.util.StringUtils; @@ -199,6 +200,9 @@ public static void preserve(FileSystem targetFS, Path path, EnumSet attributes, boolean preserveRawXattrs) throws IOException { + // strip out those attributes we don't need any more + attributes.remove(FileAttribute.BLOCKSIZE); + attributes.remove(FileAttribute.CHECKSUMTYPE); // If not preserving anything from FileStatus, don't bother fetching it. FileStatus targetFileStatus = attributes.isEmpty() ? null : targetFS.getFileStatus(path); @@ -565,10 +569,12 @@ public static String getStringDescriptionFor(long nBytes) { * and false otherwise. * @throws IOException if there's an exception while retrieving checksums. */ - public static boolean checksumsAreEqual(FileSystem sourceFS, Path source, - FileChecksum sourceChecksum, - FileSystem targetFS, - Path target, long sourceLen) + public static CopyMapper.ChecksumComparison checksumsAreEqual( + FileSystem sourceFS, + Path source, + FileChecksum sourceChecksum, + FileSystem targetFS, + Path target, long sourceLen) throws IOException { FileChecksum targetChecksum = null; try { @@ -582,8 +588,15 @@ public static boolean checksumsAreEqual(FileSystem sourceFS, Path source, } catch (IOException e) { LOG.error("Unable to retrieve checksum for " + source + " or " + target, e); } - return (sourceChecksum == null || targetChecksum == null || - sourceChecksum.equals(targetChecksum)); + // If the source or target checksum is null, that means there is no + // comparison that took place and return not compatible. + // else if matched, return compatible with the matched result. + if (sourceChecksum == null || targetChecksum == null) { + return CopyMapper.ChecksumComparison.INCOMPATIBLE; + } else if (sourceChecksum.equals(targetChecksum)) { + return CopyMapper.ChecksumComparison.TRUE; + } + return CopyMapper.ChecksumComparison.FALSE; } /** @@ -610,8 +623,12 @@ public static void compareFileLengthsAndChecksums(long srcLen, //At this point, src & dest lengths are same. if length==0, we skip checksum if ((srcLen != 0) && (!skipCrc)) { - if (!checksumsAreEqual(sourceFS, source, sourceChecksum, - targetFS, target, srcLen)) { + CopyMapper.ChecksumComparison + checksumComparison = checksumsAreEqual(sourceFS, source, sourceChecksum, + targetFS, target, srcLen); + // If Checksum comparison is false set it to false, else set to true. + boolean checksumResult = !checksumComparison.equals(CopyMapper.ChecksumComparison.FALSE); + if (!checksumResult) { StringBuilder errorMessage = new StringBuilder(DistCpConstants.CHECKSUM_MISMATCH_ERROR_MSG) .append(source).append(" and ").append(target).append("."); diff --git a/hadoop-tools/hadoop-distcp/src/main/resources/distcp-default.xml b/hadoop-tools/hadoop-distcp/src/main/resources/distcp-default.xml index 6e1154ef1a9dd..9e7d1416fd1e7 100644 --- a/hadoop-tools/hadoop-distcp/src/main/resources/distcp-default.xml +++ b/hadoop-tools/hadoop-distcp/src/main/resources/distcp-default.xml @@ -19,6 +19,12 @@ + + distcp.record.strategy.impl + org.apache.hadoop.tools.mapred.UniformRecordInputFormat + Implementation of record input format + + distcp.dynamic.strategy.impl org.apache.hadoop.tools.mapred.lib.DynamicInputFormat diff --git a/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm b/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm index bf5b89135fccb..0aba6d900a98e 100644 --- a/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm +++ b/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm @@ -337,7 +337,7 @@ Command Line Options | Flag | Description | Notes | | ----------------- | ------------------------------------ | -------- | -| `-p[rbugpcaxt]` | Preserve r: replication number b: block size u: user g: group p: permission c: checksum-type a: ACL x: XAttr t: timestamp | When `-update` is specified, status updates will **not** be synchronized unless the file sizes also differ (i.e. unless the file is re-created). If -pa is specified, DistCp preserves the permissions also because ACLs are a super-set of permissions. The option -pr is only valid if both source and target directory are not erasure coded. **Note:** If -p option's are not specified, then by default block size is preserved. | +| `-p[rbugpcaxt]` | Preserve r: replication number b: block size u: user g: group p: permission c: checksum-type a: ACL x: XAttr t: timestamp | When `-update` is specified, status updates will **not** be synchronized unless the file sizes also differ (i.e. unless the file is re-created). If -pa is specified, DistCp preserves the permissions also because ACLs are a super-set of permissions. The option -pr is only valid if both source and target directory are not erasure coded. | | `-i` | Ignore failures | As explained in the Appendix, this option will keep more accurate statistics about the copy than the default case. It also preserves logs from failed copies, which can be valuable for debugging. Finally, a failing map will not cause the job to fail before all splits are attempted. | | `-log ` | Write logs to \ | DistCp keeps logs of each file it attempts to copy as map output. If a map fails, the log output will not be retained if it is re-executed. | | `-v` | Log additional info (path, size) in the SKIP/COPY log | This option can only be used with -log option. | @@ -362,6 +362,7 @@ Command Line Options | `-copybuffersize ` | Size of the copy buffer to use. By default, `` is set to 8192B | | | `-xtrack ` | Save information about missing source files to the specified path. | This option is only valid with `-update` option. This is an experimental property and it cannot be used with `-atomic` option. | | `-direct` | Write directly to destination paths | Useful for avoiding potentially very expensive temporary file rename operations when the destination is an object store | +| `-useiterator` | Uses single threaded listStatusIterator to build listing | Useful for saving memory at the client side. Using this option will ignore the numListstatusThreads option | Architecture of DistCp ---------------------- @@ -579,7 +580,7 @@ $H3 MapReduce and other side-effects $H3 DistCp and Object Stores -DistCp works with Object Stores such as Amazon S3, Azure WASB and OpenStack Swift. +DistCp works with Object Stores such as Amazon S3, Azure ABFS and Google GCS. Prequisites @@ -622,21 +623,44 @@ And to use `-update` to only copy changed files. ```bash hadoop distcp -update -numListstatusThreads 20 \ - swift://history.cluster1/2016 \ + s3a://history/2016 \ hdfs://nn1:8020/history/2016 ``` Because object stores are slow to list files, consider setting the `-numListstatusThreads` option when performing a `-update` operation on a large directory tree (the limit is 40 threads). -When `DistCp -update` is used with object stores, -generally only the modification time and length of the individual files are compared, -not any checksums. The fact that most object stores do have valid timestamps -for directories is irrelevant; only the file timestamps are compared. -However, it is important to have the clock of the client computers close -to that of the infrastructure, so that timestamps are consistent between -the client/HDFS cluster and that of the object store. Otherwise, changed files may be -missed/copied too often. +When `DistCp -update` is used with object stores, generally only the +modification time and length of the individual files are compared, not any +checksums if the checksum algorithm between the two stores is different. + +* The `distcp -update` between two object stores with different checksum + algorithm compares the modification times of source and target files along + with the file size to determine whether to skip the file copy. The behavior + is controlled by the property `distcp.update.modification.time`, which is + set to true by default. If the source file is more recently modified than + the target file, it is assumed that the content has changed, and the file + should be updated. + We need to ensure that there is no clock skew between the machines. + The fact that most object stores do have valid timestamps for directories + is irrelevant; only the file timestamps are compared. However, it is + important to have the clock of the client computers close to that of the + infrastructure, so that timestamps are consistent between the client/HDFS + cluster and that of the object store. Otherwise, changed files may be + missed/copied too often. + +* `distcp.update.modification.time` would only be used if either of the two + stores don't have checksum validation resulting in incompatible checksum + comparison between the two. Even if the property is set to true, it won't + be used if there is valid checksum comparison between the two stores. + +To turn off the modification time check, set this in your core-site.xml +```xml + + distcp.update.modification.time + false + +``` **Notes** diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListing.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListing.java index ce7d00d2bd732..69e1421f08447 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListing.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListing.java @@ -167,6 +167,29 @@ public void testDuplicates() { } } + @Test + public void testDuplicateSourcePaths() throws Exception { + FileSystem fs = FileSystem.get(getConf()); + List srcPaths = new ArrayList(); + try { + srcPaths.add(new Path("/tmp/in")); + srcPaths.add(new Path("/tmp/in")); + TestDistCpUtils.createFile(fs, "/tmp/in/src1/1.txt"); + TestDistCpUtils.createFile(fs, "/tmp/in/src2/1.txt"); + Path target = new Path("/tmp/out"); + Path listingFile = new Path("/tmp/list"); + final DistCpOptions options = + new DistCpOptions.Builder(srcPaths, target).build(); + final DistCpContext context = new DistCpContext(options); + CopyListing listing = + CopyListing.getCopyListing(getConf(), CREDENTIALS, context); + listing.buildListing(listingFile, context); + Assert.assertTrue(fs.exists(listingFile)); + } finally { + TestDistCpUtils.delete(fs, "/tmp"); + } + } + @Test(timeout=10000) public void testBuildListing() { FileSystem fs = null; diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpOptions.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpOptions.java index 7382795dd90d7..37b84f1d7a857 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpOptions.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpOptions.java @@ -128,7 +128,7 @@ public void testSetSkipCRC() { new Path("hdfs://localhost:8020/target/")); Assert.assertFalse(builder.build().shouldSkipCRC()); - final DistCpOptions options = builder.withSyncFolder(true).withCRC(true) + final DistCpOptions options = builder.withSyncFolder(true).withSkipCRC(true) .build(); Assert.assertTrue(options.shouldSyncFolder()); Assert.assertTrue(options.shouldSkipCRC()); @@ -289,7 +289,7 @@ public void testToString() { "atomicWorkPath=null, logPath=null, sourceFileListing=abc, " + "sourcePaths=null, targetPath=xyz, filtersFile='null', " + "blocksPerChunk=0, copyBufferSize=8192, verboseLog=false, " + - "directWrite=false}"; + "directWrite=false, useiterator=false}"; String optionString = option.toString(); Assert.assertEquals(val, optionString); Assert.assertNotSame(DistCpOptionSwitch.ATOMIC_COMMIT.toString(), @@ -391,7 +391,7 @@ public void testAppendOption() { new Path("hdfs://localhost:8020/target/")) .withSyncFolder(true) .withAppend(true) - .withCRC(true) + .withSkipCRC(true) .build(); fail("Append should fail if skipCrc option is specified"); } catch (IllegalArgumentException e) { diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java index d6bbc25fdc7a8..cd2070484ad8d 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java @@ -21,6 +21,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; @@ -48,7 +49,9 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.regex.Pattern; public class TestDistCpSync { private MiniDFSCluster cluster; @@ -83,7 +86,7 @@ public void setUp() throws Exception { @After public void tearDown() throws Exception { - IOUtils.cleanup(null, dfs); + IOUtils.cleanupWithLogger(null, dfs); if (cluster != null) { cluster.shutdown(); } @@ -280,6 +283,175 @@ public void testSync() throws Exception { verifyCopy(dfs.getFileStatus(spath), dfs.getFileStatus(target), false); } + /** + * Test the basic functionality. + */ + @Test + public void testSync1() throws Exception { + Path srcpath = new Path(source, "encz-mock"); + dfs.mkdirs(srcpath); + dfs.mkdirs(new Path(source, "encz-mock/datedir")); + enableAndCreateFirstSnapshot(); + + // before sync, make some further changes on source + DFSTestUtil.createFile(dfs, new Path(source, "encz-mock/datedir/file1"), + BLOCK_SIZE, DATA_NUM, 0); + dfs.delete(new Path(source, "encz-mock/datedir"), true); + dfs.mkdirs(new Path(source, "encz-mock/datedir")); + DFSTestUtil.createFile(dfs, new Path(source, "encz-mock/datedir/file2"), + BLOCK_SIZE, DATA_NUM, 0); + dfs.createSnapshot(source, "s2"); + Assert.assertTrue(dfs.exists(new Path(source, "encz-mock/datedir/file2"))); + + SnapshotDiffReport report = dfs.getSnapshotDiffReport(source, "s1", "s2"); + System.out.println(report); + + DistCpSync distCpSync = new DistCpSync(context, conf); + + // do the sync + Assert.assertTrue(distCpSync.sync()); + // make sure the source path has been updated to the snapshot path + final Path spath = new Path(source, + HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + "s2"); + Assert.assertEquals(spath, context.getSourcePaths().get(0)); + + // build copy listing + final Path listingPath = new Path("/tmp/META/fileList.seq"); + CopyListing listing = + new SimpleCopyListing(conf, new Credentials(), distCpSync); + listing.buildListing(listingPath, context); + + Map copyListing = getListing(listingPath); + CopyMapper copyMapper = new CopyMapper(); + StubContext stubContext = new StubContext(conf, null, 0); + Mapper.Context mapContext = + stubContext.getContext(); + copyMapper.setup(mapContext); + for (Map.Entry entry : copyListing + .entrySet()) { + copyMapper.map(entry.getKey(), entry.getValue(), mapContext); + } + Assert.assertTrue(dfs.exists(new Path(target, "encz-mock/datedir/file2"))); + // verify the source and target now has the same structure + verifyCopy(dfs.getFileStatus(spath), dfs.getFileStatus(target), false); + } + + /** + * Test the basic functionality. + */ + @Test + public void testSyncNew() throws Exception { + Path srcpath = new Path(source, "encz-mock"); + dfs.mkdirs(srcpath); + dfs.mkdirs(new Path(source, "encz-mock/datedir")); + dfs.mkdirs(new Path(source, "trash")); + enableAndCreateFirstSnapshot(); + + // before sync, make some further changes on source + DFSTestUtil.createFile(dfs, new Path(source, "encz-mock/datedir/file1"), + BLOCK_SIZE, DATA_NUM, 0); + dfs.rename(new Path(source, "encz-mock/datedir"), + new Path(source, "trash")); + dfs.mkdirs(new Path(source, "encz-mock/datedir")); + DFSTestUtil.createFile(dfs, new Path(source, "encz-mock/datedir/file2"), + BLOCK_SIZE, DATA_NUM, 0); + dfs.createSnapshot(source, "s2"); + Assert.assertTrue(dfs.exists(new Path(source, "encz-mock/datedir/file2"))); + + SnapshotDiffReport report = dfs.getSnapshotDiffReport(source, "s1", "s2"); + System.out.println(report); + + DistCpSync distCpSync = new DistCpSync(context, conf); + + // do the sync + Assert.assertTrue(distCpSync.sync()); + // make sure the source path has been updated to the snapshot path + final Path spath = new Path(source, + HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + "s2"); + Assert.assertEquals(spath, context.getSourcePaths().get(0)); + + // build copy listing + final Path listingPath = new Path("/tmp/META/fileList.seq"); + CopyListing listing = + new SimpleCopyListing(conf, new Credentials(), distCpSync); + listing.buildListing(listingPath, context); + + Map copyListing = getListing(listingPath); + CopyMapper copyMapper = new CopyMapper(); + StubContext stubContext = new StubContext(conf, null, 0); + Mapper.Context mapContext = + stubContext.getContext(); + copyMapper.setup(mapContext); + for (Map.Entry entry : copyListing + .entrySet()) { + copyMapper.map(entry.getKey(), entry.getValue(), mapContext); + } + Assert.assertTrue(dfs.exists(new Path(target, "encz-mock/datedir/file2"))); + Assert.assertTrue(dfs.exists(new Path(target, "trash/datedir/file1"))); + // verify the source and target now has the same structure + verifyCopy(dfs.getFileStatus(spath), dfs.getFileStatus(target), false); + } + + /** + * Test the basic functionality. + */ + @Test + public void testSyncWithFilters() throws Exception { + Path srcpath = new Path(source, "encz-mock"); + dfs.mkdirs(srcpath); + dfs.mkdirs(new Path(source, "encz-mock/datedir")); + dfs.mkdirs(new Path(source, "trash")); + enableAndCreateFirstSnapshot(); + + // before sync, make some further changes on source + DFSTestUtil.createFile(dfs, new Path(source, "encz-mock/datedir/file1"), + BLOCK_SIZE, DATA_NUM, 0); + dfs.rename(new Path(source, "encz-mock/datedir"), + new Path(source, "trash")); + dfs.mkdirs(new Path(source, "encz-mock/datedir")); + DFSTestUtil.createFile(dfs, new Path(source, "encz-mock/datedir/file2"), + BLOCK_SIZE, DATA_NUM, 0); + dfs.createSnapshot(source, "s2"); + Assert.assertTrue(dfs.exists(new Path(source, "encz-mock/datedir/file2"))); + + SnapshotDiffReport report = dfs.getSnapshotDiffReport(source, "s1", "s2"); + System.out.println(report); + List filters = new ArrayList<>(); + filters.add(Pattern.compile(".*trash.*")); + RegexCopyFilter regexCopyFilter = new RegexCopyFilter("fakeFile"); + regexCopyFilter.setFilters(filters); + + DistCpSync distCpSync = new DistCpSync(context, conf); + distCpSync.setCopyFilter(regexCopyFilter); + + // do the sync + Assert.assertTrue(distCpSync.sync()); + // make sure the source path has been updated to the snapshot path + final Path spath = new Path(source, + HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + "s2"); + Assert.assertEquals(spath, context.getSourcePaths().get(0)); + + // build copy listing + final Path listingPath = new Path("/tmp/META/fileList.seq"); + CopyListing listing = + new SimpleCopyListing(conf, new Credentials(), distCpSync); + listing.buildListing(listingPath, context); + + Map copyListing = getListing(listingPath); + CopyMapper copyMapper = new CopyMapper(); + StubContext stubContext = new StubContext(conf, null, 0); + Mapper.Context mapContext = + stubContext.getContext(); + copyMapper.setup(mapContext); + for (Map.Entry entry : copyListing + .entrySet()) { + copyMapper.map(entry.getKey(), entry.getValue(), mapContext); + } + Assert.assertTrue(dfs.exists(new Path(target, "encz-mock/datedir/file2"))); + Assert.assertFalse(dfs.exists(new Path(target, "encz-mock/datedir/file1"))); + Assert.assertFalse(dfs.exists(new Path(target, "trash/datedir/file1"))); + } + private Map getListing(Path listingPath) throws Exception { SequenceFile.Reader reader = new SequenceFile.Reader(conf, @@ -907,4 +1079,82 @@ public void testSync11() throws Exception { deleteFilterFile(filterFile); } } + + @Test + public void testRenameWithFilter() throws Exception { + java.nio.file.Path filterFile = null; + try { + Path sourcePath = new Path(dfs.getWorkingDirectory(), "source"); + + // Create some dir inside source + dfs.mkdirs(new Path(sourcePath, "dir1")); + dfs.mkdirs(new Path(sourcePath, "dir2")); + + // Allow & Create snapshot at source. + dfs.allowSnapshot(sourcePath); + dfs.createSnapshot(sourcePath, "s1"); + + filterFile = Files.createTempFile("filters", "txt"); + String str = ".*filterDir1.*"; + try (BufferedWriter writer = new BufferedWriter( + new FileWriter(filterFile.toString()))) { + writer.write(str); + } + final DistCpOptions.Builder builder = + new DistCpOptions.Builder(new ArrayList<>(Arrays.asList(sourcePath)), + target).withFiltersFile(filterFile.toString()) + .withSyncFolder(true); + new DistCp(conf, builder.build()).execute(); + + // Check the two directories get copied. + ContractTestUtils + .assertPathExists(dfs, "dir1 should get copied to target", + new Path(target, "dir1")); + ContractTestUtils + .assertPathExists(dfs, "dir2 should get copied to target", + new Path(target, "dir2")); + + // Allow & create initial snapshots on target. + dfs.allowSnapshot(target); + dfs.createSnapshot(target, "s1"); + + // Now do a rename to a filtered name on source. + dfs.rename(new Path(sourcePath, "dir1"), + new Path(sourcePath, "filterDir1")); + + ContractTestUtils + .assertPathExists(dfs, "'filterDir1' should be there on source", + new Path(sourcePath, "filterDir1")); + + // Create the incremental snapshot. + dfs.createSnapshot(sourcePath, "s2"); + + final DistCpOptions.Builder diffBuilder = + new DistCpOptions.Builder(new ArrayList<>(Arrays.asList(sourcePath)), + target).withUseDiff("s1", "s2") + .withFiltersFile(filterFile.toString()).withSyncFolder(true); + new DistCp(conf, diffBuilder.build()).execute(); + + // Check the only qualified directory dir2 is there in target + ContractTestUtils.assertPathExists(dfs, "dir2 should be there on target", + new Path(target, "dir2")); + + // Check the filtered directory is not there. + ContractTestUtils.assertPathDoesNotExist(dfs, + "Filtered directory 'filterDir1' shouldn't get copied", + new Path(target, "filterDir1")); + + // Check the renamed directory gets deleted. + ContractTestUtils.assertPathDoesNotExist(dfs, + "Renamed directory 'dir1' should get deleted", + new Path(target, "dir1")); + + // Check the filtered directory isn't there in the home directory. + ContractTestUtils.assertPathDoesNotExist(dfs, + "Filtered directory 'filterDir1' shouldn't get copied to home directory", + new Path("filterDir1")); + } finally { + deleteFilterFile(filterFile); + } + } } diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSyncReverseBase.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSyncReverseBase.java index cca1c5381c48a..50f5823656e37 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSyncReverseBase.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSyncReverseBase.java @@ -153,7 +153,7 @@ public void setUp() throws Exception { @After public void tearDown() throws Exception { - IOUtils.cleanup(null, dfs); + IOUtils.cleanupWithLogger(null, dfs); if (cluster != null) { cluster.shutdown(); } diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java index 14cce42e0f86b..47b850f4ba3e2 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java @@ -48,9 +48,7 @@ import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; -import org.junit.Rule; import org.junit.Test; -import org.junit.rules.Timeout; /** * A JUnit test for copying files recursively. @@ -60,9 +58,6 @@ public class TestDistCpSystem { private static final Logger LOG = LoggerFactory.getLogger(TestDistCpSystem.class); - @Rule - public Timeout globalTimeout = new Timeout(30000); - private static final String SRCDAT = "srcdat"; private static final String DSTDAT = "dstdat"; private static final long BLOCK_SIZE = 1024; diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpWithAcls.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpWithAcls.java index 49613ba57008a..38b79338312ed 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpWithAcls.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpWithAcls.java @@ -96,7 +96,7 @@ public static void init() throws Exception { @AfterClass public static void shutdown() { - IOUtils.cleanup(null, fs); + IOUtils.cleanupWithLogger(null, fs); if (cluster != null) { cluster.shutdown(); } diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpWithRawXAttrs.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpWithRawXAttrs.java index 8adc2cfb867fc..4637f6da3d2af 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpWithRawXAttrs.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpWithRawXAttrs.java @@ -26,13 +26,16 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.tools.util.DistCpTestUtils; +import org.apache.hadoop.util.functional.RemoteIterators; +import org.assertj.core.api.Assertions; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; /** * Tests distcp in combination with HDFS raw.* XAttrs. @@ -61,6 +64,7 @@ public class TestDistCpWithRawXAttrs { public static void init() throws Exception { conf = new Configuration(); conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_XATTRS_ENABLED_KEY, true); + conf.setInt(DFSConfigKeys.DFS_LIST_LIMIT, 2); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).format(true) .build(); cluster.waitActive(); @@ -69,7 +73,7 @@ public static void init() throws Exception { @AfterClass public static void shutdown() { - IOUtils.cleanup(null, fs); + IOUtils.cleanupWithLogger(null, fs); if (cluster != null) { cluster.shutdown(); } @@ -164,4 +168,23 @@ private void doTestPreserveRawXAttrs(String src, String dest, } } } + + @Test + public void testUseIterator() throws Exception { + + Path source = new Path("/src"); + Path dest = new Path("/dest"); + fs.delete(source, true); + fs.delete(dest, true); + // Create a source dir + fs.mkdirs(source); + + GenericTestUtils.createFiles(fs, source, 3, 10, 10); + + DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(), + dest.toString(), "-useiterator", conf); + + Assertions.assertThat(RemoteIterators.toList(fs.listFiles(dest, true))) + .describedAs("files").hasSize(1110); + } } diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpWithXAttrs.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpWithXAttrs.java index 96193e8b1b759..36c6e6a5655c9 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpWithXAttrs.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpWithXAttrs.java @@ -40,7 +40,7 @@ import org.junit.BeforeClass; import org.junit.Test; -import com.google.common.collect.Maps; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; /** * Tests distcp in combination with HDFS XAttrs. @@ -114,7 +114,7 @@ public static void init() throws Exception { @AfterClass public static void shutdown() { - IOUtils.cleanup(null, fs); + IOUtils.cleanupWithLogger(null, fs); if (cluster != null) { cluster.shutdown(); } diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestGlobbedCopyListing.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestGlobbedCopyListing.java index 1c92a9c5ef276..389fe367b04b7 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestGlobbedCopyListing.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestGlobbedCopyListing.java @@ -72,7 +72,7 @@ private static void mkdirs(String path) throws Exception { recordInExpectedValues(path); } finally { - IOUtils.cleanup(null, fileSystem); + IOUtils.cleanupWithLogger(null, fileSystem); } } @@ -85,7 +85,7 @@ private static void touchFile(String path) throws Exception { recordInExpectedValues(path); } finally { - IOUtils.cleanup(null, fileSystem, outputStream); + IOUtils.cleanupWithLogger(null, fileSystem, outputStream); } } diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java index b48355af25ba5..85b312a94b52f 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java @@ -415,7 +415,7 @@ public void testPreserve() { "-f", "hdfs://localhost:8020/source/first", "hdfs://localhost:8020/target/"}); - Assert.assertTrue(options.shouldPreserve(FileAttribute.BLOCKSIZE)); + Assert.assertFalse(options.shouldPreserve(FileAttribute.BLOCKSIZE)); Assert.assertFalse(options.shouldPreserve(FileAttribute.REPLICATION)); Assert.assertFalse(options.shouldPreserve(FileAttribute.PERMISSION)); Assert.assertFalse(options.shouldPreserve(FileAttribute.USER)); diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java index eeaf30a929996..aa42cb968d61f 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java @@ -18,7 +18,10 @@ package org.apache.hadoop.tools.contract; +import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL_INFO; import static org.apache.hadoop.fs.contract.ContractTestUtils.*; +import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.logIOStatisticsAtLevel; +import static org.apache.hadoop.tools.DistCpConstants.CONF_LABEL_DISTCP_JOB_ID; import java.io.IOException; import java.util.Collections; @@ -26,6 +29,7 @@ import java.util.Map; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; @@ -41,8 +45,12 @@ import org.apache.hadoop.tools.DistCp; import org.apache.hadoop.tools.DistCpConstants; import org.apache.hadoop.tools.DistCpOptions; +import org.apache.hadoop.tools.SimpleCopyListing; import org.apache.hadoop.tools.mapred.CopyMapper; +import org.apache.hadoop.tools.util.DistCpTestUtils; +import org.apache.hadoop.util.functional.RemoteIterators; +import org.assertj.core.api.Assertions; import org.junit.Before; import org.junit.Rule; import org.junit.Test; @@ -57,6 +65,7 @@ * under test. The tests in the suite cover both copying from local to remote * (e.g. a backup use case) and copying from remote to local (e.g. a restore use * case). + * The HDFS contract test needs to be run explicitly. */ public abstract class AbstractContractDistCpTest extends AbstractFSContractTestBase { @@ -64,6 +73,9 @@ public abstract class AbstractContractDistCpTest private static final Logger LOG = LoggerFactory.getLogger(AbstractContractDistCpTest.class); + /** Using offset to change modification time in tests. */ + private static final long MODIFICATION_TIME_OFFSET = 10000; + public static final String SCALE_TEST_DISTCP_FILE_SIZE_KB = "scale.test.distcp.file.size.kb"; @@ -71,6 +83,22 @@ public abstract class AbstractContractDistCpTest protected static final int MB = 1024 * 1024; + /** + * Default depth for a directory tree: {@value}. + */ + protected static final int DEFAULT_DEPTH = 3; + + /** + * Default width for a directory tree: {@value}. + * Total dir size is + *

    +   *   DEFAULT_WITH^DEFAULT_DEPTH
    +   * 
    + * So the duration of a test run grows rapidly with this value. + * This has very significant consequences for object storage runs. + */ + protected static final int DEFAULT_WIDTH = 2; + @Rule public TestName testName = new TestName(); @@ -148,13 +176,20 @@ public void setup() throws Exception { localDir = localFS.makeQualified(new Path(new Path( GenericTestUtils.getTestDir().toURI()), testSubDir + "/local")); + localFS.delete(localDir, true); mkdirs(localFS, localDir); - remoteDir = path(testSubDir + "/remote"); - mkdirs(remoteFS, remoteDir); + Path testSubPath = path(testSubDir); + remoteDir = new Path(testSubPath, "remote"); // test teardown does this, but IDE-based test debugging can skip // that teardown; this guarantees the initial state is clean remoteFS.delete(remoteDir, true); - localFS.delete(localDir, true); + } + + @Override + public void teardown() throws Exception { + // if remote FS supports IOStatistics log it. + logIOStatisticsAtLevel(LOG, IOSTATISTICS_LOGGING_LEVEL_INFO, getRemoteFS()); + super.teardown(); } /** @@ -318,7 +353,31 @@ private Job distCpUpdate(final Path srcDir, final Path destDir) Collections.singletonList(srcDir), destDir) .withDeleteMissing(true) .withSyncFolder(true) - .withCRC(true) + .withSkipCRC(true) + .withDirectWrite(shouldUseDirectWrite()) + .withOverwrite(false))); + } + + /** + * Run distcp -update srcDir destDir. + * @param srcDir local source directory + * @param destDir remote destination directory. + * @return the completed job + * @throws Exception any failure. + */ + private Job distCpUpdateWithFs(final Path srcDir, final Path destDir, + FileSystem sourceFs, FileSystem targetFs) + throws Exception { + describe("\nDistcp -update from " + srcDir + " to " + destDir); + lsR("Source Fs to update", sourceFs, srcDir); + lsR("Target Fs before update", targetFs, destDir); + return runDistCp(buildWithStandardOptions( + new DistCpOptions.Builder( + Collections.singletonList(srcDir), destDir) + .withDeleteMissing(true) + .withSyncFolder(true) + .withSkipCRC(false) + .withDirectWrite(shouldUseDirectWrite()) .withOverwrite(false))); } @@ -372,6 +431,7 @@ public void testTrackDeepDirectoryStructureToRemote() throws Exception { inputDirUnderOutputDir) .withTrackMissing(trackDir) .withSyncFolder(true) + .withDirectWrite(shouldUseDirectWrite()) .withOverwrite(false))); lsR("tracked udpate", remoteFS, destDir); @@ -464,6 +524,17 @@ public void testLargeFilesFromRemote() throws Exception { largeFiles(remoteFS, remoteDir, localFS, localDir); } + @Test + public void testSetJobId() throws Exception { + describe("check jobId is set in the conf"); + remoteFS.create(new Path(remoteDir, "file1")).close(); + DistCpTestUtils + .assertRunDistCp(DistCpConstants.SUCCESS, remoteDir.toString(), + localDir.toString(), getDefaultCLIOptionsOrNull(), conf); + assertNotNull("DistCp job id isn't set", + conf.get(CONF_LABEL_DISTCP_JOB_ID)); + } + /** * Executes a DistCp using a file system sub-tree with multiple nesting * levels. @@ -515,13 +586,15 @@ private Path distCpDeepDirectoryStructure(FileSystem srcFS, */ private void largeFiles(FileSystem srcFS, Path srcDir, FileSystem dstFS, Path dstDir) throws Exception { + int fileSizeKb = conf.getInt(SCALE_TEST_DISTCP_FILE_SIZE_KB, + getDefaultDistCPSizeKb()); + if (fileSizeKb < 1) { + skip("File size in " + SCALE_TEST_DISTCP_FILE_SIZE_KB + " is zero"); + } initPathFields(srcDir, dstDir); Path largeFile1 = new Path(inputDir, "file1"); Path largeFile2 = new Path(inputDir, "file2"); Path largeFile3 = new Path(inputDir, "file3"); - mkdirs(srcFS, inputDir); - int fileSizeKb = conf.getInt(SCALE_TEST_DISTCP_FILE_SIZE_KB, - DEFAULT_DISTCP_SIZE_KB); int fileSizeMb = fileSizeKb / 1024; getLogger().info("{} with file size {}", testName.getMethodName(), fileSizeMb); byte[] data1 = dataset((fileSizeMb + 1) * MB, 33, 43); @@ -532,22 +605,37 @@ private void largeFiles(FileSystem srcFS, Path srcDir, FileSystem dstFS, createFile(srcFS, largeFile3, true, data3); Path target = new Path(dstDir, "outputDir"); runDistCp(inputDir, target); - ContractTestUtils.assertIsDirectory(dstFS, target); verifyFileContents(dstFS, new Path(target, "inputDir/file1"), data1); verifyFileContents(dstFS, new Path(target, "inputDir/file2"), data2); verifyFileContents(dstFS, new Path(target, "inputDir/file3"), data3); } + /** + * Override point. What is the default distcp size + * for large files if not overridden by + * {@link #SCALE_TEST_DISTCP_FILE_SIZE_KB}. + * If 0 then, unless overridden in the configuration, + * the large file tests will not run. + * @return file size. + */ + protected int getDefaultDistCPSizeKb() { + return DEFAULT_DISTCP_SIZE_KB; + } + /** * Executes DistCp and asserts that the job finished successfully. - * + * The choice of direct/indirect is based on the value of + * {@link #shouldUseDirectWrite()}. * @param src source path * @param dst destination path * @throws Exception if there is a failure */ private void runDistCp(Path src, Path dst) throws Exception { - runDistCp(buildWithStandardOptions( - new DistCpOptions.Builder(Collections.singletonList(src), dst))); + if (shouldUseDirectWrite()) { + runDistCpDirectWrite(src, dst); + } else { + runDistCpWithRename(src, dst); + } } /** @@ -590,6 +678,9 @@ private static void mkdirs(FileSystem fs, Path dir) throws Exception { @Test public void testDirectWrite() throws Exception { describe("copy file from local to remote using direct write option"); + if (shouldUseDirectWrite()) { + skip("not needed as all other tests use the -direct option."); + } directWrite(localFS, localDir, remoteFS, remoteDir, true); } @@ -600,6 +691,82 @@ public void testNonDirectWrite() throws Exception { directWrite(localFS, localDir, remoteFS, remoteDir, false); } + @Test + public void testDistCpWithIterator() throws Exception { + describe("Build listing in distCp using the iterator option."); + Path source = new Path(remoteDir, "src"); + Path dest = new Path(localDir, "dest"); + dest = localFS.makeQualified(dest); + + GenericTestUtils + .createFiles(remoteFS, source, getDepth(), getWidth(), getWidth()); + + GenericTestUtils.LogCapturer log = + GenericTestUtils.LogCapturer.captureLogs(SimpleCopyListing.LOG); + + String options = "-useiterator -update -delete" + getDefaultCLIOptions(); + DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(), + dest.toString(), options, conf); + + // Check the target listing was also done using iterator. + Assertions.assertThat(log.getOutput()).contains( + "Building listing using iterator mode for " + dest.toString()); + + Assertions.assertThat(RemoteIterators.toList(localFS.listFiles(dest, true))) + .describedAs("files").hasSize(getTotalFiles()); + } + + public int getDepth() { + return DEFAULT_DEPTH; + } + + public int getWidth() { + return DEFAULT_WIDTH; + } + + private int getTotalFiles() { + int totalFiles = 0; + for (int i = 1; i <= getDepth(); i++) { + totalFiles += Math.pow(getWidth(), i); + } + return totalFiles; + } + + /** + * Override point: should direct write always be used? + * false by default; enable for stores where rename is slow. + * @return true if direct write should be used in all tests. + */ + protected boolean shouldUseDirectWrite() { + return false; + } + + /** + * Return the default options for distcp, including, + * if {@link #shouldUseDirectWrite()} is true, + * the -direct option. + * Append or prepend this to string CLIs. + * @return default options. + */ + protected String getDefaultCLIOptions() { + return shouldUseDirectWrite() + ? " -direct " + : ""; + } + + /** + * Return the default options for distcp, including, + * if {@link #shouldUseDirectWrite()} is true, + * the -direct option, null if there are no + * defaults. + * @return default options. + */ + protected String getDefaultCLIOptionsOrNull() { + return shouldUseDirectWrite() + ? " -direct " + : null; + } + /** * Executes a test with support for using direct write option. * @@ -624,7 +791,7 @@ private void directWrite(FileSystem srcFS, Path srcDir, FileSystem dstFS, if (directWrite) { runDistCpDirectWrite(inputDir, target); } else { - runDistCp(inputDir, target); + runDistCpWithRename(inputDir, target); } ContractTestUtils.assertIsDirectory(dstFS, target); lsR("Destination tree after distcp", dstFS, target); @@ -650,4 +817,188 @@ private Job runDistCpDirectWrite(final Path srcDir, final Path destDir) Collections.singletonList(srcDir), destDir) .withDirectWrite(true))); } + /** + * Run distcp srcDir destDir. + * @param srcDir local source directory + * @param destDir remote destination directory + * @return the completed job + * @throws Exception any failure. + */ + private Job runDistCpWithRename(Path srcDir, final Path destDir) + throws Exception { + describe("\nDistcp from " + srcDir + " to " + destDir); + return runDistCp(buildWithStandardOptions( + new DistCpOptions.Builder( + Collections.singletonList(srcDir), destDir) + .withDirectWrite(false))); + } + + @Test + public void testDistCpWithFile() throws Exception { + describe("Distcp only file"); + + Path source = new Path(remoteDir, "file"); + Path dest = new Path(localDir, "file"); + dest = localFS.makeQualified(dest); + + mkdirs(localFS, localDir); + + int len = 4; + int base = 0x40; + byte[] block = dataset(len, base, base + len); + ContractTestUtils.createFile(remoteFS, source, true, block); + verifyPathExists(remoteFS, "", source); + verifyPathExists(localFS, "", localDir); + + DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(), + dest.toString(), getDefaultCLIOptionsOrNull(), conf); + + Assertions + .assertThat(RemoteIterators.toList(localFS.listFiles(dest, true))) + .describedAs("files").hasSize(1); + verifyFileContents(localFS, dest, block); + } + + @Test + public void testDistCpWithUpdateExistFile() throws Exception { + describe("Now update an existing file."); + + Path source = new Path(remoteDir, "file"); + Path dest = new Path(localDir, "file"); + dest = localFS.makeQualified(dest); + + int len = 4; + int base = 0x40; + byte[] block = dataset(len, base, base + len); + byte[] destBlock = dataset(len, base, base + len + 1); + ContractTestUtils.createFile(remoteFS, source, true, block); + ContractTestUtils.createFile(localFS, dest, true, destBlock); + + verifyPathExists(remoteFS, "", source); + verifyPathExists(localFS, "", dest); + DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(), + dest.toString(), "-delete -update" + getDefaultCLIOptions(), conf); + + Assertions.assertThat(RemoteIterators.toList(localFS.listFiles(dest, true))) + .hasSize(1); + verifyFileContents(localFS, dest, block); + } + + @Test + public void testDistCpUpdateCheckFileSkip() throws Exception { + describe("Distcp update to check file skips."); + + Path source = new Path(remoteDir, "file"); + Path dest = new Path(localDir, "file"); + + Path source0byte = new Path(remoteDir, "file_0byte"); + Path dest0byte = new Path(localDir, "file_0byte"); + dest = localFS.makeQualified(dest); + dest0byte = localFS.makeQualified(dest0byte); + + // Creating a source file with certain dataset. + byte[] sourceBlock = dataset(10, 'a', 'z'); + + // Write the dataset. + ContractTestUtils + .writeDataset(remoteFS, source, sourceBlock, sourceBlock.length, + 1024, true); + + // Create 0 byte source and target files. + ContractTestUtils.createFile(remoteFS, source0byte, true, new byte[0]); + ContractTestUtils.createFile(localFS, dest0byte, true, new byte[0]); + + // Execute the distcp -update job. + Job job = distCpUpdateWithFs(remoteDir, localDir, remoteFS, localFS); + + // First distcp -update would normally copy the source to dest. + verifyFileContents(localFS, dest, sourceBlock); + // Verify 1 file was skipped in the distcp -update (The 0 byte file). + // Verify 1 file was copied in the distcp -update (The new source file). + verifySkipAndCopyCounter(job, 1, 1); + + // Remove the source file and replace with a file with same name and size + // but different content. + remoteFS.delete(source, false); + Path updatedSource = new Path(remoteDir, "file"); + byte[] updatedSourceBlock = dataset(10, 'b', 'z'); + ContractTestUtils.writeDataset(remoteFS, updatedSource, + updatedSourceBlock, updatedSourceBlock.length, 1024, true); + + // For testing purposes we would take the modification time of the + // updated Source file and add an offset or subtract the offset and set + // that time as the modification time for target file, this way we can + // ensure that our test can emulate a scenario where source is either more + // recently changed after -update so that copy takes place or target file + // is more recently changed which would skip the copying since the source + // has not been recently updated. + FileStatus fsSourceUpd = remoteFS.getFileStatus(updatedSource); + long modTimeSourceUpd = fsSourceUpd.getModificationTime(); + + // Add by an offset which would ensure enough gap for the test to + // not fail due to race conditions. + long newTargetModTimeNew = modTimeSourceUpd + MODIFICATION_TIME_OFFSET; + localFS.setTimes(dest, newTargetModTimeNew, -1); + + // Execute the distcp -update job. + Job updatedSourceJobOldSrc = + distCpUpdateWithFs(remoteDir, localDir, remoteFS, + localFS); + + // File contents should remain same since the mod time for target is + // newer than the updatedSource which indicates that the sync happened + // more recently and there is no update. + verifyFileContents(localFS, dest, sourceBlock); + // Skipped both 0 byte file and sourceFile (since mod time of target is + // older than the source it is perceived that source is of older version + // and we can skip it's copy). + verifySkipAndCopyCounter(updatedSourceJobOldSrc, 2, 0); + + // Subtract by an offset which would ensure enough gap for the test to + // not fail due to race conditions. + long newTargetModTimeOld = + Math.min(modTimeSourceUpd - MODIFICATION_TIME_OFFSET, 0); + localFS.setTimes(dest, newTargetModTimeOld, -1); + + // Execute the distcp -update job. + Job updatedSourceJobNewSrc = distCpUpdateWithFs(remoteDir, localDir, + remoteFS, + localFS); + + // Verifying the target directory have both 0 byte file and the content + // file. + Assertions + .assertThat(RemoteIterators.toList(localFS.listFiles(localDir, true))) + .hasSize(2); + // Now the copy should take place and the file contents should change + // since the mod time for target is older than the source file indicating + // that there was an update to the source after the last sync took place. + verifyFileContents(localFS, dest, updatedSourceBlock); + // Verifying we skipped the 0 byte file and copied the updated source + // file (since the modification time of the new source is older than the + // target now). + verifySkipAndCopyCounter(updatedSourceJobNewSrc, 1, 1); + } + + /** + * Method to check the skipped and copied counters of a distcp job. + * + * @param job job to check. + * @param skipExpectedValue expected skip counter value. + * @param copyExpectedValue expected copy counter value. + * @throws IOException throw in case of failures. + */ + private void verifySkipAndCopyCounter(Job job, + int skipExpectedValue, int copyExpectedValue) throws IOException { + // get the skip and copy counters from the job. + long skipActualValue = job.getCounters() + .findCounter(CopyMapper.Counter.SKIP).getValue(); + long copyActualValue = job.getCounters() + .findCounter(CopyMapper.Counter.COPY).getValue(); + // Verify if the actual values equals the expected ones. + assertEquals("Mismatch in COPY counter value", copyExpectedValue, + copyActualValue); + assertEquals("Mismatch in SKIP counter value", skipExpectedValue, + skipActualValue); + } } \ No newline at end of file diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/TestHDFSContractDistCp.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/TestHDFSContractDistCp.java new file mode 100644 index 0000000000000..61a16b1e816fd --- /dev/null +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/TestHDFSContractDistCp.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.tools.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.hdfs.HDFSContract; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +import java.io.IOException; + +/** + * Verifies that the HDFS passes all the tests in + * {@link AbstractContractDistCpTest}. + * As such, it acts as an in-module validation of this contract test itself. + * It does skip the large file test cases for speed. + */ +public class TestHDFSContractDistCp extends AbstractContractDistCpTest { + + @BeforeClass + public static void createCluster() throws IOException { + HDFSContract.createCluster(); + } + + @AfterClass + public static void teardownCluster() throws IOException { + HDFSContract.destroyCluster(); + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new HDFSContract(conf); + } + + /** + * Turn off the large file tests as they are very slow and there + * are many other distcp to HDFS tests which verify such things. + * @return 0 + */ + @Override + protected int getDefaultDistCPSizeKb() { + return 0; + } +} diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java index 11118c1f72400..6a537dc6e7de9 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java @@ -18,6 +18,7 @@ package org.apache.hadoop.tools.mapred; +import org.apache.hadoop.fs.contract.ContractTestUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -53,6 +54,8 @@ import java.util.*; import static org.apache.hadoop.fs.contract.ContractTestUtils.*; +import static org.apache.hadoop.tools.DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH; +import static org.apache.hadoop.tools.DistCpConstants.CONF_LABEL_TARGET_WORK_PATH; import static org.apache.hadoop.tools.util.TestDistCpUtils.*; public class TestCopyCommitter { @@ -160,10 +163,10 @@ public void testPreserveStatus() throws IOException { context.setTargetPathExists(false); CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS); - Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong())); + Path listingFile = new Path("/tmp1/" + rand.nextLong()); listing.buildListing(listingFile, context); - conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase); + conf.set(CONF_LABEL_TARGET_FINAL_PATH, targetBase); committer.commitJob(jobContext); checkDirectoryPermissions(fs, targetBase, sourcePerm); @@ -179,6 +182,45 @@ public void testPreserveStatus() throws IOException { } + @Test + public void testPreserveStatusWithAtomicCommit() throws IOException { + TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); + JobContext jobContext = new JobContextImpl( + taskAttemptContext.getConfiguration(), + taskAttemptContext.getTaskAttemptID().getJobID()); + Configuration conf = jobContext.getConfiguration(); + String sourceBase; + String workBase; + String targetBase; + FileSystem fs = null; + try { + OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); + fs = FileSystem.get(conf); + FsPermission sourcePerm = new FsPermission((short) 511); + FsPermission initialPerm = new FsPermission((short) 448); + sourceBase = TestDistCpUtils.createTestSetup(fs, sourcePerm); + workBase = TestDistCpUtils.createTestSetup(fs, initialPerm); + targetBase = "/tmp1/" + rand.nextLong(); + final DistCpOptions options = new DistCpOptions.Builder( + Collections.singletonList(new Path(sourceBase)), new Path("/out")) + .preserve(FileAttribute.PERMISSION).build(); + options.appendToConf(conf); + final DistCpContext context = new DistCpContext(options); + context.setTargetPathExists(false); + CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS); + Path listingFile = new Path("/tmp1/" + rand.nextLong()); + listing.buildListing(listingFile, context); + conf.set(CONF_LABEL_TARGET_FINAL_PATH, targetBase); + conf.set(CONF_LABEL_TARGET_WORK_PATH, workBase); + conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true); + committer.commitJob(jobContext); + checkDirectoryPermissions(fs, targetBase, sourcePerm); + } finally { + TestDistCpUtils.delete(fs, "/tmp1"); + conf.unset(DistCpConstants.CONF_LABEL_PRESERVE_STATUS); + } + } + @Test public void testDeleteMissing() throws IOException { TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); @@ -207,6 +249,51 @@ public void testDeleteMissing() throws IOException { Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong())); listing.buildListing(listingFile, context); + conf.set(CONF_LABEL_TARGET_WORK_PATH, targetBase); + conf.set(CONF_LABEL_TARGET_FINAL_PATH, targetBase); + + committer.commitJob(jobContext); + verifyFoldersAreInSync(fs, targetBase, sourceBase); + verifyFoldersAreInSync(fs, sourceBase, targetBase); + + //Test for idempotent commit + committer.commitJob(jobContext); + verifyFoldersAreInSync(fs, targetBase, sourceBase); + verifyFoldersAreInSync(fs, sourceBase, targetBase); + } finally { + TestDistCpUtils.delete(fs, "/tmp1"); + conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false"); + } + } + + @Test + public void testDeleteMissingWithOnlyFile() throws IOException { + TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); + JobContext jobContext = new JobContextImpl(taskAttemptContext + .getConfiguration(), taskAttemptContext.getTaskAttemptID().getJobID()); + Configuration conf = jobContext.getConfiguration(); + + String sourceBase; + String targetBase; + FileSystem fs = null; + try { + OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); + fs = FileSystem.get(conf); + sourceBase = TestDistCpUtils.createTestSetupWithOnlyFile(fs, + FsPermission.getDefault()); + targetBase = TestDistCpUtils.createTestSetupWithOnlyFile(fs, + FsPermission.getDefault()); + + final DistCpOptions options = new DistCpOptions.Builder( + Collections.singletonList(new Path(sourceBase)), new Path(targetBase)) + .withSyncFolder(true).withDeleteMissing(true).build(); + options.appendToConf(conf); + final DistCpContext context = new DistCpContext(options); + + CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS); + Path listingFile = new Path(sourceBase); + listing.buildListing(listingFile, context); + conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase); conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase); @@ -256,8 +343,8 @@ public void testPreserveTimeWithDeleteMiss() throws IOException { Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong())); listing.buildListing(listingFile, context); - conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase); - conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase); + conf.set(CONF_LABEL_TARGET_WORK_PATH, targetBase); + conf.set(CONF_LABEL_TARGET_FINAL_PATH, targetBase); Path sourceListing = new Path( conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH)); @@ -320,8 +407,8 @@ public void testDeleteMissingFlatInterleavedFiles() throws IOException { Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong())); listing.buildListing(listingFile, context); - conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase); - conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase); + conf.set(CONF_LABEL_TARGET_WORK_PATH, targetBase); + conf.set(CONF_LABEL_TARGET_FINAL_PATH, targetBase); committer.commitJob(jobContext); verifyFoldersAreInSync(fs, targetBase, sourceBase); @@ -353,8 +440,8 @@ public void testAtomicCommitMissingFinal() throws IOException { fs = FileSystem.get(conf); fs.mkdirs(new Path(workPath)); - conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath); - conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, finalPath); + conf.set(CONF_LABEL_TARGET_WORK_PATH, workPath); + conf.set(CONF_LABEL_TARGET_FINAL_PATH, finalPath); conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true); assertPathExists(fs, "Work path", new Path(workPath)); @@ -391,8 +478,8 @@ public void testAtomicCommitExistingFinal() throws IOException { fs.mkdirs(new Path(workPath)); fs.mkdirs(new Path(finalPath)); - conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath); - conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, finalPath); + conf.set(CONF_LABEL_TARGET_WORK_PATH, workPath); + conf.set(CONF_LABEL_TARGET_FINAL_PATH, finalPath); conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true); assertPathExists(fs, "Work path", new Path(workPath)); @@ -450,7 +537,7 @@ private void testCommitWithChecksumMismatch(boolean skipCrc) Collections.singletonList(new Path(sourceBase)), new Path("/out")) .withBlocksPerChunk(blocksPerChunk) - .withCRC(skipCrc) + .withSkipCRC(skipCrc) .build(); options.appendToConf(conf); conf.setBoolean( @@ -463,8 +550,8 @@ private void testCommitWithChecksumMismatch(boolean skipCrc) + String.valueOf(rand.nextLong())); listing.buildListing(listingFile, context); - conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase); - conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase); + conf.set(CONF_LABEL_TARGET_WORK_PATH, targetBase); + conf.set(CONF_LABEL_TARGET_FINAL_PATH, targetBase); OutputCommitter committer = new CopyCommitter( null, taskAttemptContext); @@ -476,9 +563,12 @@ private void testCommitWithChecksumMismatch(boolean skipCrc) Path sourcePath = new Path(sourceBase + srcFilename); CopyListingFileStatus sourceCurrStatus = new CopyListingFileStatus(fs.getFileStatus(sourcePath)); - Assert.assertFalse(DistCpUtils.checksumsAreEqual( - fs, new Path(sourceBase + srcFilename), null, - fs, new Path(targetBase + srcFilename), sourceCurrStatus.getLen())); + Assert.assertEquals("Checksum should not be equal", + CopyMapper.ChecksumComparison.FALSE, + DistCpUtils.checksumsAreEqual( + fs, new Path(sourceBase + srcFilename), null, + fs, new Path(targetBase + srcFilename), + sourceCurrStatus.getLen())); } catch(IOException exception) { if (skipCrc) { LOG.error("Unexpected exception is found", exception); @@ -494,6 +584,74 @@ fs, new Path(sourceBase + srcFilename), null, } } + @Test + public void testCommitWithCleanupTempFiles() throws IOException { + testCommitWithCleanup(true); + testCommitWithCleanup(false); + } + + private void testCommitWithCleanup(boolean directWrite) throws IOException { + TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); + JobID jobID = taskAttemptContext.getTaskAttemptID().getJobID(); + JobContext jobContext = new JobContextImpl( + taskAttemptContext.getConfiguration(), + jobID); + Configuration conf = jobContext.getConfiguration(); + + String sourceBase; + String targetBase; + FileSystem fs = null; + try { + fs = FileSystem.get(conf); + sourceBase = "/tmp1/" + rand.nextLong(); + targetBase = "/tmp1/" + rand.nextLong(); + + DistCpOptions options = new DistCpOptions.Builder( + Collections.singletonList(new Path(sourceBase)), + new Path("/out")) + .withAppend(true) + .withSyncFolder(true) + .withDirectWrite(directWrite) + .build(); + options.appendToConf(conf); + + DistCpContext context = new DistCpContext(options); + context.setTargetPathExists(false); + + + conf.set(CONF_LABEL_TARGET_WORK_PATH, targetBase); + conf.set(CONF_LABEL_TARGET_FINAL_PATH, targetBase); + + Path tempFilePath = getTempFile(targetBase, taskAttemptContext); + createDirectory(fs, tempFilePath); + + OutputCommitter committer = new CopyCommitter( + null, taskAttemptContext); + committer.commitJob(jobContext); + + if (directWrite) { + ContractTestUtils.assertPathExists(fs, "Temp files should not be cleanup with append or direct option", + tempFilePath); + } else { + ContractTestUtils.assertPathDoesNotExist( + fs, + "Temp files should be clean up without append or direct option", + tempFilePath); + } + } finally { + TestDistCpUtils.delete(fs, "/tmp1"); + TestDistCpUtils.delete(fs, "/meta"); + } + } + + private Path getTempFile(String targetWorkPath, TaskAttemptContext taskAttemptContext) { + Path tempFile = new Path(targetWorkPath, ".distcp.tmp." + + taskAttemptContext.getTaskAttemptID().toString() + + "." + System.currentTimeMillis()); + LOG.info("Creating temp file: {}", tempFile); + return tempFile; + } + /** * Create a source file and its DistCp working files with different checksum * to test the checksum validation for copying blocks in parallel. diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java index bf3165765d9cd..780d82df2bce3 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java @@ -261,7 +261,7 @@ private static void touchFile(String path, boolean createMultipleBlocks, System.out.println(fileStatus.getReplication()); } finally { - IOUtils.cleanup(null, outputStream); + IOUtils.cleanupWithLogger(null, outputStream); } } diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestUniformRecordInputFormat.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestUniformRecordInputFormat.java new file mode 100644 index 0000000000000..e48471b860056 --- /dev/null +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestUniformRecordInputFormat.java @@ -0,0 +1,191 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.tools.mapred; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.JobID; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.input.FileSplit; +import org.apache.hadoop.mapreduce.task.JobContextImpl; +import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.tools.CopyListing; +import org.apache.hadoop.tools.CopyListingFileStatus; +import org.apache.hadoop.tools.DistCpContext; +import org.apache.hadoop.tools.DistCpOptions; +import org.apache.hadoop.tools.StubContext; +import org.apache.hadoop.tools.util.DistCpUtils; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + + +public class TestUniformRecordInputFormat { + private static MiniDFSCluster cluster; + private static final int N_FILES = 20; + private static final int SIZEOF_EACH_FILE = 1024; + private static final Random random = new Random(); + private static int totalFileSize = 0; + + private static final Credentials CREDENTIALS = new Credentials(); + + + @BeforeClass + public static void setup() throws Exception { + cluster = new MiniDFSCluster.Builder(new Configuration()).numDataNodes(1) + .format(true).build(); + totalFileSize = 0; + + for (int i=0; i sourceList = new ArrayList(); + sourceList.add(sourcePath); + return new DistCpOptions.Builder(sourceList, targetPath) + .maxMaps(nMaps) + .build(); + } + + private static int createFile(String path, int fileSize) throws Exception { + FileSystem fileSystem = null; + DataOutputStream outputStream = null; + try { + fileSystem = cluster.getFileSystem(); + outputStream = fileSystem.create(new Path(path), true, 0); + int size = (int) Math.ceil(fileSize + (1 - random.nextFloat()) * fileSize); + outputStream.write(new byte[size]); + return size; + } + finally { + IOUtils.cleanupWithLogger(null, fileSystem, outputStream); + } + } + + @AfterClass + public static void tearDown() { + cluster.shutdown(); + } + + public void testGetSplits(int nMaps) throws Exception { + DistCpContext context = new DistCpContext(getOptions(nMaps)); + Configuration configuration = new Configuration(); + configuration.set("mapred.map.tasks", String.valueOf(context.getMaxMaps())); + Path listFile = new Path(cluster.getFileSystem().getUri().toString() + + "/tmp/testGetSplits_2/fileList.seq"); + CopyListing.getCopyListing(configuration, CREDENTIALS, context) + .buildListing(listFile, context); + + JobContext jobContext = new JobContextImpl(configuration, new JobID()); + UniformRecordInputFormat uniformRecordInputFormat = new UniformRecordInputFormat(); + List splits + = uniformRecordInputFormat.getSplits(jobContext); + + long totalRecords = DistCpUtils.getLong(configuration, "mapred.number.of.records"); + long recordPerMap = totalRecords / nMaps; + + + checkSplits(listFile, splits); + + int doubleCheckedTotalSize = 0; + for (int i=0; i < splits.size(); ++i) { + InputSplit split = splits.get(i); + int currentSplitSize = 0; + RecordReader recordReader = + uniformRecordInputFormat.createRecordReader(split, null); + StubContext stubContext = new StubContext(jobContext.getConfiguration(), + recordReader, 0); + final TaskAttemptContext taskAttemptContext + = stubContext.getContext(); + recordReader.initialize(split, taskAttemptContext); + int recordCnt = 0; + while (recordReader.nextKeyValue()) { + recordCnt++; + Path sourcePath = recordReader.getCurrentValue().getPath(); + FileSystem fs = sourcePath.getFileSystem(configuration); + FileStatus fileStatus [] = fs.listStatus(sourcePath); + if (fileStatus.length > 1) { + continue; + } + currentSplitSize += fileStatus[0].getLen(); + } + + Assert.assertTrue(recordCnt == recordPerMap || recordCnt == (recordPerMap + 1)); + doubleCheckedTotalSize += currentSplitSize; + } + + Assert.assertEquals(totalFileSize, doubleCheckedTotalSize); + } + + private void checkSplits(Path listFile, List splits) throws IOException { + long lastEnd = 0; + + //Verify if each split's start is matching with the previous end and + //we are not missing anything + for (InputSplit split : splits) { + FileSplit fileSplit = (FileSplit) split; + long start = fileSplit.getStart(); + Assert.assertEquals(lastEnd, start); + lastEnd = start + fileSplit.getLength(); + } + + //Verify there is nothing more to read from the input file + SequenceFile.Reader reader + = new SequenceFile.Reader(cluster.getFileSystem().getConf(), + SequenceFile.Reader.file(listFile)); + + try { + reader.seek(lastEnd); + CopyListingFileStatus srcFileStatus = new CopyListingFileStatus(); + Text srcRelPath = new Text(); + Assert.assertFalse(reader.next(srcRelPath, srcFileStatus)); + } finally { + IOUtils.closeStream(reader); + } + } + + @Test + public void testGetSplits() throws Exception { + testGetSplits(9); + for (int i=1; i attrs = EnumSet.of(FileAttribute.ACL, + FileAttribute.GROUP, + FileAttribute.PERMISSION, + FileAttribute.TIMES, + FileAttribute.XATTR); + for (FileAttribute attr : attrs) { + intercept(FileNotFoundException.class, () -> + DistCpUtils.preserve(fs, dst, srcStatus, + EnumSet.of(attr), + false)); + } + + // but with the preservation flags only used + // in file creation, this does not happen + DistCpUtils.preserve(fs, dst, srcStatus, + EnumSet.of( + FileAttribute.BLOCKSIZE, + FileAttribute.CHECKSUMTYPE), + false); } @Test @@ -258,16 +340,8 @@ public void testPreserveAclsforDefaultACL() throws IOException { // FileStatus.equals only compares path field, must explicitly compare all // fields - Assert.assertEquals("getPermission", srcStatus.getPermission(), - dstStatus.getPermission()); - Assert.assertEquals("Owner", srcStatus.getOwner(), dstStatus.getOwner()); - Assert.assertEquals("Group", srcStatus.getGroup(), dstStatus.getGroup()); - Assert.assertEquals("AccessTime", srcStatus.getAccessTime(), - dstStatus.getAccessTime()); - Assert.assertEquals("ModificationTime", srcStatus.getModificationTime(), - dstStatus.getModificationTime()); - Assert.assertEquals("Replication", srcStatus.getReplication(), - dstStatus.getReplication()); + assertStatusEqual(fs, dest, srcStatus); + Assert.assertArrayEquals(en1.toArray(), dd2.toArray()); } @@ -486,12 +560,7 @@ public void testPreserveNothingOnFile() throws IOException { CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst)); // FileStatus.equals only compares path field, must explicitly compare all fields - Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission())); - Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner())); - Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup())); - Assert.assertFalse(srcStatus.getAccessTime() == dstStatus.getAccessTime()); - Assert.assertFalse(srcStatus.getModificationTime() == dstStatus.getModificationTime()); - Assert.assertFalse(srcStatus.getReplication() == dstStatus.getReplication()); + assertStatusNotEqual(fs, dst, srcStatus); } @Test @@ -842,13 +911,7 @@ public void testPreserveOnFileUpwardRecursion() throws IOException { // FileStatus.equals only compares path field, must explicitly compare all fields // attributes of src -> f2 ? should be yes - CopyListingFileStatus f2Status = new CopyListingFileStatus(fs.getFileStatus(f2)); - Assert.assertTrue(srcStatus.getPermission().equals(f2Status.getPermission())); - Assert.assertTrue(srcStatus.getOwner().equals(f2Status.getOwner())); - Assert.assertTrue(srcStatus.getGroup().equals(f2Status.getGroup())); - Assert.assertTrue(srcStatus.getAccessTime() == f2Status.getAccessTime()); - Assert.assertTrue(srcStatus.getModificationTime() == f2Status.getModificationTime()); - Assert.assertTrue(srcStatus.getReplication() == f2Status.getReplication()); + assertStatusEqual(fs, f2, srcStatus); // attributes of src -> f1 ? should be no CopyListingFileStatus f1Status = new CopyListingFileStatus(fs.getFileStatus(f1)); @@ -1047,13 +1110,7 @@ public void testPreserveOnFileDownwardRecursion() throws IOException { // FileStatus.equals only compares path field, must explicitly compare all fields // attributes of src -> f0 ? should be yes - CopyListingFileStatus f0Status = new CopyListingFileStatus(fs.getFileStatus(f0)); - Assert.assertTrue(srcStatus.getPermission().equals(f0Status.getPermission())); - Assert.assertTrue(srcStatus.getOwner().equals(f0Status.getOwner())); - Assert.assertTrue(srcStatus.getGroup().equals(f0Status.getGroup())); - Assert.assertTrue(srcStatus.getAccessTime() == f0Status.getAccessTime()); - Assert.assertTrue(srcStatus.getModificationTime() == f0Status.getModificationTime()); - Assert.assertTrue(srcStatus.getReplication() == f0Status.getReplication()); + assertStatusEqual(fs, f0, srcStatus); // attributes of src -> f1 ? should be no CopyListingFileStatus f1Status = new CopyListingFileStatus(fs.getFileStatus(f1)); @@ -1303,6 +1360,15 @@ private static String getBase(String base) { return base + "/" + location; } + public static String createTestSetupWithOnlyFile(FileSystem fs, + FsPermission perm) throws IOException { + String location = String.valueOf(rand.nextLong()); + fs.mkdirs(new Path("/tmp1/" + location)); + fs.setPermission(new Path("/tmp1/" + location), perm); + createFile(fs, new Path("/tmp1/" + location + "/file")); + return "/tmp1/" + location + "/file"; + } + public static void delete(FileSystem fs, String path) { try { if (fs != null) { diff --git a/hadoop-tools/hadoop-distcp/src/test/resources/contract/hdfs.xml b/hadoop-tools/hadoop-distcp/src/test/resources/contract/hdfs.xml new file mode 100644 index 0000000000000..3c9396f79adec --- /dev/null +++ b/hadoop-tools/hadoop-distcp/src/test/resources/contract/hdfs.xml @@ -0,0 +1,139 @@ + + + + + + + fs.contract.test.root-tests-enabled + true + + + + fs.file.contract.test.random-seek-count + 500 + + + + fs.contract.is-case-sensitive + true + + + + fs.contract.supports-append + true + + + + fs.contract.supports-atomic-directory-delete + true + + + + fs.contract.supports-atomic-rename + true + + + + fs.contract.supports-block-locality + true + + + + fs.contract.supports-concat + true + + + + fs.contract.supports-seek + true + + + + fs.contract.rejects-seek-past-eof + true + + + + fs.contract.supports-strict-exceptions + true + + + + fs.contract.supports-unix-permissions + true + + + + fs.contract.rename-returns-false-if-dest-exists + true + + + + fs.contract.rename-returns-false-if-source-missing + true + + + + fs.contract.supports-settimes + true + + + + fs.contract.supports-getfilestatus + true + + + + fs.contract.supports-file-reference + true + + + + fs.contract.supports-content-check + true + + + + fs.contract.supports-unbuffer + true + + + + fs.contract.supports-hflush + true + + + + fs.contract.supports-hsync + true + + + + fs.contract.metadata_updated_on_hsync + false + + + + + dfs.namenode.fs-limits.min-block-size + 0 + + diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-blockgen/pom.xml b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-blockgen/pom.xml index 6ab4d8f96cafb..2d874c2b75c81 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-blockgen/pom.xml +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-blockgen/pom.xml @@ -19,11 +19,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../../hadoop-project hadoop-dynamometer-blockgen - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop Dynamometer Block Listing Generator Apache Hadoop Dynamometer Block Listing Generator jar @@ -36,7 +36,7 @@ org.mockito - mockito-all + mockito-core test diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-dist/pom.xml b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-dist/pom.xml index dbd23952dbc71..1a7210281f291 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-dist/pom.xml +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-dist/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project-dist - 3.3.0-SNAPSHOT + 3.3.6 ../../../hadoop-project-dist hadoop-dynamometer-dist - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop Dynamometer Dist Apache Hadoop Dynamometer Dist jar diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/pom.xml b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/pom.xml index 0995c485af185..156cc9cdf79d7 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/pom.xml +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/pom.xml @@ -19,11 +19,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../../hadoop-project hadoop-dynamometer-infra - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop Dynamometer Cluster Simulator Apache Hadoop Dynamometer Cluster Simulator jar @@ -40,8 +40,8 @@ compile - com.google.guava - guava + org.apache.hadoop.thirdparty + hadoop-shaded-guava compile @@ -74,7 +74,7 @@ org.mockito - mockito-all + mockito-core test diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/AMOptions.java b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/AMOptions.java index 5eb16e523463d..77f8c2ce57224 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/AMOptions.java +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/AMOptions.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.tools.dynamometer; -import com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import java.util.HashMap; import java.util.List; import java.util.Map; diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/AllowAllImpersonationProvider.java b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/AllowAllImpersonationProvider.java index 490453f298633..14f52ac2b28a2 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/AllowAllImpersonationProvider.java +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/AllowAllImpersonationProvider.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.tools.dynamometer; +import java.net.InetAddress; + import org.apache.hadoop.conf.Configured; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.ImpersonationProvider; @@ -32,8 +34,13 @@ public void init(String configurationPrefix) { // Do nothing } - public void authorize(UserGroupInformation user, String remoteAddress) { + public void authorize(UserGroupInformation user, InetAddress remoteAddress) { // Do nothing } + // Although this API was removed from the interface by HADOOP-17367, we need + // to keep it here because TestDynamometerInfra uses an old hadoop binary. + public void authorize(UserGroupInformation user, String remoteAddress) { + // Do nothing + } } diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/ApplicationMaster.java b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/ApplicationMaster.java index 015a5a692a012..094721b98d58a 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/ApplicationMaster.java +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/ApplicationMaster.java @@ -17,9 +17,9 @@ */ package org.apache.hadoop.tools.dynamometer; -import com.google.common.base.Joiner; -import com.google.common.collect.Lists; -import com.google.common.primitives.Ints; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.primitives.Ints; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/Client.java b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/Client.java index 3c735dd1cfccd..3c8baec15c74f 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/Client.java +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/Client.java @@ -17,11 +17,11 @@ */ package org.apache.hadoop.tools.dynamometer; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.base.Splitter; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.base.Splitter; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import java.util.Optional; import java.util.function.Supplier; import org.apache.hadoop.tools.dynamometer.workloadgenerator.audit.AuditReplayMapper; diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/DynoInfraUtils.java b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/DynoInfraUtils.java index bc127083a1da6..ee0810d6439d7 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/DynoInfraUtils.java +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/DynoInfraUtils.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.tools.dynamometer; -import com.google.common.base.Joiner; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/SimulatedDataNodes.java b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/SimulatedDataNodes.java index 520077e0823cb..0189d1fd1a1f1 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/SimulatedDataNodes.java +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/SimulatedDataNodes.java @@ -131,7 +131,7 @@ public int run(String[] args) throws Exception { + " block listing files; launching DataNodes accordingly."); mc.startDataNodes(getConf(), blockListFiles.size(), null, false, StartupOption.REGULAR, null, null, null, null, false, true, true, - null); + null, null, null); long startTime = Time.monotonicNow(); System.out.println("Waiting for DataNodes to connect to NameNode and " + "init storage directories."); diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/test/java/org/apache/hadoop/tools/dynamometer/TestDynamometerInfra.java b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/test/java/org/apache/hadoop/tools/dynamometer/TestDynamometerInfra.java index 056b7de70b870..7d219413ac167 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/test/java/org/apache/hadoop/tools/dynamometer/TestDynamometerInfra.java +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/test/java/org/apache/hadoop/tools/dynamometer/TestDynamometerInfra.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.tools.dynamometer; -import com.google.common.collect.Sets; +import org.apache.hadoop.thirdparty.com.google.common.collect.Sets; import java.util.Optional; import java.util.concurrent.TimeoutException; import java.util.concurrent.TimeUnit; @@ -76,6 +76,7 @@ import org.junit.Assert; import org.junit.Assume; import org.junit.BeforeClass; +import org.junit.Ignore; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -111,6 +112,7 @@ * property to point directly to a Hadoop tarball which is present locally and * no download will occur. */ +@Ignore public class TestDynamometerInfra { private static final Logger LOG = @@ -122,7 +124,7 @@ public class TestDynamometerInfra { private static final String HADOOP_BIN_PATH_KEY = "dyno.hadoop.bin.path"; private static final String HADOOP_BIN_VERSION_KEY = "dyno.hadoop.bin.version"; - private static final String HADOOP_BIN_VERSION_DEFAULT = "3.1.3"; + private static final String HADOOP_BIN_VERSION_DEFAULT = "3.1.4"; private static final String FSIMAGE_FILENAME = "fsimage_0000000000000061740"; private static final String VERSION_FILENAME = "VERSION"; diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/pom.xml b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/pom.xml index 25a1f91bc2cc7..d284b20e200b0 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/pom.xml +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/pom.xml @@ -19,11 +19,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../../hadoop-project hadoop-dynamometer-workload - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop Dynamometer Workload Simulator Apache Hadoop Dynamometer Workload Simulator jar @@ -39,6 +39,11 @@ junit test + + org.mockito + mockito-core + test + org.apache.hadoop hadoop-minicluster diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/CreateFileMapper.java b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/CreateFileMapper.java index 33dc81d5a24a1..64b8dc28e67d0 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/CreateFileMapper.java +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/CreateFileMapper.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.tools.dynamometer.workloadgenerator; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import java.io.IOException; import java.io.OutputStream; import java.net.URI; diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/audit/AuditLogDirectParser.java b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/audit/AuditLogDirectParser.java index 9a1aa243127e4..e649b69be73a7 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/audit/AuditLogDirectParser.java +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/audit/AuditLogDirectParser.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.tools.dynamometer.workloadgenerator.audit; -import com.google.common.base.Splitter; +import org.apache.hadoop.thirdparty.com.google.common.base.Splitter; import java.io.IOException; import java.text.DateFormat; import java.text.ParseException; diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/audit/AuditReplayMapper.java b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/audit/AuditReplayMapper.java index 4dad215409c81..c46f720bd0a87 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/audit/AuditReplayMapper.java +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/audit/AuditReplayMapper.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.tools.dynamometer.workloadgenerator.audit; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import java.util.Optional; import java.util.function.Function; diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/audit/AuditReplayThread.java b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/audit/AuditReplayThread.java index 274c5a763bd0f..14e8c9cb82f16 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/audit/AuditReplayThread.java +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/audit/AuditReplayThread.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.tools.dynamometer.workloadgenerator.audit; -import com.google.common.base.Splitter; +import org.apache.hadoop.thirdparty.com.google.common.base.Splitter; import org.apache.hadoop.tools.dynamometer.workloadgenerator.WorkloadDriver; import java.io.IOException; import java.net.URI; diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/test/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/TestWorkloadGenerator.java b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/test/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/TestWorkloadGenerator.java index 0162352f08f60..f3edc24b70444 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/test/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/TestWorkloadGenerator.java +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/test/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/TestWorkloadGenerator.java @@ -24,6 +24,7 @@ import org.apache.hadoop.tools.dynamometer.workloadgenerator.audit.AuditLogHiveTableParser; import org.apache.hadoop.tools.dynamometer.workloadgenerator.audit.AuditReplayMapper; import java.io.IOException; +import java.net.InetAddress; import java.nio.charset.StandardCharsets; import org.apache.hadoop.conf.Configuration; @@ -115,7 +116,7 @@ public void init(String configurationPrefix) { // Do nothing } - public void authorize(UserGroupInformation user, String remoteAddress) + public void authorize(UserGroupInformation user, InetAddress remoteAddress) throws AuthorizationException { try { if (!user.getRealUser().getShortUserName() diff --git a/hadoop-tools/hadoop-dynamometer/pom.xml b/hadoop-tools/hadoop-dynamometer/pom.xml index dd96039295a2c..53bec3f560f0c 100644 --- a/hadoop-tools/hadoop-dynamometer/pom.xml +++ b/hadoop-tools/hadoop-dynamometer/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-dynamometer - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop Dynamometer Apache Hadoop Dynamometer pom diff --git a/hadoop-tools/hadoop-extras/pom.xml b/hadoop-tools/hadoop-extras/pom.xml index f93b0e15aa009..fc58089c48c6c 100644 --- a/hadoop-tools/hadoop-extras/pom.xml +++ b/hadoop-tools/hadoop-extras/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-extras - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop Extras Apache Hadoop Extras jar diff --git a/hadoop-tools/hadoop-fs2img/pom.xml b/hadoop-tools/hadoop-fs2img/pom.xml index 6a8c43f1ecbf6..39e699a83ddeb 100644 --- a/hadoop-tools/hadoop-fs2img/pom.xml +++ b/hadoop-tools/hadoop-fs2img/pom.xml @@ -17,12 +17,12 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project org.apache.hadoop hadoop-fs2img - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop Image Generation Tool Apache Hadoop Image Generation Tool jar @@ -87,10 +87,9 @@
    - org.codehaus.mojo - findbugs-maven-plugin + com.github.spotbugs + spotbugs-maven-plugin - true true ${basedir}/dev-support/findbugs-exclude.xml Max diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageWriter.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageWriter.java index cb9614058936f..9c8dc1f2304c1 100644 --- a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageWriter.java +++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageWriter.java @@ -35,7 +35,7 @@ import java.util.Map.Entry; import java.util.concurrent.atomic.AtomicLong; -import com.google.common.base.Charsets; +import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; import org.apache.hadoop.thirdparty.protobuf.CodedOutputStream; import org.apache.hadoop.classification.InterfaceAudience; diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreePath.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreePath.java index 1932f28e3d95b..ad3474c476dcb 100644 --- a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreePath.java +++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreePath.java @@ -19,7 +19,7 @@ import java.io.IOException; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.thirdparty.protobuf.ByteString; import org.apache.hadoop.classification.InterfaceAudience; diff --git a/hadoop-tools/hadoop-gridmix/pom.xml b/hadoop-tools/hadoop-gridmix/pom.xml index b9e433bd08180..a5b15a9c28f59 100644 --- a/hadoop-tools/hadoop-gridmix/pom.xml +++ b/hadoop-tools/hadoop-gridmix/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-gridmix - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop Gridmix Apache Hadoop Gridmix jar @@ -123,10 +123,9 @@ - org.codehaus.mojo - findbugs-maven-plugin + com.github.spotbugs + spotbugs-maven-plugin - true true ${basedir}/dev-support/findbugs-exclude.xml Max diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/ReadRecordFactory.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/ReadRecordFactory.java index 2cb806e6a4bfc..f95c4b36a5cc1 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/ReadRecordFactory.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/ReadRecordFactory.java @@ -79,7 +79,7 @@ public float getProgress() throws IOException { @Override public void close() throws IOException { - IOUtils.cleanup(null, src); + IOUtils.cleanupWithLogger(null, src); factory.close(); } } diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/ReplayJobFactory.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/ReplayJobFactory.java index bdbfc3beec7e3..fe3b5d36d9841 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/ReplayJobFactory.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/ReplayJobFactory.java @@ -112,7 +112,7 @@ public void run() { } catch (InterruptedException e) { // exit thread; ignore any jobs remaining in the trace } finally { - IOUtils.cleanup(null, jobProducer); + IOUtils.cleanupWithLogger(null, jobProducer); } } } diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SerialJobFactory.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SerialJobFactory.java index e8c7d61a64d4c..cb05ab63f1c07 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SerialJobFactory.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SerialJobFactory.java @@ -143,7 +143,7 @@ public void run() { } catch (InterruptedException e) { return; } finally { - IOUtils.cleanup(null, jobProducer); + IOUtils.cleanupWithLogger(null, jobProducer); } } diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/StressJobFactory.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/StressJobFactory.java index bd15c2999e59f..4e7fc9c2bbd80 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/StressJobFactory.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/StressJobFactory.java @@ -247,7 +247,7 @@ public void run() { LOG.error("[STRESS] Interrupted in the main block!", e); return; } finally { - IOUtils.cleanup(null, jobProducer); + IOUtils.cleanupWithLogger(null, jobProducer); } } } diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/TotalHeapUsageEmulatorPlugin.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/TotalHeapUsageEmulatorPlugin.java index bfbf516611110..2fb0becebf29b 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/TotalHeapUsageEmulatorPlugin.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/TotalHeapUsageEmulatorPlugin.java @@ -20,7 +20,7 @@ import java.io.IOException; import java.util.ArrayList; -import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapred.gridmix.Progressive; import org.apache.hadoop.tools.rumen.ResourceUsageMetrics; diff --git a/hadoop-tools/hadoop-kafka/pom.xml b/hadoop-tools/hadoop-kafka/pom.xml index a227ad4efd029..84167fee11af3 100644 --- a/hadoop-tools/hadoop-kafka/pom.xml +++ b/hadoop-tools/hadoop-kafka/pom.xml @@ -19,11 +19,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-kafka - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop Kafka Library support This module contains code to support integration with Kafka. @@ -39,10 +39,9 @@ - org.codehaus.mojo - findbugs-maven-plugin + com.github.spotbugs + spotbugs-maven-plugin - true true Max @@ -98,6 +97,10 @@ org.xerial.snappy snappy-java + + net.jpountz.lz4 + lz4 + diff --git a/hadoop-tools/hadoop-kafka/src/main/java/org/apache/hadoop/metrics2/sink/KafkaSink.java b/hadoop-tools/hadoop-kafka/src/main/java/org/apache/hadoop/metrics2/sink/KafkaSink.java index c83552b94d311..0856d0f4e0eeb 100644 --- a/hadoop-tools/hadoop-kafka/src/main/java/org/apache/hadoop/metrics2/sink/KafkaSink.java +++ b/hadoop-tools/hadoop-kafka/src/main/java/org/apache/hadoop/metrics2/sink/KafkaSink.java @@ -18,7 +18,7 @@ package org.apache.hadoop.metrics2.sink; -import com.google.common.base.Strings; +import org.apache.hadoop.thirdparty.com.google.common.base.Strings; import org.apache.kafka.clients.producer.Producer; import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.commons.configuration2.SubsetConfiguration; diff --git a/hadoop-tools/hadoop-kafka/src/test/java/org/apache/hadoop/metrics2/impl/TestKafkaMetrics.java b/hadoop-tools/hadoop-kafka/src/test/java/org/apache/hadoop/metrics2/impl/TestKafkaMetrics.java index 8d74bf247567f..fb19172359a5f 100644 --- a/hadoop-tools/hadoop-kafka/src/test/java/org/apache/hadoop/metrics2/impl/TestKafkaMetrics.java +++ b/hadoop-tools/hadoop-kafka/src/test/java/org/apache/hadoop/metrics2/impl/TestKafkaMetrics.java @@ -18,7 +18,7 @@ package org.apache.hadoop.metrics2.impl; -import com.google.common.collect.Lists; +import org.apache.hadoop.thirdparty.com.google.common.collect.Lists; import org.apache.commons.configuration2.SubsetConfiguration; import org.apache.hadoop.metrics2.AbstractMetric; import org.apache.hadoop.metrics2.MetricType; diff --git a/hadoop-tools/hadoop-openstack/dev-support/findbugs-exclude.xml b/hadoop-tools/hadoop-openstack/dev-support/findbugs-exclude.xml deleted file mode 100644 index cfb75c73081b5..0000000000000 --- a/hadoop-tools/hadoop-openstack/dev-support/findbugs-exclude.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - - - - - - - - - - - - - - - - - diff --git a/hadoop-tools/hadoop-openstack/pom.xml b/hadoop-tools/hadoop-openstack/pom.xml index 0236fbb897eb1..e125bd442602f 100644 --- a/hadoop-tools/hadoop-openstack/pom.xml +++ b/hadoop-tools/hadoop-openstack/pom.xml @@ -19,16 +19,17 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-openstack - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop OpenStack support - This module contains code to support integration with OpenStack. - Currently this consists of a filesystem client to read data from - and write data to an OpenStack Swift object store. + This module used to contain code to support integration with OpenStack. + It has been deleted as unsupported; the JAR is still published so as to + not break applications which declare an explicit maven/ivy/SBT dependency + on the module. jar @@ -37,105 +38,18 @@ true - - - tests-off - - - src/test/resources/auth-keys.xml - - - - true - - - - tests-on - - - src/test/resources/auth-keys.xml - - - - false - - - - - - org.codehaus.mojo - findbugs-maven-plugin + com.github.spotbugs + spotbugs-maven-plugin - true true - ${basedir}/dev-support/findbugs-exclude.xml - Max - - org.apache.maven.plugins - maven-dependency-plugin - - - deplist - compile - - list - - - - ${project.basedir}/target/hadoop-tools-deps/${project.artifactId}.tools-optional.txt - - - - - - - org.apache.hadoop - hadoop-common - compile - - - org.apache.hadoop - hadoop-common - test - test-jar - - - org.apache.hadoop - hadoop-annotations - compile - - - - org.apache.httpcomponents - httpcore - - - commons-logging - commons-logging - compile - - - junit - junit - provided - - - com.fasterxml.jackson.core - jackson-annotations - - - com.fasterxml.jackson.core - jackson-databind - - diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/ApiKeyAuthenticationRequest.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/ApiKeyAuthenticationRequest.java deleted file mode 100644 index e25d17d2fb898..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/ApiKeyAuthenticationRequest.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.auth; - -import com.fasterxml.jackson.annotation.JsonProperty; - -/** - * Class that represents authentication request to Openstack Keystone. - * Contains basic authentication information. - * THIS FILE IS MAPPED BY JACKSON TO AND FROM JSON. - * DO NOT RENAME OR MODIFY FIELDS AND THEIR ACCESSORS - */ -public class ApiKeyAuthenticationRequest extends AuthenticationRequest { - /** - * Credentials for login - */ - private ApiKeyCredentials apiKeyCredentials; - - /** - * API key auth - * @param tenantName tenant - * @param apiKeyCredentials credentials - */ - public ApiKeyAuthenticationRequest(String tenantName, ApiKeyCredentials apiKeyCredentials) { - this.tenantName = tenantName; - this.apiKeyCredentials = apiKeyCredentials; - } - - /** - * @return credentials for login into Keystone - */ - @JsonProperty("RAX-KSKEY:apiKeyCredentials") - public ApiKeyCredentials getApiKeyCredentials() { - return apiKeyCredentials; - } - - /** - * @param apiKeyCredentials credentials for login into Keystone - */ - public void setApiKeyCredentials(ApiKeyCredentials apiKeyCredentials) { - this.apiKeyCredentials = apiKeyCredentials; - } - - @Override - public String toString() { - return "Auth as " + - "tenant '" + tenantName + "' " - + apiKeyCredentials; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/ApiKeyCredentials.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/ApiKeyCredentials.java deleted file mode 100644 index 412ce81daa300..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/ApiKeyCredentials.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.auth; - - -/** - * Describes credentials to log in Swift using Keystone authentication. - * THIS FILE IS MAPPED BY JACKSON TO AND FROM JSON. - * DO NOT RENAME OR MODIFY FIELDS AND THEIR ACCESSORS. - */ -public class ApiKeyCredentials { - /** - * user login - */ - private String username; - - /** - * user password - */ - private String apikey; - - /** - * default constructor - */ - public ApiKeyCredentials() { - } - - /** - * @param username user login - * @param apikey user api key - */ - public ApiKeyCredentials(String username, String apikey) { - this.username = username; - this.apikey = apikey; - } - - /** - * @return user api key - */ - public String getApiKey() { - return apikey; - } - - /** - * @param apikey user api key - */ - public void setApiKey(String apikey) { - this.apikey = apikey; - } - - /** - * @return login - */ - public String getUsername() { - return username; - } - - /** - * @param username login - */ - public void setUsername(String username) { - this.username = username; - } - - @Override - public String toString() { - return "user " + - "'" + username + '\'' + - " with key of length " + ((apikey == null) ? 0 : apikey.length()); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/AuthenticationRequest.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/AuthenticationRequest.java deleted file mode 100644 index a2a3b55e76f2e..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/AuthenticationRequest.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.auth; - -/** - * Class that represents authentication request to Openstack Keystone. - * Contains basic authentication information. - * THIS FILE IS MAPPED BY JACKSON TO AND FROM JSON. - * DO NOT RENAME OR MODIFY FIELDS AND THEIR ACCESSORS. - */ -public class AuthenticationRequest { - - /** - * tenant name - */ - protected String tenantName; - - public AuthenticationRequest() { - } - - /** - * @return tenant name for Keystone authorization - */ - public String getTenantName() { - return tenantName; - } - - /** - * @param tenantName tenant name for authorization - */ - public void setTenantName(String tenantName) { - this.tenantName = tenantName; - } - - @Override - public String toString() { - return "AuthenticationRequest{" + - "tenantName='" + tenantName + '\'' + - '}'; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/AuthenticationRequestWrapper.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/AuthenticationRequestWrapper.java deleted file mode 100644 index f30e90dad384d..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/AuthenticationRequestWrapper.java +++ /dev/null @@ -1,59 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.auth; - -/** - * This class is used for correct hierarchy mapping of - * Keystone authentication model and java code. - * THIS FILE IS MAPPED BY JACKSON TO AND FROM JSON. - * DO NOT RENAME OR MODIFY FIELDS AND THEIR ACCESSORS. - */ -public class AuthenticationRequestWrapper { - /** - * authentication request - */ - private AuthenticationRequest auth; - - /** - * default constructor used for json parsing - */ - public AuthenticationRequestWrapper() { - } - - /** - * @param auth authentication requests - */ - public AuthenticationRequestWrapper(AuthenticationRequest auth) { - this.auth = auth; - } - - /** - * @return authentication request - */ - public AuthenticationRequest getAuth() { - return auth; - } - - /** - * @param auth authentication request - */ - public void setAuth(AuthenticationRequest auth) { - this.auth = auth; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/AuthenticationResponse.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/AuthenticationResponse.java deleted file mode 100644 index f09ec0c5fb99e..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/AuthenticationResponse.java +++ /dev/null @@ -1,69 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.auth; - -import org.apache.hadoop.fs.swift.auth.entities.AccessToken; -import org.apache.hadoop.fs.swift.auth.entities.Catalog; -import org.apache.hadoop.fs.swift.auth.entities.User; - -import java.util.List; - -/** - * Response from KeyStone deserialized into AuthenticationResponse class. - * THIS FILE IS MAPPED BY JACKSON TO AND FROM JSON. - * DO NOT RENAME OR MODIFY FIELDS AND THEIR ACCESSORS. - */ -public class AuthenticationResponse { - private Object metadata; - private List serviceCatalog; - private User user; - private AccessToken token; - - public Object getMetadata() { - return metadata; - } - - public void setMetadata(Object metadata) { - this.metadata = metadata; - } - - public List getServiceCatalog() { - return serviceCatalog; - } - - public void setServiceCatalog(List serviceCatalog) { - this.serviceCatalog = serviceCatalog; - } - - public User getUser() { - return user; - } - - public void setUser(User user) { - this.user = user; - } - - public AccessToken getToken() { - return token; - } - - public void setToken(AccessToken token) { - this.token = token; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/AuthenticationWrapper.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/AuthenticationWrapper.java deleted file mode 100644 index 6f67a16715e77..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/AuthenticationWrapper.java +++ /dev/null @@ -1,47 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.auth; - -/** - * This class is used for correct hierarchy mapping of - * Keystone authentication model and java code - * THIS FILE IS MAPPED BY JACKSON TO AND FROM JSON. - * DO NOT RENAME OR MODIFY FIELDS AND THEIR ACCESSORS. - */ -public class AuthenticationWrapper { - - /** - * authentication response field - */ - private AuthenticationResponse access; - - /** - * @return authentication response - */ - public AuthenticationResponse getAccess() { - return access; - } - - /** - * @param access sets authentication response - */ - public void setAccess(AuthenticationResponse access) { - this.access = access; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/KeyStoneAuthRequest.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/KeyStoneAuthRequest.java deleted file mode 100644 index c3abbac88f452..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/KeyStoneAuthRequest.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.auth; - -/** - * Class that represents authentication to OpenStack Keystone. - * Contains basic authentication information. - * Used when {@link ApiKeyAuthenticationRequest} is not applicable. - * (problem with different Keystone installations/versions/modifications) - * THIS FILE IS MAPPED BY JACKSON TO AND FROM JSON. - * DO NOT RENAME OR MODIFY FIELDS AND THEIR ACCESSORS. - */ -public class KeyStoneAuthRequest extends AuthenticationRequest { - - /** - * Credentials for Keystone authentication - */ - private KeystoneApiKeyCredentials apiAccessKeyCredentials; - - /** - * @param tenant Keystone tenant name for authentication - * @param apiAccessKeyCredentials Credentials for authentication - */ - public KeyStoneAuthRequest(String tenant, KeystoneApiKeyCredentials apiAccessKeyCredentials) { - this.apiAccessKeyCredentials = apiAccessKeyCredentials; - this.tenantName = tenant; - } - - public KeystoneApiKeyCredentials getApiAccessKeyCredentials() { - return apiAccessKeyCredentials; - } - - public void setApiAccessKeyCredentials(KeystoneApiKeyCredentials apiAccessKeyCredentials) { - this.apiAccessKeyCredentials = apiAccessKeyCredentials; - } - - @Override - public String toString() { - return "KeyStoneAuthRequest as " + - "tenant '" + tenantName + "' " - + apiAccessKeyCredentials; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/KeystoneApiKeyCredentials.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/KeystoneApiKeyCredentials.java deleted file mode 100644 index 75202b3a6d23a..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/KeystoneApiKeyCredentials.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.auth; - -/** - * Class for Keystone authentication. - * Used when {@link ApiKeyCredentials} is not applicable - * THIS FILE IS MAPPED BY JACKSON TO AND FROM JSON. - * DO NOT RENAME OR MODIFY FIELDS AND THEIR ACCESSORS. - */ -public class KeystoneApiKeyCredentials { - - /** - * User access key - */ - private String accessKey; - - /** - * User access secret - */ - private String secretKey; - - public KeystoneApiKeyCredentials(String accessKey, String secretKey) { - this.accessKey = accessKey; - this.secretKey = secretKey; - } - - public String getAccessKey() { - return accessKey; - } - - public void setAccessKey(String accessKey) { - this.accessKey = accessKey; - } - - public String getSecretKey() { - return secretKey; - } - - public void setSecretKey(String secretKey) { - this.secretKey = secretKey; - } - - @Override - public String toString() { - return "user " + - "'" + accessKey + '\'' + - " with key of length " + ((secretKey == null) ? 0 : secretKey.length()); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/PasswordAuthenticationRequest.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/PasswordAuthenticationRequest.java deleted file mode 100644 index ee519f3f8daa1..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/PasswordAuthenticationRequest.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.auth; - -/** - * Class that represents authentication request to Openstack Keystone. - * Contains basic authentication information. - * THIS FILE IS MAPPED BY JACKSON TO AND FROM JSON. - * DO NOT RENAME OR MODIFY FIELDS AND THEIR ACCESSORS. - */ -public class PasswordAuthenticationRequest extends AuthenticationRequest { - /** - * Credentials for login - */ - private PasswordCredentials passwordCredentials; - - /** - * @param tenantName tenant - * @param passwordCredentials password credentials - */ - public PasswordAuthenticationRequest(String tenantName, PasswordCredentials passwordCredentials) { - this.tenantName = tenantName; - this.passwordCredentials = passwordCredentials; - } - - /** - * @return credentials for login into Keystone - */ - public PasswordCredentials getPasswordCredentials() { - return passwordCredentials; - } - - /** - * @param passwordCredentials credentials for login into Keystone - */ - public void setPasswordCredentials(PasswordCredentials passwordCredentials) { - this.passwordCredentials = passwordCredentials; - } - - @Override - public String toString() { - return "Authenticate as " + - "tenant '" + tenantName + "' " - + passwordCredentials; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/PasswordCredentials.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/PasswordCredentials.java deleted file mode 100644 index 40d8c77feb49d..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/PasswordCredentials.java +++ /dev/null @@ -1,86 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.auth; - - -/** - * Describes credentials to log in Swift using Keystone authentication. - * THIS FILE IS MAPPED BY JACKSON TO AND FROM JSON. - * DO NOT RENAME OR MODIFY FIELDS AND THEIR ACCESSORS. - */ -public class PasswordCredentials { - /** - * user login - */ - private String username; - - /** - * user password - */ - private String password; - - /** - * default constructor - */ - public PasswordCredentials() { - } - - /** - * @param username user login - * @param password user password - */ - public PasswordCredentials(String username, String password) { - this.username = username; - this.password = password; - } - - /** - * @return user password - */ - public String getPassword() { - return password; - } - - /** - * @param password user password - */ - public void setPassword(String password) { - this.password = password; - } - - /** - * @return login - */ - public String getUsername() { - return username; - } - - /** - * @param username login - */ - public void setUsername(String username) { - this.username = username; - } - - @Override - public String toString() { - return "PasswordCredentials{username='" + username + "'}"; - } -} - diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/Roles.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/Roles.java deleted file mode 100644 index 57f2fa6d45108..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/Roles.java +++ /dev/null @@ -1,97 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.auth; - -/** - * Describes user roles in Openstack system. - * THIS FILE IS MAPPED BY JACKSON TO AND FROM JSON. - * DO NOT RENAME OR MODIFY FIELDS AND THEIR ACCESSORS. - */ -public class Roles { - /** - * role name - */ - private String name; - - /** - * This field user in RackSpace auth model - */ - private String id; - - /** - * This field user in RackSpace auth model - */ - private String description; - - /** - * Service id used in HP public Cloud - */ - private String serviceId; - - /** - * Service id used in HP public Cloud - */ - private String tenantId; - - /** - * @return role name - */ - public String getName() { - return name; - } - - /** - * @param name role name - */ - public void setName(String name) { - this.name = name; - } - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getDescription() { - return description; - } - - public void setDescription(String description) { - this.description = description; - } - - public String getServiceId() { - return serviceId; - } - - public void setServiceId(String serviceId) { - this.serviceId = serviceId; - } - - public String getTenantId() { - return tenantId; - } - - public void setTenantId(String tenantId) { - this.tenantId = tenantId; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/entities/AccessToken.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/entities/AccessToken.java deleted file mode 100644 index b38d4660e5ae6..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/entities/AccessToken.java +++ /dev/null @@ -1,107 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.auth.entities; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; - -/** - * Access token representation of Openstack Keystone authentication. - * Class holds token id, tenant and expiration time. - * THIS FILE IS MAPPED BY JACKSON TO AND FROM JSON. - * DO NOT RENAME OR MODIFY FIELDS AND THEIR ACCESSORS. - * - * Example: - *
    - * "token" : {
    - *   "RAX-AUTH:authenticatedBy" : [ "APIKEY" ],
    - *   "expires" : "2013-07-12T05:19:24.685-05:00",
    - *   "id" : "8bbea4215113abdab9d4c8fb0d37",
    - *   "tenant" : { "id" : "01011970",
    - *   "name" : "77777"
    - *   }
    - *  }
    - * 
    - */ -@JsonIgnoreProperties(ignoreUnknown = true) - -public class AccessToken { - /** - * token expiration time - */ - private String expires; - /** - * token id - */ - private String id; - /** - * tenant name for whom id is attached - */ - private Tenant tenant; - - /** - * @return token expiration time - */ - public String getExpires() { - return expires; - } - - /** - * @param expires the token expiration time - */ - public void setExpires(String expires) { - this.expires = expires; - } - - /** - * @return token value - */ - public String getId() { - return id; - } - - /** - * @param id token value - */ - public void setId(String id) { - this.id = id; - } - - /** - * @return tenant authenticated in Openstack Keystone - */ - public Tenant getTenant() { - return tenant; - } - - /** - * @param tenant tenant authenticated in Openstack Keystone - */ - public void setTenant(Tenant tenant) { - this.tenant = tenant; - } - - @Override - public String toString() { - return "AccessToken{" + - "id='" + id + '\'' + - ", tenant=" + tenant + - ", expires='" + expires + '\'' + - '}'; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/entities/Catalog.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/entities/Catalog.java deleted file mode 100644 index 76e161b064277..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/entities/Catalog.java +++ /dev/null @@ -1,107 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.auth.entities; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; - -import java.util.List; - -/** - * Describes Openstack Swift REST endpoints. - * THIS FILE IS MAPPED BY JACKSON TO AND FROM JSON. - * DO NOT RENAME OR MODIFY FIELDS AND THEIR ACCESSORS. - */ -@JsonIgnoreProperties(ignoreUnknown = true) - -public class Catalog { - /** - * List of valid swift endpoints - */ - private List endpoints; - /** - * endpoint links are additional information description - * which aren't used in Hadoop and Swift integration scope - */ - private List endpoints_links; - /** - * Openstack REST service name. In our case name = "keystone" - */ - private String name; - - /** - * Type of REST service. In our case type = "identity" - */ - private String type; - - /** - * @return List of endpoints - */ - public List getEndpoints() { - return endpoints; - } - - /** - * @param endpoints list of endpoints - */ - public void setEndpoints(List endpoints) { - this.endpoints = endpoints; - } - - /** - * @return list of endpoint links - */ - public List getEndpoints_links() { - return endpoints_links; - } - - /** - * @param endpoints_links list of endpoint links - */ - public void setEndpoints_links(List endpoints_links) { - this.endpoints_links = endpoints_links; - } - - /** - * @return name of Openstack REST service - */ - public String getName() { - return name; - } - - /** - * @param name of Openstack REST service - */ - public void setName(String name) { - this.name = name; - } - - /** - * @return type of Openstack REST service - */ - public String getType() { - return type; - } - - /** - * @param type of REST service - */ - public void setType(String type) { - this.type = type; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/entities/Endpoint.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/entities/Endpoint.java deleted file mode 100644 index b1cbf2acc7b4a..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/entities/Endpoint.java +++ /dev/null @@ -1,194 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.auth.entities; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; - -import java.net.URI; - -/** - * Openstack Swift endpoint description. - * THIS FILE IS MAPPED BY JACKSON TO AND FROM JSON. - * DO NOT RENAME OR MODIFY FIELDS AND THEIR ACCESSORS. - */ -@JsonIgnoreProperties(ignoreUnknown = true) - -public class Endpoint { - - /** - * endpoint id - */ - private String id; - - /** - * Keystone admin URL - */ - private URI adminURL; - - /** - * Keystone internal URL - */ - private URI internalURL; - - /** - * public accessible URL - */ - private URI publicURL; - - /** - * public accessible URL#2 - */ - private URI publicURL2; - - /** - * Openstack region name - */ - private String region; - - /** - * This field is used in RackSpace authentication model - */ - private String tenantId; - - /** - * This field user in RackSpace auth model - */ - private String versionId; - - /** - * This field user in RackSpace auth model - */ - private String versionInfo; - - /** - * This field user in RackSpace auth model - */ - private String versionList; - - - /** - * @return endpoint id - */ - public String getId() { - return id; - } - - /** - * @param id endpoint id - */ - public void setId(String id) { - this.id = id; - } - - /** - * @return Keystone admin URL - */ - public URI getAdminURL() { - return adminURL; - } - - /** - * @param adminURL Keystone admin URL - */ - public void setAdminURL(URI adminURL) { - this.adminURL = adminURL; - } - - /** - * @return internal Keystone - */ - public URI getInternalURL() { - return internalURL; - } - - /** - * @param internalURL Keystone internal URL - */ - public void setInternalURL(URI internalURL) { - this.internalURL = internalURL; - } - - /** - * @return public accessible URL - */ - public URI getPublicURL() { - return publicURL; - } - - /** - * @param publicURL public URL - */ - public void setPublicURL(URI publicURL) { - this.publicURL = publicURL; - } - - public URI getPublicURL2() { - return publicURL2; - } - - public void setPublicURL2(URI publicURL2) { - this.publicURL2 = publicURL2; - } - - /** - * @return Openstack region name - */ - public String getRegion() { - return region; - } - - /** - * @param region Openstack region name - */ - public void setRegion(String region) { - this.region = region; - } - - public String getTenantId() { - return tenantId; - } - - public void setTenantId(String tenantId) { - this.tenantId = tenantId; - } - - public String getVersionId() { - return versionId; - } - - public void setVersionId(String versionId) { - this.versionId = versionId; - } - - public String getVersionInfo() { - return versionInfo; - } - - public void setVersionInfo(String versionInfo) { - this.versionInfo = versionInfo; - } - - public String getVersionList() { - return versionList; - } - - public void setVersionList(String versionList) { - this.versionList = versionList; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/entities/Tenant.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/entities/Tenant.java deleted file mode 100644 index 405d2c853682c..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/entities/Tenant.java +++ /dev/null @@ -1,107 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.auth.entities; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; - -/** - * Tenant is abstraction in Openstack which describes all account - * information and user privileges in system. - * THIS FILE IS MAPPED BY JACKSON TO AND FROM JSON. - * DO NOT RENAME OR MODIFY FIELDS AND THEIR ACCESSORS. - */ -@JsonIgnoreProperties(ignoreUnknown = true) -public class Tenant { - - /** - * tenant id - */ - private String id; - - /** - * tenant short description which Keystone returns - */ - private String description; - - /** - * boolean enabled user account or no - */ - private boolean enabled; - - /** - * tenant human readable name - */ - private String name; - - /** - * @return tenant name - */ - public String getName() { - return name; - } - - /** - * @param name tenant name - */ - public void setName(String name) { - this.name = name; - } - - /** - * @return true if account enabled and false otherwise - */ - public boolean isEnabled() { - return enabled; - } - - /** - * @param enabled enable or disable - */ - public void setEnabled(boolean enabled) { - this.enabled = enabled; - } - - /** - * @return account short description - */ - public String getDescription() { - return description; - } - - /** - * @param description set account description - */ - public void setDescription(String description) { - this.description = description; - } - - /** - * @return set tenant id - */ - public String getId() { - return id; - } - - /** - * @param id tenant id - */ - public void setId(String id) { - this.id = id; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/entities/User.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/entities/User.java deleted file mode 100644 index da3bac20f2b42..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/auth/entities/User.java +++ /dev/null @@ -1,132 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.auth.entities; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import org.apache.hadoop.fs.swift.auth.Roles; - -import java.util.List; - -/** - * Describes user entity in Keystone - * In different Swift installations User is represented differently. - * To avoid any JSON deserialization failures this entity is ignored. - * THIS FILE IS MAPPED BY JACKSON TO AND FROM JSON. - * DO NOT RENAME OR MODIFY FIELDS AND THEIR ACCESSORS. - */ -@JsonIgnoreProperties(ignoreUnknown = true) -public class User { - - /** - * user id in Keystone - */ - private String id; - - /** - * user human readable name - */ - private String name; - - /** - * user roles in Keystone - */ - private List roles; - - /** - * links to user roles - */ - private List roles_links; - - /** - * human readable username in Keystone - */ - private String username; - - /** - * @return user id - */ - public String getId() { - return id; - } - - /** - * @param id user id - */ - public void setId(String id) { - this.id = id; - } - - - /** - * @return user name - */ - public String getName() { - return name; - } - - - /** - * @param name user name - */ - public void setName(String name) { - this.name = name; - } - - /** - * @return user roles - */ - public List getRoles() { - return roles; - } - - /** - * @param roles sets user roles - */ - public void setRoles(List roles) { - this.roles = roles; - } - - /** - * @return user roles links - */ - public List getRoles_links() { - return roles_links; - } - - /** - * @param roles_links user roles links - */ - public void setRoles_links(List roles_links) { - this.roles_links = roles_links; - } - - /** - * @return username - */ - public String getUsername() { - return username; - } - - /** - * @param username human readable user name - */ - public void setUsername(String username) { - this.username = username; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftAuthenticationFailedException.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftAuthenticationFailedException.java deleted file mode 100644 index fdb9a3973ad08..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftAuthenticationFailedException.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.exceptions; - -import org.apache.http.HttpResponse; - -import java.net.URI; - -/** - * An exception raised when an authentication request was rejected - */ -public class SwiftAuthenticationFailedException extends SwiftInvalidResponseException { - - public SwiftAuthenticationFailedException(String message, - int statusCode, - String operation, - URI uri) { - super(message, statusCode, operation, uri); - } - - public SwiftAuthenticationFailedException(String message, - String operation, - URI uri, - HttpResponse resp) { - super(message, operation, uri, resp); - } - - @Override - public String exceptionTitle() { - return "Authentication Failure"; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftBadRequestException.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftBadRequestException.java deleted file mode 100644 index f5b2abde0a9a8..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftBadRequestException.java +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.exceptions; - -import org.apache.http.HttpResponse; - -import java.net.URI; - -/** - * Thrown to indicate that data locality can't be calculated or requested path is incorrect. - * Data locality can't be calculated if Openstack Swift version is old. - */ -public class SwiftBadRequestException extends SwiftInvalidResponseException { - - public SwiftBadRequestException(String message, - String operation, - URI uri, - HttpResponse resp) { - super(message, operation, uri, resp); - } - - public SwiftBadRequestException(String message, - int statusCode, - String operation, - URI uri) { - super(message, statusCode, operation, uri); - } - - @Override - public String exceptionTitle() { - return "BadRequest"; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftConfigurationException.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftConfigurationException.java deleted file mode 100644 index 3651f2e050557..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftConfigurationException.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.exceptions; - -/** - * Exception raised to indicate there is some problem with how the Swift FS - * is configured - */ -public class SwiftConfigurationException extends SwiftException { - public SwiftConfigurationException(String message) { - super(message); - } - - public SwiftConfigurationException(String message, Throwable cause) { - super(message, cause); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftConnectionClosedException.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftConnectionClosedException.java deleted file mode 100644 index eeaf8a5606fb3..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftConnectionClosedException.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.fs.swift.exceptions; - -/** - * Exception raised when an attempt is made to use a closed stream - */ -public class SwiftConnectionClosedException extends SwiftException { - - public static final String MESSAGE = - "Connection to Swift service has been closed"; - - public SwiftConnectionClosedException() { - super(MESSAGE); - } - - public SwiftConnectionClosedException(String reason) { - super(MESSAGE + ": " + reason); - } - -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftConnectionException.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftConnectionException.java deleted file mode 100644 index 74607b8915a55..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftConnectionException.java +++ /dev/null @@ -1,35 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.exceptions; - -/** - * Thrown to indicate that connection is lost or failed to be made - */ -public class SwiftConnectionException extends SwiftException { - public SwiftConnectionException() { - } - - public SwiftConnectionException(String message) { - super(message); - } - - public SwiftConnectionException(String message, Throwable cause) { - super(message, cause); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftException.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftException.java deleted file mode 100644 index eba674fee5d55..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftException.java +++ /dev/null @@ -1,43 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.exceptions; - -import java.io.IOException; - -/** - * A Swift-specific exception -subclasses exist - * for various specific problems. - */ -public class SwiftException extends IOException { - public SwiftException() { - super(); - } - - public SwiftException(String message) { - super(message); - } - - public SwiftException(String message, Throwable cause) { - super(message, cause); - } - - public SwiftException(Throwable cause) { - super(cause); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftInternalStateException.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftInternalStateException.java deleted file mode 100644 index 0f3e5d988497f..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftInternalStateException.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.exceptions; - -/** - * The internal state of the Swift client is wrong -presumably a sign - * of some bug - */ -public class SwiftInternalStateException extends SwiftException { - - public SwiftInternalStateException(String message) { - super(message); - } - - public SwiftInternalStateException(String message, Throwable cause) { - super(message, cause); - } - - public SwiftInternalStateException(Throwable cause) { - super(cause); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftInvalidResponseException.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftInvalidResponseException.java deleted file mode 100644 index e90e57519b939..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftInvalidResponseException.java +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.exceptions; - -import org.apache.hadoop.fs.swift.util.HttpResponseUtils; -import org.apache.http.HttpResponse; - -import java.io.IOException; -import java.net.URI; - -/** - * Exception raised when the HTTP code is invalid. The status code, - * method name and operation URI are all in the response. - */ -public class SwiftInvalidResponseException extends SwiftConnectionException { - - public final int statusCode; - public final String operation; - public final URI uri; - public final String body; - - public SwiftInvalidResponseException(String message, - int statusCode, - String operation, - URI uri) { - super(message); - this.statusCode = statusCode; - this.operation = operation; - this.uri = uri; - this.body = ""; - } - - public SwiftInvalidResponseException(String message, - String operation, - URI uri, - HttpResponse resp) { - super(message); - this.statusCode = resp.getStatusLine().getStatusCode(); - this.operation = operation; - this.uri = uri; - String bodyAsString; - try { - bodyAsString = HttpResponseUtils.getResponseBodyAsString(resp); - if (bodyAsString == null) { - bodyAsString = ""; - } - } catch (IOException e) { - bodyAsString = ""; - } - this.body = bodyAsString; - } - - public int getStatusCode() { - return statusCode; - } - - public String getOperation() { - return operation; - } - - public URI getUri() { - return uri; - } - - public String getBody() { - return body; - } - - /** - * Override point: title of an exception -this is used in the - * toString() method. - * @return the new exception title - */ - public String exceptionTitle() { - return "Invalid Response"; - } - - /** - * Build a description that includes the exception title, the URI, - * the message, the status code -and any body of the response - * @return the string value for display - */ - @Override - public String toString() { - StringBuilder msg = new StringBuilder(); - msg.append(exceptionTitle()); - msg.append(": "); - msg.append(getMessage()); - msg.append(" "); - msg.append(operation); - msg.append(" "); - msg.append(uri); - msg.append(" => "); - msg.append(statusCode); - if (body != null && !body.isEmpty()) { - msg.append(" : "); - msg.append(body); - } - - return msg.toString(); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftJsonMarshallingException.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftJsonMarshallingException.java deleted file mode 100644 index 0b078d7f43337..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftJsonMarshallingException.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.exceptions; - -/** - * Exception raised when the J/O mapping fails. - */ -public class SwiftJsonMarshallingException extends SwiftException { - - public SwiftJsonMarshallingException(String message) { - super(message); - } - - public SwiftJsonMarshallingException(String message, Throwable cause) { - super(message, cause); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftOperationFailedException.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftOperationFailedException.java deleted file mode 100644 index 8f78f70f44b5d..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftOperationFailedException.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.exceptions; - -/** - * Used to relay exceptions upstream from the inner implementation - * to the public API, where it is downgraded to a log+failure. - * Making it visible internally aids testing - */ -public class SwiftOperationFailedException extends SwiftException { - - public SwiftOperationFailedException(String message) { - super(message); - } - - public SwiftOperationFailedException(String message, Throwable cause) { - super(message, cause); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftThrottledRequestException.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftThrottledRequestException.java deleted file mode 100644 index 1e7ca67d1b00f..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftThrottledRequestException.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.exceptions; - -import org.apache.http.HttpResponse; - -import java.net.URI; - -/** - * Exception raised if a Swift endpoint returned a HTTP response indicating - * the caller is being throttled. - */ -public class SwiftThrottledRequestException extends - SwiftInvalidResponseException { - public SwiftThrottledRequestException(String message, - String operation, - URI uri, - HttpResponse resp) { - super(message, operation, uri, resp); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftUnsupportedFeatureException.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftUnsupportedFeatureException.java deleted file mode 100644 index b7e011c59ab7c..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/exceptions/SwiftUnsupportedFeatureException.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.exceptions; - -/** - * Exception raised on an unsupported feature in the FS API -such as - * append() - */ -public class SwiftUnsupportedFeatureException extends SwiftException { - - public SwiftUnsupportedFeatureException(String message) { - super(message); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/CopyRequest.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/CopyRequest.java deleted file mode 100644 index c25a630cc298d..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/CopyRequest.java +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.http; - -import org.apache.http.client.methods.HttpEntityEnclosingRequestBase; - -/** - * Implementation for SwiftRestClient to make copy requests. - * COPY is a method that came with WebDAV (RFC2518), and is not something that - * can be handled by all proxies en-route to a filesystem. - */ -class CopyRequest extends HttpEntityEnclosingRequestBase { - - CopyRequest() { - super(); - } - - /** - * @return http method name - */ - @Override - public String getMethod() { - return "COPY"; - } -} \ No newline at end of file diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/ExceptionDiags.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/ExceptionDiags.java deleted file mode 100644 index d159caa66909a..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/ExceptionDiags.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.http; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.lang.reflect.Constructor; -import java.net.ConnectException; -import java.net.NoRouteToHostException; -import java.net.SocketTimeoutException; -import java.net.UnknownHostException; - -/** - * Variant of Hadoop NetUtils exception wrapping with URI awareness and - * available in branch-1 too. - */ -public class ExceptionDiags { - private static final Logger LOG = - LoggerFactory.getLogger(ExceptionDiags.class); - - /** text to point users elsewhere: {@value} */ - private static final String FOR_MORE_DETAILS_SEE - = " For more details see: "; - /** text included in wrapped exceptions if the host is null: {@value} */ - public static final String UNKNOWN_HOST = "(unknown)"; - /** Base URL of the Hadoop Wiki: {@value} */ - public static final String HADOOP_WIKI = "http://wiki.apache.org/hadoop/"; - - /** - * Take an IOException and a URI, wrap it where possible with - * something that includes the URI - * - * @param dest target URI - * @param operation operation - * @param exception the caught exception. - * @return an exception to throw - */ - public static IOException wrapException(final String dest, - final String operation, - final IOException exception) { - String action = operation + " " + dest; - String xref = null; - - if (exception instanceof ConnectException) { - xref = "ConnectionRefused"; - } else if (exception instanceof UnknownHostException) { - xref = "UnknownHost"; - } else if (exception instanceof SocketTimeoutException) { - xref = "SocketTimeout"; - } else if (exception instanceof NoRouteToHostException) { - xref = "NoRouteToHost"; - } - String msg = action - + " failed on exception: " - + exception; - if (xref != null) { - msg = msg + ";" + see(xref); - } - return wrapWithMessage(exception, msg); - } - - private static String see(final String entry) { - return FOR_MORE_DETAILS_SEE + HADOOP_WIKI + entry; - } - - @SuppressWarnings("unchecked") - private static T wrapWithMessage( - T exception, String msg) { - Class clazz = exception.getClass(); - try { - Constructor ctor = - clazz.getConstructor(String.class); - Throwable t = ctor.newInstance(msg); - return (T) (t.initCause(exception)); - } catch (Throwable e) { - return exception; - } - } - -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/HttpBodyContent.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/HttpBodyContent.java deleted file mode 100644 index b471f218e57e1..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/HttpBodyContent.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.fs.swift.http; - -/** - * Response tuple from GET operations; combines the input stream with the content length - */ -public class HttpBodyContent { - private final long contentLength; - private final HttpInputStreamWithRelease inputStream; - - /** - * build a body response - * @param inputStream input stream from the operation - * @param contentLength length of content; may be -1 for "don't know" - */ - public HttpBodyContent(HttpInputStreamWithRelease inputStream, - long contentLength) { - this.contentLength = contentLength; - this.inputStream = inputStream; - } - - public long getContentLength() { - return contentLength; - } - - public HttpInputStreamWithRelease getInputStream() { - return inputStream; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/HttpInputStreamWithRelease.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/HttpInputStreamWithRelease.java deleted file mode 100644 index bd025aca1b8d7..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/HttpInputStreamWithRelease.java +++ /dev/null @@ -1,234 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.http; - -import org.apache.hadoop.fs.swift.exceptions.SwiftConnectionClosedException; -import org.apache.hadoop.fs.swift.util.SwiftUtils; -import org.apache.http.HttpResponse; -import org.apache.http.client.methods.HttpRequestBase; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.ByteArrayInputStream; -import java.io.EOFException; -import java.io.IOException; -import java.io.InputStream; -import java.net.URI; - -/** - * This replaces the input stream release class from JetS3t and AWS; - * # Failures in the constructor are relayed up instead of simply logged. - * # it is set up to be more robust at teardown - * # release logic is thread safe - * Note that the thread safety of the inner stream contains no thread - * safety guarantees -this stream is not to be read across streams. - * The thread safety logic here is to ensure that even if somebody ignores - * that rule, the release code does not get entered twice -and that - * any release in one thread is picked up by read operations in all others. - */ -public class HttpInputStreamWithRelease extends InputStream { - - private static final Logger LOG = - LoggerFactory.getLogger(HttpInputStreamWithRelease.class); - private final URI uri; - private HttpRequestBase req; - private HttpResponse resp; - //flag to say the stream is released -volatile so that read operations - //pick it up even while unsynchronized. - private volatile boolean released; - //volatile flag to verify that data is consumed. - private volatile boolean dataConsumed; - private InputStream inStream; - /** - * In debug builds, this is filled in with the construction-time - * stack, which is then included in logs from the finalize(), method. - */ - private final Exception constructionStack; - - /** - * Why the stream is closed - */ - private String reasonClosed = "unopened"; - - public HttpInputStreamWithRelease(URI uri, HttpRequestBase req, - HttpResponse resp) throws IOException { - this.uri = uri; - this.req = req; - this.resp = resp; - constructionStack = LOG.isDebugEnabled() ? new Exception("stack") : null; - if (req == null) { - throw new IllegalArgumentException("Null 'request' parameter "); - } - try { - inStream = resp.getEntity().getContent(); - } catch (IOException e) { - inStream = new ByteArrayInputStream(new byte[]{}); - throw releaseAndRethrow("getResponseBodyAsStream() in constructor -" + e, e); - } - } - - @Override - public void close() throws IOException { - release("close()", null); - } - - /** - * Release logic - * @param reason reason for release (used in debug messages) - * @param ex exception that is a cause -null for non-exceptional releases - * @return true if the release took place here - * @throws IOException if the abort or close operations failed. - */ - private synchronized boolean release(String reason, Exception ex) throws - IOException { - if (!released) { - reasonClosed = reason; - try { - LOG.debug("Releasing connection to {}: {}", uri, reason, ex); - if (req != null) { - if (!dataConsumed) { - req.abort(); - } - req.releaseConnection(); - } - if (inStream != null) { - //this guard may seem un-needed, but a stack trace seen - //on the JetS3t predecessor implied that it - //is useful - inStream.close(); - } - return true; - } finally { - //if something went wrong here, we do not want the release() operation - //to try and do anything in advance. - released = true; - dataConsumed = true; - } - } else { - return false; - } - } - - /** - * Release the method, using the exception as a cause - * @param operation operation that failed - * @param ex the exception which triggered it. - * @return the exception to throw - */ - private IOException releaseAndRethrow(String operation, IOException ex) { - try { - release(operation, ex); - } catch (IOException ioe) { - LOG.debug("Exception during release: {}", operation, ioe); - //make this the exception if there was none before - if (ex == null) { - ex = ioe; - } - } - return ex; - } - - /** - * Assume that the connection is not released: throws an exception if it is - * @throws SwiftConnectionClosedException - */ - private synchronized void assumeNotReleased() throws SwiftConnectionClosedException { - if (released || inStream == null) { - throw new SwiftConnectionClosedException(reasonClosed); - } - } - - @Override - public int available() throws IOException { - assumeNotReleased(); - try { - return inStream.available(); - } catch (IOException e) { - throw releaseAndRethrow("available() failed -" + e, e); - } - } - - @Override - public int read() throws IOException { - assumeNotReleased(); - int read = 0; - try { - read = inStream.read(); - } catch (EOFException e) { - LOG.debug("EOF exception", e); - read = -1; - } catch (IOException e) { - throw releaseAndRethrow("read()", e); - } - if (read < 0) { - dataConsumed = true; - release("read() -all data consumed", null); - } - return read; - } - - @Override - public int read(byte[] b, int off, int len) throws IOException { - SwiftUtils.validateReadArgs(b, off, len); - if (len == 0) { - return 0; - } - //if the stream is already closed, then report an exception. - assumeNotReleased(); - //now read in a buffer, reacting differently to different operations - int read; - try { - read = inStream.read(b, off, len); - } catch (EOFException e) { - LOG.debug("EOF exception", e); - read = -1; - } catch (IOException e) { - throw releaseAndRethrow("read(b, off, " + len + ")", e); - } - if (read < 0) { - dataConsumed = true; - release("read() -all data consumed", null); - } - return read; - } - - /** - * Finalizer does release the stream, but also logs at WARN level - * including the URI at fault - */ - @Override - protected void finalize() { - try { - if (release("finalize()", constructionStack)) { - LOG.warn("input stream of {}" + - " not closed properly -cleaned up in finalize()", uri); - } - } catch (Exception e) { - //swallow anything that failed here - LOG.warn("Exception while releasing {} in finalizer", uri, e); - } - } - - @Override - public String toString() { - return "HttpInputStreamWithRelease working with " + uri - +" released=" + released - +" dataConsumed=" + dataConsumed; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/RestClientBindings.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/RestClientBindings.java deleted file mode 100644 index f6917d3ffaea6..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/RestClientBindings.java +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.http; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.swift.exceptions.SwiftConfigurationException; - -import java.net.URI; -import java.util.Properties; - -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.*; - -/** - * This class implements the binding logic between Hadoop configurations - * and the swift rest client. - *

    - * The swift rest client takes a Properties instance containing - * the string values it uses to bind to a swift endpoint. - *

    - * This class extracts the values for a specific filesystem endpoint - * and then builds an appropriate Properties file. - */ -public final class RestClientBindings { - private static final Logger LOG = - LoggerFactory.getLogger(RestClientBindings.class); - - public static final String E_INVALID_NAME = "Invalid swift hostname '%s':" + - " hostname must in form container.service"; - - /** - * Public for testing : build the full prefix for use in resolving - * configuration items - * - * @param service service to use - * @return the prefix string without any trailing "." - */ - public static String buildSwiftInstancePrefix(String service) { - return SWIFT_SERVICE_PREFIX + service; - } - - /** - * Raise an exception for an invalid service name - * - * @param hostname hostname that was being parsed - * @return an exception to throw - */ - private static SwiftConfigurationException invalidName(String hostname) { - return new SwiftConfigurationException( - String.format(E_INVALID_NAME, hostname)); - } - - /** - * Get the container name from the hostname -the single element before the - * first "." in the hostname - * - * @param hostname hostname to split - * @return the container - * @throws SwiftConfigurationException - */ - public static String extractContainerName(String hostname) throws - SwiftConfigurationException { - int i = hostname.indexOf("."); - if (i <= 0) { - throw invalidName(hostname); - } - return hostname.substring(0, i); - } - - public static String extractContainerName(URI uri) throws - SwiftConfigurationException { - return extractContainerName(uri.getHost()); - } - - /** - * Get the service name from a longer hostname string - * - * @param hostname hostname - * @return the separated out service name - * @throws SwiftConfigurationException if the hostname was invalid - */ - public static String extractServiceName(String hostname) throws - SwiftConfigurationException { - int i = hostname.indexOf("."); - if (i <= 0) { - throw invalidName(hostname); - } - String service = hostname.substring(i + 1); - if (service.isEmpty() || service.contains(".")) { - //empty service contains dots in -not currently supported - throw invalidName(hostname); - } - return service; - } - - public static String extractServiceName(URI uri) throws - SwiftConfigurationException { - return extractServiceName(uri.getHost()); - } - - /** - * Build a properties instance bound to the configuration file -using - * the filesystem URI as the source of the information. - * - * @param fsURI filesystem URI - * @param conf configuration - * @return a properties file with the instance-specific properties extracted - * and bound to the swift client properties. - * @throws SwiftConfigurationException if the configuration is invalid - */ - public static Properties bind(URI fsURI, Configuration conf) throws - SwiftConfigurationException { - String host = fsURI.getHost(); - if (host == null || host.isEmpty()) { - //expect shortnames -> conf names - throw invalidName(host); - } - - String container = extractContainerName(host); - String service = extractServiceName(host); - - //build filename schema - String prefix = buildSwiftInstancePrefix(service); - if (LOG.isDebugEnabled()) { - LOG.debug("Filesystem " + fsURI - + " is using configuration keys " + prefix); - } - Properties props = new Properties(); - props.setProperty(SWIFT_SERVICE_PROPERTY, service); - props.setProperty(SWIFT_CONTAINER_PROPERTY, container); - copy(conf, prefix + DOT_AUTH_URL, props, SWIFT_AUTH_PROPERTY, true); - copy(conf, prefix + DOT_USERNAME, props, SWIFT_USERNAME_PROPERTY, true); - copy(conf, prefix + DOT_APIKEY, props, SWIFT_APIKEY_PROPERTY, false); - copy(conf, prefix + DOT_PASSWORD, props, SWIFT_PASSWORD_PROPERTY, - props.contains(SWIFT_APIKEY_PROPERTY) ? true : false); - copy(conf, prefix + DOT_TENANT, props, SWIFT_TENANT_PROPERTY, false); - copy(conf, prefix + DOT_REGION, props, SWIFT_REGION_PROPERTY, false); - copy(conf, prefix + DOT_HTTP_PORT, props, SWIFT_HTTP_PORT_PROPERTY, false); - copy(conf, prefix + - DOT_HTTPS_PORT, props, SWIFT_HTTPS_PORT_PROPERTY, false); - - copyBool(conf, prefix + DOT_PUBLIC, props, SWIFT_PUBLIC_PROPERTY, false); - copyBool(conf, prefix + DOT_LOCATION_AWARE, props, - SWIFT_LOCATION_AWARE_PROPERTY, false); - - return props; - } - - /** - * Extract a boolean value from the configuration and copy it to the - * properties instance. - * @param conf source configuration - * @param confKey key in the configuration file - * @param props destination property set - * @param propsKey key in the property set - * @param defVal default value - */ - private static void copyBool(Configuration conf, - String confKey, - Properties props, - String propsKey, - boolean defVal) { - boolean b = conf.getBoolean(confKey, defVal); - props.setProperty(propsKey, Boolean.toString(b)); - } - - private static void set(Properties props, String key, String optVal) { - if (optVal != null) { - props.setProperty(key, optVal); - } - } - - /** - * Copy a (trimmed) property from the configuration file to the properties file. - *

    - * If marked as required and not found in the configuration, an - * exception is raised. - * If not required -and missing- then the property will not be set. - * In this case, if the property is already in the Properties instance, - * it will remain untouched. - * - * @param conf source configuration - * @param confKey key in the configuration file - * @param props destination property set - * @param propsKey key in the property set - * @param required is the property required - * @throws SwiftConfigurationException if the property is required but was - * not found in the configuration instance. - */ - public static void copy(Configuration conf, String confKey, Properties props, - String propsKey, - boolean required) throws SwiftConfigurationException { - //TODO: replace. version compatibility issue conf.getTrimmed fails with NoSuchMethodError - String val = conf.get(confKey); - if (val != null) { - val = val.trim(); - } - if (required && val == null) { - throw new SwiftConfigurationException( - "Missing mandatory configuration option: " - + - confKey); - } - set(props, propsKey, val); - } - - -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/SwiftProtocolConstants.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/SwiftProtocolConstants.java deleted file mode 100644 index a01f32c18b2b9..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/SwiftProtocolConstants.java +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.http; - -import org.apache.hadoop.util.VersionInfo; - -/** - * Constants used in the Swift REST protocol, - * and in the properties used to configure the {@link SwiftRestClient}. - */ -public class SwiftProtocolConstants { - /** - * Swift-specific header for authentication: {@value} - */ - public static final String HEADER_AUTH_KEY = "X-Auth-Token"; - - /** - * Default port used by Swift for HTTP - */ - public static final int SWIFT_HTTP_PORT = 8080; - - /** - * Default port used by Swift Auth for HTTPS - */ - public static final int SWIFT_HTTPS_PORT = 443; - - /** HTTP standard {@value} header */ - public static final String HEADER_RANGE = "Range"; - - /** HTTP standard {@value} header */ - public static final String HEADER_DESTINATION = "Destination"; - - /** HTTP standard {@value} header */ - public static final String HEADER_LAST_MODIFIED = "Last-Modified"; - - /** HTTP standard {@value} header */ - public static final String HEADER_CONTENT_LENGTH = "Content-Length"; - - /** HTTP standard {@value} header */ - public static final String HEADER_CONTENT_RANGE = "Content-Range"; - - /** - * Patten for range headers - */ - public static final String SWIFT_RANGE_HEADER_FORMAT_PATTERN = "bytes=%d-%d"; - - /** - * section in the JSON catalog provided after auth listing the swift FS: - * {@value} - */ - public static final String SERVICE_CATALOG_SWIFT = "swift"; - /** - * section in the JSON catalog provided after auth listing the cloud files; - * this is an alternate catalog entry name - * {@value} - */ - public static final String SERVICE_CATALOG_CLOUD_FILES = "cloudFiles"; - /** - * section in the JSON catalog provided after auth listing the object store; - * this is an alternate catalog entry name - * {@value} - */ - public static final String SERVICE_CATALOG_OBJECT_STORE = "object-store"; - - /** - * entry in the swift catalog defining the prefix used to talk to objects - * {@value} - */ - public static final String SWIFT_OBJECT_AUTH_ENDPOINT = - "/object_endpoint/"; - /** - * Swift-specific header: object manifest used in the final upload - * of a multipart operation: {@value} - */ - public static final String X_OBJECT_MANIFEST = "X-Object-Manifest"; - /** - * Swift-specific header -#of objects in a container: {@value} - */ - public static final String X_CONTAINER_OBJECT_COUNT = - "X-Container-Object-Count"; - /** - * Swift-specific header: no. of bytes used in a container {@value} - */ - public static final String X_CONTAINER_BYTES_USED = "X-Container-Bytes-Used"; - - /** - * Header to set when requesting the latest version of a file: : {@value} - */ - public static final String X_NEWEST = "X-Newest"; - - /** - * throttled response sent by some endpoints. - */ - public static final int SC_THROTTLED_498 = 498; - /** - * W3C recommended status code for throttled operations - */ - public static final int SC_TOO_MANY_REQUESTS_429 = 429; - - public static final String FS_SWIFT = "fs.swift"; - - /** - * Prefix for all instance-specific values in the configuration: {@value} - */ - public static final String SWIFT_SERVICE_PREFIX = FS_SWIFT + ".service."; - - /** - * timeout for all connections: {@value} - */ - public static final String SWIFT_CONNECTION_TIMEOUT = - FS_SWIFT + ".connect.timeout"; - - /** - * timeout for all connections: {@value} - */ - public static final String SWIFT_SOCKET_TIMEOUT = - FS_SWIFT + ".socket.timeout"; - - /** - * the default socket timeout in millis {@value}. - * This controls how long the connection waits for responses from - * servers. - */ - public static final int DEFAULT_SOCKET_TIMEOUT = 60000; - - /** - * connection retry count for all connections: {@value} - */ - public static final String SWIFT_RETRY_COUNT = - FS_SWIFT + ".connect.retry.count"; - - /** - * delay in millis between bulk (delete, rename, copy operations: {@value} - */ - public static final String SWIFT_THROTTLE_DELAY = - FS_SWIFT + ".connect.throttle.delay"; - - /** - * the default throttle delay in millis {@value} - */ - public static final int DEFAULT_THROTTLE_DELAY = 0; - - /** - * blocksize for all filesystems: {@value} - */ - public static final String SWIFT_BLOCKSIZE = - FS_SWIFT + ".blocksize"; - - /** - * the default blocksize for filesystems in KB: {@value} - */ - public static final int DEFAULT_SWIFT_BLOCKSIZE = 32 * 1024; - - /** - * partition size for all filesystems in KB: {@value} - */ - public static final String SWIFT_PARTITION_SIZE = - FS_SWIFT + ".partsize"; - - /** - * The default partition size for uploads: {@value} - */ - public static final int DEFAULT_SWIFT_PARTITION_SIZE = 4608*1024; - - /** - * request size for reads in KB: {@value} - */ - public static final String SWIFT_REQUEST_SIZE = - FS_SWIFT + ".requestsize"; - - /** - * The default request size for reads: {@value} - */ - public static final int DEFAULT_SWIFT_REQUEST_SIZE = 64; - - - public static final String HEADER_USER_AGENT="User-Agent"; - - /** - * The user agent sent in requests. - */ - public static final String SWIFT_USER_AGENT= "Apache Hadoop Swift Client " - + VersionInfo.getBuildVersion(); - - /** - * Key for passing the service name as a property -not read from the - * configuration : {@value} - */ - public static final String DOT_SERVICE = ".SERVICE-NAME"; - - /** - * Key for passing the container name as a property -not read from the - * configuration : {@value} - */ - public static final String DOT_CONTAINER = ".CONTAINER-NAME"; - - public static final String DOT_AUTH_URL = ".auth.url"; - public static final String DOT_TENANT = ".tenant"; - public static final String DOT_USERNAME = ".username"; - public static final String DOT_PASSWORD = ".password"; - public static final String DOT_HTTP_PORT = ".http.port"; - public static final String DOT_HTTPS_PORT = ".https.port"; - public static final String DOT_REGION = ".region"; - public static final String DOT_PROXY_HOST = ".proxy.host"; - public static final String DOT_PROXY_PORT = ".proxy.port"; - public static final String DOT_LOCATION_AWARE = ".location-aware"; - public static final String DOT_APIKEY = ".apikey"; - public static final String DOT_USE_APIKEY = ".useApikey"; - - /** - * flag to say use public URL - */ - public static final String DOT_PUBLIC = ".public"; - - public static final String SWIFT_SERVICE_PROPERTY = FS_SWIFT + DOT_SERVICE; - public static final String SWIFT_CONTAINER_PROPERTY = FS_SWIFT + DOT_CONTAINER; - - public static final String SWIFT_AUTH_PROPERTY = FS_SWIFT + DOT_AUTH_URL; - public static final String SWIFT_TENANT_PROPERTY = FS_SWIFT + DOT_TENANT; - public static final String SWIFT_USERNAME_PROPERTY = FS_SWIFT + DOT_USERNAME; - public static final String SWIFT_PASSWORD_PROPERTY = FS_SWIFT + DOT_PASSWORD; - public static final String SWIFT_APIKEY_PROPERTY = FS_SWIFT + DOT_APIKEY; - public static final String SWIFT_HTTP_PORT_PROPERTY = FS_SWIFT + DOT_HTTP_PORT; - public static final String SWIFT_HTTPS_PORT_PROPERTY = FS_SWIFT - + DOT_HTTPS_PORT; - public static final String SWIFT_REGION_PROPERTY = FS_SWIFT + DOT_REGION; - public static final String SWIFT_PUBLIC_PROPERTY = FS_SWIFT + DOT_PUBLIC; - - public static final String SWIFT_USE_API_KEY_PROPERTY = FS_SWIFT + DOT_USE_APIKEY; - - public static final String SWIFT_LOCATION_AWARE_PROPERTY = FS_SWIFT + - DOT_LOCATION_AWARE; - - public static final String SWIFT_PROXY_HOST_PROPERTY = FS_SWIFT + DOT_PROXY_HOST; - public static final String SWIFT_PROXY_PORT_PROPERTY = FS_SWIFT + DOT_PROXY_PORT; - public static final String HTTP_ROUTE_DEFAULT_PROXY = - "http.route.default-proxy"; - /** - * Topology to return when a block location is requested - */ - public static final String TOPOLOGY_PATH = "/swift/unknown"; - /** - * Block location to return when a block location is requested - */ - public static final String BLOCK_LOCATION = "/default-rack/swift"; - /** - * Default number of attempts to retry a connect request: {@value} - */ - static final int DEFAULT_RETRY_COUNT = 3; - /** - * Default timeout in milliseconds for connection requests: {@value} - */ - static final int DEFAULT_CONNECT_TIMEOUT = 15000; -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/SwiftRestClient.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/SwiftRestClient.java deleted file mode 100644 index cf6bf9b972a10..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/SwiftRestClient.java +++ /dev/null @@ -1,1879 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.http; - -import org.apache.hadoop.fs.swift.util.HttpResponseUtils; -import org.apache.http.Header; -import org.apache.http.HttpHost; -import org.apache.http.HttpResponse; -import org.apache.http.HttpStatus; -import org.apache.http.client.HttpClient; -import org.apache.http.client.config.RequestConfig; -import org.apache.http.client.methods.HttpDelete; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.client.methods.HttpHead; -import org.apache.http.client.methods.HttpPost; -import org.apache.http.client.methods.HttpPut; -import org.apache.http.client.methods.HttpRequestBase; -import org.apache.http.client.methods.HttpUriRequest; -import org.apache.http.config.SocketConfig; -import org.apache.http.entity.ContentType; -import org.apache.http.entity.InputStreamEntity; -import org.apache.http.entity.StringEntity; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.DefaultHttpRequestRetryHandler; -import org.apache.http.impl.client.HttpClientBuilder; -import org.apache.http.message.BasicHeader; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.swift.auth.ApiKeyAuthenticationRequest; -import org.apache.hadoop.fs.swift.auth.ApiKeyCredentials; -import org.apache.hadoop.fs.swift.auth.AuthenticationRequest; -import org.apache.hadoop.fs.swift.auth.AuthenticationRequestWrapper; -import org.apache.hadoop.fs.swift.auth.AuthenticationResponse; -import org.apache.hadoop.fs.swift.auth.AuthenticationWrapper; -import org.apache.hadoop.fs.swift.auth.KeyStoneAuthRequest; -import org.apache.hadoop.fs.swift.auth.KeystoneApiKeyCredentials; -import org.apache.hadoop.fs.swift.auth.PasswordAuthenticationRequest; -import org.apache.hadoop.fs.swift.auth.PasswordCredentials; -import org.apache.hadoop.fs.swift.auth.entities.AccessToken; -import org.apache.hadoop.fs.swift.auth.entities.Catalog; -import org.apache.hadoop.fs.swift.auth.entities.Endpoint; -import org.apache.hadoop.fs.swift.exceptions.SwiftAuthenticationFailedException; -import org.apache.hadoop.fs.swift.exceptions.SwiftBadRequestException; -import org.apache.hadoop.fs.swift.exceptions.SwiftConfigurationException; -import org.apache.hadoop.fs.swift.exceptions.SwiftException; -import org.apache.hadoop.fs.swift.exceptions.SwiftInternalStateException; -import org.apache.hadoop.fs.swift.exceptions.SwiftInvalidResponseException; -import org.apache.hadoop.fs.swift.exceptions.SwiftThrottledRequestException; -import org.apache.hadoop.fs.swift.util.Duration; -import org.apache.hadoop.fs.swift.util.DurationStats; -import org.apache.hadoop.fs.swift.util.DurationStatsTable; -import org.apache.hadoop.fs.swift.util.JSONUtil; -import org.apache.hadoop.fs.swift.util.SwiftObjectPath; -import org.apache.hadoop.fs.swift.util.SwiftUtils; - -import java.io.EOFException; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.io.UnsupportedEncodingException; -import java.net.URI; -import java.net.URISyntaxException; -import java.net.URLEncoder; -import java.util.List; -import java.util.Properties; - -import static org.apache.http.HttpStatus.*; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.*; - -/** - * This implements the client-side of the Swift REST API. - * - * The core actions put, get and query data in the Swift object store, - * after authenticating the client. - * - * Logging: - * - * Logging at DEBUG level displays detail about the actions of this - * client, including HTTP requests and responses -excluding authentication - * details. - */ -public final class SwiftRestClient { - private static final Logger LOG = - LoggerFactory.getLogger(SwiftRestClient.class); - - /** - * Header that says "use newest version" -ensures that - * the query doesn't pick up older versions served by - * an eventually consistent filesystem (except in the special case - * of a network partition, at which point no guarantees about - * consistency can be made. - */ - public static final Header NEWEST = - new BasicHeader(SwiftProtocolConstants.X_NEWEST, "true"); - - /** - * the authentication endpoint as supplied in the configuration. - */ - private final URI authUri; - - /** - * Swift region. Some OpenStack installations has more than one region. - * In this case user can specify the region with which Hadoop will be working - */ - private final String region; - - /** - * tenant name. - */ - private final String tenant; - - /** - * username name. - */ - private final String username; - - /** - * user password. - */ - private final String password; - - /** - * user api key. - */ - private final String apiKey; - - /** - * The authentication request used to authenticate with Swift. - */ - private final AuthenticationRequest authRequest; - - /** - * This auth request is similar to @see authRequest, - * with one difference: it has another json representation when - * authRequest one is not applicable. - */ - private AuthenticationRequest keystoneAuthRequest; - - private boolean useKeystoneAuthentication = false; - - /** - * The container this client is working with. - */ - private final String container; - private final String serviceDescription; - - /** - * Access token (Secret). - */ - private AccessToken token; - - /** - * Endpoint for swift operations, obtained after authentication. - */ - private URI endpointURI; - - /** - * URI under which objects can be found. - * This is set when the user is authenticated -the URI - * is returned in the body of the success response. - */ - private URI objectLocationURI; - - /** - * The name of the service provider. - */ - private final String serviceProvider; - - /** - * Should the public swift endpoint be used, rather than the in-cluster one? - */ - private final boolean usePublicURL; - - /** - * Number of times to retry a connection. - */ - private final int retryCount; - - /** - * How long (in milliseconds) should a connection be attempted. - */ - private final int connectTimeout; - - /** - * How long (in milliseconds) should a connection be attempted. - */ - private final int socketTimeout; - - /** - * How long (in milliseconds) between bulk operations. - */ - private final int throttleDelay; - - /** - * the name of a proxy host (can be null, in which case there is no proxy). - */ - private String proxyHost; - - /** - * The port of a proxy. This is ignored if {@link #proxyHost} is null. - */ - private int proxyPort; - - /** - * Flag to indicate whether or not the client should - * query for file location data. - */ - private final boolean locationAware; - - private final int partSizeKB; - /** - * The blocksize of this FS - */ - private final int blocksizeKB; - private final int bufferSizeKB; - - private final DurationStatsTable durationStats = new DurationStatsTable(); - /** - * objects query endpoint. This is synchronized - * to handle a simultaneous update of all auth data in one - * go. - */ - private synchronized URI getEndpointURI() { - return endpointURI; - } - - /** - * token for Swift communication. - */ - private synchronized AccessToken getToken() { - return token; - } - - /** - * Setter of authentication and endpoint details. - * Being synchronized guarantees that all three fields are set up together. - * It is up to the reader to read all three fields in their own - * synchronized block to be sure that they are all consistent. - * - * @param endpoint endpoint URI - * @param objectLocation object location URI - * @param authToken auth token - */ - private void setAuthDetails(URI endpoint, - URI objectLocation, - AccessToken authToken) { - if (LOG.isDebugEnabled()) { - LOG.debug(String.format("setAuth: endpoint=%s; objectURI=%s; token=%s", - endpoint, objectLocation, authToken)); - } - synchronized (this) { - endpointURI = endpoint; - objectLocationURI = objectLocation; - token = authToken; - } - } - - - /** - * Base class for all Swift REST operations. - * - * @param request - * @param result - */ - private static abstract class HttpRequestProcessor - { - public final M createRequest(String uri) throws IOException { - final M req = doCreateRequest(uri); - setup(req); - return req; - } - - /** - * Override it to return some result after request is executed. - */ - public abstract R extractResult(M req, HttpResponse resp) - throws IOException; - - /** - * Factory method to create a REST method against the given URI. - * - * @param uri target - * @return method to invoke - */ - protected abstract M doCreateRequest(String uri) throws IOException; - - /** - * Override port to set up the request before it is executed. - */ - protected void setup(M req) throws IOException { - } - - /** - * Override point: what are the status codes that this operation supports? - * - * @return an array with the permitted status code(s) - */ - protected int[] getAllowedStatusCodes() { - return new int[]{ - SC_OK, - SC_CREATED, - SC_ACCEPTED, - SC_NO_CONTENT, - SC_PARTIAL_CONTENT, - }; - } - } - - private static abstract class GetRequestProcessor - extends HttpRequestProcessor { - @Override - protected final HttpGet doCreateRequest(String uri) { - return new HttpGet(uri); - } - } - - private static abstract class PostRequestProcessor - extends HttpRequestProcessor { - @Override - protected final HttpPost doCreateRequest(String uri) { - return new HttpPost(uri); - } - } - - /** - * There's a special type for auth messages, so that low-level - * message handlers can react to auth failures differently from everything - * else. - */ - private static final class AuthPostRequest extends HttpPost { - private AuthPostRequest(String uri) { - super(uri); - } - } - - /** - * Generate an auth message. - * @param response - */ - private static abstract class AuthRequestProcessor - extends HttpRequestProcessor { - @Override - protected final AuthPostRequest doCreateRequest(String uri) { - return new AuthPostRequest(uri); - } - } - - private static abstract class PutRequestProcessor - extends HttpRequestProcessor { - @Override - protected final HttpPut doCreateRequest(String uri) { - return new HttpPut(uri); - } - - /** - * Override point: what are the status codes that this operation supports? - * - * @return the list of status codes to accept - */ - @Override - protected int[] getAllowedStatusCodes() { - return new int[]{ - SC_OK, - SC_CREATED, - SC_NO_CONTENT, - SC_ACCEPTED, - }; - } - } - - /** - * Create operation. - * - * @param result type - */ - private static abstract class CopyRequestProcessor - extends HttpRequestProcessor { - @Override - protected final CopyRequest doCreateRequest(String uri) - throws SwiftException { - CopyRequest copy = new CopyRequest(); - try { - copy.setURI(new URI(uri)); - } catch (URISyntaxException e) { - throw new SwiftException("Failed to create URI from: " + uri); - } - return copy; - } - - /** - * The only allowed status code is 201:created. - * @return an array with the permitted status code(s) - */ - @Override - protected int[] getAllowedStatusCodes() { - return new int[]{ - SC_CREATED - }; - } - } - - /** - * Delete operation. - * - * @param - */ - private static abstract class DeleteRequestProcessor - extends HttpRequestProcessor { - @Override - protected final HttpDelete doCreateRequest(String uri) { - return new HttpDelete(uri); - } - - @Override - protected int[] getAllowedStatusCodes() { - return new int[]{ - SC_OK, - SC_ACCEPTED, - SC_NO_CONTENT, - SC_NOT_FOUND - }; - } - } - - private static abstract class HeadRequestProcessor - extends HttpRequestProcessor { - @Override - protected final HttpHead doCreateRequest(String uri) { - return new HttpHead(uri); - } - } - - - /** - * Create a Swift Rest Client instance. - * - * @param filesystemURI filesystem URI - * @param conf The configuration to use to extract the binding - * @throws SwiftConfigurationException the configuration is not valid for - * defining a rest client against the service - */ - private SwiftRestClient(URI filesystemURI, - Configuration conf) - throws SwiftConfigurationException { - Properties props = RestClientBindings.bind(filesystemURI, conf); - String stringAuthUri = getOption(props, SWIFT_AUTH_PROPERTY); - username = getOption(props, SWIFT_USERNAME_PROPERTY); - password = props.getProperty(SWIFT_PASSWORD_PROPERTY); - apiKey = props.getProperty(SWIFT_APIKEY_PROPERTY); - //optional - region = props.getProperty(SWIFT_REGION_PROPERTY); - //tenant is optional - tenant = props.getProperty(SWIFT_TENANT_PROPERTY); - //service is used for diagnostics - serviceProvider = props.getProperty(SWIFT_SERVICE_PROPERTY); - container = props.getProperty(SWIFT_CONTAINER_PROPERTY); - String isPubProp = props.getProperty(SWIFT_PUBLIC_PROPERTY, "false"); - usePublicURL = "true".equals(isPubProp); - - if (apiKey == null && password == null) { - throw new SwiftConfigurationException( - "Configuration for " + filesystemURI +" must contain either " - + SWIFT_PASSWORD_PROPERTY + " or " - + SWIFT_APIKEY_PROPERTY); - } - //create the (reusable) authentication request - if (password != null) { - authRequest = new PasswordAuthenticationRequest(tenant, - new PasswordCredentials( - username, - password)); - } else { - authRequest = new ApiKeyAuthenticationRequest(tenant, - new ApiKeyCredentials( - username, apiKey)); - keystoneAuthRequest = new KeyStoneAuthRequest(tenant, - new KeystoneApiKeyCredentials(username, apiKey)); - } - locationAware = "true".equals( - props.getProperty(SWIFT_LOCATION_AWARE_PROPERTY, "false")); - - //now read in properties that are shared across all connections - - //connection and retries - try { - retryCount = conf.getInt(SWIFT_RETRY_COUNT, DEFAULT_RETRY_COUNT); - connectTimeout = conf.getInt(SWIFT_CONNECTION_TIMEOUT, - DEFAULT_CONNECT_TIMEOUT); - socketTimeout = conf.getInt(SWIFT_SOCKET_TIMEOUT, - DEFAULT_SOCKET_TIMEOUT); - - throttleDelay = conf.getInt(SWIFT_THROTTLE_DELAY, - DEFAULT_THROTTLE_DELAY); - - //proxy options - proxyHost = conf.get(SWIFT_PROXY_HOST_PROPERTY); - proxyPort = conf.getInt(SWIFT_PROXY_PORT_PROPERTY, 8080); - - blocksizeKB = conf.getInt(SWIFT_BLOCKSIZE, - DEFAULT_SWIFT_BLOCKSIZE); - if (blocksizeKB <= 0) { - throw new SwiftConfigurationException("Invalid blocksize set in " - + SWIFT_BLOCKSIZE - + ": " + blocksizeKB); - } - partSizeKB = conf.getInt(SWIFT_PARTITION_SIZE, - DEFAULT_SWIFT_PARTITION_SIZE); - if (partSizeKB <=0) { - throw new SwiftConfigurationException("Invalid partition size set in " - + SWIFT_PARTITION_SIZE - + ": " + partSizeKB); - } - - bufferSizeKB = conf.getInt(SWIFT_REQUEST_SIZE, - DEFAULT_SWIFT_REQUEST_SIZE); - if (bufferSizeKB <=0) { - throw new SwiftConfigurationException("Invalid buffer size set in " - + SWIFT_REQUEST_SIZE - + ": " + bufferSizeKB); - } - } catch (NumberFormatException e) { - //convert exceptions raised parsing ints and longs into - // SwiftConfigurationException instances - throw new SwiftConfigurationException(e.toString(), e); - } - //everything you need for diagnostics. The password is omitted. - serviceDescription = String.format( - "Service={%s} container={%s} uri={%s}" - + " tenant={%s} user={%s} region={%s}" - + " publicURL={%b}" - + " location aware={%b}" - + " partition size={%d KB}, buffer size={%d KB}" - + " block size={%d KB}" - + " connect timeout={%d}, retry count={%d}" - + " socket timeout={%d}" - + " throttle delay={%d}" - , - serviceProvider, - container, - stringAuthUri, - tenant, - username, - region != null ? region : "(none)", - usePublicURL, - locationAware, - partSizeKB, - bufferSizeKB, - blocksizeKB, - connectTimeout, - retryCount, - socketTimeout, - throttleDelay - ); - if (LOG.isDebugEnabled()) { - LOG.debug(serviceDescription); - } - try { - this.authUri = new URI(stringAuthUri); - } catch (URISyntaxException e) { - throw new SwiftConfigurationException("The " + SWIFT_AUTH_PROPERTY - + " property was incorrect: " - + stringAuthUri, e); - } - } - - /** - * Get a mandatory configuration option. - * - * @param props property set - * @param key key - * @return value of the configuration - * @throws SwiftConfigurationException if there was no match for the key - */ - private static String getOption(Properties props, String key) throws - SwiftConfigurationException { - String val = props.getProperty(key); - if (val == null) { - throw new SwiftConfigurationException("Undefined property: " + key); - } - return val; - } - - /** - * Make an HTTP GET request to Swift to get a range of data in the object. - * - * @param path path to object - * @param offset offset from file beginning - * @param length file length - * @return The input stream -which must be closed afterwards. - * @throws IOException Problems - * @throws SwiftException swift specific error - * @throws FileNotFoundException path is not there - */ - public HttpBodyContent getData(SwiftObjectPath path, - long offset, - long length) throws IOException { - if (offset < 0) { - throw new SwiftException("Invalid offset: " + offset - + " in getDataAsInputStream( path=" + path - + ", offset=" + offset - + ", length =" + length + ")"); - } - if (length <= 0) { - throw new SwiftException("Invalid length: " + length - + " in getDataAsInputStream( path="+ path - + ", offset=" + offset - + ", length ="+ length + ")"); - } - - final String range = String.format(SWIFT_RANGE_HEADER_FORMAT_PATTERN, - offset, - offset + length - 1); - if (LOG.isDebugEnabled()) { - LOG.debug("getData:" + range); - } - - return getData(path, - new BasicHeader(HEADER_RANGE, range), - SwiftRestClient.NEWEST); - } - - /** - * Returns object length. - * - * @param uri file URI - * @return object length - * @throws SwiftException on swift-related issues - * @throws IOException on network/IO problems - */ - public long getContentLength(URI uri) throws IOException { - preRemoteCommand("getContentLength"); - return perform("getContentLength", uri, new HeadRequestProcessor() { - @Override - public Long extractResult(HttpHead req, HttpResponse resp) - throws IOException { - return HttpResponseUtils.getContentLength(resp); - } - - @Override - protected void setup(HttpHead req) throws IOException { - super.setup(req); - req.addHeader(NEWEST); - } - }); - } - - /** - * Get the length of the remote object. - * @param path object to probe - * @return the content length - * @throws IOException on any failure - */ - public long getContentLength(SwiftObjectPath path) throws IOException { - return getContentLength(pathToURI(path)); - } - - /** - * Get the path contents as an input stream. - * Warning: this input stream must be closed to avoid - * keeping Http connections open. - * - * @param path path to file - * @param requestHeaders http headers - * @return byte[] file data or null if the object was not found - * @throws IOException on IO Faults - * @throws FileNotFoundException if there is nothing at the path - */ - public HttpBodyContent getData(SwiftObjectPath path, - final Header... requestHeaders) - throws IOException { - preRemoteCommand("getData"); - return doGet(pathToURI(path), - requestHeaders); - } - - /** - * Returns object location as byte[]. - * - * @param path path to file - * @param requestHeaders http headers - * @return byte[] file data or null if the object was not found - * @throws IOException on IO Faults - */ - public byte[] getObjectLocation(SwiftObjectPath path, - final Header... requestHeaders) throws IOException { - if (!isLocationAware()) { - //if the filesystem is not location aware, do not ask for this information - return null; - } - preRemoteCommand("getObjectLocation"); - try { - return perform("getObjectLocation", pathToObjectLocation(path), - new GetRequestProcessor() { - @Override - protected int[] getAllowedStatusCodes() { - return new int[]{ - SC_OK, - SC_FORBIDDEN, - SC_NO_CONTENT - }; - } - - @Override - public byte[] extractResult(HttpGet req, HttpResponse resp) throws - IOException { - - //TODO: remove SC_NO_CONTENT if it depends on Swift versions - int statusCode = resp.getStatusLine().getStatusCode(); - if (statusCode == SC_NOT_FOUND - || statusCode == SC_FORBIDDEN - || statusCode == SC_NO_CONTENT - || resp.getEntity().getContent() == null) { - return null; - } - final InputStream responseBodyAsStream = - resp.getEntity().getContent(); - final byte[] locationData = new byte[1024]; - - return responseBodyAsStream.read(locationData) > 0 ? - locationData : null; - } - - @Override - protected void setup(HttpGet req) - throws SwiftInternalStateException { - setHeaders(req, requestHeaders); - } - }); - } catch (IOException e) { - LOG.warn("Failed to get the location of " + path + ": " + e, e); - return null; - } - } - - /** - * Create the URI needed to query the location of an object. - * @param path object path to retrieve information about - * @return the URI for the location operation - * @throws SwiftException if the URI could not be constructed - */ - private URI pathToObjectLocation(SwiftObjectPath path) throws SwiftException { - URI uri; - String dataLocationURI = objectLocationURI.toString(); - try { - if (path.toString().startsWith("/")) { - dataLocationURI = dataLocationURI.concat(path.toUriPath()); - } else { - dataLocationURI = dataLocationURI.concat("/").concat(path.toUriPath()); - } - - uri = new URI(dataLocationURI); - } catch (URISyntaxException e) { - throw new SwiftException(e); - } - return uri; - } - - /** - * Find objects under a prefix. - * - * @param path path prefix - * @param requestHeaders optional request headers - * @return byte[] file data or null if the object was not found - * @throws IOException on IO Faults - * @throws FileNotFoundException if nothing is at the end of the URI -that is, - * the directory is empty - */ - public byte[] findObjectsByPrefix(SwiftObjectPath path, - final Header... requestHeaders) throws IOException { - preRemoteCommand("findObjectsByPrefix"); - URI uri; - String dataLocationURI = getEndpointURI().toString(); - try { - String object = path.getObject(); - if (object.startsWith("/")) { - object = object.substring(1); - } - object = encodeUrl(object); - dataLocationURI = dataLocationURI.concat("/") - .concat(path.getContainer()) - .concat("/?prefix=") - .concat(object) - ; - uri = new URI(dataLocationURI); - } catch (URISyntaxException e) { - throw new SwiftException("Bad URI: " + dataLocationURI, e); - } - - return perform("findObjectsByPrefix", uri, - new GetRequestProcessor() { - @Override - public byte[] extractResult(HttpGet req, HttpResponse resp) - throws IOException { - if (resp.getStatusLine().getStatusCode() == SC_NOT_FOUND) { - //no result - throw new FileNotFoundException("Not found " + req.getURI()); - } - return HttpResponseUtils.getResponseBody(resp); - } - - @Override - protected int[] getAllowedStatusCodes() { - return new int[]{ - SC_OK, - SC_NOT_FOUND - }; - } - - @Override - protected void setup(HttpGet req) throws SwiftInternalStateException { - setHeaders(req, requestHeaders); - } - }); - } - - /** - * Find objects in a directory. - * - * @param path path prefix - * @param requestHeaders optional request headers - * @return byte[] file data or null if the object was not found - * @throws IOException on IO Faults - * @throws FileNotFoundException if nothing is at the end of the URI -that is, - * the directory is empty - */ - public byte[] listDeepObjectsInDirectory(SwiftObjectPath path, - boolean listDeep, - final Header... requestHeaders) - throws IOException { - preRemoteCommand("listDeepObjectsInDirectory"); - - String endpoint = getEndpointURI().toString(); - StringBuilder dataLocationURI = new StringBuilder(); - dataLocationURI.append(endpoint); - String object = path.getObject(); - if (object.startsWith("/")) { - object = object.substring(1); - } - if (!object.endsWith("/")) { - object = object.concat("/"); - } - - if (object.equals("/")) { - object = ""; - } - - dataLocationURI = dataLocationURI.append("/") - .append(path.getContainer()) - .append("/?prefix=") - .append(object) - .append("&format=json"); - - //in listing deep set param to false - if (listDeep == false) { - dataLocationURI.append("&delimiter=/"); - } - - return findObjects(dataLocationURI.toString(), requestHeaders); - } - - /** - * Find objects in a location. - * @param location URI - * @param requestHeaders optional request headers - * @return the body of te response - * @throws IOException IO problems - */ - private byte[] findObjects(String location, final Header[] requestHeaders) - throws IOException { - URI uri; - preRemoteCommand("findObjects"); - try { - uri = new URI(location); - } catch (URISyntaxException e) { - throw new SwiftException("Bad URI: " + location, e); - } - - return perform("findObjects", uri, - new GetRequestProcessor() { - @Override - public byte[] extractResult(HttpGet req, HttpResponse resp) - throws IOException { - if (resp.getStatusLine().getStatusCode() == SC_NOT_FOUND) { - //no result - throw new FileNotFoundException("Not found " + req.getURI()); - } - return HttpResponseUtils.getResponseBody(resp); - } - - @Override - protected int[] getAllowedStatusCodes() { - return new int[]{ - SC_OK, - SC_NOT_FOUND - }; - } - - @Override - protected void setup(HttpGet req) throws SwiftInternalStateException { - setHeaders(req, requestHeaders); - } - }); - } - - /** - * Copy an object. This is done by sending a COPY method to the filesystem - * which is required to handle this WebDAV-level extension to the - * base HTTP operations. - * - * @param src source path - * @param dst destination path - * @param headers any headers - * @return true if the status code was considered successful - * @throws IOException on IO Faults - */ - public boolean copyObject(SwiftObjectPath src, final SwiftObjectPath dst, - final Header... headers) throws IOException { - - preRemoteCommand("copyObject"); - - return perform("copy", pathToURI(src), - new CopyRequestProcessor() { - @Override - public Boolean extractResult(CopyRequest req, HttpResponse resp) - throws IOException { - return resp.getStatusLine().getStatusCode() != SC_NOT_FOUND; - } - - @Override - protected void setup(CopyRequest req) throws - SwiftInternalStateException { - setHeaders(req, headers); - req.addHeader(HEADER_DESTINATION, dst.toUriPath()); - } - }); - } - - /** - * Uploads file as Input Stream to Swift. - * The data stream will be closed after the request. - * - * @param path path to Swift - * @param data object data - * @param length length of data - * @param requestHeaders http headers - * @throws IOException on IO Faults - */ - public void upload(SwiftObjectPath path, - final InputStream data, - final long length, - final Header... requestHeaders) - throws IOException { - preRemoteCommand("upload"); - - try { - perform("upload", pathToURI(path), new PutRequestProcessor() { - @Override - public byte[] extractResult(HttpPut req, HttpResponse resp) - throws IOException { - return HttpResponseUtils.getResponseBody(resp); - } - - @Override - protected void setup(HttpPut req) throws - SwiftInternalStateException { - req.setEntity(new InputStreamEntity(data, length)); - setHeaders(req, requestHeaders); - } - }); - } finally { - data.close(); - } - - } - - - /** - * Deletes object from swift. - * The result is true if this operation did the deletion. - * - * @param path path to file - * @param requestHeaders http headers - * @throws IOException on IO Faults - */ - public boolean delete(SwiftObjectPath path, final Header... requestHeaders) throws IOException { - preRemoteCommand("delete"); - - return perform("", pathToURI(path), new DeleteRequestProcessor() { - @Override - public Boolean extractResult(HttpDelete req, HttpResponse resp) - throws IOException { - return resp.getStatusLine().getStatusCode() == SC_NO_CONTENT; - } - - @Override - protected void setup(HttpDelete req) throws - SwiftInternalStateException { - setHeaders(req, requestHeaders); - } - }); - } - - /** - * Issue a head request. - * @param reason reason -used in logs - * @param path path to query - * @param requestHeaders request header - * @return the response headers. This may be an empty list - * @throws IOException IO problems - * @throws FileNotFoundException if there is nothing at the end - */ - public Header[] headRequest(String reason, - SwiftObjectPath path, - final Header... requestHeaders) - throws IOException { - - preRemoteCommand("headRequest: "+ reason); - return perform(reason, pathToURI(path), - new HeadRequestProcessor() { - @Override - public Header[] extractResult(HttpHead req, HttpResponse resp) - throws IOException { - if (resp.getStatusLine().getStatusCode() == SC_NOT_FOUND) { - throw new FileNotFoundException("Not Found " + req.getURI()); - } - return resp.getAllHeaders(); - } - - @Override - protected void setup(HttpHead req) throws - SwiftInternalStateException { - setHeaders(req, requestHeaders); - } - }); - } - - /** - * Issue a put request. - * @param path path - * @param requestHeaders optional headers - * @return the HTTP response - * @throws IOException any problem - */ - public int putRequest(SwiftObjectPath path, final Header... requestHeaders) - throws IOException { - - preRemoteCommand("putRequest"); - return perform(pathToURI(path), new PutRequestProcessor() { - - @Override - public Integer extractResult(HttpPut req, HttpResponse resp) - throws IOException { - return resp.getStatusLine().getStatusCode(); - } - - @Override - protected void setup(HttpPut req) throws - SwiftInternalStateException { - setHeaders(req, requestHeaders); - } - }); - } - - /** - * Authenticate to Openstack Keystone. - * As well as returning the access token, the member fields {@link #token}, - * {@link #endpointURI} and {@link #objectLocationURI} are set up for re-use. - *

    - * This method is re-entrant -if more than one thread attempts to authenticate - * neither will block -but the field values with have those of the last caller. - * - * @return authenticated access token - */ - public AccessToken authenticate() throws IOException { - final AuthenticationRequest authenticationRequest; - if (useKeystoneAuthentication) { - authenticationRequest = keystoneAuthRequest; - } else { - authenticationRequest = authRequest; - } - - LOG.debug("started authentication"); - return perform("authentication", - authUri, - new AuthenticationPost(authenticationRequest)); - } - - private final class AuthenticationPost extends - AuthRequestProcessor { - final AuthenticationRequest authenticationRequest; - - private AuthenticationPost(AuthenticationRequest authenticationRequest) { - this.authenticationRequest = authenticationRequest; - } - - @Override - protected void setup(AuthPostRequest req) throws IOException { - req.setEntity(getAuthenticationRequst(authenticationRequest)); - } - - /** - * specification says any of the 2xxs are OK, so list all - * the standard ones - * @return a set of 2XX status codes. - */ - @Override - protected int[] getAllowedStatusCodes() { - return new int[]{ - SC_OK, - SC_BAD_REQUEST, - SC_CREATED, - SC_ACCEPTED, - SC_NON_AUTHORITATIVE_INFORMATION, - SC_NO_CONTENT, - SC_RESET_CONTENT, - SC_PARTIAL_CONTENT, - SC_MULTI_STATUS, - SC_UNAUTHORIZED //if request unauthorized, try another method - }; - } - - @Override - public AccessToken extractResult(AuthPostRequest req, HttpResponse resp) - throws IOException { - //initial check for failure codes leading to authentication failures - if (resp.getStatusLine().getStatusCode() == SC_BAD_REQUEST) { - throw new SwiftAuthenticationFailedException( - authenticationRequest.toString(), "POST", authUri, resp); - } - - final AuthenticationResponse access = - JSONUtil.toObject(HttpResponseUtils.getResponseBodyAsString(resp), - AuthenticationWrapper.class).getAccess(); - final List serviceCatalog = access.getServiceCatalog(); - //locate the specific service catalog that defines Swift; variations - //in the name of this add complexity to the search - StringBuilder catList = new StringBuilder(); - StringBuilder regionList = new StringBuilder(); - - //these fields are all set together at the end of the operation - URI endpointURI = null; - URI objectLocation; - Endpoint swiftEndpoint = null; - AccessToken accessToken; - - for (Catalog catalog : serviceCatalog) { - String name = catalog.getName(); - String type = catalog.getType(); - String descr = String.format("[%s: %s]; ", name, type); - catList.append(descr); - if (LOG.isDebugEnabled()) { - LOG.debug("Catalog entry " + descr); - } - if (name.equals(SERVICE_CATALOG_SWIFT) - || name.equals(SERVICE_CATALOG_CLOUD_FILES) - || type.equals(SERVICE_CATALOG_OBJECT_STORE)) { - //swift is found - if (LOG.isDebugEnabled()) { - LOG.debug("Found swift catalog as " + name + " => " + type); - } - //now go through the endpoints - for (Endpoint endpoint : catalog.getEndpoints()) { - String endpointRegion = endpoint.getRegion(); - URI publicURL = endpoint.getPublicURL(); - URI internalURL = endpoint.getInternalURL(); - descr = String.format("[%s => %s / %s]; ", - endpointRegion, - publicURL, - internalURL); - regionList.append(descr); - if (LOG.isDebugEnabled()) { - LOG.debug("Endpoint " + descr); - } - if (region == null || endpointRegion.equals(region)) { - endpointURI = usePublicURL ? publicURL : internalURL; - swiftEndpoint = endpoint; - break; - } - } - } - } - if (endpointURI == null) { - String message = "Could not find swift service from auth URL " - + authUri - + " and region '" + region + "'. " - + "Categories: " + catList - + ((regionList.length() > 0) ? - ("regions: " + regionList) - : "No regions"); - throw new SwiftInvalidResponseException(message, - SC_OK, - "authenticating", - authUri); - - } - - - accessToken = access.getToken(); - String path = SWIFT_OBJECT_AUTH_ENDPOINT - + swiftEndpoint.getTenantId(); - String host = endpointURI.getHost(); - try { - objectLocation = new URI(endpointURI.getScheme(), - null, - host, - endpointURI.getPort(), - path, - null, - null); - } catch (URISyntaxException e) { - throw new SwiftException("object endpoint URI is incorrect: " - + endpointURI - + " + " + path, - e); - } - setAuthDetails(endpointURI, objectLocation, accessToken); - - if (LOG.isDebugEnabled()) { - LOG.debug("authenticated against " + endpointURI); - } - createDefaultContainer(); - return accessToken; - } - } - - private StringEntity getAuthenticationRequst( - AuthenticationRequest authenticationRequest) throws IOException { - final String data = JSONUtil.toJSON(new AuthenticationRequestWrapper( - authenticationRequest)); - if (LOG.isDebugEnabled()) { - LOG.debug("Authenticating with " + authenticationRequest); - } - return new StringEntity(data, ContentType.create("application/json", - "UTF-8")); - } - - /** - * create default container if it doesn't exist for Hadoop Swift integration. - * non-reentrant, as this should only be needed once. - * - * @throws IOException IO problems. - */ - private synchronized void createDefaultContainer() throws IOException { - createContainer(container); - } - - /** - * Create a container -if it already exists, do nothing. - * - * @param containerName the container name - * @throws IOException IO problems - * @throws SwiftBadRequestException invalid container name - * @throws SwiftInvalidResponseException error from the server - */ - public void createContainer(String containerName) throws IOException { - SwiftObjectPath objectPath = new SwiftObjectPath(containerName, ""); - try { - //see if the data is there - headRequest("createContainer", objectPath, NEWEST); - } catch (FileNotFoundException ex) { - int status = 0; - try { - status = putRequest(objectPath); - } catch (FileNotFoundException e) { - //triggered by a very bad container name. - //re-insert the 404 result into the status - status = SC_NOT_FOUND; - } - if (status == SC_BAD_REQUEST) { - throw new SwiftBadRequestException( - "Bad request -authentication failure or bad container name?", - status, - "PUT", - null); - } - if (!isStatusCodeExpected(status, - SC_OK, - SC_CREATED, - SC_ACCEPTED, - SC_NO_CONTENT)) { - throw new SwiftInvalidResponseException("Couldn't create container " - + containerName + - " for storing data in Swift." + - " Try to create container " + - containerName + " manually ", - status, - "PUT", - null); - } else { - throw ex; - } - } - } - - /** - * Trigger an initial auth operation if some of the needed - * fields are missing. - * - * @throws IOException on problems - */ - private void authIfNeeded() throws IOException { - if (getEndpointURI() == null) { - authenticate(); - } - } - - /** - * Pre-execution actions to be performed by methods. Currently this - *

      - *
    • Logs the operation at TRACE
    • - *
    • Authenticates the client -if needed
    • - *
    - * @throws IOException - */ - private void preRemoteCommand(String operation) throws IOException { - if (LOG.isTraceEnabled()) { - LOG.trace("Executing " + operation); - } - authIfNeeded(); - } - - - /** - * Performs the HTTP request, validates the response code and returns - * the received data. HTTP Status codes are converted into exceptions. - * - * @param uri URI to source - * @param processor HttpMethodProcessor - * @param method - * @param result type - * @return result of HTTP request - * @throws IOException IO problems - * @throws SwiftBadRequestException the status code indicated "Bad request" - * @throws SwiftInvalidResponseException the status code is out of range - * for the action (excluding 404 responses) - * @throws SwiftInternalStateException the internal state of this client - * is invalid - * @throws FileNotFoundException a 404 response was returned - */ - private R perform(URI uri, - HttpRequestProcessor processor) - throws IOException, - SwiftBadRequestException, - SwiftInternalStateException, - SwiftInvalidResponseException, - FileNotFoundException { - return perform("",uri, processor); - } - - /** - * Performs the HTTP request, validates the response code and returns - * the received data. HTTP Status codes are converted into exceptions. - * @param reason why is this operation taking place. Used for statistics - * @param uri URI to source - * @param processor HttpMethodProcessor - * @param method - * @param result type - * @return result of HTTP request - * @throws IOException IO problems - * @throws SwiftBadRequestException the status code indicated "Bad request" - * @throws SwiftInvalidResponseException the status code is out of range - * for the action (excluding 404 responses) - * @throws SwiftInternalStateException the internal state of this client - * is invalid - * @throws FileNotFoundException a 404 response was returned - */ - private R perform(String reason, URI uri, - HttpRequestProcessor processor) - throws IOException, SwiftBadRequestException, SwiftInternalStateException, - SwiftInvalidResponseException, FileNotFoundException { - checkNotNull(uri); - checkNotNull(processor); - - final M req = processor.createRequest(uri.toString()); - req.addHeader(HEADER_USER_AGENT, SWIFT_USER_AGENT); - //retry policy - HttpClientBuilder clientBuilder = HttpClientBuilder.create(); - clientBuilder.setRetryHandler( - new DefaultHttpRequestRetryHandler(retryCount, false)); - RequestConfig.Builder requestConfigBuilder = - RequestConfig.custom().setConnectTimeout(connectTimeout); - if (proxyHost != null) { - requestConfigBuilder.setProxy(new HttpHost(proxyHost, proxyPort)); - } - clientBuilder.setDefaultRequestConfig(requestConfigBuilder.build()); - clientBuilder.setDefaultSocketConfig( - SocketConfig.custom().setSoTimeout(socketTimeout).build()); - Duration duration = new Duration(); - boolean success = false; - HttpResponse resp; - try { - // client should not be closed in this method because - // the connection can be used later - CloseableHttpClient client = clientBuilder.build(); - int statusCode = 0; - try { - resp = exec(client, req); - statusCode = checkNotNull(resp.getStatusLine().getStatusCode()); - } catch (IOException e) { - //rethrow with extra diagnostics and wiki links - throw ExceptionDiags.wrapException(uri.toString(), req.getMethod(), e); - } - - //look at the response and see if it was valid or not. - //Valid is more than a simple 200; even 404 "not found" is considered - //valid -which it is for many methods. - - //validate the allowed status code for this operation - int[] allowedStatusCodes = processor.getAllowedStatusCodes(); - boolean validResponse = isStatusCodeExpected(statusCode, - allowedStatusCodes); - - if (!validResponse) { - IOException ioe = buildException(uri, req, resp, statusCode); - throw ioe; - } - - R r = processor.extractResult(req, resp); - success = true; - return r; - } catch (IOException e) { - //release the connection -always - req.releaseConnection(); - throw e; - } finally { - duration.finished(); - durationStats.add(req.getMethod() + " " + reason, duration, success); - } - } - - /** - * Build an exception from a failed operation. This can include generating - * specific exceptions (e.g. FileNotFound), as well as the default - * {@link SwiftInvalidResponseException}. - * - * @param uri URI for operation - * @param resp operation that failed - * @param statusCode status code - * @param method type - * @return an exception to throw - */ - private IOException buildException( - URI uri, M req, HttpResponse resp, int statusCode) { - IOException fault; - - //log the failure @debug level - String errorMessage = String.format("Method %s on %s failed, status code: %d," + - " status line: %s", - req.getMethod(), - uri, - statusCode, - resp.getStatusLine() - ); - if (LOG.isDebugEnabled()) { - LOG.debug(errorMessage); - } - //send the command - switch (statusCode) { - case SC_NOT_FOUND: - fault = new FileNotFoundException("Operation " + req.getMethod() - + " on " + uri); - break; - - case SC_BAD_REQUEST: - //bad HTTP request - fault = new SwiftBadRequestException("Bad request against " + uri, - req.getMethod(), uri, resp); - break; - - case SC_REQUESTED_RANGE_NOT_SATISFIABLE: - //out of range - StringBuilder errorText = new StringBuilder( - resp.getStatusLine().getReasonPhrase()); - //get the requested length - Header requestContentLen = req.getFirstHeader(HEADER_CONTENT_LENGTH); - if (requestContentLen != null) { - errorText.append(" requested ").append(requestContentLen.getValue()); - } - //and the result - Header availableContentRange = resp.getFirstHeader(HEADER_CONTENT_RANGE); - - if (availableContentRange != null) { - errorText.append(" available ") - .append(availableContentRange.getValue()); - } - fault = new EOFException(errorText.toString()); - break; - - case SC_UNAUTHORIZED: - //auth failure; should only happen on the second attempt - fault = new SwiftAuthenticationFailedException( - "Operation not authorized- current access token =" + getToken(), - req.getMethod(), - uri, - resp); - break; - - case SwiftProtocolConstants.SC_TOO_MANY_REQUESTS_429: - case SwiftProtocolConstants.SC_THROTTLED_498: - //response code that may mean the client is being throttled - fault = new SwiftThrottledRequestException( - "Client is being throttled: too many requests", - req.getMethod(), - uri, - resp); - break; - - default: - //return a generic invalid HTTP response - fault = new SwiftInvalidResponseException( - errorMessage, - req.getMethod(), - uri, - resp); - } - - return fault; - } - - /** - * Exec a GET request and return the input stream of the response. - * - * @param uri URI to GET - * @param requestHeaders request headers - * @return the input stream. This must be closed to avoid log errors - * @throws IOException - */ - private HttpBodyContent doGet(final URI uri, final Header... requestHeaders) throws IOException { - return perform("", uri, new GetRequestProcessor() { - @Override - public HttpBodyContent extractResult(HttpGet req, HttpResponse resp) - throws IOException { - return new HttpBodyContent( - new HttpInputStreamWithRelease(uri, req, resp), - HttpResponseUtils.getContentLength(resp)); - } - - @Override - protected void setup(HttpGet req) throws - SwiftInternalStateException { - setHeaders(req, requestHeaders); - } - }); - } - - /** - * Create an instance against a specific FS URI. - * - * @param filesystemURI filesystem to bond to - * @param config source of configuration data - * @return REST client instance - * @throws IOException on instantiation problems - */ - public static SwiftRestClient getInstance(URI filesystemURI, - Configuration config) throws IOException { - return new SwiftRestClient(filesystemURI, config); - } - - - /** - * Converts Swift path to URI to make request. - * This is public for unit testing - * - * @param path path to object - * @param endpointURI domain url e.g. http://domain.com - * @return valid URI for object - * @throws SwiftException - */ - public static URI pathToURI(SwiftObjectPath path, - URI endpointURI) throws SwiftException { - checkNotNull(endpointURI, "Null Endpoint -client is not authenticated"); - - String dataLocationURI = endpointURI.toString(); - try { - - dataLocationURI = SwiftUtils.joinPaths(dataLocationURI, encodeUrl(path.toUriPath())); - return new URI(dataLocationURI); - } catch (URISyntaxException e) { - throw new SwiftException("Failed to create URI from " + dataLocationURI, e); - } - } - - /** - * Encode the URL. This extends {@link URLEncoder#encode(String, String)} - * with a replacement of + with %20. - * @param url URL string - * @return an encoded string - * @throws SwiftException if the URL cannot be encoded - */ - private static String encodeUrl(String url) throws SwiftException { - if (url.matches(".*\\s+.*")) { - try { - url = URLEncoder.encode(url, "UTF-8"); - url = url.replace("+", "%20"); - } catch (UnsupportedEncodingException e) { - throw new SwiftException("failed to encode URI", e); - } - } - - return url; - } - - /** - * Convert a swift path to a URI relative to the current endpoint. - * - * @param path path - * @return an path off the current endpoint URI. - * @throws SwiftException - */ - private URI pathToURI(SwiftObjectPath path) throws SwiftException { - return pathToURI(path, getEndpointURI()); - } - - /** - * Add the headers to the method, and the auth token (which must be set). - * @param method method to update - * @param requestHeaders the list of headers - * @throws SwiftInternalStateException not yet authenticated - */ - private void setHeaders(HttpUriRequest method, Header[] requestHeaders) - throws SwiftInternalStateException { - for (Header header : requestHeaders) { - method.addHeader(header); - } - setAuthToken(method, getToken()); - } - - - /** - * Set the auth key header of the method to the token ID supplied. - * - * @param method method - * @param accessToken access token - * @throws SwiftInternalStateException if the client is not yet authenticated - */ - private void setAuthToken(HttpUriRequest method, AccessToken accessToken) - throws SwiftInternalStateException { - checkNotNull(accessToken,"Not authenticated"); - method.addHeader(HEADER_AUTH_KEY, accessToken.getId()); - } - - /** - * Execute a method in a new HttpClient instance. If the auth failed, - * authenticate then retry the method. - * - * @param req request to exec - * @param client client to use - * @param Request type - * @return the status code - * @throws IOException on any failure - */ - private HttpResponse exec(HttpClient client, M req) - throws IOException { - HttpResponse resp = execWithDebugOutput(req, client); - int statusCode = resp.getStatusLine().getStatusCode(); - if ((statusCode == HttpStatus.SC_UNAUTHORIZED - || statusCode == HttpStatus.SC_BAD_REQUEST) - && req instanceof AuthPostRequest - && !useKeystoneAuthentication) { - if (LOG.isDebugEnabled()) { - LOG.debug("Operation failed with status " + statusCode - + " attempting keystone auth"); - } - //if rackspace key authentication failed - try custom Keystone authentication - useKeystoneAuthentication = true; - final AuthPostRequest authentication = (AuthPostRequest) req; - //replace rackspace auth with keystone one - authentication.setEntity(getAuthenticationRequst(keystoneAuthRequest)); - resp = execWithDebugOutput(req, client); - } - - if (statusCode == HttpStatus.SC_UNAUTHORIZED ) { - //unauthed -or the auth uri rejected it. - - if (req instanceof AuthPostRequest) { - //unauth response from the AUTH URI itself. - throw new SwiftAuthenticationFailedException(authRequest.toString(), - "auth", - authUri, - resp); - } - //any other URL: try again - if (LOG.isDebugEnabled()) { - LOG.debug("Reauthenticating"); - } - //re-auth, this may recurse into the same dir - authenticate(); - if (LOG.isDebugEnabled()) { - LOG.debug("Retrying original request"); - } - resp = execWithDebugOutput(req, client); - } - return resp; - } - - /** - * Execute the request with the request and response logged at debug level. - * @param req request to execute - * @param client client to use - * @param method type - * @return the status code - * @throws IOException any failure reported by the HTTP client. - */ - private HttpResponse execWithDebugOutput(M req, - HttpClient client) throws IOException { - if (LOG.isDebugEnabled()) { - StringBuilder builder = new StringBuilder( - req.getMethod() + " " + req.getURI() + "\n"); - for (Header header : req.getAllHeaders()) { - builder.append(header.toString()); - } - LOG.debug(builder.toString()); - } - HttpResponse resp = client.execute(req); - if (LOG.isDebugEnabled()) { - LOG.debug("Status code = " + resp.getStatusLine().getStatusCode()); - } - return resp; - } - - /** - * Ensures that an object reference passed as a parameter to the calling - * method is not null. - * - * @param reference an object reference - * @return the non-null reference that was validated - * @throws NullPointerException if {@code reference} is null - */ - private static T checkNotNull(T reference) throws - SwiftInternalStateException { - return checkNotNull(reference, "Null Reference"); - } - - private static T checkNotNull(T reference, String message) throws - SwiftInternalStateException { - if (reference == null) { - throw new SwiftInternalStateException(message); - } - return reference; - } - - /** - * Check for a status code being expected -takes a list of expected values - * - * @param status received status - * @param expected expected value - * @return true if status is an element of [expected] - */ - private boolean isStatusCodeExpected(int status, int... expected) { - for (int code : expected) { - if (status == code) { - return true; - } - } - return false; - } - - - @Override - public String toString() { - return "Swift client: " + serviceDescription; - } - - /** - * Get the region which this client is bound to - * @return the region - */ - public String getRegion() { - return region; - } - - /** - * Get the tenant to which this client is bound - * @return the tenant - */ - public String getTenant() { - return tenant; - } - - /** - * Get the username this client identifies itself as - * @return the username - */ - public String getUsername() { - return username; - } - - /** - * Get the container to which this client is bound - * @return the container - */ - public String getContainer() { - return container; - } - - /** - * Is this client bound to a location aware Swift blobstore - * -that is, can you query for the location of partitions? - * @return true iff the location of multipart file uploads - * can be determined. - */ - public boolean isLocationAware() { - return locationAware; - } - - /** - * Get the blocksize of this filesystem - * @return a blocksize > 0 - */ - public long getBlocksizeKB() { - return blocksizeKB; - } - - /** - * Get the partition size in KB. - * @return the partition size - */ - public int getPartSizeKB() { - return partSizeKB; - } - - /** - * Get the buffer size in KB. - * @return the buffer size wanted for reads - */ - public int getBufferSizeKB() { - return bufferSizeKB; - } - - public int getProxyPort() { - return proxyPort; - } - - public String getProxyHost() { - return proxyHost; - } - - public int getRetryCount() { - return retryCount; - } - - public int getConnectTimeout() { - return connectTimeout; - } - - public boolean isUsePublicURL() { - return usePublicURL; - } - - public int getThrottleDelay() { - return throttleDelay; - } - - /** - * Get the current operation statistics. - * @return a snapshot of the statistics - */ - - public List getOperationStatistics() { - return durationStats.getDurationStatistics(); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/package.html b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/package.html deleted file mode 100644 index ad900f90d06f0..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/package.html +++ /dev/null @@ -1,81 +0,0 @@ - - - - - - Swift Filesystem Client for Apache Hadoop - - - -

    - Swift Filesystem Client for Apache Hadoop -

    - -

    Introduction

    - -
    This package provides support in Apache Hadoop for the OpenStack Swift - Key-Value store, allowing client applications -including MR Jobs- to - read and write data in Swift. -
    - -
    Design Goals
    -
      -
    1. Give clients access to SwiftFS files, similar to S3n:
    2. -
    3. maybe: support a Swift Block store -- at least until Swift's - support for >5GB files has stabilized. -
    4. -
    5. Support for data-locality if the Swift FS provides file location information
    6. -
    7. Support access to multiple Swift filesystems in the same client/task.
    8. -
    9. Authenticate using the Keystone APIs.
    10. -
    11. Avoid dependency on unmaintained libraries.
    12. -
    - - -

    Supporting multiple Swift Filesystems

    - -The goal of supporting multiple swift filesystems simultaneously changes how -clusters are named and authenticated. In Hadoop's S3 and S3N filesystems, the "bucket" into -which objects are stored is directly named in the URL, such as -s3n://bucket/object1. The Hadoop configuration contains a -single set of login credentials for S3 (username and key), which are used to -authenticate the HTTP operations. - -For swift, we need to know not only which "container" name, but which credentials -to use to authenticate with it -and which URL to use for authentication. - -This has led to a different design pattern from S3, as instead of simple bucket names, -the hostname of an S3 container is two-level, the name of the service provider -being the second path: swift://bucket.service/ - -The service portion of this domain name is used as a reference into -the client settings -and so identify the service provider of that container. - - -

    Testing

    - -
    - The client code can be tested against public or private Swift instances; the - public services are (at the time of writing -January 2013-), Rackspace and - HP Cloud. Testing against both instances is how interoperability - can be verified. -
    - - - diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/StrictBufferedFSInputStream.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/StrictBufferedFSInputStream.java deleted file mode 100644 index 794219f31a4a6..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/StrictBufferedFSInputStream.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.snative; - -import org.apache.hadoop.fs.BufferedFSInputStream; -import org.apache.hadoop.fs.FSExceptionMessages; -import org.apache.hadoop.fs.FSInputStream; -import org.apache.hadoop.fs.swift.exceptions.SwiftConnectionClosedException; - -import java.io.EOFException; -import java.io.IOException; - -/** - * Add stricter compliance with the evolving FS specifications - */ -public class StrictBufferedFSInputStream extends BufferedFSInputStream { - - public StrictBufferedFSInputStream(FSInputStream in, - int size) { - super(in, size); - } - - @Override - public void seek(long pos) throws IOException { - if (pos < 0) { - throw new EOFException(FSExceptionMessages.NEGATIVE_SEEK); - } - if (in == null) { - throw new SwiftConnectionClosedException(FSExceptionMessages.STREAM_IS_CLOSED); - } - super.seek(pos); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftFileStatus.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftFileStatus.java deleted file mode 100644 index 725cae1e3b8e3..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftFileStatus.java +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.snative; - -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsPermission; - -/** - * A subclass of {@link FileStatus} that contains the - * Swift-specific rules of when a file is considered to be a directory. - */ -public class SwiftFileStatus extends FileStatus { - - public SwiftFileStatus() { - } - - public SwiftFileStatus(long length, - boolean isdir, - int block_replication, - long blocksize, long modification_time, Path path) { - super(length, isdir, block_replication, blocksize, modification_time, path); - } - - public SwiftFileStatus(long length, - boolean isdir, - int block_replication, - long blocksize, - long modification_time, - long access_time, - FsPermission permission, - String owner, String group, Path path) { - super(length, isdir, block_replication, blocksize, modification_time, - access_time, permission, owner, group, path); - } - - //HDFS2+ only - - public SwiftFileStatus(long length, - boolean isdir, - int block_replication, - long blocksize, - long modification_time, - long access_time, - FsPermission permission, - String owner, String group, Path symlink, Path path) { - super(length, isdir, block_replication, blocksize, modification_time, - access_time, permission, owner, group, symlink, path); - } - - /** - * Declare that the path represents a directory, which in the - * SwiftNativeFileSystem means "is a directory or a 0 byte file" - * - * @return true if the status is considered to be a file - */ - @Override - public boolean isDirectory() { - return super.isDirectory() || getLen() == 0; - } - - /** - * A entry is a file if it is not a directory. - * By implementing it and not marking as an override this - * subclass builds and runs in both Hadoop versions. - * @return the opposite value to {@link #isDirectory()} - */ - @Override - public boolean isFile() { - return !this.isDirectory(); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append(getClass().getSimpleName()); - sb.append("{ "); - sb.append("path=").append(getPath()); - sb.append("; isDirectory=").append(isDirectory()); - sb.append("; length=").append(getLen()); - sb.append("; blocksize=").append(getBlockSize()); - sb.append("; modification_time=").append(getModificationTime()); - sb.append("}"); - return sb.toString(); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeFileSystem.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeFileSystem.java deleted file mode 100644 index 560eadd930917..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeFileSystem.java +++ /dev/null @@ -1,761 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.snative; - -import org.apache.hadoop.security.UserGroupInformation; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.BlockLocation; -import org.apache.hadoop.fs.CreateFlag; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileAlreadyExistsException; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.ParentNotDirectoryException; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.fs.swift.exceptions.SwiftConfigurationException; -import org.apache.hadoop.fs.swift.exceptions.SwiftOperationFailedException; -import org.apache.hadoop.fs.swift.exceptions.SwiftUnsupportedFeatureException; -import org.apache.hadoop.fs.swift.http.SwiftProtocolConstants; -import org.apache.hadoop.fs.swift.util.DurationStats; -import org.apache.hadoop.fs.swift.util.SwiftObjectPath; -import org.apache.hadoop.fs.swift.util.SwiftUtils; -import org.apache.hadoop.util.Progressable; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.OutputStream; -import java.net.URI; -import java.util.ArrayList; -import java.util.EnumSet; -import java.util.List; - -/** - * Swift file system implementation. Extends Hadoop FileSystem - */ -public class SwiftNativeFileSystem extends FileSystem { - - /** filesystem prefix: {@value} */ - public static final String SWIFT = "swift"; - private static final Logger LOG = - LoggerFactory.getLogger(SwiftNativeFileSystem.class); - - /** - * path to user work directory for storing temporary files - */ - private Path workingDir; - - /** - * Swift URI - */ - private URI uri; - - /** - * reference to swiftFileSystemStore - */ - private SwiftNativeFileSystemStore store; - - /** - * Default constructor for Hadoop - */ - public SwiftNativeFileSystem() { - // set client in initialize() - } - - /** - * This constructor used for testing purposes - */ - public SwiftNativeFileSystem(SwiftNativeFileSystemStore store) { - this.store = store; - } - - /** - * This is for testing - * @return the inner store class - */ - public SwiftNativeFileSystemStore getStore() { - return store; - } - - @Override - public String getScheme() { - return SWIFT; - } - - /** - * default class initialization. - * - * @param fsuri path to Swift - * @param conf Hadoop configuration - * @throws IOException - */ - @Override - public void initialize(URI fsuri, Configuration conf) throws IOException { - super.initialize(fsuri, conf); - - setConf(conf); - if (store == null) { - store = new SwiftNativeFileSystemStore(); - } - this.uri = fsuri; - String username; - try { - username = UserGroupInformation.getCurrentUser().getShortUserName(); - } catch (IOException ex) { - LOG.warn("Unable to get user name. Fall back to system property " + - "user.name", ex); - username = System.getProperty("user.name"); - } - this.workingDir = new Path("/user", username) - .makeQualified(uri, new Path(username)); - if (LOG.isDebugEnabled()) { - LOG.debug("Initializing SwiftNativeFileSystem against URI " + uri - + " and working dir " + workingDir); - } - store.initialize(uri, conf); - LOG.debug("SwiftFileSystem initialized"); - } - - /** - * @return path to Swift - */ - @Override - public URI getUri() { - - return uri; - } - - @Override - public String toString() { - return "Swift FileSystem " + store; - } - - /** - * Path to user working directory - * - * @return Hadoop path - */ - @Override - public Path getWorkingDirectory() { - return workingDir; - } - - /** - * @param dir user working directory - */ - @Override - public void setWorkingDirectory(Path dir) { - workingDir = makeAbsolute(dir); - if (LOG.isDebugEnabled()) { - LOG.debug("SwiftFileSystem.setWorkingDirectory to " + dir); - } - } - - /** - * Return a file status object that represents the path. - * - * @param path The path we want information from - * @return a FileStatus object - */ - @Override - public FileStatus getFileStatus(Path path) throws IOException { - Path absolutePath = makeAbsolute(path); - return store.getObjectMetadata(absolutePath); - } - - /** - * The blocksize of this filesystem is set by the property - * SwiftProtocolConstants.SWIFT_BLOCKSIZE;the default is the value of - * SwiftProtocolConstants.DEFAULT_SWIFT_BLOCKSIZE; - * @return the blocksize for this FS. - */ - @Override - public long getDefaultBlockSize() { - return store.getBlocksize(); - } - - /** - * The blocksize for this filesystem. - * @see #getDefaultBlockSize() - * @param f path of file - * @return the blocksize for the path - */ - @Override - public long getDefaultBlockSize(Path f) { - return store.getBlocksize(); - } - - @Override - public long getBlockSize(Path path) throws IOException { - return store.getBlocksize(); - } - - @Override - @SuppressWarnings("deprecation") - public boolean isFile(Path f) throws IOException { - try { - FileStatus fileStatus = getFileStatus(f); - return !SwiftUtils.isDirectory(fileStatus); - } catch (FileNotFoundException e) { - return false; // f does not exist - } - } - - @SuppressWarnings("deprecation") - @Override - public boolean isDirectory(Path f) throws IOException { - - try { - FileStatus fileStatus = getFileStatus(f); - return SwiftUtils.isDirectory(fileStatus); - } catch (FileNotFoundException e) { - return false; // f does not exist - } - } - - /** - * Override getCononicalServiceName because we don't support token in Swift - */ - @Override - public String getCanonicalServiceName() { - // Does not support Token - return null; - } - - /** - * Return an array containing hostnames, offset and size of - * portions of the given file. For a nonexistent - * file or regions, null will be returned. - *

    - * This call is most helpful with DFS, where it returns - * hostnames of machines that contain the given file. - *

    - * The FileSystem will simply return an elt containing 'localhost'. - */ - @Override - public BlockLocation[] getFileBlockLocations(FileStatus file, - long start, - long len) throws IOException { - //argument checks - if (file == null) { - return null; - } - - if (start < 0 || len < 0) { - throw new IllegalArgumentException("Negative start or len parameter" + - " to getFileBlockLocations"); - } - if (file.getLen() <= start) { - return new BlockLocation[0]; - } - - // Check if requested file in Swift is more than 5Gb. In this case - // each block has its own location -which may be determinable - // from the Swift client API, depending on the remote server - final FileStatus[] listOfFileBlocks = store.listSubPaths(file.getPath(), - false, - true); - List locations = new ArrayList(); - if (listOfFileBlocks.length > 1) { - for (FileStatus fileStatus : listOfFileBlocks) { - if (SwiftObjectPath.fromPath(uri, fileStatus.getPath()) - .equals(SwiftObjectPath.fromPath(uri, file.getPath()))) { - continue; - } - locations.addAll(store.getObjectLocation(fileStatus.getPath())); - } - } else { - locations = store.getObjectLocation(file.getPath()); - } - - if (locations.isEmpty()) { - LOG.debug("No locations returned for " + file.getPath()); - //no locations were returned for the object - //fall back to the superclass - - String[] name = {SwiftProtocolConstants.BLOCK_LOCATION}; - String[] host = { "localhost" }; - String[] topology={SwiftProtocolConstants.TOPOLOGY_PATH}; - return new BlockLocation[] { - new BlockLocation(name, host, topology,0, file.getLen()) - }; - } - - final String[] names = new String[locations.size()]; - final String[] hosts = new String[locations.size()]; - int i = 0; - for (URI location : locations) { - hosts[i] = location.getHost(); - names[i] = location.getAuthority(); - i++; - } - return new BlockLocation[]{ - new BlockLocation(names, hosts, 0, file.getLen()) - }; - } - - /** - * Create the parent directories. - * As an optimization, the entire hierarchy of parent - * directories is Not polled. Instead - * the tree is walked up from the last to the first, - * creating directories until one that exists is found. - * - * This strategy means if a file is created in an existing directory, - * one quick poll suffices. - * - * There is a big assumption here: that all parent directories of an existing - * directory also exists. - * @param path path to create. - * @param permission to apply to files - * @return true if the operation was successful - * @throws IOException on a problem - */ - @Override - public boolean mkdirs(Path path, FsPermission permission) throws IOException { - if (LOG.isDebugEnabled()) { - LOG.debug("SwiftFileSystem.mkdirs: " + path); - } - Path directory = makeAbsolute(path); - - //build a list of paths to create - List paths = new ArrayList(); - while (shouldCreate(directory)) { - //this directory needs creation, add to the list - paths.add(0, directory); - //now see if the parent needs to be created - directory = directory.getParent(); - } - - //go through the list of directories to create - for (Path p : paths) { - if (isNotRoot(p)) { - //perform a mkdir operation without any polling of - //the far end first - forceMkdir(p); - } - } - - //if an exception was not thrown, this operation is considered - //a success - return true; - } - - private boolean isNotRoot(Path absolutePath) { - return !isRoot(absolutePath); - } - - private boolean isRoot(Path absolutePath) { - return absolutePath.getParent() == null; - } - - /** - * internal implementation of directory creation. - * - * @param path path to file - * @return boolean file is created; false: no need to create - * @throws IOException if specified path is file instead of directory - */ - private boolean mkdir(Path path) throws IOException { - Path directory = makeAbsolute(path); - boolean shouldCreate = shouldCreate(directory); - if (shouldCreate) { - forceMkdir(directory); - } - return shouldCreate; - } - - /** - * Should mkdir create this directory? - * If the directory is root : false - * If the entry exists and is a directory: false - * If the entry exists and is a file: exception - * else: true - * @param directory path to query - * @return true iff the directory should be created - * @throws IOException IO problems - * @throws ParentNotDirectoryException if the path references a file - */ - private boolean shouldCreate(Path directory) throws IOException { - FileStatus fileStatus; - boolean shouldCreate; - if (isRoot(directory)) { - //its the base dir, bail out immediately - return false; - } - try { - //find out about the path - fileStatus = getFileStatus(directory); - - if (!SwiftUtils.isDirectory(fileStatus)) { - //if it's a file, raise an error - throw new ParentNotDirectoryException( - String.format("%s: can't mkdir since it exists and is not a directory: %s", - directory, fileStatus)); - } else { - //path exists, and it is a directory - if (LOG.isDebugEnabled()) { - LOG.debug("skipping mkdir(" + directory + ") as it exists already"); - } - shouldCreate = false; - } - } catch (FileNotFoundException e) { - shouldCreate = true; - } - return shouldCreate; - } - - /** - * mkdir of a directory -irrespective of what was there underneath. - * There are no checks for the directory existing, there not - * being a path there, etc. etc. Those are assumed to have - * taken place already - * @param absolutePath path to create - * @throws IOException IO problems - */ - private void forceMkdir(Path absolutePath) throws IOException { - if (LOG.isDebugEnabled()) { - LOG.debug("Making dir '" + absolutePath + "' in Swift"); - } - //file is not found: it must be created - store.createDirectory(absolutePath); - } - - /** - * List the statuses of the files/directories in the given path if the path is - * a directory. - * - * @param path given path - * @return the statuses of the files/directories in the given path - * @throws IOException - */ - @Override - public FileStatus[] listStatus(Path path) throws IOException { - if (LOG.isDebugEnabled()) { - LOG.debug("SwiftFileSystem.listStatus for: " + path); - } - return store.listSubPaths(makeAbsolute(path), false, true); - } - - /** - * This optional operation is not supported - */ - @Override - public FSDataOutputStream append(Path f, int bufferSize, Progressable progress) - throws IOException { - LOG.debug("SwiftFileSystem.append"); - throw new SwiftUnsupportedFeatureException("Not supported: append()"); - } - - /** - * @param permission Currently ignored. - */ - @Override - public FSDataOutputStream create(Path file, FsPermission permission, - boolean overwrite, int bufferSize, - short replication, long blockSize, - Progressable progress) - throws IOException { - LOG.debug("SwiftFileSystem.create"); - - FileStatus fileStatus = null; - Path absolutePath = makeAbsolute(file); - try { - fileStatus = getFileStatus(absolutePath); - } catch (FileNotFoundException e) { - //the file isn't there. - } - - if (fileStatus != null) { - //the path exists -action depends on whether or not it is a directory, - //and what the overwrite policy is. - - //What is clear at this point is that if the entry exists, there's - //no need to bother creating any parent entries - if (fileStatus.isDirectory()) { - //here someone is trying to create a file over a directory - -/* we can't throw an exception here as there is no easy way to distinguish - a file from the dir - - throw new SwiftPathExistsException("Cannot create a file over a directory:" - + file); - */ - if (LOG.isDebugEnabled()) { - LOG.debug("Overwriting either an empty file or a directory"); - } - } - if (overwrite) { - //overwrite set -> delete the object. - store.delete(absolutePath, true); - } else { - throw new FileAlreadyExistsException("Path exists: " + file); - } - } else { - // destination does not exist -trigger creation of the parent - Path parent = file.getParent(); - if (parent != null) { - if (!mkdirs(parent)) { - throw new SwiftOperationFailedException( - "Mkdirs failed to create " + parent); - } - } - } - - SwiftNativeOutputStream out = createSwiftOutputStream(file); - return new FSDataOutputStream(out, statistics); - } - - /** - * Create the swift output stream - * @param path path to write to - * @return the new file - * @throws IOException - */ - protected SwiftNativeOutputStream createSwiftOutputStream(Path path) throws - IOException { - long partSizeKB = getStore().getPartsizeKB(); - return new SwiftNativeOutputStream(getConf(), - getStore(), - path.toUri().toString(), - partSizeKB); - } - - /** - * Opens an FSDataInputStream at the indicated Path. - * - * @param path the file name to open - * @param bufferSize the size of the buffer to be used. - * @return the input stream - * @throws FileNotFoundException if the file is not found - * @throws IOException any IO problem - */ - @Override - public FSDataInputStream open(Path path, int bufferSize) throws IOException { - int bufferSizeKB = getStore().getBufferSizeKB(); - long readBlockSize = bufferSizeKB * 1024L; - return open(path, bufferSize, readBlockSize); - } - - /** - * Low-level operation to also set the block size for this operation - * @param path the file name to open - * @param bufferSize the size of the buffer to be used. - * @param readBlockSize how big should the read block/buffer size be? - * @return the input stream - * @throws FileNotFoundException if the file is not found - * @throws IOException any IO problem - */ - public FSDataInputStream open(Path path, - int bufferSize, - long readBlockSize) throws IOException { - if (readBlockSize <= 0) { - throw new SwiftConfigurationException("Bad remote buffer size"); - } - Path absolutePath = makeAbsolute(path); - return new FSDataInputStream( - new StrictBufferedFSInputStream( - new SwiftNativeInputStream(store, - statistics, - absolutePath, - readBlockSize), - bufferSize)); - } - - /** - * Renames Path src to Path dst. On swift this uses copy-and-delete - * and is not atomic. - * - * @param src path - * @param dst path - * @return true if directory renamed, false otherwise - * @throws IOException on problems - */ - @Override - public boolean rename(Path src, Path dst) throws IOException { - - try { - store.rename(makeAbsolute(src), makeAbsolute(dst)); - //success - return true; - } catch (SwiftOperationFailedException - | FileAlreadyExistsException - | FileNotFoundException - | ParentNotDirectoryException e) { - //downgrade to a failure - LOG.debug("rename({}, {}) failed",src, dst, e); - return false; - } - } - - - /** - * Delete a file or directory - * - * @param path the path to delete. - * @param recursive if path is a directory and set to - * true, the directory is deleted else throws an exception if the - * directory is not empty - * case of a file the recursive can be set to either true or false. - * @return true if the object was deleted - * @throws IOException IO problems - */ - @Override - public boolean delete(Path path, boolean recursive) throws IOException { - try { - return store.delete(path, recursive); - } catch (FileNotFoundException e) { - //base path was not found. - return false; - } - } - - /** - * Delete a file. - * This method is abstract in Hadoop 1.x; in 2.x+ it is non-abstract - * and deprecated - */ - @Override - public boolean delete(Path f) throws IOException { - return delete(f, true); - } - - /** - * Makes path absolute - * - * @param path path to file - * @return absolute path - */ - protected Path makeAbsolute(Path path) { - if (path.isAbsolute()) { - return path; - } - return new Path(workingDir, path); - } - - /** - * Get the current operation statistics - * @return a snapshot of the statistics - */ - public List getOperationStatistics() { - return store.getOperationStatistics(); - } - - /** - * Low level method to do a deep listing of all entries, not stopping - * at the next directory entry. This is to let tests be confident that - * recursive deletes really are working. - * @param path path to recurse down - * @param newest ask for the newest data, potentially slower than not. - * @return a potentially empty array of file status - * @throws IOException any problem - */ - @InterfaceAudience.Private - public FileStatus[] listRawFileStatus(Path path, boolean newest) throws IOException { - return store.listSubPaths(makeAbsolute(path), true, newest); - } - - /** - * Get the number of partitions written by an output stream - * This is for testing - * @param outputStream output stream - * @return the #of partitions written by that stream - */ - @InterfaceAudience.Private - public static int getPartitionsWritten(FSDataOutputStream outputStream) { - SwiftNativeOutputStream snos = getSwiftNativeOutputStream(outputStream); - return snos.getPartitionsWritten(); - } - - private static SwiftNativeOutputStream getSwiftNativeOutputStream( - FSDataOutputStream outputStream) { - OutputStream wrappedStream = outputStream.getWrappedStream(); - return (SwiftNativeOutputStream) wrappedStream; - } - - /** - * Get the size of partitions written by an output stream - * This is for testing - * - * @param outputStream output stream - * @return partition size in bytes - */ - @InterfaceAudience.Private - public static long getPartitionSize(FSDataOutputStream outputStream) { - SwiftNativeOutputStream snos = getSwiftNativeOutputStream(outputStream); - return snos.getFilePartSize(); - } - - /** - * Get the the number of bytes written to an output stream - * This is for testing - * - * @param outputStream output stream - * @return partition size in bytes - */ - @InterfaceAudience.Private - public static long getBytesWritten(FSDataOutputStream outputStream) { - SwiftNativeOutputStream snos = getSwiftNativeOutputStream(outputStream); - return snos.getBytesWritten(); - } - - /** - * Get the the number of bytes uploaded by an output stream - * to the swift cluster. - * This is for testing - * - * @param outputStream output stream - * @return partition size in bytes - */ - @InterfaceAudience.Private - public static long getBytesUploaded(FSDataOutputStream outputStream) { - SwiftNativeOutputStream snos = getSwiftNativeOutputStream(outputStream); - return snos.getBytesUploaded(); - } - - /** - * {@inheritDoc} - * @throws FileNotFoundException if the parent directory is not present -or - * is not a directory. - */ - @Override - public FSDataOutputStream createNonRecursive(Path path, - FsPermission permission, - EnumSet flags, - int bufferSize, - short replication, - long blockSize, - Progressable progress) throws IOException { - Path parent = path.getParent(); - if (parent != null) { - // expect this to raise an exception if there is no parent - if (!getFileStatus(parent).isDirectory()) { - throw new FileAlreadyExistsException("Not a directory: " + parent); - } - } - return create(path, permission, - flags.contains(CreateFlag.OVERWRITE), bufferSize, - replication, blockSize, progress); - } - -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeFileSystemStore.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeFileSystemStore.java deleted file mode 100644 index 5e4800900920a..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeFileSystemStore.java +++ /dev/null @@ -1,986 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.fs.swift.snative; - -import com.fasterxml.jackson.databind.type.CollectionType; - -import org.apache.http.Header; -import org.apache.http.HttpStatus; -import org.apache.http.message.BasicHeader; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileAlreadyExistsException; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.ParentNotDirectoryException; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.swift.exceptions.SwiftConfigurationException; -import org.apache.hadoop.fs.swift.exceptions.SwiftException; -import org.apache.hadoop.fs.swift.exceptions.SwiftInvalidResponseException; -import org.apache.hadoop.fs.swift.exceptions.SwiftOperationFailedException; -import org.apache.hadoop.fs.swift.http.HttpBodyContent; -import org.apache.hadoop.fs.swift.http.SwiftProtocolConstants; -import org.apache.hadoop.fs.swift.http.SwiftRestClient; -import org.apache.hadoop.fs.swift.util.DurationStats; -import org.apache.hadoop.fs.swift.util.JSONUtil; -import org.apache.hadoop.fs.swift.util.SwiftObjectPath; -import org.apache.hadoop.fs.swift.util.SwiftUtils; - -import java.io.ByteArrayInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.io.InterruptedIOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.nio.charset.Charset; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.LinkedList; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * File system store implementation. - * Makes REST requests, parses data from responses - */ -public class SwiftNativeFileSystemStore { - private static final Pattern URI_PATTERN = Pattern.compile("\"\\S+?\""); - private static final String PATTERN = "EEE, d MMM yyyy hh:mm:ss zzz"; - private static final Logger LOG = - LoggerFactory.getLogger(SwiftNativeFileSystemStore.class); - private URI uri; - private SwiftRestClient swiftRestClient; - - /** - * Initalize the filesystem store -this creates the REST client binding. - * - * @param fsURI URI of the filesystem, which is used to map to the filesystem-specific - * options in the configuration file - * @param configuration configuration - * @throws IOException on any failure. - */ - public void initialize(URI fsURI, Configuration configuration) throws IOException { - this.uri = fsURI; - this.swiftRestClient = SwiftRestClient.getInstance(fsURI, configuration); - } - - @Override - public String toString() { - return "SwiftNativeFileSystemStore with " - + swiftRestClient; - } - - /** - * Get the default blocksize of this (bound) filesystem - * @return the blocksize returned for all FileStatus queries, - * which is used by the MapReduce splitter. - */ - public long getBlocksize() { - return 1024L * swiftRestClient.getBlocksizeKB(); - } - - public long getPartsizeKB() { - return swiftRestClient.getPartSizeKB(); - } - - public int getBufferSizeKB() { - return swiftRestClient.getBufferSizeKB(); - } - - public int getThrottleDelay() { - return swiftRestClient.getThrottleDelay(); - } - /** - * Upload a file/input stream of a specific length. - * - * @param path destination path in the swift filesystem - * @param inputStream input data. This is closed afterwards, always - * @param length length of the data - * @throws IOException on a problem - */ - public void uploadFile(Path path, InputStream inputStream, long length) - throws IOException { - swiftRestClient.upload(toObjectPath(path), inputStream, length); - } - - /** - * Upload part of a larger file. - * - * @param path destination path - * @param partNumber item number in the path - * @param inputStream input data - * @param length length of the data - * @throws IOException on a problem - */ - public void uploadFilePart(Path path, int partNumber, - InputStream inputStream, long length) - throws IOException { - - String stringPath = path.toUri().toString(); - String partitionFilename = SwiftUtils.partitionFilenameFromNumber( - partNumber); - if (stringPath.endsWith("/")) { - stringPath = stringPath.concat(partitionFilename); - } else { - stringPath = stringPath.concat("/").concat(partitionFilename); - } - - swiftRestClient.upload( - new SwiftObjectPath(toDirPath(path).getContainer(), stringPath), - inputStream, - length); - } - - /** - * Tell the Swift server to expect a multi-part upload by submitting - * a 0-byte file with the X-Object-Manifest header - * - * @param path path of final final - * @throws IOException - */ - public void createManifestForPartUpload(Path path) throws IOException { - String pathString = toObjectPath(path).toString(); - if (!pathString.endsWith("/")) { - pathString = pathString.concat("/"); - } - if (pathString.startsWith("/")) { - pathString = pathString.substring(1); - } - - swiftRestClient.upload(toObjectPath(path), - new ByteArrayInputStream(new byte[0]), - 0, - new BasicHeader(SwiftProtocolConstants.X_OBJECT_MANIFEST, pathString)); - } - - /** - * Get the metadata of an object - * - * @param path path - * @return file metadata. -or null if no headers were received back from the server. - * @throws IOException on a problem - * @throws FileNotFoundException if there is nothing at the end - */ - public SwiftFileStatus getObjectMetadata(Path path) throws IOException { - return getObjectMetadata(path, true); - } - - /** - * Get the HTTP headers, in case you really need the low-level - * metadata - * @param path path to probe - * @param newest newest or oldest? - * @return the header list - * @throws IOException IO problem - * @throws FileNotFoundException if there is nothing at the end - */ - public Header[] getObjectHeaders(Path path, boolean newest) - throws IOException, FileNotFoundException { - SwiftObjectPath objectPath = toObjectPath(path); - return stat(objectPath, newest); - } - - /** - * Get the metadata of an object - * - * @param path path - * @param newest flag to say "set the newest header", otherwise take any entry - * @return file metadata. -or null if no headers were received back from the server. - * @throws IOException on a problem - * @throws FileNotFoundException if there is nothing at the end - */ - public SwiftFileStatus getObjectMetadata(Path path, boolean newest) - throws IOException, FileNotFoundException { - - SwiftObjectPath objectPath = toObjectPath(path); - final Header[] headers = stat(objectPath, newest); - //no headers is treated as a missing file - if (headers.length == 0) { - throw new FileNotFoundException("Not Found " + path.toUri()); - } - - boolean isDir = false; - long length = 0; - long lastModified = 0 ; - for (Header header : headers) { - String headerName = header.getName(); - if (headerName.equals(SwiftProtocolConstants.X_CONTAINER_OBJECT_COUNT) || - headerName.equals(SwiftProtocolConstants.X_CONTAINER_BYTES_USED)) { - length = 0; - isDir = true; - } - if (SwiftProtocolConstants.HEADER_CONTENT_LENGTH.equals(headerName)) { - length = Long.parseLong(header.getValue()); - } - if (SwiftProtocolConstants.HEADER_LAST_MODIFIED.equals(headerName)) { - final SimpleDateFormat simpleDateFormat = new SimpleDateFormat(PATTERN); - try { - lastModified = simpleDateFormat.parse(header.getValue()).getTime(); - } catch (ParseException e) { - throw new SwiftException("Failed to parse " + header.toString(), e); - } - } - } - if (lastModified == 0) { - lastModified = System.currentTimeMillis(); - } - - Path correctSwiftPath = getCorrectSwiftPath(path); - return new SwiftFileStatus(length, - isDir, - 1, - getBlocksize(), - lastModified, - correctSwiftPath); - } - - private Header[] stat(SwiftObjectPath objectPath, boolean newest) throws - IOException { - Header[] headers; - if (newest) { - headers = swiftRestClient.headRequest("getObjectMetadata-newest", - objectPath, SwiftRestClient.NEWEST); - } else { - headers = swiftRestClient.headRequest("getObjectMetadata", - objectPath); - } - return headers; - } - - /** - * Get the object as an input stream - * - * @param path object path - * @return the input stream -this must be closed to terminate the connection - * @throws IOException IO problems - * @throws FileNotFoundException path doesn't resolve to an object - */ - public HttpBodyContent getObject(Path path) throws IOException { - return swiftRestClient.getData(toObjectPath(path), - SwiftRestClient.NEWEST); - } - - /** - * Get the input stream starting from a specific point. - * - * @param path path to object - * @param byteRangeStart starting point - * @param length no. of bytes - * @return an input stream that must be closed - * @throws IOException IO problems - */ - public HttpBodyContent getObject(Path path, long byteRangeStart, long length) - throws IOException { - return swiftRestClient.getData( - toObjectPath(path), byteRangeStart, length); - } - - /** - * List a directory. - * This is O(n) for the number of objects in this path. - * - * - * - * @param path working path - * @param listDeep ask for all the data - * @param newest ask for the newest data - * @return Collection of file statuses - * @throws IOException IO problems - * @throws FileNotFoundException if the path does not exist - */ - private List listDirectory(SwiftObjectPath path, - boolean listDeep, - boolean newest) throws IOException { - final byte[] bytes; - final ArrayList files = new ArrayList(); - final Path correctSwiftPath = getCorrectSwiftPath(path); - try { - bytes = swiftRestClient.listDeepObjectsInDirectory(path, listDeep); - } catch (FileNotFoundException e) { - if (LOG.isDebugEnabled()) { - LOG.debug("" + - "File/Directory not found " + path); - } - if (SwiftUtils.isRootDir(path)) { - return Collections.emptyList(); - } else { - throw e; - } - } catch (SwiftInvalidResponseException e) { - //bad HTTP error code - if (e.getStatusCode() == HttpStatus.SC_NO_CONTENT) { - //this can come back on a root list if the container is empty - if (SwiftUtils.isRootDir(path)) { - return Collections.emptyList(); - } else { - //NO_CONTENT returned on something other than the root directory; - //see if it is there, and convert to empty list or not found - //depending on whether the entry exists. - FileStatus stat = getObjectMetadata(correctSwiftPath, newest); - - if (stat.isDirectory()) { - //it's an empty directory. state that - return Collections.emptyList(); - } else { - //it's a file -return that as the status - files.add(stat); - return files; - } - } - } else { - //a different status code: rethrow immediately - throw e; - } - } - - final CollectionType collectionType = JSONUtil.getJsonMapper().getTypeFactory(). - constructCollectionType(List.class, SwiftObjectFileStatus.class); - - final List fileStatusList = JSONUtil.toObject( - new String(bytes, Charset.forName("UTF-8")), collectionType); - - //this can happen if user lists file /data/files/file - //in this case swift will return empty array - if (fileStatusList.isEmpty()) { - SwiftFileStatus objectMetadata = getObjectMetadata(correctSwiftPath, - newest); - if (objectMetadata.isFile()) { - files.add(objectMetadata); - } - - return files; - } - - for (SwiftObjectFileStatus status : fileStatusList) { - if (status.getName() != null) { - files.add(new SwiftFileStatus(status.getBytes(), - status.getBytes() == 0, - 1, - getBlocksize(), - status.getLast_modified().getTime(), - getCorrectSwiftPath(new Path(status.getName())))); - } - } - - return files; - } - - /** - * List all elements in this directory - * - * - * - * @param path path to work with - * @param recursive do a recursive get - * @param newest ask for the newest, or can some out of date data work? - * @return the file statuses, or an empty array if there are no children - * @throws IOException on IO problems - * @throws FileNotFoundException if the path is nonexistent - */ - public FileStatus[] listSubPaths(Path path, - boolean recursive, - boolean newest) throws IOException { - final Collection fileStatuses; - fileStatuses = listDirectory(toDirPath(path), recursive, newest); - return fileStatuses.toArray(new FileStatus[fileStatuses.size()]); - } - - /** - * Create a directory - * - * @param path path - * @throws IOException - */ - public void createDirectory(Path path) throws IOException { - innerCreateDirectory(toDirPath(path)); - } - - /** - * The inner directory creation option. This only creates - * the dir at the given path, not any parent dirs. - * @param swiftObjectPath swift object path at which a 0-byte blob should be - * put - * @throws IOException IO problems - */ - private void innerCreateDirectory(SwiftObjectPath swiftObjectPath) - throws IOException { - - swiftRestClient.putRequest(swiftObjectPath); - } - - private SwiftObjectPath toDirPath(Path path) throws - SwiftConfigurationException { - return SwiftObjectPath.fromPath(uri, path, false); - } - - private SwiftObjectPath toObjectPath(Path path) throws - SwiftConfigurationException { - return SwiftObjectPath.fromPath(uri, path); - } - - /** - * Try to find the specific server(s) on which the data lives - * @param path path to probe - * @return a possibly empty list of locations - * @throws IOException on problems determining the locations - */ - public List getObjectLocation(Path path) throws IOException { - final byte[] objectLocation; - objectLocation = swiftRestClient.getObjectLocation(toObjectPath(path)); - if (objectLocation == null || objectLocation.length == 0) { - //no object location, return an empty list - return new LinkedList(); - } - return extractUris(new String(objectLocation, Charset.forName("UTF-8")), path); - } - - /** - * deletes object from Swift - * - * @param path path to delete - * @return true if the path was deleted by this specific operation. - * @throws IOException on a failure - */ - public boolean deleteObject(Path path) throws IOException { - SwiftObjectPath swiftObjectPath = toObjectPath(path); - if (!SwiftUtils.isRootDir(swiftObjectPath)) { - return swiftRestClient.delete(swiftObjectPath); - } else { - if (LOG.isDebugEnabled()) { - LOG.debug("Not deleting root directory entry"); - } - return true; - } - } - - /** - * deletes a directory from Swift. This is not recursive - * - * @param path path to delete - * @return true if the path was deleted by this specific operation -or - * the path was root and not acted on. - * @throws IOException on a failure - */ - public boolean rmdir(Path path) throws IOException { - return deleteObject(path); - } - - /** - * Does the object exist - * - * @param path object path - * @return true if the metadata of an object could be retrieved - * @throws IOException IO problems other than FileNotFound, which - * is downgraded to an object does not exist return code - */ - public boolean objectExists(Path path) throws IOException { - return objectExists(toObjectPath(path)); - } - - /** - * Does the object exist - * - * @param path swift object path - * @return true if the metadata of an object could be retrieved - * @throws IOException IO problems other than FileNotFound, which - * is downgraded to an object does not exist return code - */ - public boolean objectExists(SwiftObjectPath path) throws IOException { - try { - Header[] headers = swiftRestClient.headRequest("objectExists", - path, - SwiftRestClient.NEWEST); - //no headers is treated as a missing file - return headers.length != 0; - } catch (FileNotFoundException e) { - return false; - } - } - - /** - * Rename through copy-and-delete. this is a consequence of the - * Swift filesystem using the path as the hash - * into the Distributed Hash Table, "the ring" of filenames. - *

    - * Because of the nature of the operation, it is not atomic. - * - * @param src source file/dir - * @param dst destination - * @throws IOException IO failure - * @throws SwiftOperationFailedException if the rename failed - * @throws FileNotFoundException if the source directory is missing, or - * the parent directory of the destination - */ - public void rename(Path src, Path dst) - throws FileNotFoundException, SwiftOperationFailedException, IOException { - if (LOG.isDebugEnabled()) { - LOG.debug("mv " + src + " " + dst); - } - boolean renamingOnToSelf = src.equals(dst); - - SwiftObjectPath srcObject = toObjectPath(src); - SwiftObjectPath destObject = toObjectPath(dst); - - if (SwiftUtils.isRootDir(srcObject)) { - throw new SwiftOperationFailedException("cannot rename root dir"); - } - - final SwiftFileStatus srcMetadata; - srcMetadata = getObjectMetadata(src); - SwiftFileStatus dstMetadata; - try { - dstMetadata = getObjectMetadata(dst); - } catch (FileNotFoundException e) { - //destination does not exist. - LOG.debug("Destination does not exist"); - dstMetadata = null; - } - - //check to see if the destination parent directory exists - Path srcParent = src.getParent(); - Path dstParent = dst.getParent(); - //skip the overhead of a HEAD call if the src and dest share the same - //parent dir (in which case the dest dir exists), or the destination - //directory is root, in which case it must also exist - if (dstParent != null && !dstParent.equals(srcParent)) { - SwiftFileStatus fileStatus; - try { - fileStatus = getObjectMetadata(dstParent); - } catch (FileNotFoundException e) { - //destination parent doesn't exist; bail out - LOG.debug("destination parent directory " + dstParent + " doesn't exist"); - throw e; - } - if (!fileStatus.isDir()) { - throw new ParentNotDirectoryException(dstParent.toString()); - } - } - - boolean destExists = dstMetadata != null; - boolean destIsDir = destExists && SwiftUtils.isDirectory(dstMetadata); - //calculate the destination - SwiftObjectPath destPath; - - //enum the child entries and everything underneath - List childStats = listDirectory(srcObject, true, true); - boolean srcIsFile = !srcMetadata.isDirectory(); - if (srcIsFile) { - - //source is a simple file OR a partitioned file - // outcomes: - // #1 dest exists and is file: fail - // #2 dest exists and is dir: destination path becomes under dest dir - // #3 dest does not exist: use dest as name - if (destExists) { - - if (destIsDir) { - //outcome #2 -move to subdir of dest - destPath = toObjectPath(new Path(dst, src.getName())); - } else { - //outcome #1 dest it's a file: fail if different - if (!renamingOnToSelf) { - throw new FileAlreadyExistsException( - "cannot rename a file over one that already exists"); - } else { - //is mv self self where self is a file. this becomes a no-op - LOG.debug("Renaming file onto self: no-op => success"); - return; - } - } - } else { - //outcome #3 -new entry - destPath = toObjectPath(dst); - } - int childCount = childStats.size(); - //here there is one of: - // - a single object ==> standard file - // -> - if (childCount == 0) { - copyThenDeleteObject(srcObject, destPath); - } else { - //do the copy - SwiftUtils.debug(LOG, "Source file appears to be partitioned." + - " copying file and deleting children"); - - copyObject(srcObject, destPath); - for (FileStatus stat : childStats) { - SwiftUtils.debug(LOG, "Deleting partitioned file %s ", stat); - deleteObject(stat.getPath()); - } - - swiftRestClient.delete(srcObject); - } - } else { - - //here the source exists and is a directory - // outcomes (given we know the parent dir exists if we get this far) - // #1 destination is a file: fail - // #2 destination is a directory: create a new dir under that one - // #3 destination doesn't exist: create a new dir with that name - // #3 and #4 are only allowed if the dest path is not == or under src - - - if (destExists && !destIsDir) { - // #1 destination is a file: fail - throw new FileAlreadyExistsException( - "the source is a directory, but not the destination"); - } - Path targetPath; - if (destExists) { - // #2 destination is a directory: create a new dir under that one - targetPath = new Path(dst, src.getName()); - } else { - // #3 destination doesn't exist: create a new dir with that name - targetPath = dst; - } - SwiftObjectPath targetObjectPath = toObjectPath(targetPath); - //final check for any recursive operations - if (srcObject.isEqualToOrParentOf(targetObjectPath)) { - //you can't rename a directory onto itself - throw new SwiftOperationFailedException( - "cannot move a directory under itself"); - } - - - LOG.info("mv " + srcObject + " " + targetPath); - - logDirectory("Directory to copy ", srcObject, childStats); - - // iterative copy of everything under the directory. - // by listing all children this can be done iteratively - // rather than recursively -everything in this list is either a file - // or a 0-byte-len file pretending to be a directory. - String srcURI = src.toUri().toString(); - int prefixStripCount = srcURI.length() + 1; - for (FileStatus fileStatus : childStats) { - Path copySourcePath = fileStatus.getPath(); - String copySourceURI = copySourcePath.toUri().toString(); - - String copyDestSubPath = copySourceURI.substring(prefixStripCount); - - Path copyDestPath = new Path(targetPath, copyDestSubPath); - if (LOG.isTraceEnabled()) { - //trace to debug some low-level rename path problems; retained - //in case they ever come back. - LOG.trace("srcURI=" + srcURI - + "; copySourceURI=" + copySourceURI - + "; copyDestSubPath=" + copyDestSubPath - + "; copyDestPath=" + copyDestPath); - } - SwiftObjectPath copyDestination = toObjectPath(copyDestPath); - - try { - copyThenDeleteObject(toObjectPath(copySourcePath), - copyDestination); - } catch (FileNotFoundException e) { - LOG.info("Skipping rename of " + copySourcePath); - } - //add a throttle delay - throttle(); - } - //now rename self. If missing, create the dest directory and warn - if (!SwiftUtils.isRootDir(srcObject)) { - try { - copyThenDeleteObject(srcObject, - targetObjectPath); - } catch (FileNotFoundException e) { - //create the destination directory - LOG.warn("Source directory deleted during rename", e); - innerCreateDirectory(destObject); - } - } - } - } - - /** - * Debug action to dump directory statuses to the debug log - * - * @param message explanation - * @param objectPath object path (can be null) - * @param statuses listing output - */ - private void logDirectory(String message, SwiftObjectPath objectPath, - Iterable statuses) { - - if (LOG.isDebugEnabled()) { - LOG.debug(message + ": listing of " + objectPath); - for (FileStatus fileStatus : statuses) { - LOG.debug(fileStatus.getPath().toString()); - } - } - } - - public void copy(Path srcKey, Path dstKey) throws IOException { - SwiftObjectPath srcObject = toObjectPath(srcKey); - SwiftObjectPath destObject = toObjectPath(dstKey); - swiftRestClient.copyObject(srcObject, destObject); - } - - - /** - * Copy an object then, if the copy worked, delete it. - * If the copy failed, the source object is not deleted. - * - * @param srcObject source object path - * @param destObject destination object path - * @throws IOException IO problems - - */ - private void copyThenDeleteObject(SwiftObjectPath srcObject, - SwiftObjectPath destObject) throws - IOException { - - - //do the copy - copyObject(srcObject, destObject); - //getting here means the copy worked - swiftRestClient.delete(srcObject); - } - /** - * Copy an object - * @param srcObject source object path - * @param destObject destination object path - * @throws IOException IO problems - */ - private void copyObject(SwiftObjectPath srcObject, - SwiftObjectPath destObject) throws - IOException { - if (srcObject.isEqualToOrParentOf(destObject)) { - throw new SwiftException( - "Can't copy " + srcObject + " onto " + destObject); - } - //do the copy - boolean copySucceeded = swiftRestClient.copyObject(srcObject, destObject); - if (!copySucceeded) { - throw new SwiftException("Copy of " + srcObject + " to " - + destObject + "failed"); - } - } - - /** - * Take a Hadoop path and return one which uses the URI prefix and authority - * of this FS. It doesn't make a relative path absolute - * @param path path in - * @return path with a URI bound to this FS - * @throws SwiftException URI cannot be created. - */ - public Path getCorrectSwiftPath(Path path) throws - SwiftException { - try { - final URI fullUri = new URI(uri.getScheme(), - uri.getAuthority(), - path.toUri().getPath(), - null, - null); - - return new Path(fullUri); - } catch (URISyntaxException e) { - throw new SwiftException("Specified path " + path + " is incorrect", e); - } - } - - /** - * Builds a hadoop-Path from a swift path, inserting the URI authority - * of this FS instance - * @param path swift object path - * @return Hadoop path - * @throws SwiftException if the URI couldn't be created. - */ - private Path getCorrectSwiftPath(SwiftObjectPath path) throws - SwiftException { - try { - final URI fullUri = new URI(uri.getScheme(), - uri.getAuthority(), - path.getObject(), - null, - null); - - return new Path(fullUri); - } catch (URISyntaxException e) { - throw new SwiftException("Specified path " + path + " is incorrect", e); - } - } - - - /** - * extracts URIs from json - * @param json json to parse - * @param path path (used in exceptions) - * @return URIs - * @throws SwiftOperationFailedException on any problem parsing the JSON - */ - public static List extractUris(String json, Path path) throws - SwiftOperationFailedException { - final Matcher matcher = URI_PATTERN.matcher(json); - final List result = new ArrayList(); - while (matcher.find()) { - final String s = matcher.group(); - final String uri = s.substring(1, s.length() - 1); - try { - URI createdUri = URI.create(uri); - result.add(createdUri); - } catch (IllegalArgumentException e) { - //failure to create the URI, which means this is bad JSON. Convert - //to an exception with useful text - throw new SwiftOperationFailedException( - String.format( - "could not convert \"%s\" into a URI." + - " source: %s " + - " first JSON: %s", - uri, path, json.substring(0, 256))); - } - } - return result; - } - - /** - * Insert a throttled wait if the throttle delay > 0 - * @throws InterruptedIOException if interrupted during sleep - */ - public void throttle() throws InterruptedIOException { - int throttleDelay = getThrottleDelay(); - if (throttleDelay > 0) { - try { - Thread.sleep(throttleDelay); - } catch (InterruptedException e) { - //convert to an IOE - throw (InterruptedIOException) new InterruptedIOException(e.toString()) - .initCause(e); - } - } - } - - /** - * Get the current operation statistics - * @return a snapshot of the statistics - */ - public List getOperationStatistics() { - return swiftRestClient.getOperationStatistics(); - } - - - /** - * Delete the entire tree. This is an internal one with slightly different - * behavior: if an entry is missing, a {@link FileNotFoundException} is - * raised. This lets the caller distinguish a file not found with - * other reasons for failure, so handles race conditions in recursive - * directory deletes better. - *

    - * The problem being addressed is: caller A requests a recursive directory - * of directory /dir ; caller B requests a delete of a file /dir/file, - * between caller A enumerating the files contents, and requesting a delete - * of /dir/file. We want to recognise the special case - * "directed file is no longer there" and not convert that into a failure - * - * @param absolutePath the path to delete. - * @param recursive if path is a directory and set to - * true, the directory is deleted else throws an exception if the - * directory is not empty - * case of a file the recursive can be set to either true or false. - * @return true if the object was deleted - * @throws IOException IO problems - * @throws FileNotFoundException if a file/dir being deleted is not there - - * this includes entries below the specified path, (if the path is a dir - * and recursive is true) - */ - public boolean delete(Path absolutePath, boolean recursive) throws IOException { - Path swiftPath = getCorrectSwiftPath(absolutePath); - SwiftUtils.debug(LOG, "Deleting path '%s' recursive=%b", - absolutePath, - recursive); - boolean askForNewest = true; - SwiftFileStatus fileStatus = getObjectMetadata(swiftPath, askForNewest); - - //ask for the file/dir status, but don't demand the newest, as we - //don't mind if the directory has changed - //list all entries under this directory. - //this will throw FileNotFoundException if the file isn't there - FileStatus[] statuses = listSubPaths(absolutePath, true, askForNewest); - if (statuses == null) { - //the directory went away during the non-atomic stages of the operation. - // Return false as it was not this thread doing the deletion. - SwiftUtils.debug(LOG, "Path '%s' has no status -it has 'gone away'", - absolutePath, - recursive); - return false; - } - int filecount = statuses.length; - SwiftUtils.debug(LOG, "Path '%s' %d status entries'", - absolutePath, - filecount); - - if (filecount == 0) { - //it's an empty directory or a path - rmdir(absolutePath); - return true; - } - - if (LOG.isDebugEnabled()) { - SwiftUtils.debug(LOG, "%s", SwiftUtils.fileStatsToString(statuses, "\n")); - } - - if (filecount == 1 && swiftPath.equals(statuses[0].getPath())) { - // 1 entry => simple file and it is the target - //simple file: delete it - SwiftUtils.debug(LOG, "Deleting simple file %s", absolutePath); - deleteObject(absolutePath); - return true; - } - - //>1 entry implies directory with children. Run through them, - // but first check for the recursive flag and reject it *unless it looks - // like a partitioned file (len > 0 && has children) - if (!fileStatus.isDirectory()) { - LOG.debug("Multiple child entries but entry has data: assume partitioned"); - } else if (!recursive) { - //if there are children, unless this is a recursive operation, fail immediately - throw new SwiftOperationFailedException("Directory " + fileStatus - + " is not empty: " - + SwiftUtils.fileStatsToString( - statuses, "; ")); - } - - //delete the entries. including ourselves. - for (FileStatus entryStatus : statuses) { - Path entryPath = entryStatus.getPath(); - try { - boolean deleted = deleteObject(entryPath); - if (!deleted) { - SwiftUtils.debug(LOG, "Failed to delete entry '%s'; continuing", - entryPath); - } - } catch (FileNotFoundException e) { - //the path went away -race conditions. - //do not fail, as the outcome is still OK. - SwiftUtils.debug(LOG, "Path '%s' is no longer present; continuing", - entryPath); - } - throttle(); - } - //now delete self - SwiftUtils.debug(LOG, "Deleting base entry %s", absolutePath); - deleteObject(absolutePath); - - return true; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeInputStream.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeInputStream.java deleted file mode 100644 index bce7325c980e7..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeInputStream.java +++ /dev/null @@ -1,385 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.snative; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.fs.FSExceptionMessages; -import org.apache.hadoop.fs.FSInputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.swift.exceptions.SwiftConnectionClosedException; -import org.apache.hadoop.fs.swift.exceptions.SwiftException; -import org.apache.hadoop.fs.swift.http.HttpBodyContent; -import org.apache.hadoop.fs.swift.http.HttpInputStreamWithRelease; -import org.apache.hadoop.fs.swift.util.SwiftUtils; - -import java.io.EOFException; -import java.io.IOException; - -/** - * The input stream from remote Swift blobs. - * The class attempts to be buffer aware, and react to a forward seek operation - * by trying to scan ahead through the current block of data to find it. - * This accelerates some operations that do a lot of seek()/read() actions, - * including work (such as in the MR engine) that do a seek() immediately after - * an open(). - */ -class SwiftNativeInputStream extends FSInputStream { - - private static final Logger LOG = - LoggerFactory.getLogger(SwiftNativeInputStream.class); - - /** - * range requested off the server: {@value} - */ - private final long bufferSize; - - /** - * File nativeStore instance - */ - private final SwiftNativeFileSystemStore nativeStore; - - /** - * Hadoop statistics. Used to get info about number of reads, writes, etc. - */ - private final FileSystem.Statistics statistics; - - /** - * Data input stream - */ - private HttpInputStreamWithRelease httpStream; - - /** - * File path - */ - private final Path path; - - /** - * Current position - */ - private long pos = 0; - - /** - * Length of the file picked up at start time - */ - private long contentLength = -1; - - /** - * Why the stream is closed - */ - private String reasonClosed = "unopened"; - - /** - * Offset in the range requested last - */ - private long rangeOffset = 0; - - public SwiftNativeInputStream(SwiftNativeFileSystemStore storeNative, - FileSystem.Statistics statistics, Path path, long bufferSize) - throws IOException { - this.nativeStore = storeNative; - this.statistics = statistics; - this.path = path; - if (bufferSize <= 0) { - throw new IllegalArgumentException("Invalid buffer size"); - } - this.bufferSize = bufferSize; - //initial buffer fill - this.httpStream = storeNative.getObject(path).getInputStream(); - //fillBuffer(0); - } - - /** - * Move to a new position within the file relative to where the pointer is now. - * Always call from a synchronized clause - * @param offset offset - */ - private synchronized void incPos(int offset) { - pos += offset; - rangeOffset += offset; - SwiftUtils.trace(LOG, "Inc: pos=%d bufferOffset=%d", pos, rangeOffset); - } - - /** - * Update the start of the buffer; always call from a sync'd clause - * @param seekPos position sought. - * @param contentLength content length provided by response (may be -1) - */ - private synchronized void updateStartOfBufferPosition(long seekPos, - long contentLength) { - //reset the seek pointer - pos = seekPos; - //and put the buffer offset to 0 - rangeOffset = 0; - this.contentLength = contentLength; - SwiftUtils.trace(LOG, "Move: pos=%d; bufferOffset=%d; contentLength=%d", - pos, - rangeOffset, - contentLength); - } - - @Override - public synchronized int read() throws IOException { - verifyOpen(); - int result = -1; - try { - result = httpStream.read(); - } catch (IOException e) { - String msg = "IOException while reading " + path - + ": " +e + ", attempting to reopen."; - LOG.debug(msg, e); - if (reopenBuffer()) { - result = httpStream.read(); - } - } - if (result != -1) { - incPos(1); - } - if (statistics != null && result != -1) { - statistics.incrementBytesRead(1); - } - return result; - } - - @Override - public synchronized int read(byte[] b, int off, int len) throws IOException { - SwiftUtils.debug(LOG, "read(buffer, %d, %d)", off, len); - SwiftUtils.validateReadArgs(b, off, len); - if (len == 0) { - return 0; - } - int result = -1; - try { - verifyOpen(); - result = httpStream.read(b, off, len); - } catch (IOException e) { - //other IO problems are viewed as transient and re-attempted - LOG.info("Received IOException while reading '" + path + - "', attempting to reopen: " + e); - LOG.debug("IOE on read()" + e, e); - if (reopenBuffer()) { - result = httpStream.read(b, off, len); - } - } - if (result > 0) { - incPos(result); - if (statistics != null) { - statistics.incrementBytesRead(result); - } - } - - return result; - } - - /** - * Re-open the buffer - * @return true iff more data could be added to the buffer - * @throws IOException if not - */ - private boolean reopenBuffer() throws IOException { - innerClose("reopening buffer to trigger refresh"); - boolean success = false; - try { - fillBuffer(pos); - success = true; - } catch (EOFException eof) { - //the EOF has been reached - this.reasonClosed = "End of file"; - } - return success; - } - - /** - * close the stream. After this the stream is not usable -unless and until - * it is re-opened (which can happen on some of the buffer ops) - * This method is thread-safe and idempotent. - * - * @throws IOException on IO problems. - */ - @Override - public synchronized void close() throws IOException { - innerClose("closed"); - } - - private void innerClose(String reason) throws IOException { - try { - if (httpStream != null) { - reasonClosed = reason; - if (LOG.isDebugEnabled()) { - LOG.debug("Closing HTTP input stream : " + reason); - } - httpStream.close(); - } - } finally { - httpStream = null; - } - } - - /** - * Assume that the connection is not closed: throws an exception if it is - * @throws SwiftConnectionClosedException - */ - private void verifyOpen() throws SwiftConnectionClosedException { - if (httpStream == null) { - throw new SwiftConnectionClosedException(reasonClosed); - } - } - - @Override - public synchronized String toString() { - return "SwiftNativeInputStream" + - " position=" + pos - + " buffer size = " + bufferSize - + " " - + (httpStream != null ? httpStream.toString() - : (" no input stream: " + reasonClosed)); - } - - /** - * Treats any finalize() call without the input stream being closed - * as a serious problem, logging at error level - * @throws Throwable n/a - */ - @Override - protected void finalize() throws Throwable { - if (httpStream != null) { - LOG.error( - "Input stream is leaking handles by not being closed() properly: " - + httpStream.toString()); - } - } - - /** - * Read through the specified number of bytes. - * The implementation iterates a byte a time, which may seem inefficient - * compared to the read(bytes[]) method offered by input streams. - * However, if you look at the code that implements that method, it comes - * down to read() one char at a time -only here the return value is discarded. - * - *

    - * This is a no-op if the stream is closed - * @param bytes number of bytes to read. - * @throws IOException IO problems - * @throws SwiftException if a read returned -1. - */ - private int chompBytes(long bytes) throws IOException { - int count = 0; - if (httpStream != null) { - int result; - for (long i = 0; i < bytes; i++) { - result = httpStream.read(); - if (result < 0) { - throw new SwiftException("Received error code while chomping input"); - } - count ++; - incPos(1); - } - } - return count; - } - - /** - * Seek to an offset. If the data is already in the buffer, move to it - * @param targetPos target position - * @throws IOException on any problem - */ - @Override - public synchronized void seek(long targetPos) throws IOException { - if (targetPos < 0) { - throw new EOFException( - FSExceptionMessages.NEGATIVE_SEEK); - } - //there's some special handling of near-local data - //as the seek can be omitted if it is in/adjacent - long offset = targetPos - pos; - if (LOG.isDebugEnabled()) { - LOG.debug("Seek to " + targetPos + "; current pos =" + pos - + "; offset="+offset); - } - if (offset == 0) { - LOG.debug("seek is no-op"); - return; - } - - if (offset < 0) { - LOG.debug("seek is backwards"); - } else if ((rangeOffset + offset < bufferSize)) { - //if the seek is in range of that requested, scan forwards - //instead of closing and re-opening a new HTTP connection - SwiftUtils.debug(LOG, - "seek is within current stream" - + "; pos= %d ; targetPos=%d; " - + "offset= %d ; bufferOffset=%d", - pos, targetPos, offset, rangeOffset); - try { - LOG.debug("chomping "); - chompBytes(offset); - } catch (IOException e) { - //this is assumed to be recoverable with a seek -or more likely to fail - LOG.debug("while chomping ",e); - } - if (targetPos - pos == 0) { - LOG.trace("chomping successful"); - return; - } - LOG.trace("chomping failed"); - } else { - if (LOG.isDebugEnabled()) { - LOG.debug("Seek is beyond buffer size of " + bufferSize); - } - } - - innerClose("seeking to " + targetPos); - fillBuffer(targetPos); - } - - /** - * Fill the buffer from the target position - * If the target position == current position, the - * read still goes ahead; this is a way of handling partial read failures - * @param targetPos target position - * @throws IOException IO problems on the read - */ - private void fillBuffer(long targetPos) throws IOException { - long length = targetPos + bufferSize; - SwiftUtils.debug(LOG, "Fetching %d bytes starting at %d", length, targetPos); - HttpBodyContent blob = nativeStore.getObject(path, targetPos, length); - httpStream = blob.getInputStream(); - updateStartOfBufferPosition(targetPos, blob.getContentLength()); - } - - @Override - public synchronized long getPos() throws IOException { - return pos; - } - - /** - * This FS doesn't explicitly support multiple data sources, so - * return false here. - * @param targetPos the desired target position - * @return true if a new source of the data has been set up - * as the source of future reads - * @throws IOException IO problems - */ - @Override - public boolean seekToNewSource(long targetPos) throws IOException { - return false; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeOutputStream.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeOutputStream.java deleted file mode 100644 index ac49a8a649535..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeOutputStream.java +++ /dev/null @@ -1,389 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.snative; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.swift.exceptions.SwiftConnectionClosedException; -import org.apache.hadoop.fs.swift.exceptions.SwiftException; -import org.apache.hadoop.fs.swift.exceptions.SwiftInternalStateException; -import org.apache.hadoop.fs.swift.util.SwiftUtils; - -import java.io.BufferedOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; - -/** - * Output stream, buffers data on local disk. - * Writes to Swift on the close() method, unless the - * file is significantly large that it is being written as partitions. - * In this case, the first partition is written on the first write that puts - * data over the partition, as may later writes. The close() then causes - * the final partition to be written, along with a partition manifest. - */ -class SwiftNativeOutputStream extends OutputStream { - public static final int ATTEMPT_LIMIT = 3; - private long filePartSize; - private static final Logger LOG = - LoggerFactory.getLogger(SwiftNativeOutputStream.class); - private Configuration conf; - private String key; - private File backupFile; - private OutputStream backupStream; - private SwiftNativeFileSystemStore nativeStore; - private boolean closed; - private int partNumber; - private long blockOffset; - private long bytesWritten; - private long bytesUploaded; - private boolean partUpload = false; - final byte[] oneByte = new byte[1]; - - /** - * Create an output stream - * @param conf configuration to use - * @param nativeStore native store to write through - * @param key the key to write - * @param partSizeKB the partition size - * @throws IOException - */ - @SuppressWarnings("IOResourceOpenedButNotSafelyClosed") - public SwiftNativeOutputStream(Configuration conf, - SwiftNativeFileSystemStore nativeStore, - String key, - long partSizeKB) throws IOException { - this.conf = conf; - this.key = key; - this.backupFile = newBackupFile(); - this.nativeStore = nativeStore; - this.backupStream = new BufferedOutputStream(new FileOutputStream(backupFile)); - this.partNumber = 1; - this.blockOffset = 0; - this.filePartSize = 1024L * partSizeKB; - } - - private File newBackupFile() throws IOException { - File dir = new File(conf.get("hadoop.tmp.dir")); - if (!dir.mkdirs() && !dir.exists()) { - throw new SwiftException("Cannot create Swift buffer directory: " + dir); - } - File result = File.createTempFile("output-", ".tmp", dir); - result.deleteOnExit(); - return result; - } - - /** - * Flush the local backing stream. - * This does not trigger a flush of data to the remote blobstore. - * @throws IOException - */ - @Override - public void flush() throws IOException { - backupStream.flush(); - } - - /** - * check that the output stream is open - * - * @throws SwiftException if it is not - */ - private synchronized void verifyOpen() throws SwiftException { - if (closed) { - throw new SwiftConnectionClosedException(); - } - } - - /** - * Close the stream. This will trigger the upload of all locally cached - * data to the remote blobstore. - * @throws IOException IO problems uploading the data. - */ - @Override - public synchronized void close() throws IOException { - if (closed) { - return; - } - - try { - closed = true; - //formally declare as closed. - backupStream.close(); - backupStream = null; - Path keypath = new Path(key); - if (partUpload) { - partUpload(true); - nativeStore.createManifestForPartUpload(keypath); - } else { - uploadOnClose(keypath); - } - } finally { - delete(backupFile); - backupFile = null; - } - assert backupStream == null: "backup stream has been reopened"; - } - - /** - * Upload a file when closed, either in one go, or, if the file is - * already partitioned, by uploading the remaining partition and a manifest. - * @param keypath key as a path - * @throws IOException IO Problems - */ - private void uploadOnClose(Path keypath) throws IOException { - boolean uploadSuccess = false; - int attempt = 0; - while (!uploadSuccess) { - try { - ++attempt; - bytesUploaded += uploadFileAttempt(keypath, attempt); - uploadSuccess = true; - } catch (IOException e) { - LOG.info("Upload failed " + e, e); - if (attempt > ATTEMPT_LIMIT) { - throw e; - } - } - } -} - - @SuppressWarnings("IOResourceOpenedButNotSafelyClosed") - private long uploadFileAttempt(Path keypath, int attempt) throws IOException { - long uploadLen = backupFile.length(); - SwiftUtils.debug(LOG, "Closing write of file %s;" + - " localfile=%s of length %d - attempt %d", - key, - backupFile, - uploadLen, - attempt); - - nativeStore.uploadFile(keypath, - new FileInputStream(backupFile), - uploadLen); - return uploadLen; - } - - @Override - protected void finalize() throws Throwable { - if(!closed) { - LOG.warn("stream not closed"); - } - if (backupFile != null) { - LOG.warn("Leaking backing file " + backupFile); - } - } - - private void delete(File file) { - if (file != null) { - SwiftUtils.debug(LOG, "deleting %s", file); - if (!file.delete()) { - LOG.warn("Could not delete " + file); - } - } - } - - @Override - public void write(int b) throws IOException { - //insert to a one byte array - oneByte[0] = (byte) b; - //then delegate to the array writing routine - write(oneByte, 0, 1); - } - - @Override - public synchronized void write(byte[] buffer, int offset, int len) throws - IOException { - //validate args - if (offset < 0 || len < 0 || (offset + len) > buffer.length) { - throw new IndexOutOfBoundsException("Invalid offset/length for write"); - } - //validate the output stream - verifyOpen(); - SwiftUtils.debug(LOG, " write(offset=%d, len=%d)", offset, len); - - // if the size of file is greater than the partition limit - while (blockOffset + len >= filePartSize) { - // - then partition the blob and upload as many partitions - // are needed. - //how many bytes to write for this partition. - int subWriteLen = (int) (filePartSize - blockOffset); - if (subWriteLen < 0 || subWriteLen > len) { - throw new SwiftInternalStateException("Invalid subwrite len: " - + subWriteLen - + " -buffer len: " + len); - } - writeToBackupStream(buffer, offset, subWriteLen); - //move the offset along and length down - offset += subWriteLen; - len -= subWriteLen; - //now upload the partition that has just been filled up - // (this also sets blockOffset=0) - partUpload(false); - } - //any remaining data is now written - writeToBackupStream(buffer, offset, len); - } - - /** - * Write to the backup stream. - * Guarantees: - *

      - *
    1. backupStream is open
    2. - *
    3. blockOffset + len < filePartSize
    4. - *
    - * @param buffer buffer to write - * @param offset offset in buffer - * @param len length of write. - * @throws IOException backup stream write failing - */ - private void writeToBackupStream(byte[] buffer, int offset, int len) throws - IOException { - assert len >= 0 : "remainder to write is negative"; - SwiftUtils.debug(LOG," writeToBackupStream(offset=%d, len=%d)", offset, len); - if (len == 0) { - //no remainder -downgrade to no-op - return; - } - - //write the new data out to the backup stream - backupStream.write(buffer, offset, len); - //increment the counters - blockOffset += len; - bytesWritten += len; - } - - /** - * Upload a single partition. This deletes the local backing-file, - * and re-opens it to create a new one. - * @param closingUpload is this the final upload of an upload - * @throws IOException on IO problems - */ - @SuppressWarnings("IOResourceOpenedButNotSafelyClosed") - private void partUpload(boolean closingUpload) throws IOException { - if (backupStream != null) { - backupStream.close(); - } - - if (closingUpload && partUpload && backupFile.length() == 0) { - //skipping the upload if - // - it is close time - // - the final partition is 0 bytes long - // - one part has already been written - SwiftUtils.debug(LOG, "skipping upload of 0 byte final partition"); - delete(backupFile); - } else { - partUpload = true; - boolean uploadSuccess = false; - int attempt = 0; - while(!uploadSuccess) { - try { - ++attempt; - bytesUploaded += uploadFilePartAttempt(attempt); - uploadSuccess = true; - } catch (IOException e) { - LOG.info("Upload failed " + e, e); - if (attempt > ATTEMPT_LIMIT) { - throw e; - } - } - } - delete(backupFile); - partNumber++; - blockOffset = 0; - if (!closingUpload) { - //if not the final upload, create a new output stream - backupFile = newBackupFile(); - backupStream = - new BufferedOutputStream(new FileOutputStream(backupFile)); - } - } - } - - @SuppressWarnings("IOResourceOpenedButNotSafelyClosed") - private long uploadFilePartAttempt(int attempt) throws IOException { - long uploadLen = backupFile.length(); - SwiftUtils.debug(LOG, "Uploading part %d of file %s;" + - " localfile=%s of length %d - attempt %d", - partNumber, - key, - backupFile, - uploadLen, - attempt); - nativeStore.uploadFilePart(new Path(key), - partNumber, - new FileInputStream(backupFile), - uploadLen); - return uploadLen; - } - - /** - * Get the file partition size - * @return the partition size - */ - long getFilePartSize() { - return filePartSize; - } - - /** - * Query the number of partitions written - * This is intended for testing - * @return the of partitions already written to the remote FS - */ - synchronized int getPartitionsWritten() { - return partNumber - 1; - } - - /** - * Get the number of bytes written to the output stream. - * This should always be less than or equal to bytesUploaded. - * @return the number of bytes written to this stream - */ - long getBytesWritten() { - return bytesWritten; - } - - /** - * Get the number of bytes uploaded to remote Swift cluster. - * bytesUploaded -bytesWritten = the number of bytes left to upload - * @return the number of bytes written to the remote endpoint - */ - long getBytesUploaded() { - return bytesUploaded; - } - - @Override - public String toString() { - return "SwiftNativeOutputStream{" + - ", key='" + key + '\'' + - ", backupFile=" + backupFile + - ", closed=" + closed + - ", filePartSize=" + filePartSize + - ", partNumber=" + partNumber + - ", blockOffset=" + blockOffset + - ", partUpload=" + partUpload + - ", nativeStore=" + nativeStore + - ", bytesWritten=" + bytesWritten + - ", bytesUploaded=" + bytesUploaded + - '}'; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftObjectFileStatus.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftObjectFileStatus.java deleted file mode 100644 index ca8adc6244c73..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftObjectFileStatus.java +++ /dev/null @@ -1,115 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.snative; - -import java.util.Date; - -/** - * Java mapping of Swift JSON file status. - * THIS FILE IS MAPPED BY JACKSON TO AND FROM JSON. - * DO NOT RENAME OR MODIFY FIELDS AND THEIR ACCESSORS. - */ - -class SwiftObjectFileStatus { - private long bytes; - private String content_type; - private String hash; - private Date last_modified; - private String name; - private String subdir; - - SwiftObjectFileStatus() { - } - - SwiftObjectFileStatus(long bytes, String content_type, String hash, - Date last_modified, String name) { - this.bytes = bytes; - this.content_type = content_type; - this.hash = hash; - this.last_modified = last_modified; - this.name = name; - } - - public long getBytes() { - return bytes; - } - - public void setBytes(long bytes) { - this.bytes = bytes; - } - - public String getContent_type() { - return content_type; - } - - public void setContent_type(String content_type) { - this.content_type = content_type; - } - - public String getHash() { - return hash; - } - - public void setHash(String hash) { - this.hash = hash; - } - - public Date getLast_modified() { - return last_modified; - } - - public void setLast_modified(Date last_modified) { - this.last_modified = last_modified; - } - - public String getName() { - return pathToRootPath(name); - } - - public void setName(String name) { - this.name = name; - } - - public String getSubdir() { - return pathToRootPath(subdir); - } - - public void setSubdir(String subdir) { - this.subdir = subdir; - } - - /** - * If path doesn't starts with '/' - * method will concat '/' - * - * @param path specified path - * @return root path string - */ - private String pathToRootPath(String path) { - if (path == null) { - return null; - } - - if (path.startsWith("/")) { - return path; - } - - return "/".concat(path); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/Duration.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/Duration.java deleted file mode 100644 index 3071f946824c6..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/Duration.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.util; - -public class Duration { - - private final long started; - private long finished; - - public Duration() { - started = time(); - finished = started; - } - - private long time() { - return System.currentTimeMillis(); - } - - public void finished() { - finished = time(); - } - - public String getDurationString() { - return humanTime(value()); - } - - public static String humanTime(long time) { - long seconds = (time / 1000); - long minutes = (seconds / 60); - return String.format("%d:%02d:%03d", minutes, seconds % 60, time % 1000); - } - - @Override - public String toString() { - return getDurationString(); - } - - public long value() { - return finished -started; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/DurationStats.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/DurationStats.java deleted file mode 100644 index 734cf8b6dc100..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/DurationStats.java +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.util; - -/** - * Build ongoing statistics from duration data - */ -public class DurationStats { - - final String operation; - int n; - long sum; - long min; - long max; - double mean, m2; - - /** - * Construct statistics for a given operation. - * @param operation operation - */ - public DurationStats(String operation) { - this.operation = operation; - reset(); - } - - /** - * construct from another stats entry; - * all value are copied. - * @param that the source statistics - */ - public DurationStats(DurationStats that) { - operation = that.operation; - n = that.n; - sum = that.sum; - min = that.min; - max = that.max; - mean = that.mean; - m2 = that.m2; - } - - /** - * Add a duration - * @param duration the new duration - */ - public void add(Duration duration) { - add(duration.value()); - } - - /** - * Add a number - * @param x the number - */ - public void add(long x) { - n++; - sum += x; - double delta = x - mean; - mean += delta / n; - m2 += delta * (x - mean); - if (x < min) { - min = x; - } - if (x > max) { - max = x; - } - } - - /** - * Reset the data - */ - public void reset() { - n = 0; - sum = 0; - sum = 0; - min = 10000000; - max = 0; - mean = 0; - m2 = 0; - } - - /** - * Get the number of entries sampled - * @return the number of durations added - */ - public int getCount() { - return n; - } - - /** - * Get the sum of all durations - * @return all the durations - */ - public long getSum() { - return sum; - } - - /** - * Get the arithmetic mean of the aggregate statistics - * @return the arithmetic mean - */ - public double getArithmeticMean() { - return mean; - } - - /** - * Variance, sigma^2 - * @return variance, or, if no samples are there, 0. - */ - public double getVariance() { - return n > 0 ? (m2 / (n - 1)) : 0; - } - - /** - * Get the std deviation, sigma - * @return the stddev, 0 may mean there are no samples. - */ - public double getDeviation() { - double variance = getVariance(); - return (variance > 0) ? Math.sqrt(variance) : 0; - } - - /** - * Covert to a useful string - * @return a human readable summary - */ - @Override - public String toString() { - return String.format( - "%s count=%d total=%.3fs mean=%.3fs stddev=%.3fs min=%.3fs max=%.3fs", - operation, - n, - sum / 1000.0, - mean / 1000.0, - getDeviation() / 1000000.0, - min / 1000.0, - max / 1000.0); - } - -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/DurationStatsTable.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/DurationStatsTable.java deleted file mode 100644 index 58f8f0b641de5..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/DurationStatsTable.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.fs.swift.util; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * Build a duration stats table to which you can add statistics. - * Designed to be multithreaded - */ -public class DurationStatsTable { - - private Map statsTable - = new HashMap(6); - - /** - * Add an operation - * @param operation operation name - * @param duration duration - */ - public void add(String operation, Duration duration, boolean success) { - DurationStats durationStats; - String key = operation; - if (!success) { - key += "-FAIL"; - } - synchronized (this) { - durationStats = statsTable.get(key); - if (durationStats == null) { - durationStats = new DurationStats(key); - statsTable.put(key, durationStats); - } - } - synchronized (durationStats) { - durationStats.add(duration); - } - } - - /** - * Get the current duration statistics - * @return a snapshot of the statistics - */ - public synchronized List getDurationStatistics() { - List results = new ArrayList(statsTable.size()); - for (DurationStats stat: statsTable.values()) { - results.add(new DurationStats(stat)); - } - return results; - } - - /** - * reset the values of the statistics. This doesn't delete them, merely zeroes them. - */ - public synchronized void reset() { - for (DurationStats stat : statsTable.values()) { - stat.reset(); - } - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/HttpResponseUtils.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/HttpResponseUtils.java deleted file mode 100644 index 1cc340d83d9b4..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/HttpResponseUtils.java +++ /dev/null @@ -1,121 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.util; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; - -import org.apache.http.Header; -import org.apache.http.HttpResponse; -import org.apache.http.util.EncodingUtils; - -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.HEADER_CONTENT_LENGTH; - -/** - * Utility class for parsing HttpResponse. This class is implemented like - * {@code org.apache.commons.httpclient.HttpMethodBase.java} in httpclient 3.x. - */ -public abstract class HttpResponseUtils { - - /** - * Returns the response body of the HTTPResponse, if any, as an array of bytes. - * If response body is not available or cannot be read, returns null - * - * Note: This will cause the entire response body to be buffered in memory. A - * malicious server may easily exhaust all the VM memory. It is strongly - * recommended, to use getResponseAsStream if the content length of the - * response is unknown or reasonably large. - * - * @param resp HttpResponse - * @return The response body - * @throws IOException If an I/O (transport) problem occurs while obtaining - * the response body. - */ - public static byte[] getResponseBody(HttpResponse resp) throws IOException { - try(InputStream instream = resp.getEntity().getContent()) { - if (instream != null) { - long contentLength = resp.getEntity().getContentLength(); - if (contentLength > Integer.MAX_VALUE) { - //guard integer cast from overflow - throw new IOException("Content too large to be buffered: " - + contentLength +" bytes"); - } - ByteArrayOutputStream outstream = new ByteArrayOutputStream( - contentLength > 0 ? (int) contentLength : 4*1024); - byte[] buffer = new byte[4096]; - int len; - while ((len = instream.read(buffer)) > 0) { - outstream.write(buffer, 0, len); - } - outstream.close(); - return outstream.toByteArray(); - } - } - return null; - } - - /** - * Returns the response body of the HTTPResponse, if any, as a {@link String}. - * If response body is not available or cannot be read, returns null - * The string conversion on the data is done using UTF-8. - * - * Note: This will cause the entire response body to be buffered in memory. A - * malicious server may easily exhaust all the VM memory. It is strongly - * recommended, to use getResponseAsStream if the content length of the - * response is unknown or reasonably large. - * - * @param resp HttpResponse - * @return The response body. - * @throws IOException If an I/O (transport) problem occurs while obtaining - * the response body. - */ - public static String getResponseBodyAsString(HttpResponse resp) - throws IOException { - byte[] rawdata = getResponseBody(resp); - if (rawdata != null) { - return EncodingUtils.getString(rawdata, "UTF-8"); - } else { - return null; - } - } - - /** - * Return the length (in bytes) of the response body, as specified in a - * Content-Length header. - * - *

    - * Return -1 when the content-length is unknown. - *

    - * - * @param resp HttpResponse - * @return content length, if Content-Length header is available. - * 0 indicates that the request has no body. - * If Content-Length header is not present, the method - * returns -1. - */ - public static long getContentLength(HttpResponse resp) { - Header header = resp.getFirstHeader(HEADER_CONTENT_LENGTH); - if (header == null) { - return -1; - } else { - return Long.parseLong(header.getValue()); - } - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/JSONUtil.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/JSONUtil.java deleted file mode 100644 index fee7e7f5697e3..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/JSONUtil.java +++ /dev/null @@ -1,124 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.util; - -import com.fasterxml.jackson.core.JsonGenerationException; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.JsonMappingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.type.CollectionType; -import org.apache.hadoop.fs.swift.exceptions.SwiftJsonMarshallingException; - -import java.io.IOException; -import java.io.StringWriter; -import java.io.Writer; - - -public class JSONUtil { - private static ObjectMapper jsonMapper = new ObjectMapper(); - - /** - * Private constructor. - */ - private JSONUtil() { - } - - /** - * Converting object to JSON string. If errors appears throw - * MeshinException runtime exception. - * - * @param object The object to convert. - * @return The JSON string representation. - * @throws IOException IO issues - * @throws SwiftJsonMarshallingException failure to generate JSON - */ - public static String toJSON(Object object) throws - IOException { - Writer json = new StringWriter(); - try { - jsonMapper.writeValue(json, object); - return json.toString(); - } catch (JsonGenerationException | JsonMappingException e) { - throw new SwiftJsonMarshallingException(e.toString(), e); - } - } - - /** - * Convert string representation to object. If errors appears throw - * Exception runtime exception. - * - * @param value The JSON string. - * @param klazz The class to convert. - * @return The Object of the given class. - */ - public static T toObject(String value, Class klazz) throws - IOException { - try { - return jsonMapper.readValue(value, klazz); - } catch (JsonGenerationException e) { - throw new SwiftJsonMarshallingException(e.toString() - + " source: " + value, - e); - } catch (JsonMappingException e) { - throw new SwiftJsonMarshallingException(e.toString() - + " source: " + value, - e); - } - } - - /** - * @param value json string - * @param typeReference class type reference - * @param type - * @return deserialized T object - */ - @SuppressWarnings("unchecked") - public static T toObject(String value, - final TypeReference typeReference) - throws IOException { - try { - return (T)jsonMapper.readValue(value, typeReference); - } catch (JsonGenerationException | JsonMappingException e) { - throw new SwiftJsonMarshallingException("Error generating response", e); - } - } - - /** - * @param value json string - * @param collectionType class describing how to deserialize collection of objects - * @param type - * @return deserialized T object - */ - @SuppressWarnings("unchecked") - public static T toObject(String value, - final CollectionType collectionType) - throws IOException { - try { - return (T)jsonMapper.readValue(value, collectionType); - } catch (JsonGenerationException | JsonMappingException e) { - throw new SwiftJsonMarshallingException(e.toString() - + " source: " + value, - e); - } - } - - public static ObjectMapper getJsonMapper() { - return jsonMapper; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/SwiftObjectPath.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/SwiftObjectPath.java deleted file mode 100644 index 791509a9e0372..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/SwiftObjectPath.java +++ /dev/null @@ -1,187 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.util; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.swift.exceptions.SwiftConfigurationException; -import org.apache.hadoop.fs.swift.http.RestClientBindings; - -import java.net.URI; -import java.util.regex.Pattern; - -/** - * Swift hierarchy mapping of (container, path) - */ -public final class SwiftObjectPath { - private static final Pattern PATH_PART_PATTERN = Pattern.compile(".*/AUTH_\\w*/"); - - /** - * Swift container - */ - private final String container; - - /** - * swift object - */ - private final String object; - - private final String uriPath; - - /** - * Build an instance from a (host, object) pair - * - * @param container container name - * @param object object ref underneath the container - */ - public SwiftObjectPath(String container, String object) { - - if (object == null) { - throw new IllegalArgumentException("object name can't be null"); - } - - this.container = container; - this.object = URI.create(object).getPath(); - uriPath = buildUriPath(); - } - - public String getContainer() { - return container; - } - - public String getObject() { - return object; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (!(o instanceof SwiftObjectPath)) return false; - final SwiftObjectPath that = (SwiftObjectPath) o; - return this.toUriPath().equals(that.toUriPath()); - } - - @Override - public int hashCode() { - int result = container.hashCode(); - result = 31 * result + object.hashCode(); - return result; - } - - private String buildUriPath() { - return SwiftUtils.joinPaths(container, object); - } - - public String toUriPath() { - return uriPath; - } - - @Override - public String toString() { - return toUriPath(); - } - - /** - * Test for the object matching a path, ignoring the container - * value. - * - * @param path path string - * @return true iff the object's name matches the path - */ - public boolean objectMatches(String path) { - return object.equals(path); - } - - - /** - * Query to see if the possibleChild object is a child path of this. - * object. - * - * The test is done by probing for the path of the this object being - * at the start of the second -with a trailing slash, and both - * containers being equal - * - * @param possibleChild possible child dir - * @return true iff the possibleChild is under this object - */ - public boolean isEqualToOrParentOf(SwiftObjectPath possibleChild) { - String origPath = toUriPath(); - String path = origPath; - if (!path.endsWith("/")) { - path = path + "/"; - } - String childPath = possibleChild.toUriPath(); - return childPath.equals(origPath) || childPath.startsWith(path); - } - - /** - * Create a path tuple of (container, path), where the container is - * chosen from the host of the URI. - * - * @param uri uri to start from - * @param path path underneath - * @return a new instance. - * @throws SwiftConfigurationException if the URI host doesn't parse into - * container.service - */ - public static SwiftObjectPath fromPath(URI uri, - Path path) - throws SwiftConfigurationException { - return fromPath(uri, path, false); - } - - /** - * Create a path tuple of (container, path), where the container is - * chosen from the host of the URI. - * A trailing slash can be added to the path. This is the point where - * these /-es need to be appended, because when you construct a {@link Path} - * instance, {@link Path#normalizePath(String, String)} is called - * -which strips off any trailing slash. - * - * @param uri uri to start from - * @param path path underneath - * @param addTrailingSlash should a trailing slash be added if there isn't one. - * @return a new instance. - * @throws SwiftConfigurationException if the URI host doesn't parse into - * container.service - */ - public static SwiftObjectPath fromPath(URI uri, - Path path, - boolean addTrailingSlash) - throws SwiftConfigurationException { - - String url = - path.toUri().getPath().replaceAll(PATH_PART_PATTERN.pattern(), ""); - //add a trailing slash if needed - if (addTrailingSlash && !url.endsWith("/")) { - url += "/"; - } - - String container = uri.getHost(); - if (container == null) { - //no container, not good: replace with "" - container = ""; - } else if (container.contains(".")) { - //its a container.service URI. Strip the container - container = RestClientBindings.extractContainerName(container); - } - return new SwiftObjectPath(container, url); - } - - -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/SwiftTestUtils.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/SwiftTestUtils.java deleted file mode 100644 index 2e3abce251a2d..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/SwiftTestUtils.java +++ /dev/null @@ -1,547 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.util; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.swift.exceptions.SwiftConfigurationException; -import org.junit.internal.AssumptionViolatedException; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.Properties; - -/** - * Utilities used across test cases - */ -public class SwiftTestUtils extends org.junit.Assert { - - private static final Logger LOG = - LoggerFactory.getLogger(SwiftTestUtils.class); - - public static final String TEST_FS_SWIFT = "test.fs.swift.name"; - public static final String IO_FILE_BUFFER_SIZE = "io.file.buffer.size"; - - /** - * Get the test URI - * @param conf configuration - * @throws SwiftConfigurationException missing parameter or bad URI - */ - public static URI getServiceURI(Configuration conf) throws - SwiftConfigurationException { - String instance = conf.get(TEST_FS_SWIFT); - if (instance == null) { - throw new SwiftConfigurationException( - "Missing configuration entry " + TEST_FS_SWIFT); - } - try { - return new URI(instance); - } catch (URISyntaxException e) { - throw new SwiftConfigurationException("Bad URI: " + instance); - } - } - - public static boolean hasServiceURI(Configuration conf) { - String instance = conf.get(TEST_FS_SWIFT); - return instance != null; - } - - /** - * Assert that a property in the property set matches the expected value - * @param props property set - * @param key property name - * @param expected expected value. If null, the property must not be in the set - */ - public static void assertPropertyEquals(Properties props, - String key, - String expected) { - String val = props.getProperty(key); - if (expected == null) { - assertNull("Non null property " + key + " = " + val, val); - } else { - assertEquals("property " + key + " = " + val, - expected, - val); - } - } - - /** - * - * Write a file and read it in, validating the result. Optional flags control - * whether file overwrite operations should be enabled, and whether the - * file should be deleted afterwards. - * - * If there is a mismatch between what was written and what was expected, - * a small range of bytes either side of the first error are logged to aid - * diagnosing what problem occurred -whether it was a previous file - * or a corrupting of the current file. This assumes that two - * sequential runs to the same path use datasets with different character - * moduli. - * - * @param fs filesystem - * @param path path to write to - * @param len length of data - * @param overwrite should the create option allow overwrites? - * @param delete should the file be deleted afterwards? -with a verification - * that it worked. Deletion is not attempted if an assertion has failed - * earlier -it is not in a finally{} block. - * @throws IOException IO problems - */ - public static void writeAndRead(FileSystem fs, - Path path, - byte[] src, - int len, - int blocksize, - boolean overwrite, - boolean delete) throws IOException { - fs.mkdirs(path.getParent()); - - writeDataset(fs, path, src, len, blocksize, overwrite); - - byte[] dest = readDataset(fs, path, len); - - compareByteArrays(src, dest, len); - - if (delete) { - boolean deleted = fs.delete(path, false); - assertTrue("Deleted", deleted); - assertPathDoesNotExist(fs, "Cleanup failed", path); - } - } - - /** - * Write a file. - * Optional flags control - * whether file overwrite operations should be enabled - * @param fs filesystem - * @param path path to write to - * @param len length of data - * @param overwrite should the create option allow overwrites? - * @throws IOException IO problems - */ - public static void writeDataset(FileSystem fs, - Path path, - byte[] src, - int len, - int blocksize, - boolean overwrite) throws IOException { - assertTrue( - "Not enough data in source array to write " + len + " bytes", - src.length >= len); - FSDataOutputStream out = fs.create(path, - overwrite, - fs.getConf() - .getInt(IO_FILE_BUFFER_SIZE, - 4096), - (short) 1, - blocksize); - out.write(src, 0, len); - out.close(); - assertFileHasLength(fs, path, len); - } - - /** - * Read the file and convert to a byte dataset - * @param fs filesystem - * @param path path to read from - * @param len length of data to read - * @return the bytes - * @throws IOException IO problems - */ - public static byte[] readDataset(FileSystem fs, Path path, int len) - throws IOException { - FSDataInputStream in = fs.open(path); - byte[] dest = new byte[len]; - try { - in.readFully(0, dest); - } finally { - in.close(); - } - return dest; - } - - /** - * Assert that the array src[0..len] and dest[] are equal - * @param src source data - * @param dest actual - * @param len length of bytes to compare - */ - public static void compareByteArrays(byte[] src, - byte[] dest, - int len) { - assertEquals("Number of bytes read != number written", - len, dest.length); - int errors = 0; - int first_error_byte = -1; - for (int i = 0; i < len; i++) { - if (src[i] != dest[i]) { - if (errors == 0) { - first_error_byte = i; - } - errors++; - } - } - - if (errors > 0) { - String message = String.format(" %d errors in file of length %d", - errors, len); - LOG.warn(message); - // the range either side of the first error to print - // this is a purely arbitrary number, to aid user debugging - final int overlap = 10; - for (int i = Math.max(0, first_error_byte - overlap); - i < Math.min(first_error_byte + overlap, len); - i++) { - byte actual = dest[i]; - byte expected = src[i]; - String letter = toChar(actual); - String line = String.format("[%04d] %2x %s%n", i, actual, letter); - if (expected != actual) { - line = String.format("[%04d] %2x %s -expected %2x %s%n", - i, - actual, - letter, - expected, - toChar(expected)); - } - LOG.warn(line); - } - fail(message); - } - } - - /** - * Convert a byte to a character for printing. If the - * byte value is < 32 -and hence unprintable- the byte is - * returned as a two digit hex value - * @param b byte - * @return the printable character string - */ - public static String toChar(byte b) { - if (b >= 0x20) { - return Character.toString((char) b); - } else { - return String.format("%02x", b); - } - } - - public static String toChar(byte[] buffer) { - StringBuilder builder = new StringBuilder(buffer.length); - for (byte b : buffer) { - builder.append(toChar(b)); - } - return builder.toString(); - } - - public static byte[] toAsciiByteArray(String s) { - char[] chars = s.toCharArray(); - int len = chars.length; - byte[] buffer = new byte[len]; - for (int i = 0; i < len; i++) { - buffer[i] = (byte) (chars[i] & 0xff); - } - return buffer; - } - - public static void cleanupInTeardown(FileSystem fileSystem, - String cleanupPath) { - cleanup("TEARDOWN", fileSystem, cleanupPath); - } - - public static void cleanup(String action, - FileSystem fileSystem, - String cleanupPath) { - noteAction(action); - try { - if (fileSystem != null) { - fileSystem.delete(fileSystem.makeQualified(new Path(cleanupPath)), - true); - } - } catch (Exception e) { - LOG.error("Error deleting in "+ action + " - " + cleanupPath + ": " + e, e); - } - } - - public static void noteAction(String action) { - if (LOG.isDebugEnabled()) { - LOG.debug("============== "+ action +" ============="); - } - } - - /** - * downgrade a failure to a message and a warning, then an - * exception for the Junit test runner to mark as failed - * @param message text message - * @param failure what failed - * @throws AssumptionViolatedException always - */ - public static void downgrade(String message, Throwable failure) { - LOG.warn("Downgrading test " + message, failure); - AssumptionViolatedException ave = - new AssumptionViolatedException(failure, null); - throw ave; - } - - /** - * report an overridden test as unsupported - * @param message message to use in the text - * @throws AssumptionViolatedException always - */ - public static void unsupported(String message) { - throw new AssumptionViolatedException(message); - } - - /** - * report a test has been skipped for some reason - * @param message message to use in the text - * @throws AssumptionViolatedException always - */ - public static void skip(String message) { - throw new AssumptionViolatedException(message); - } - - - /** - * Make an assertion about the length of a file - * @param fs filesystem - * @param path path of the file - * @param expected expected length - * @throws IOException on File IO problems - */ - public static void assertFileHasLength(FileSystem fs, Path path, - int expected) throws IOException { - FileStatus status = fs.getFileStatus(path); - assertEquals( - "Wrong file length of file " + path + " status: " + status, - expected, - status.getLen()); - } - - /** - * Assert that a path refers to a directory - * @param fs filesystem - * @param path path of the directory - * @throws IOException on File IO problems - */ - public static void assertIsDirectory(FileSystem fs, - Path path) throws IOException { - FileStatus fileStatus = fs.getFileStatus(path); - assertIsDirectory(fileStatus); - } - - /** - * Assert that a path refers to a directory - * @param fileStatus stats to check - */ - public static void assertIsDirectory(FileStatus fileStatus) { - assertTrue("Should be a dir -but isn't: " + fileStatus, - fileStatus.isDirectory()); - } - - /** - * Write the text to a file, returning the converted byte array - * for use in validating the round trip - * @param fs filesystem - * @param path path of file - * @param text text to write - * @param overwrite should the operation overwrite any existing file? - * @return the read bytes - * @throws IOException on IO problems - */ - public static byte[] writeTextFile(FileSystem fs, - Path path, - String text, - boolean overwrite) throws IOException { - FSDataOutputStream stream = fs.create(path, overwrite); - byte[] bytes = new byte[0]; - if (text != null) { - bytes = toAsciiByteArray(text); - stream.write(bytes); - } - stream.close(); - return bytes; - } - - /** - * Touch a file: fails if it is already there - * @param fs filesystem - * @param path path - * @throws IOException IO problems - */ - public static void touch(FileSystem fs, - Path path) throws IOException { - fs.delete(path, true); - writeTextFile(fs, path, null, false); - } - - public static void assertDeleted(FileSystem fs, - Path file, - boolean recursive) throws IOException { - assertPathExists(fs, "about to be deleted file", file); - boolean deleted = fs.delete(file, recursive); - String dir = ls(fs, file.getParent()); - assertTrue("Delete failed on " + file + ": " + dir, deleted); - assertPathDoesNotExist(fs, "Deleted file", file); - } - - /** - * Read in "length" bytes, convert to an ascii string - * @param fs filesystem - * @param path path to read - * @param length #of bytes to read. - * @return the bytes read and converted to a string - * @throws IOException - */ - public static String readBytesToString(FileSystem fs, - Path path, - int length) throws IOException { - FSDataInputStream in = fs.open(path); - try { - byte[] buf = new byte[length]; - in.readFully(0, buf); - return toChar(buf); - } finally { - in.close(); - } - } - - public static String getDefaultWorkingDirectory() { - return "/user/" + System.getProperty("user.name"); - } - - public static String ls(FileSystem fileSystem, Path path) throws IOException { - return SwiftUtils.ls(fileSystem, path); - } - - public static String dumpStats(String pathname, FileStatus[] stats) { - return pathname + SwiftUtils.fileStatsToString(stats,"\n"); - } - - /** - /** - * Assert that a file exists and whose {@link FileStatus} entry - * declares that this is a file and not a symlink or directory. - * @param fileSystem filesystem to resolve path against - * @param filename name of the file - * @throws IOException IO problems during file operations - */ - public static void assertIsFile(FileSystem fileSystem, Path filename) throws - IOException { - assertPathExists(fileSystem, "Expected file", filename); - FileStatus status = fileSystem.getFileStatus(filename); - String fileInfo = filename + " " + status; - assertFalse("File claims to be a directory " + fileInfo, - status.isDirectory()); -/* disabled for Hadoop v1 compatibility - assertFalse("File claims to be a symlink " + fileInfo, - status.isSymlink()); -*/ - } - - /** - * Create a dataset for use in the tests; all data is in the range - * base to (base+modulo-1) inclusive - * @param len length of data - * @param base base of the data - * @param modulo the modulo - * @return the newly generated dataset - */ - public static byte[] dataset(int len, int base, int modulo) { - byte[] dataset = new byte[len]; - for (int i = 0; i < len; i++) { - dataset[i] = (byte) (base + (i % modulo)); - } - return dataset; - } - - /** - * Assert that a path exists -but make no assertions as to the - * type of that entry - * - * @param fileSystem filesystem to examine - * @param message message to include in the assertion failure message - * @param path path in the filesystem - * @throws IOException IO problems - */ - public static void assertPathExists(FileSystem fileSystem, String message, - Path path) throws IOException { - try { - fileSystem.getFileStatus(path); - } catch (FileNotFoundException e) { - //failure, report it - throw (IOException)new FileNotFoundException(message + ": not found " - + path + " in " + path.getParent() + ": " + e + " -- " - + ls(fileSystem, path.getParent())).initCause(e); - } - } - - /** - * Assert that a path does not exist - * - * @param fileSystem filesystem to examine - * @param message message to include in the assertion failure message - * @param path path in the filesystem - * @throws IOException IO problems - */ - public static void assertPathDoesNotExist(FileSystem fileSystem, - String message, - Path path) throws IOException { - try { - FileStatus status = fileSystem.getFileStatus(path); - fail(message + ": unexpectedly found " + path + " as " + status); - } catch (FileNotFoundException expected) { - //this is expected - - } - } - - - /** - * Assert that a FileSystem.listStatus on a dir finds the subdir/child entry - * @param fs filesystem - * @param dir directory to scan - * @param subdir full path to look for - * @throws IOException IO problems - */ - public static void assertListStatusFinds(FileSystem fs, - Path dir, - Path subdir) throws IOException { - FileStatus[] stats = fs.listStatus(dir); - boolean found = false; - StringBuilder builder = new StringBuilder(); - for (FileStatus stat : stats) { - builder.append(stat.toString()).append('\n'); - if (stat.getPath().equals(subdir)) { - found = true; - } - } - assertTrue("Path " + subdir - + " not found in directory " + dir + ":" + builder, - found); - } - -} diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/SwiftUtils.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/SwiftUtils.java deleted file mode 100644 index f218a80595a8a..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/SwiftUtils.java +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.util; - -import org.slf4j.Logger; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; - -import java.io.FileNotFoundException; -import java.io.IOException; - -/** - * Various utility classes for SwiftFS support - */ -public final class SwiftUtils { - - public static final String READ = "read(buffer, offset, length)"; - - /** - * Join two (non null) paths, inserting a forward slash between them - * if needed - * - * @param path1 first path - * @param path2 second path - * @return the combined path - */ - public static String joinPaths(String path1, String path2) { - StringBuilder result = - new StringBuilder(path1.length() + path2.length() + 1); - result.append(path1); - boolean insertSlash = true; - if (path1.endsWith("/")) { - insertSlash = false; - } else if (path2.startsWith("/")) { - insertSlash = false; - } - if (insertSlash) { - result.append("/"); - } - result.append(path2); - return result.toString(); - } - - /** - * This test contains the is-directory logic for Swift, so if - * changed there is only one place for it. - * - * @param fileStatus status to examine - * @return true if we consider this status to be representative of a - * directory. - */ - public static boolean isDirectory(FileStatus fileStatus) { - return fileStatus.isDirectory() || isFilePretendingToBeDirectory(fileStatus); - } - - /** - * Test for the entry being a file that is treated as if it is a - * directory - * - * @param fileStatus status - * @return true if it meets the rules for being a directory - */ - public static boolean isFilePretendingToBeDirectory(FileStatus fileStatus) { - return fileStatus.getLen() == 0; - } - - /** - * Predicate: Is a swift object referring to the root directory? - * @param swiftObject object to probe - * @return true iff the object refers to the root - */ - public static boolean isRootDir(SwiftObjectPath swiftObject) { - return swiftObject.objectMatches("") || swiftObject.objectMatches("/"); - } - - /** - * Sprintf() to the log iff the log is at debug level. If the log - * is not at debug level, the printf operation is skipped, so - * no time is spent generating the string. - * @param log log to use - * @param text text message - * @param args args arguments to the print statement - */ - public static void debug(Logger log, String text, Object... args) { - if (log.isDebugEnabled()) { - log.debug(String.format(text, args)); - } - } - - /** - * Log an exception (in text and trace) iff the log is at debug - * @param log Log to use - * @param text text message - * @param ex exception - */ - public static void debugEx(Logger log, String text, Exception ex) { - if (log.isDebugEnabled()) { - log.debug(text + ex, ex); - } - } - - /** - * Sprintf() to the log iff the log is at trace level. If the log - * is not at trace level, the printf operation is skipped, so - * no time is spent generating the string. - * @param log log to use - * @param text text message - * @param args args arguments to the print statement - */ - public static void trace(Logger log, String text, Object... args) { - if (log.isTraceEnabled()) { - log.trace(String.format(text, args)); - } - } - - /** - * Given a partition number, calculate the partition value. - * This is used in the SwiftNativeOutputStream, and is placed - * here for tests to be able to calculate the filename of - * a partition. - * @param partNumber part number - * @return a string to use as the filename - */ - public static String partitionFilenameFromNumber(int partNumber) { - return String.format("%06d", partNumber); - } - - /** - * List a a path to string - * @param fileSystem filesystem - * @param path directory - * @return a listing of the filestatuses of elements in the directory, one - * to a line, preceded by the full path of the directory - * @throws IOException connectivity problems - */ - public static String ls(FileSystem fileSystem, Path path) throws - IOException { - if (path == null) { - //surfaces when someone calls getParent() on something at the top of the path - return "/"; - } - FileStatus[] stats; - String pathtext = "ls " + path; - try { - stats = fileSystem.listStatus(path); - } catch (FileNotFoundException e) { - return pathtext + " -file not found"; - } catch (IOException e) { - return pathtext + " -failed: " + e; - } - return pathtext + fileStatsToString(stats, "\n"); - } - - /** - * Take an array of filestatus and convert to a string (prefixed w/ a [01] counter - * @param stats array of stats - * @param separator separator after every entry - * @return a stringified set - */ - public static String fileStatsToString(FileStatus[] stats, String separator) { - StringBuilder buf = new StringBuilder(stats.length * 128); - for (int i = 0; i < stats.length; i++) { - buf.append(String.format("[%02d] %s", i, stats[i])).append(separator); - } - return buf.toString(); - } - - /** - * Verify that the basic args to a read operation are valid; - * throws an exception if not -with meaningful text including - * @param buffer destination buffer - * @param off offset - * @param len number of bytes to read - * @throws NullPointerException null buffer - * @throws IndexOutOfBoundsException on any invalid range. - */ - public static void validateReadArgs(byte[] buffer, int off, int len) { - if (buffer == null) { - throw new NullPointerException("Null byte array in"+ READ); - } - if (off < 0 ) { - throw new IndexOutOfBoundsException("Negative buffer offset " - + off - + " in " + READ); - } - if (len < 0 ) { - throw new IndexOutOfBoundsException("Negative read length " - + len - + " in " + READ); - } - if (off > buffer.length) { - throw new IndexOutOfBoundsException("Buffer offset of " - + off - + "beyond buffer size of " - + buffer.length - + " in " + READ); - } - } -} diff --git a/hadoop-tools/hadoop-openstack/src/site/markdown/index.md b/hadoop-tools/hadoop-openstack/src/site/markdown/index.md deleted file mode 100644 index 1815f60c613a9..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/site/markdown/index.md +++ /dev/null @@ -1,549 +0,0 @@ - - -* [Hadoop OpenStack Support: Swift Object Store](#Hadoop_OpenStack_Support:_Swift_Object_Store) - * [Introduction](#Introduction) - * [Features](#Features) - * [Using the Hadoop Swift Filesystem Client](#Using_the_Hadoop_Swift_Filesystem_Client) - * [Concepts: services and containers](#Concepts:_services_and_containers) - * [Containers and Objects](#Containers_and_Objects) - * [Eventual Consistency](#Eventual_Consistency) - * [Non-atomic "directory" operations.](#Non-atomic_directory_operations.) - * [Working with Swift Object Stores in Hadoop](#Working_with_Swift_Object_Stores_in_Hadoop) - * [Swift Filesystem URIs](#Swift_Filesystem_URIs) - * [Installing](#Installing) - * [Configuring](#Configuring) - * [Example: Rackspace US, in-cluster access using API key](#Example:_Rackspace_US_in-cluster_access_using_API_key) - * [Example: Rackspace UK: remote access with password authentication](#Example:_Rackspace_UK:_remote_access_with_password_authentication) - * [Example: HP cloud service definition](#Example:_HP_cloud_service_definition) - * [General Swift Filesystem configuration options](#General_Swift_Filesystem_configuration_options) - * [Blocksize fs.swift.blocksize](#Blocksize_fs.swift.blocksize) - * [Partition size fs.swift.partsize](#Partition_size_fs.swift.partsize) - * [Request size fs.swift.requestsize](#Request_size_fs.swift.requestsize) - * [Connection timeout fs.swift.connect.timeout](#Connection_timeout_fs.swift.connect.timeout) - * [Connection timeout fs.swift.socket.timeout](#Connection_timeout_fs.swift.socket.timeout) - * [Connection Retry Count fs.swift.connect.retry.count](#Connection_Retry_Count_fs.swift.connect.retry.count) - * [Connection Throttle Delay fs.swift.connect.throttle.delay](#Connection_Throttle_Delay_fs.swift.connect.throttle.delay) - * [HTTP Proxy](#HTTP_Proxy) - * [Troubleshooting](#Troubleshooting) - * [ClassNotFoundException](#ClassNotFoundException) - * [Failure to Authenticate](#Failure_to_Authenticate) - * [Timeout connecting to the Swift Service](#Timeout_connecting_to_the_Swift_Service) - * [Warnings](#Warnings) - * [Limits](#Limits) - * [Testing the hadoop-openstack module](#Testing_the_hadoop-openstack_module) - -Hadoop OpenStack Support: Swift Object Store -============================================ - -Introduction ------------- - -[OpenStack](http://www.openstack.org/) is an open source cloud infrastructure which can be accessed from multiple public IaaS providers, and deployed privately. It offers infrastructure services such as VM hosting (Nova), authentication (Keystone) and storage of binary objects (Swift). - -This module enables Apache Hadoop applications -including MapReduce jobs, read and write data to and from instances of the [OpenStack Swift object store](http://www.openstack.org/software/openstack-storage/). - -To make it part of Apache Hadoop's default classpath, simply make sure that -HADOOP_OPTIONAL_TOOLS in hadoop-env.sh has 'hadoop-openstack' in the list. - -Features --------- - -* Read and write of data stored in a Swift object store - -* Support of a pseudo-hierachical file system (directories, subdirectories and - files) - -* Standard filesystem operations: `create`, `delete`, `mkdir`, - `ls`, `mv`, `stat`. - -* Can act as a source of data in a MapReduce job, or a sink. - -* Support for multiple OpenStack services, and multiple containers from a - single service. - -* Supports in-cluster and remote access to Swift data. - -* Supports OpenStack Keystone authentication with password or token. - -* Released under the Apache Software License - -* Tested against the Hadoop 3.x and 1.x branches, against multiple public - OpenStack clusters: Rackspace US, Rackspace UK, HP Cloud. - -* Tested against private OpenStack clusters, including scalability tests of - large file uploads. - -Using the Hadoop Swift Filesystem Client ----------------------------------------- - -### Concepts: services and containers - -OpenStack swift is an *Object Store*; also known as a *blobstore*. It stores arbitrary binary objects by name in a *container*. - -The Hadoop Swift filesystem library adds another concept, the *service*, which defines which Swift blobstore hosts a container -and how to connect to it. - -### Containers and Objects - -* Containers are created by users with accounts on the Swift filestore, and hold - *objects*. - -* Objects can be zero bytes long, or they can contain data. - -* Objects in the container can be up to 5GB; there is a special support for - larger files than this, which merges multiple objects in to one. - -* Each object is referenced by it's *name*; there is no notion of directories. - -* You can use any characters in an object name that can be 'URL-encoded'; the - maximum length of a name is 1034 characters -after URL encoding. - -* Names can have `/` characters in them, which are used to create the illusion of - a directory structure. For example `dir/dir2/name`. Even though this looks - like a directory, *it is still just a name*. There is no requirement to have - any entries in the container called `dir` or `dir/dir2` - -* That said. if the container has zero-byte objects that look like directory - names above other objects, they can pretend to be directories. Continuing the - example, a 0-byte object called `dir` would tell clients that it is a - directory while `dir/dir2` or `dir/dir2/name` were present. This creates an - illusion of containers holding a filesystem. - -Client applications talk to Swift over HTTP or HTTPS, reading, writing and deleting objects using standard HTTP operations (GET, PUT and DELETE, respectively). There is also a COPY operation, that creates a new object in the container, with a new name, containing the old data. There is no rename operation itself, objects need to be copied -then the original entry deleted. - -### Eventual Consistency - -The Swift Filesystem is \*eventually consistent\*: an operation on an object may not be immediately visible to that client, or other clients. This is a consequence of the goal of the filesystem: to span a set of machines, across multiple datacenters, in such a way that the data can still be available when many of them fail. (In contrast, the Hadoop HDFS filesystem is \*immediately consistent\*, but it does not span datacenters.) - -Eventual consistency can cause surprises for client applications that expect immediate consistency: after an object is deleted or overwritten, the object may still be visible -or the old data still retrievable. The Swift Filesystem client for Apache Hadoop attempts to handle this, in conjunction with the MapReduce engine, but there may be still be occasions when eventual consistency causes surprises. - -### Non-atomic "directory" operations. - -Hadoop expects some operations to be atomic, especially `rename()`, which is something the MapReduce layer relies on to commit the output of a job, renaming data from a temp directory to the final path. Because a rename is implemented as a copy of every blob under the directory's path, followed by a delete of the originals, the intermediate state of the operation will be visible to other clients. If two Reducer tasks to rename their temp directory to the final path, both operations may succeed, with the result that output directory contains mixed data. This can happen if MapReduce jobs are being run with *speculation* enabled and Swift used as the direct output of the MR job (it can also happen against Amazon S3). - -Other consequences of the non-atomic operations are: - -1. If a program is looking for the presence of the directory before acting - on the data -it may start prematurely. This can be avoided by using - other mechanisms to co-ordinate the programs, such as the presence of a file - that is written *after* any bulk directory operations. - -2. A `rename()` or `delete()` operation may include files added under - the source directory tree during the operation, may unintentionally delete - it, or delete the 0-byte swift entries that mimic directories and act - as parents for the files. Try to avoid doing this. - -The best ways to avoid all these problems is not using Swift as the filesystem between MapReduce jobs or other Hadoop workflows. It can act as a source of data, and a final destination, but it doesn't meet all of Hadoop's expectations of what a filesystem is -it's a *blobstore*. - -Working with Swift Object Stores in Hadoop ------------------------------------------- - -Once installed, the Swift FileSystem client can be used by any Hadoop application to read from or write to data stored in a Swift container. - -Data stored in Swift can be used as the direct input to a MapReduce job -simply use the `swift:` URL (see below) to declare the source of the data. - -This Swift Filesystem client is designed to work with multiple Swift object stores, both public and private. This allows the client to work with different clusters, reading and writing data to and from either of them. - -It can also work with the same object stores using multiple login details. - -These features are achieved by one basic concept: using a service name in the URI referring to a swift filesystem, and looking up all the connection and login details for that specific service. Different service names can be defined in the Hadoop XML configuration file, so defining different clusters, or providing different login details for the same object store(s). - -### Swift Filesystem URIs - -Hadoop uses URIs to refer to files within a filesystem. Some common examples are: - - local://etc/hosts - hdfs://cluster1/users/example/data/set1 - hdfs://cluster2.example.org:8020/users/example/data/set1 - -The Swift Filesystem Client adds a new URL type `swift`. In a Swift Filesystem URL, the hostname part of a URL identifies the container and the service to work with; the path the name of the object. Here are some examples - - swift://container.rackspace/my-object.csv - swift://data.hpcloud/data/set1 - swift://dmitry.privatecloud/out/results - -In the last two examples, the paths look like directories: it is not, they are simply the objects named `data/set1` and `out/results` respectively. - -### Installing - -The `hadoop-openstack` JAR must be on the classpath of the Hadoop program trying to talk to the Swift service. If installed in the classpath of the Hadoop MapReduce service, then all programs started by the MR engine will pick up the JAR automatically. This is the easiest way to give all Hadoop jobs access to Swift. - -Alternatively, the JAR can be included as one of the JAR files that an application uses. This lets the Hadoop jobs work with a Swift object store even if the Hadoop cluster is not pre-configured for this. - -The library also depends upon the Apache HttpComponents library, which must also be on the classpath. - -### Configuring - -To talk to a swift service, the user must must provide: - -1. The URL defining the container and the service. - -2. In the cluster/job configuration, the login details of that service. - -Multiple service definitions can co-exist in the same configuration file: just use different names for them. - -#### Example: Rackspace US, in-cluster access using API key - -This service definition is for use in a Hadoop cluster deployed within Rackspace's US infrastructure. - - - fs.swift.service.rackspace.auth.url - https://auth.api.rackspacecloud.com/v2.0/tokens - Rackspace US (multiregion) - - - - fs.swift.service.rackspace.username - user4 - - - - fs.swift.service.rackspace.region - DFW - - - - fs.swift.service.rackspace.apikey - fe806aa86dfffe2f6ed8 - - -Here the API key visible in the account settings API keys page is used to log in. No property for public/private access -the default is to use the private endpoint for Swift operations. - -This configuration also selects one of the regions, DFW, for its data. - -A reference to this service would use the `rackspace` service name: - - swift://hadoop-container.rackspace/ - -#### Example: Rackspace UK: remote access with password authentication - -This connects to Rackspace's UK ("LON") datacenter. - - - fs.swift.service.rackspaceuk.auth.url - https://lon.identity.api.rackspacecloud.com/v2.0/tokens - Rackspace UK - - - - fs.swift.service.rackspaceuk.username - user4 - - - - fs.swift.service.rackspaceuk.password - insert-password-here/value> - - - - fs.swift.service.rackspace.public - true - - -This is a public access point connection, using a password over an API key. - -A reference to this service would use the `rackspaceuk` service name: - - swift://hadoop-container.rackspaceuk/ - -Because the public endpoint is used, if this service definition is used within the London datacenter, all accesses will be billed at the public upload/download rates, *irrespective of where the Hadoop cluster is*. - -#### Example: HP cloud service definition - -Here is an example that connects to the HP Cloud object store. - - - fs.swift.service.hpcloud.auth.url - https://region-a.geo-1.identity.hpcloudsvc.com:35357/v2.0/tokens - - HP Cloud - - - - fs.swift.service.hpcloud.tenant - FE806AA86 - - - - fs.swift.service.hpcloud.username - FE806AA86DFFFE2F6ED8 - - - - fs.swift.service.hpcloud.password - secret-password-goes-here - - - - fs.swift.service.hpcloud.public - true - - -A reference to this service would use the `hpcloud` service name: - - swift://hadoop-container.hpcloud/ - -### General Swift Filesystem configuration options - -Some configuration options apply to the Swift client, independent of the specific Swift filesystem chosen. - -#### Blocksize fs.swift.blocksize - -Swift does not break up files into blocks, except in the special case of files over 5GB in length. Accordingly, there isn't a notion of a "block size" to define where the data is kept. - -Hadoop's MapReduce layer depends on files declaring their block size, so that it knows how to partition work. Too small a blocksize means that many mappers work on small pieces of data; too large a block size means that only a few mappers get started. - -The block size value reported by Swift, therefore, controls the basic workload partioning of the MapReduce engine -and can be an important parameter to tune for performance of the cluster. - -The property has a unit of kilobytes; the default value is `32*1024`: 32 MB - - - fs.swift.blocksize - 32768 - - -This blocksize has no influence on how files are stored in Swift; it only controls what the reported size of blocks are - a value used in Hadoop MapReduce to divide work. - -Note that the MapReduce engine's split logic can be tuned independently by setting the `mapred.min.split.size` and `mapred.max.split.size` properties, which can be done in specific job configurations. - - - mapred.min.split.size - 524288 - - - - mapred.max.split.size - 1048576 - - -In an Apache Pig script, these properties would be set as: - - mapred.min.split.size 524288 - mapred.max.split.size 1048576 - -#### Partition size fs.swift.partsize - -The Swift filesystem client breaks very large files into partitioned files, uploading each as it progresses, and writing any remaning data and an XML manifest when a partitioned file is closed. - -The partition size defaults to 4608 MB; 4.5GB, the maximum filesize that Swift can support. - -It is possible to set a smaller partition size, in the `fs.swift.partsize` option. This takes a value in KB. - - - fs.swift.partsize - 1024 - upload every MB - - -When should this value be changed from its default? - -While there is no need to ever change it for basic operation of the Swift filesystem client, it can be tuned - -* If a Swift filesystem is location aware, then breaking a file up into - smaller partitions scatters the data round the cluster. For best performance, - the property `fs.swift.blocksize` should be set to a smaller value than the - partition size of files. - -* When writing to an unpartitioned file, the entire write is done in the - `close()` operation. When a file is partitioned, the outstanding data to - be written whenever the outstanding amount of data is greater than the - partition size. This means that data will be written more incrementally - -#### Request size fs.swift.requestsize - -The Swift filesystem client reads files in HTTP GET operations, asking for a block of data at a time. - -The default value is 64KB. A larger value may be more efficient over faster networks, as it reduces the overhead of setting up the HTTP operation. - -However, if the file is read with many random accesses, requests for data will be made from different parts of the file -discarding some of the previously requested data. The benefits of larger request sizes may be wasted. - -The property `fs.swift.requestsize` sets the request size in KB. - - - fs.swift.requestsize - 128 - - -#### Connection timeout fs.swift.connect.timeout - -This sets the timeout in milliseconds to connect to a Swift service. - - - fs.swift.connect.timeout - 15000 - - -A shorter timeout means that connection failures are raised faster -but may trigger more false alarms. A longer timeout is more resilient to network problems -and may be needed when talking to remote filesystems. - -#### Connection timeout fs.swift.socket.timeout - -This sets the timeout in milliseconds to wait for data from a connected socket. - - - fs.swift.socket.timeout - 60000 - - -A shorter timeout means that connection failures are raised faster -but may trigger more false alarms. A longer timeout is more resilient to network problems -and may be needed when talking to remote filesystems. - -#### Connection Retry Count fs.swift.connect.retry.count - -This sets the number of times to try to connect to a service whenever an HTTP request is made. - - - fs.swift.connect.retry.count - 3 - - -The more retries, the more resilient it is to transient outages -and the less rapid it is at detecting and reporting server connectivity problems. - -#### Connection Throttle Delay fs.swift.connect.throttle.delay - -This property adds a delay between bulk file copy and delete operations, to prevent requests being throttled or blocked by the remote service - - - fs.swift.connect.throttle.delay - 0 - - -It is measured in milliseconds; "0" means do not add any delay. - -Throttling is enabled on the public endpoints of some Swift services. If `rename()` or `delete()` operations fail with `SwiftThrottledRequestException` exceptions, try setting this property. - -#### HTTP Proxy - -If the client can only access the Swift filesystem via a web proxy server, the client configuration must specify the proxy via the `fs.swift.connect.proxy.host` and `fs.swift.connect.proxy.port` properties. - - - fs.swift.proxy.host - web-proxy - - - - fs.swift.proxy.port - 8088 - - -If the host is declared, the proxy port must be set to a valid integer value. - -### Troubleshooting - -#### ClassNotFoundException - -The `hadoop-openstack` JAR -or any dependencies- may not be on your classpath. - -Make sure that the: -* JAR is installed on the servers in the cluster. -* 'hadoop-openstack' is on the HADOOP_OPTIONAL_TOOLS entry in hadoop-env.sh or that the job submission process uploads the JAR file to the distributed cache. - -#### Failure to Authenticate - -A `SwiftAuthenticationFailedException` is thrown when the client cannot authenticate with the OpenStack keystone server. This could be because the URL in the service definition is wrong, or because the supplied credentials are invalid. - -1. Check the authentication URL through `curl` or your browser - -2. Use a Swift client such as CyberDuck to validate your credentials - -3. If you have included a tenant ID, try leaving it out. Similarly, - try adding it if you had not included it. - -4. Try switching from API key authentication to password-based authentication, - by setting the password. - -5. Change your credentials. As with Amazon AWS clients, some credentials - don't seem to like going over the network. - -#### Timeout connecting to the Swift Service - -This happens if the client application is running outside an OpenStack cluster, where it does not have access to the private hostname/IP address for filesystem operations. Set the `public` flag to true -but remember to set it to false for use in-cluster. - -### Warnings - -1. Do not share your login details with anyone, which means do not log the - details, or check the XML configuration files into any revision control system - to which you do not have exclusive access. - -2. Similarly, do not use your real account details in any - documentation \*or any bug reports submitted online\* - -3. Prefer the apikey authentication over passwords as it is easier - to revoke a key -and some service providers allow you to set - an automatic expiry date on a key when issued. - -4. Do not use the public service endpoint from within a public OpenStack - cluster, as it will run up large bills. - -5. Remember: it's not a real filesystem or hierarchical directory structure. - Some operations (directory rename and delete) take time and are not atomic or - isolated from other operations taking place. - -6. Append is not supported. - -7. Unix-style permissions are not supported. All accounts with write access to - a repository have unlimited access; the same goes for those with read access. - -8. In the public clouds, do not make the containers public unless you are happy - with anyone reading your data, and are prepared to pay the costs of their - downloads. - -### Limits - -* Maximum length of an object path: 1024 characters - -* Maximum size of a binary object: no absolute limit. Files \> 5GB are - partitioned into separate files in the native filesystem, and merged during - retrieval. *Warning:* the partitioned/large file support is the - most complex part of the Hadoop/Swift FS integration, and, along with - authentication, the most troublesome to support. - -### Testing the hadoop-openstack module - -The `hadoop-openstack` can be remotely tested against any public or private cloud infrastructure which supports the OpenStack Keystone authentication mechanism. It can also be tested against private OpenStack clusters. OpenStack Development teams are strongly encouraged to test the Hadoop swift filesystem client against any version of Swift that they are developing or deploying, to stress their cluster and to identify bugs early. - -The module comes with a large suite of JUnit tests -tests that are only executed if the source tree includes credentials to test against a specific cluster. - -After checking out the Hadoop source tree, create the file: - - hadoop-tools/hadoop-openstack/src/test/resources/auth-keys.xml - -Into this file, insert the credentials needed to bond to the test filesystem, as decribed above. - -Next set the property `test.fs.swift.name` to the URL of a swift container to test against. The tests expect exclusive access to this container -do not keep any other data on it, or expect it to be preserved. - - - test.fs.swift.name - swift://test.myswift/ - - -In the base hadoop directory, run: - - mvn clean install -DskipTests - -This builds a set of Hadoop JARs consistent with the `hadoop-openstack` module that is about to be tested. - -In the `hadoop-tools/hadoop-openstack` directory run - - mvn test -Dtest=TestSwiftRestClient - -This runs some simple tests which include authenticating against the remote swift service. If these tests fail, so will all the rest. If it does fail: check your authentication. - -Once this test succeeds, you can run the full test suite - - mvn test - -Be advised that these tests can take an hour or more, especially against a remote Swift service -or one that throttles bulk operations. - -Once the `auth-keys.xml` file is in place, the `mvn test` runs from the Hadoop source base directory will automatically run these OpenStack tests While this ensures that no regressions have occurred, it can also add significant time to test runs, and may run up bills, depending on who is providingthe Swift storage service. We recommend having a separate source tree set up purely for the Swift tests, and running it manually or by the CI tooling at a lower frequency than normal test runs. - -Finally: Apache Hadoop is an open source project. Contributions of code -including more tests- are very welcome. diff --git a/hadoop-tools/hadoop-openstack/src/site/resources/css/site.css b/hadoop-tools/hadoop-openstack/src/site/resources/css/site.css deleted file mode 100644 index f830baafa8cc8..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/site/resources/css/site.css +++ /dev/null @@ -1,30 +0,0 @@ -/* -* Licensed to the Apache Software Foundation (ASF) under one or more -* contributor license agreements. See the NOTICE file distributed with -* this work for additional information regarding copyright ownership. -* The ASF licenses this file to You under the Apache License, Version 2.0 -* (the "License"); you may not use this file except in compliance with -* the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ -#banner { - height: 93px; - background: none; -} - -#bannerLeft img { - margin-left: 30px; - margin-top: 10px; -} - -#bannerRight img { - margin: 17px; -} - diff --git a/hadoop-tools/hadoop-openstack/src/site/site.xml b/hadoop-tools/hadoop-openstack/src/site/site.xml deleted file mode 100644 index e2941380e2acf..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/site/site.xml +++ /dev/null @@ -1,46 +0,0 @@ - - - - - - - org.apache.maven.skins - maven-stylus-skin - ${maven-stylus-skin.version} - - - - - - - - - diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/AcceptAllFilter.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/AcceptAllFilter.java deleted file mode 100644 index 16c9da25777d2..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/AcceptAllFilter.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.fs.swift; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathFilter; - -/** - * A path filter that accepts everything - */ -public class AcceptAllFilter implements PathFilter { - @Override - public boolean accept(Path file) { - return true; - } -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/SwiftFileSystemBaseTest.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/SwiftFileSystemBaseTest.java deleted file mode 100644 index 4361a06a9498f..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/SwiftFileSystemBaseTest.java +++ /dev/null @@ -1,400 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.swift.exceptions.SwiftOperationFailedException; -import org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem; -import org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystemStore; -import org.apache.hadoop.fs.swift.util.DurationStats; -import org.apache.hadoop.fs.swift.util.SwiftTestUtils; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.Before; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.OutputStream; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.List; - -import static org.apache.hadoop.fs.swift.util.SwiftTestUtils.assertPathExists; -import static org.apache.hadoop.fs.swift.util.SwiftTestUtils.cleanupInTeardown; -import static org.apache.hadoop.fs.swift.util.SwiftTestUtils.getServiceURI; -import static org.apache.hadoop.fs.swift.util.SwiftTestUtils.noteAction; - -/** - * This is the base class for most of the Swift tests - */ -public class SwiftFileSystemBaseTest extends Assert implements - SwiftTestConstants { - - protected static final Logger LOG = - LoggerFactory.getLogger(SwiftFileSystemBaseTest.class); - protected SwiftNativeFileSystem fs; - protected static SwiftNativeFileSystem lastFs; - protected byte[] data = SwiftTestUtils.dataset(getBlockSize() * 2, 0, 255); - private Configuration conf; - - @Before - public void setUp() throws Exception { - noteAction("setup"); - final URI uri = getFilesystemURI(); - conf = createConfiguration(); - - fs = createSwiftFS(); - try { - fs.initialize(uri, conf); - } catch (IOException e) { - //FS init failed, set it to null so that teardown doesn't - //attempt to use it - fs = null; - throw e; - } - //remember the last FS - lastFs = fs; - noteAction("setup complete"); - } - - /** - * Configuration generator. May be overridden to inject - * some custom options - * @return a configuration with which to create FS instances - */ - protected Configuration createConfiguration() { - return new Configuration(); - } - - @After - public void tearDown() throws Exception { - cleanupInTeardown(fs, "/test"); - } - - @AfterClass - public static void classTearDown() throws Exception { - if (lastFs != null) { - List statistics = lastFs.getOperationStatistics(); - for (DurationStats stat : statistics) { - LOG.info(stat.toString()); - } - } - } - - /** - * Get the configuration used to set up the FS - * @return the configuration - */ - public Configuration getConf() { - return conf; - } - - /** - * Describe the test, combining some logging with details - * for people reading the code - * - * @param description test description - */ - protected void describe(String description) { - noteAction(description); - } - - protected URI getFilesystemURI() throws URISyntaxException, IOException { - return getServiceURI(createConfiguration()); - } - - protected SwiftNativeFileSystem createSwiftFS() throws IOException { - SwiftNativeFileSystem swiftNativeFileSystem = - new SwiftNativeFileSystem(); - return swiftNativeFileSystem; - } - - protected int getBlockSize() { - return 1024; - } - - /** - * Is rename supported? - * @return true - */ - protected boolean renameSupported() { - return true; - } - - /** - * assume in a test that rename is supported; - * skip it if not - */ - protected void assumeRenameSupported() { - Assume.assumeTrue(renameSupported()); - } - - /** - * Take an unqualified path, and qualify it w.r.t the - * current filesystem - * @param pathString source path - * @return a qualified path instance - */ - protected Path path(String pathString) { - return fs.makeQualified(new Path(pathString)); - } - - /** - * Get the filesystem - * @return the current FS - */ - public SwiftNativeFileSystem getFs() { - return fs; - } - - /** - * Create a file using the standard {@link #data} bytes. - * - * @param path path to write - * @throws IOException on any problem - */ - protected void createFile(Path path) throws IOException { - createFile(path, data); - } - - /** - * Create a file with the given data. - * - * @param path path to write - * @param sourceData source dataset - * @throws IOException on any problem - */ - protected void createFile(Path path, byte[] sourceData) throws IOException { - FSDataOutputStream out = fs.create(path); - out.write(sourceData, 0, sourceData.length); - out.close(); - } - - /** - * Create and then close a file - * @param path path to create - * @throws IOException on a failure - */ - protected void createEmptyFile(Path path) throws IOException { - FSDataOutputStream out = fs.create(path); - out.close(); - } - - /** - * Get the inner store -useful for lower level operations - * - * @return the store - */ - protected SwiftNativeFileSystemStore getStore() { - return fs.getStore(); - } - - /** - * Rename a path - * @param src source - * @param dst dest - * @param renameMustSucceed flag to say "this rename must exist" - * @param srcExists add assert that the source exists afterwards - * @param dstExists add assert the dest exists afterwards - * @throws IOException IO trouble - */ - protected void rename(Path src, Path dst, boolean renameMustSucceed, - boolean srcExists, boolean dstExists) throws IOException { - if (renameMustSucceed) { - renameToSuccess(src, dst, srcExists, dstExists); - } else { - renameToFailure(src, dst); - } - } - - /** - * Get a string describing the outcome of a rename, by listing the dest - * path and its parent along with some covering text - * @param src source path - * @param dst dest path - * @return a string for logs and exceptions - * @throws IOException IO problems - */ - private String getRenameOutcome(Path src, Path dst) throws IOException { - String lsDst = ls(dst); - Path parent = dst.getParent(); - String lsParent = parent != null ? ls(parent) : ""; - return " result of " + src + " => " + dst - + " - " + lsDst - + " \n" + lsParent; - } - - /** - * Rename, expecting an exception to be thrown - * - * @param src source - * @param dst dest - * @throws IOException a failure other than an - * expected SwiftRenameException or FileNotFoundException - */ - protected void renameToFailure(Path src, Path dst) throws IOException { - try { - getStore().rename(src, dst); - fail("Expected failure renaming " + src + " to " + dst - + "- but got success"); - } catch (SwiftOperationFailedException e) { - LOG.debug("Rename failed (expected):" + e); - } catch (FileNotFoundException e) { - LOG.debug("Rename failed (expected):" + e); - } - } - - /** - * Rename to success - * - * @param src source - * @param dst dest - * @param srcExists add assert that the source exists afterwards - * @param dstExists add assert the dest exists afterwards - * @throws SwiftOperationFailedException operation failure - * @throws IOException IO problems - */ - protected void renameToSuccess(Path src, Path dst, - boolean srcExists, boolean dstExists) - throws SwiftOperationFailedException, IOException { - getStore().rename(src, dst); - String outcome = getRenameOutcome(src, dst); - assertEquals("Source " + src + "exists: " + outcome, - srcExists, fs.exists(src)); - assertEquals("Destination " + dstExists + " exists" + outcome, - dstExists, fs.exists(dst)); - } - - /** - * List a path in the test FS - * @param path path to list - * @return the contents of the path/dir - * @throws IOException IO problems - */ - protected String ls(Path path) throws IOException { - return SwiftTestUtils.ls(fs, path); - } - - /** - * assert that a path exists - * @param message message to use in an assertion - * @param path path to probe - * @throws IOException IO problems - */ - public void assertExists(String message, Path path) throws IOException { - assertPathExists(fs, message, path); - } - - /** - * assert that a path does not - * @param message message to use in an assertion - * @param path path to probe - * @throws IOException IO problems - */ - public void assertPathDoesNotExist(String message, Path path) throws - IOException { - SwiftTestUtils.assertPathDoesNotExist(fs, message, path); - } - - /** - * Assert that a file exists and whose {@link FileStatus} entry - * declares that this is a file and not a symlink or directory. - * - * @param filename name of the file - * @throws IOException IO problems during file operations - */ - protected void assertIsFile(Path filename) throws IOException { - SwiftTestUtils.assertIsFile(fs, filename); - } - - /** - * Assert that a file exists and whose {@link FileStatus} entry - * declares that this is a file and not a symlink or directory. - * - * @throws IOException IO problems during file operations - */ - protected void mkdirs(Path path) throws IOException { - assertTrue("Failed to mkdir" + path, fs.mkdirs(path)); - } - - /** - * Assert that a delete succeeded - * @param path path to delete - * @param recursive recursive flag - * @throws IOException IO problems - */ - protected void assertDeleted(Path path, boolean recursive) throws IOException { - SwiftTestUtils.assertDeleted(fs, path, recursive); - } - - /** - * Assert that a value is not equal to the expected value - * @param message message if the two values are equal - * @param expected expected value - * @param actual actual value - */ - protected void assertNotEqual(String message, int expected, int actual) { - assertTrue(message, - actual != expected); - } - - /** - * Get the number of partitions written from the Swift Native FS APIs - * @param out output stream - * @return the number of partitioned files written by the stream - */ - protected int getPartitionsWritten(FSDataOutputStream out) { - return SwiftNativeFileSystem.getPartitionsWritten(out); - } - - /** - * Assert that the no. of partitions written matches expectations - * @param action operation (for use in the assertions) - * @param out output stream - * @param expected expected no. of partitions - */ - protected void assertPartitionsWritten(String action, FSDataOutputStream out, - long expected) { - OutputStream nativeStream = out.getWrappedStream(); - int written = getPartitionsWritten(out); - if(written !=expected) { - Assert.fail(action + ": " + - TestSwiftFileSystemPartitionedUploads.WRONG_PARTITION_COUNT - + " + expected: " + expected + " actual: " + written - + " -- " + nativeStream); - } - } - - /** - * Assert that the result value == -1; which implies - * that a read was successful - * @param text text to include in a message (usually the operation) - * @param result read result to validate - */ - protected void assertMinusOne(String text, int result) { - assertEquals(text + " wrong read result " + result, -1, result); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/SwiftTestConstants.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/SwiftTestConstants.java deleted file mode 100644 index 6948cf92fa6c0..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/SwiftTestConstants.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift; - -/** - * Hard coded constants for the test timeouts - */ -public interface SwiftTestConstants { - /** - * Timeout for swift tests: {@value} - */ - int SWIFT_TEST_TIMEOUT = 5 * 60 * 1000; - - /** - * Timeout for tests performing bulk operations: {@value} - */ - int SWIFT_BULK_IO_TEST_TIMEOUT = 12 * 60 * 1000; -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestFSMainOperationsSwift.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestFSMainOperationsSwift.java deleted file mode 100644 index b595f1c2d14ab..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestFSMainOperationsSwift.java +++ /dev/null @@ -1,372 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.fs.swift; - - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSMainOperationsBaseTest; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.swift.http.SwiftProtocolConstants; -import org.apache.hadoop.fs.swift.util.SwiftTestUtils; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import static org.apache.hadoop.fs.swift.SwiftTestConstants.SWIFT_TEST_TIMEOUT; -import java.io.IOException; -import java.net.URI; - -public class TestFSMainOperationsSwift extends FSMainOperationsBaseTest { - - @Override - @Before - public void setUp() throws Exception { - Configuration conf = new Configuration(); - //small blocksize for faster remote tests - conf.setInt(SwiftProtocolConstants.SWIFT_BLOCKSIZE, 2); - URI serviceURI = SwiftTestUtils.getServiceURI(conf); - fSys = FileSystem.get(serviceURI, conf); - super.setUp(); - } - - private Path wd = null; - - @Override - protected FileSystem createFileSystem() throws Exception { - return fSys; - } - - @Override - protected Path getDefaultWorkingDirectory() throws IOException { - if (wd == null) { - wd = fSys.getWorkingDirectory(); - } - return wd; - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testWDAbsolute() throws IOException { - Path absoluteDir = getTestRootPath(fSys, "test/existingDir"); - fSys.mkdirs(absoluteDir); - fSys.setWorkingDirectory(absoluteDir); - Assert.assertEquals(absoluteDir, fSys.getWorkingDirectory()); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testListStatusThrowsExceptionForUnreadableDir() { - SwiftTestUtils.skip("unsupported"); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testGlobStatusThrowsExceptionForUnreadableDir() { - SwiftTestUtils.skip("unsupported"); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testFsStatus() throws Exception { - super.testFsStatus(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testWorkingDirectory() throws Exception { - super.testWorkingDirectory(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testMkdirs() throws Exception { - super.testMkdirs(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testMkdirsFailsForSubdirectoryOfExistingFile() throws Exception { - super.testMkdirsFailsForSubdirectoryOfExistingFile(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testGetFileStatusThrowsExceptionForNonExistentFile() throws - Exception { - super.testGetFileStatusThrowsExceptionForNonExistentFile(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testListStatusThrowsExceptionForNonExistentFile() throws - Exception { - super.testListStatusThrowsExceptionForNonExistentFile(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testListStatus() throws Exception { - super.testListStatus(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testListStatusFilterWithNoMatches() throws Exception { - super.testListStatusFilterWithNoMatches(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testListStatusFilterWithSomeMatches() throws Exception { - super.testListStatusFilterWithSomeMatches(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testGlobStatusNonExistentFile() throws Exception { - super.testGlobStatusNonExistentFile(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testGlobStatusWithNoMatchesInPath() throws Exception { - super.testGlobStatusWithNoMatchesInPath(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testGlobStatusSomeMatchesInDirectories() throws Exception { - super.testGlobStatusSomeMatchesInDirectories(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testGlobStatusWithMultipleWildCardMatches() throws Exception { - super.testGlobStatusWithMultipleWildCardMatches(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testGlobStatusWithMultipleMatchesOfSingleChar() throws Exception { - super.testGlobStatusWithMultipleMatchesOfSingleChar(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testGlobStatusFilterWithEmptyPathResults() throws Exception { - super.testGlobStatusFilterWithEmptyPathResults(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testGlobStatusFilterWithSomePathMatchesAndTrivialFilter() throws - Exception { - super.testGlobStatusFilterWithSomePathMatchesAndTrivialFilter(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testGlobStatusFilterWithMultipleWildCardMatchesAndTrivialFilter() throws - Exception { - super.testGlobStatusFilterWithMultipleWildCardMatchesAndTrivialFilter(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testGlobStatusFilterWithMultiplePathMatchesAndNonTrivialFilter() throws - Exception { - super.testGlobStatusFilterWithMultiplePathMatchesAndNonTrivialFilter(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testGlobStatusFilterWithNoMatchingPathsAndNonTrivialFilter() throws - Exception { - super.testGlobStatusFilterWithNoMatchingPathsAndNonTrivialFilter(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testGlobStatusFilterWithMultiplePathWildcardsAndNonTrivialFilter() throws - Exception { - super.testGlobStatusFilterWithMultiplePathWildcardsAndNonTrivialFilter(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testWriteReadAndDeleteEmptyFile() throws Exception { - super.testWriteReadAndDeleteEmptyFile(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testWriteReadAndDeleteHalfABlock() throws Exception { - super.testWriteReadAndDeleteHalfABlock(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testWriteReadAndDeleteOneBlock() throws Exception { - super.testWriteReadAndDeleteOneBlock(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testWriteReadAndDeleteOneAndAHalfBlocks() throws Exception { - super.testWriteReadAndDeleteOneAndAHalfBlocks(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testWriteReadAndDeleteTwoBlocks() throws Exception { - super.testWriteReadAndDeleteTwoBlocks(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testOverwrite() throws IOException { - super.testOverwrite(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testWriteInNonExistentDirectory() throws IOException { - super.testWriteInNonExistentDirectory(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testDeleteNonExistentFile() throws IOException { - super.testDeleteNonExistentFile(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testDeleteRecursively() throws IOException { - super.testDeleteRecursively(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testDeleteEmptyDirectory() throws IOException { - super.testDeleteEmptyDirectory(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testRenameNonExistentPath() throws Exception { - super.testRenameNonExistentPath(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testRenameFileToNonExistentDirectory() throws Exception { - super.testRenameFileToNonExistentDirectory(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testRenameFileToDestinationWithParentFile() throws Exception { - super.testRenameFileToDestinationWithParentFile(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testRenameFileToExistingParent() throws Exception { - super.testRenameFileToExistingParent(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testRenameFileToItself() throws Exception { - super.testRenameFileToItself(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testRenameFileAsExistingFile() throws Exception { - super.testRenameFileAsExistingFile(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testRenameFileAsExistingDirectory() throws Exception { - super.testRenameFileAsExistingDirectory(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testRenameDirectoryToItself() throws Exception { - super.testRenameDirectoryToItself(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testRenameDirectoryToNonExistentParent() throws Exception { - super.testRenameDirectoryToNonExistentParent(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testRenameDirectoryAsNonExistentDirectory() throws Exception { - super.testRenameDirectoryAsNonExistentDirectory(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testRenameDirectoryAsEmptyDirectory() throws Exception { - super.testRenameDirectoryAsEmptyDirectory(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testRenameDirectoryAsNonEmptyDirectory() throws Exception { - super.testRenameDirectoryAsNonEmptyDirectory(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testRenameDirectoryAsFile() throws Exception { - super.testRenameDirectoryAsFile(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testInputStreamClosedTwice() throws IOException { - super.testInputStreamClosedTwice(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testOutputStreamClosedTwice() throws IOException { - super.testOutputStreamClosedTwice(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testGetWrappedInputStream() throws IOException { - super.testGetWrappedInputStream(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - @Override - public void testCopyToLocalWithUseRawLocalFileSystemOption() throws - Exception { - super.testCopyToLocalWithUseRawLocalFileSystemOption(); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestLogResources.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestLogResources.java deleted file mode 100644 index 99c6962cb48e5..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestLogResources.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.junit.Test; - -import java.net.URL; - -/** - * This test just debugs which log resources are being picked up - */ -public class TestLogResources implements SwiftTestConstants { - protected static final Logger LOG = - LoggerFactory.getLogger(TestLogResources.class); - - private void printf(String format, Object... args) { - String msg = String.format(format, args); - System.out.printf(msg + "\n"); - LOG.info(msg); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testWhichLog4JPropsFile() throws Throwable { - locateResource("log4j.properties"); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testWhichLog4JXMLFile() throws Throwable { - locateResource("log4j.XML"); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testCommonsLoggingProps() throws Throwable { - locateResource("commons-logging.properties"); - } - - private void locateResource(String resource) { - URL url = this.getClass().getClassLoader().getResource(resource); - if (url != null) { - printf("resource %s is at %s", resource, url); - } else { - printf("resource %s is not on the classpath", resource); - } - } -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestReadPastBuffer.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestReadPastBuffer.java deleted file mode 100644 index c195bffc5137f..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestReadPastBuffer.java +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.swift.http.SwiftProtocolConstants; -import org.apache.hadoop.fs.swift.util.SwiftTestUtils; -import org.apache.hadoop.io.IOUtils; -import org.junit.After; -import org.junit.Test; - -/** - * Seek tests verify that - *
      - *
    1. When you seek on a 0 byte file to byte (0), it's not an error.
    2. - *
    3. When you seek past the end of a file, it's an error that should - * raise -what- EOFException?
    4. - *
    5. when you seek forwards, you get new data
    6. - *
    7. when you seek backwards, you get the previous data
    8. - *
    9. That this works for big multi-MB files as well as small ones.
    10. - *
    - * These may seem "obvious", but the more the input streams try to be clever - * about offsets and buffering, the more likely it is that seek() will start - * to get confused. - */ -public class TestReadPastBuffer extends SwiftFileSystemBaseTest { - protected static final Logger LOG = - LoggerFactory.getLogger(TestReadPastBuffer.class); - public static final int SWIFT_READ_BLOCKSIZE = 4096; - public static final int SEEK_FILE_LEN = SWIFT_READ_BLOCKSIZE * 2; - - private Path testPath; - private Path readFile; - private Path zeroByteFile; - private FSDataInputStream instream; - - - /** - * Get a configuration which a small blocksize reported to callers - * @return a configuration for this test - */ - @Override - public Configuration getConf() { - Configuration conf = super.getConf(); - /* - * set to 4KB - */ - conf.setInt(SwiftProtocolConstants.SWIFT_BLOCKSIZE, SWIFT_READ_BLOCKSIZE); - return conf; - } - - /** - * Setup creates dirs under test/hadoop - * - * @throws Exception - */ - @Override - public void setUp() throws Exception { - super.setUp(); - byte[] block = SwiftTestUtils.dataset(SEEK_FILE_LEN, 0, 255); - - //delete the test directory - testPath = path("/test"); - readFile = new Path(testPath, "TestReadPastBuffer.txt"); - createFile(readFile, block); - } - - @After - public void cleanFile() { - IOUtils.closeStream(instream); - instream = null; - } - - /** - * Create a config with a 1KB request size - * @return a config - */ - @Override - protected Configuration createConfiguration() { - Configuration conf = super.createConfiguration(); - conf.set(SwiftProtocolConstants.SWIFT_REQUEST_SIZE, "1"); - return conf; - } - - /** - * Seek past the buffer then read - * @throws Throwable problems - */ - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testSeekAndReadPastEndOfFile() throws Throwable { - instream = fs.open(readFile); - assertEquals(0, instream.getPos()); - //expect that seek to 0 works - //go just before the end - instream.seek(SEEK_FILE_LEN - 2); - assertTrue("Premature EOF", instream.read() != -1); - assertTrue("Premature EOF", instream.read() != -1); - assertMinusOne("read past end of file", instream.read()); - } - - /** - * Seek past the buffer and attempt a read(buffer) - * @throws Throwable failures - */ - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testSeekBulkReadPastEndOfFile() throws Throwable { - instream = fs.open(readFile); - assertEquals(0, instream.getPos()); - //go just before the end - instream.seek(SEEK_FILE_LEN - 1); - byte[] buffer = new byte[1]; - int result = instream.read(buffer, 0, 1); - //next byte is expected to fail - result = instream.read(buffer, 0, 1); - assertMinusOne("read past end of file", result); - //and this one - result = instream.read(buffer, 0, 1); - assertMinusOne("read past end of file", result); - - //now do an 0-byte read and expect it to - //to be checked first - result = instream.read(buffer, 0, 0); - assertEquals("EOF checks coming before read range check", 0, result); - - } - - - - /** - * Read past the buffer size byte by byte and verify that it refreshed - * @throws Throwable - */ - @Test - public void testReadPastBufferSize() throws Throwable { - instream = fs.open(readFile); - - while (instream.read() != -1); - //here we have gone past the end of a file and its buffer. Now try again - assertMinusOne("reading after the (large) file was read: "+ instream, - instream.read()); - } -} - diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSeek.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSeek.java deleted file mode 100644 index 51fa92a2eb3a1..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSeek.java +++ /dev/null @@ -1,260 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.swift.exceptions.SwiftConnectionClosedException; -import org.apache.hadoop.fs.swift.http.SwiftProtocolConstants; -import org.apache.hadoop.fs.swift.util.SwiftTestUtils; -import org.apache.hadoop.io.IOUtils; -import org.junit.After; -import org.junit.Test; - -import java.io.EOFException; -import java.io.IOException; - -/** - * Seek tests verify that - *
      - *
    1. When you seek on a 0 byte file to byte (0), it's not an error.
    2. - *
    3. When you seek past the end of a file, it's an error that should - * raise -what- EOFException?
    4. - *
    5. when you seek forwards, you get new data
    6. - *
    7. when you seek backwards, you get the previous data
    8. - *
    9. That this works for big multi-MB files as well as small ones.
    10. - *
    - * These may seem "obvious", but the more the input streams try to be clever - * about offsets and buffering, the more likely it is that seek() will start - * to get confused. - */ -public class TestSeek extends SwiftFileSystemBaseTest { - protected static final Logger LOG = - LoggerFactory.getLogger(TestSeek.class); - public static final int SMALL_SEEK_FILE_LEN = 256; - - private Path testPath; - private Path smallSeekFile; - private Path zeroByteFile; - private FSDataInputStream instream; - - /** - * Setup creates dirs under test/hadoop - * - * @throws Exception - */ - @Override - public void setUp() throws Exception { - super.setUp(); - //delete the test directory - testPath = path("/test"); - smallSeekFile = new Path(testPath, "seekfile.txt"); - zeroByteFile = new Path(testPath, "zero.txt"); - byte[] block = SwiftTestUtils.dataset(SMALL_SEEK_FILE_LEN, 0, 255); - //this file now has a simple rule: offset => value - createFile(smallSeekFile, block); - createEmptyFile(zeroByteFile); - } - - @After - public void cleanFile() { - IOUtils.closeStream(instream); - instream = null; - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testSeekZeroByteFile() throws Throwable { - instream = fs.open(zeroByteFile); - assertEquals(0, instream.getPos()); - //expect initial read to fai; - int result = instream.read(); - assertMinusOne("initial byte read", result); - byte[] buffer = new byte[1]; - //expect that seek to 0 works - instream.seek(0); - //reread, expect same exception - result = instream.read(); - assertMinusOne("post-seek byte read", result); - result = instream.read(buffer, 0, 1); - assertMinusOne("post-seek buffer read", result); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testBlockReadZeroByteFile() throws Throwable { - instream = fs.open(zeroByteFile); - assertEquals(0, instream.getPos()); - //expect that seek to 0 works - byte[] buffer = new byte[1]; - int result = instream.read(buffer, 0, 1); - assertMinusOne("block read zero byte file", result); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testSeekReadClosedFile() throws Throwable { - instream = fs.open(smallSeekFile); - instream.close(); - try { - instream.seek(0); - } catch (SwiftConnectionClosedException e) { - //expected a closed file - } - try { - instream.read(); - } catch (IOException e) { - //expected a closed file - } - try { - byte[] buffer = new byte[1]; - int result = instream.read(buffer, 0, 1); - } catch (IOException e) { - //expected a closed file - } - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testNegativeSeek() throws Throwable { - instream = fs.open(smallSeekFile); - assertEquals(0, instream.getPos()); - try { - instream.seek(-1); - long p = instream.getPos(); - LOG.warn("Seek to -1 returned a position of " + p); - int result = instream.read(); - fail( - "expected an exception, got data " + result + " at a position of " + p); - } catch (IOException e) { - //bad seek -expected - } - assertEquals(0, instream.getPos()); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testSeekFile() throws Throwable { - instream = fs.open(smallSeekFile); - assertEquals(0, instream.getPos()); - //expect that seek to 0 works - instream.seek(0); - int result = instream.read(); - assertEquals(0, result); - assertEquals(1, instream.read()); - assertEquals(2, instream.getPos()); - assertEquals(2, instream.read()); - assertEquals(3, instream.getPos()); - instream.seek(128); - assertEquals(128, instream.getPos()); - assertEquals(128, instream.read()); - instream.seek(63); - assertEquals(63, instream.read()); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testSeekAndReadPastEndOfFile() throws Throwable { - instream = fs.open(smallSeekFile); - assertEquals(0, instream.getPos()); - //expect that seek to 0 works - //go just before the end - instream.seek(SMALL_SEEK_FILE_LEN - 2); - assertTrue("Premature EOF", instream.read() != -1); - assertTrue("Premature EOF", instream.read() != -1); - assertMinusOne("read past end of file", instream.read()); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testSeekAndPastEndOfFileThenReseekAndRead() throws Throwable { - instream = fs.open(smallSeekFile); - //go just before the end. This may or may not fail; it may be delayed until the - //read - try { - instream.seek(SMALL_SEEK_FILE_LEN); - //if this doesn't trigger, then read() is expected to fail - assertMinusOne("read after seeking past EOF", instream.read()); - } catch (EOFException expected) { - //here an exception was raised in seek - } - instream.seek(1); - assertTrue("Premature EOF", instream.read() != -1); - } - - @Override - protected Configuration createConfiguration() { - Configuration conf = super.createConfiguration(); - conf.set(SwiftProtocolConstants.SWIFT_REQUEST_SIZE, "1"); - return conf; - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testSeekBigFile() throws Throwable { - Path testSeekFile = new Path(testPath, "bigseekfile.txt"); - byte[] block = SwiftTestUtils.dataset(65536, 0, 255); - createFile(testSeekFile, block); - instream = fs.open(testSeekFile); - assertEquals(0, instream.getPos()); - //expect that seek to 0 works - instream.seek(0); - int result = instream.read(); - assertEquals(0, result); - assertEquals(1, instream.read()); - assertEquals(2, instream.read()); - - //do seek 32KB ahead - instream.seek(32768); - assertEquals("@32768", block[32768], (byte) instream.read()); - instream.seek(40000); - assertEquals("@40000", block[40000], (byte) instream.read()); - instream.seek(8191); - assertEquals("@8191", block[8191], (byte) instream.read()); - instream.seek(0); - assertEquals("@0", 0, (byte) instream.read()); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testPositionedBulkReadDoesntChangePosition() throws Throwable { - Path testSeekFile = new Path(testPath, "bigseekfile.txt"); - byte[] block = SwiftTestUtils.dataset(65536, 0, 255); - createFile(testSeekFile, block); - instream = fs.open(testSeekFile); - instream.seek(39999); - assertTrue(-1 != instream.read()); - assertEquals (40000, instream.getPos()); - - byte[] readBuffer = new byte[256]; - instream.read(128, readBuffer, 0, readBuffer.length); - //have gone back - assertEquals(40000, instream.getPos()); - //content is the same too - assertEquals("@40000", block[40000], (byte) instream.read()); - //now verify the picked up data - for (int i = 0; i < 256; i++) { - assertEquals("@" + i, block[i + 128], readBuffer[i]); - } - } - - /** - * work out the expected byte from a specific offset - * @param offset offset in the file - * @return the value - */ - int expectedByte(int offset) { - return offset & 0xff; - } -} - diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftConfig.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftConfig.java deleted file mode 100644 index 0212b4d9c65dc..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftConfig.java +++ /dev/null @@ -1,194 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.fs.swift; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.swift.http.SwiftRestClient; -import org.junit.Assert; -import org.junit.Test; - -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; - -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.DOT_AUTH_URL; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.DOT_LOCATION_AWARE; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.DOT_PASSWORD; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.DOT_TENANT; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.DOT_USERNAME; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.SWIFT_BLOCKSIZE; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.SWIFT_CONNECTION_TIMEOUT; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.SWIFT_PARTITION_SIZE; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.SWIFT_PROXY_HOST_PROPERTY; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.SWIFT_PROXY_PORT_PROPERTY; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.SWIFT_RETRY_COUNT; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.SWIFT_SERVICE_PREFIX; - -/** - * Test the swift service-specific configuration binding features - */ -public class TestSwiftConfig extends Assert { - - - public static final String SERVICE = "openstack"; - - @Test(expected = org.apache.hadoop.fs.swift.exceptions.SwiftConfigurationException.class) - public void testEmptyUrl() throws Exception { - final Configuration configuration = new Configuration(); - - set(configuration, DOT_TENANT, "tenant"); - set(configuration, DOT_USERNAME, "username"); - set(configuration, DOT_PASSWORD, "password"); - mkInstance(configuration); - } - -@Test - public void testEmptyTenant() throws Exception { - final Configuration configuration = new Configuration(); - set(configuration, DOT_AUTH_URL, "http://localhost:8080"); - set(configuration, DOT_USERNAME, "username"); - set(configuration, DOT_PASSWORD, "password"); - mkInstance(configuration); - } - - @Test(expected = org.apache.hadoop.fs.swift.exceptions.SwiftConfigurationException.class) - public void testEmptyUsername() throws Exception { - final Configuration configuration = new Configuration(); - set(configuration, DOT_AUTH_URL, "http://localhost:8080"); - set(configuration, DOT_TENANT, "tenant"); - set(configuration, DOT_PASSWORD, "password"); - mkInstance(configuration); - } - - @Test(expected = org.apache.hadoop.fs.swift.exceptions.SwiftConfigurationException.class) - public void testEmptyPassword() throws Exception { - final Configuration configuration = new Configuration(); - set(configuration, DOT_AUTH_URL, "http://localhost:8080"); - set(configuration, DOT_TENANT, "tenant"); - set(configuration, DOT_USERNAME, "username"); - mkInstance(configuration); - } - - @Test - public void testGoodRetryCount() throws Exception { - final Configuration configuration = createCoreConfig(); - configuration.set(SWIFT_RETRY_COUNT, "3"); - mkInstance(configuration); - } - - @Test(expected = org.apache.hadoop.fs.swift.exceptions.SwiftConfigurationException.class) - public void testBadRetryCount() throws Exception { - final Configuration configuration = createCoreConfig(); - configuration.set(SWIFT_RETRY_COUNT, "three"); - mkInstance(configuration); - } - - @Test(expected = org.apache.hadoop.fs.swift.exceptions.SwiftConfigurationException.class) - public void testBadConnectTimeout() throws Exception { - final Configuration configuration = createCoreConfig(); - configuration.set(SWIFT_CONNECTION_TIMEOUT, "three"); - mkInstance(configuration); - } - - @Test(expected = org.apache.hadoop.fs.swift.exceptions.SwiftConfigurationException.class) - public void testZeroBlocksize() throws Exception { - final Configuration configuration = createCoreConfig(); - configuration.set(SWIFT_BLOCKSIZE, "0"); - mkInstance(configuration); - } - - @Test(expected = org.apache.hadoop.fs.swift.exceptions.SwiftConfigurationException.class) - public void testNegativeBlocksize() throws Exception { - final Configuration configuration = createCoreConfig(); - configuration.set(SWIFT_BLOCKSIZE, "-1"); - mkInstance(configuration); - } - - @Test - public void testPositiveBlocksize() throws Exception { - final Configuration configuration = createCoreConfig(); - int size = 127; - configuration.set(SWIFT_BLOCKSIZE, Integer.toString(size)); - SwiftRestClient restClient = mkInstance(configuration); - assertEquals(size, restClient.getBlocksizeKB()); - } - - @Test - public void testLocationAwareTruePropagates() throws Exception { - final Configuration configuration = createCoreConfig(); - set(configuration, DOT_LOCATION_AWARE, "true"); - SwiftRestClient restClient = mkInstance(configuration); - assertTrue(restClient.isLocationAware()); - } - - @Test - public void testLocationAwareFalsePropagates() throws Exception { - final Configuration configuration = createCoreConfig(); - set(configuration, DOT_LOCATION_AWARE, "false"); - SwiftRestClient restClient = mkInstance(configuration); - assertFalse(restClient.isLocationAware()); - } - - @Test(expected = org.apache.hadoop.fs.swift.exceptions.SwiftConfigurationException.class) - public void testNegativePartsize() throws Exception { - final Configuration configuration = createCoreConfig(); - configuration.set(SWIFT_PARTITION_SIZE, "-1"); - SwiftRestClient restClient = mkInstance(configuration); - } - - @Test - public void testPositivePartsize() throws Exception { - final Configuration configuration = createCoreConfig(); - int size = 127; - configuration.set(SWIFT_PARTITION_SIZE, Integer.toString(size)); - SwiftRestClient restClient = mkInstance(configuration); - assertEquals(size, restClient.getPartSizeKB()); - } - - @Test - public void testProxyData() throws Exception { - final Configuration configuration = createCoreConfig(); - String proxy="web-proxy"; - int port = 8088; - configuration.set(SWIFT_PROXY_HOST_PROPERTY, proxy); - configuration.set(SWIFT_PROXY_PORT_PROPERTY, Integer.toString(port)); - SwiftRestClient restClient = mkInstance(configuration); - assertEquals(proxy, restClient.getProxyHost()); - assertEquals(port, restClient.getProxyPort()); - } - - private Configuration createCoreConfig() { - final Configuration configuration = new Configuration(); - set(configuration, DOT_AUTH_URL, "http://localhost:8080"); - set(configuration, DOT_TENANT, "tenant"); - set(configuration, DOT_USERNAME, "username"); - set(configuration, DOT_PASSWORD, "password"); - return configuration; - } - - private void set(Configuration configuration, String field, String value) { - configuration.set(SWIFT_SERVICE_PREFIX + SERVICE + field, value); - } - - private SwiftRestClient mkInstance(Configuration configuration) throws - IOException, - URISyntaxException { - URI uri = new URI("swift://container.openstack/"); - return SwiftRestClient.getInstance(uri, configuration); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemBasicOps.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemBasicOps.java deleted file mode 100644 index 516dc99fab02d..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemBasicOps.java +++ /dev/null @@ -1,296 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift; - -import org.junit.Assert; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.ParentNotDirectoryException; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.swift.exceptions.SwiftBadRequestException; -import org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem; -import org.apache.hadoop.fs.swift.util.SwiftTestUtils; -import org.junit.Test; - -import java.io.FileNotFoundException; -import java.io.IOException; - -import static org.apache.hadoop.fs.swift.util.SwiftTestUtils.assertFileHasLength; -import static org.apache.hadoop.fs.swift.util.SwiftTestUtils.assertIsDirectory; -import static org.apache.hadoop.fs.swift.util.SwiftTestUtils.readBytesToString; -import static org.apache.hadoop.fs.swift.util.SwiftTestUtils.writeTextFile; - - -/** - * Test basic filesystem operations. - * Many of these are similar to those in {@link TestSwiftFileSystemContract} - * -this is a JUnit4 test suite used to initially test the Swift - * component. Once written, there's no reason not to retain these tests. - */ -public class TestSwiftFileSystemBasicOps extends SwiftFileSystemBaseTest { - - private static final Logger LOG = - LoggerFactory.getLogger(TestSwiftFileSystemBasicOps.class); - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testLsRoot() throws Throwable { - Path path = new Path("/"); - FileStatus[] statuses = fs.listStatus(path); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testMkDir() throws Throwable { - Path path = new Path("/test/MkDir"); - fs.mkdirs(path); - //success then -so try a recursive operation - fs.delete(path, true); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testDeleteNonexistentFile() throws Throwable { - Path path = new Path("/test/DeleteNonexistentFile"); - assertFalse("delete returned true", fs.delete(path, false)); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testPutFile() throws Throwable { - Path path = new Path("/test/PutFile"); - Exception caught = null; - writeTextFile(fs, path, "Testing a put to a file", false); - assertDeleted(path, false); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testPutGetFile() throws Throwable { - Path path = new Path("/test/PutGetFile"); - try { - String text = "Testing a put and get to a file " - + System.currentTimeMillis(); - writeTextFile(fs, path, text, false); - - String result = readBytesToString(fs, path, text.length()); - assertEquals(text, result); - } finally { - delete(fs, path); - } - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testPutDeleteFileInSubdir() throws Throwable { - Path path = - new Path("/test/PutDeleteFileInSubdir/testPutDeleteFileInSubdir"); - String text = "Testing a put and get to a file in a subdir " - + System.currentTimeMillis(); - writeTextFile(fs, path, text, false); - assertDeleted(path, false); - //now delete the parent that should have no children - assertDeleted(new Path("/test/PutDeleteFileInSubdir"), false); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testRecursiveDelete() throws Throwable { - Path childpath = - new Path("/test/testRecursiveDelete"); - String text = "Testing a put and get to a file in a subdir " - + System.currentTimeMillis(); - writeTextFile(fs, childpath, text, false); - //now delete the parent that should have no children - assertDeleted(new Path("/test"), true); - assertFalse("child entry still present " + childpath, fs.exists(childpath)); - } - - private void delete(SwiftNativeFileSystem fs, Path path) { - try { - if (!fs.delete(path, false)) { - LOG.warn("Failed to delete " + path); - } - } catch (IOException e) { - LOG.warn("deleting " + path, e); - } - } - - private void deleteR(SwiftNativeFileSystem fs, Path path) { - try { - if (!fs.delete(path, true)) { - LOG.warn("Failed to delete " + path); - } - } catch (IOException e) { - LOG.warn("deleting " + path, e); - } - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testOverwrite() throws Throwable { - Path path = new Path("/test/Overwrite"); - try { - String text = "Testing a put to a file " - + System.currentTimeMillis(); - writeTextFile(fs, path, text, false); - assertFileHasLength(fs, path, text.length()); - String text2 = "Overwriting a file " - + System.currentTimeMillis(); - writeTextFile(fs, path, text2, true); - assertFileHasLength(fs, path, text2.length()); - String result = readBytesToString(fs, path, text2.length()); - assertEquals(text2, result); - } finally { - delete(fs, path); - } - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testOverwriteDirectory() throws Throwable { - Path path = new Path("/test/testOverwriteDirectory"); - try { - fs.mkdirs(path.getParent()); - String text = "Testing a put to a file " - + System.currentTimeMillis(); - writeTextFile(fs, path, text, false); - assertFileHasLength(fs, path, text.length()); - } finally { - delete(fs, path); - } - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testFileStatus() throws Throwable { - Path path = new Path("/test/FileStatus"); - try { - String text = "Testing File Status " - + System.currentTimeMillis(); - writeTextFile(fs, path, text, false); - SwiftTestUtils.assertIsFile(fs, path); - } finally { - delete(fs, path); - } - } - - /** - * Assert that a newly created directory is a directory - * - * @throws Throwable if not, or if something else failed - */ - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testDirStatus() throws Throwable { - Path path = new Path("/test/DirStatus"); - try { - fs.mkdirs(path); - assertIsDirectory(fs, path); - } finally { - delete(fs, path); - } - } - - /** - * Assert that if a directory that has children is deleted, it is still - * a directory - * - * @throws Throwable if not, or if something else failed - */ - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testDirStaysADir() throws Throwable { - Path path = new Path("/test/dirStaysADir"); - Path child = new Path(path, "child"); - try { - //create the dir - fs.mkdirs(path); - //assert the parent has the directory nature - assertIsDirectory(fs, path); - //create the child dir - writeTextFile(fs, child, "child file", true); - //assert the parent has the directory nature - assertIsDirectory(fs, path); - //now rm the child - delete(fs, child); - } finally { - deleteR(fs, path); - } - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testCreateMultilevelDir() throws Throwable { - Path base = new Path("/test/CreateMultilevelDir"); - Path path = new Path(base, "1/2/3"); - fs.mkdirs(path); - assertExists("deep multilevel dir not created", path); - fs.delete(base, true); - assertPathDoesNotExist("Multilevel delete failed", path); - assertPathDoesNotExist("Multilevel delete failed", base); - - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testCreateDirWithFileParent() throws Throwable { - Path path = new Path("/test/CreateDirWithFileParent"); - Path child = new Path(path, "subdir/child"); - fs.mkdirs(path.getParent()); - try { - //create the child dir - writeTextFile(fs, path, "parent", true); - try { - fs.mkdirs(child); - } catch (ParentNotDirectoryException expected) { - LOG.debug("Expected Exception", expected); - } - } finally { - fs.delete(path, true); - } - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testLongObjectNamesForbidden() throws Throwable { - StringBuilder buffer = new StringBuilder(1200); - buffer.append("/"); - for (int i = 0; i < (1200 / 4); i++) { - buffer.append(String.format("%04x", i)); - } - String pathString = buffer.toString(); - Path path = new Path(pathString); - try { - writeTextFile(fs, path, pathString, true); - //if we get here, problems. - fs.delete(path, false); - fail("Managed to create an object with a name of length " - + pathString.length()); - } catch (SwiftBadRequestException e) { - //expected - //LOG.debug("Caught exception " + e, e); - } - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testLsNonExistentFile() throws Exception { - try { - Path path = new Path("/test/hadoop/file"); - FileStatus[] statuses = fs.listStatus(path); - fail("Should throw FileNotFoundException on " + path - + " but got list of length " + statuses.length); - } catch (FileNotFoundException fnfe) { - // expected - } - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testGetCanonicalServiceName() { - Assert.assertNull(fs.getCanonicalServiceName()); - } - - -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemBlockLocation.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemBlockLocation.java deleted file mode 100644 index 1ad28a6cc4521..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemBlockLocation.java +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift; - -import org.apache.hadoop.fs.BlockLocation; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.swift.http.SwiftProtocolConstants; -import org.apache.hadoop.fs.swift.util.SwiftTestUtils; -import org.junit.Test; - -import java.io.IOException; - -/** - * Test block location logic. - * The endpoint may or may not be location-aware - */ -public class TestSwiftFileSystemBlockLocation extends SwiftFileSystemBaseTest { - - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testLocateSingleFileBlocks() throws Throwable { - describe("verify that a file returns 1+ blocks"); - FileStatus fileStatus = createFileAndGetStatus(); - BlockLocation[] locations = - getFs().getFileBlockLocations(fileStatus, 0, 1); - assertNotEqual("No block locations supplied for " + fileStatus, 0, - locations.length); - for (BlockLocation location : locations) { - assertLocationValid(location); - } - } - - private void assertLocationValid(BlockLocation location) throws - IOException { - LOG.info("{}", location); - String[] hosts = location.getHosts(); - String[] names = location.getNames(); - assertNotEqual("No hosts supplied for " + location, 0, hosts.length); - //for every host, there's a name. - assertEquals("Unequal names and hosts in " + location, - hosts.length, names.length); - assertEquals(SwiftProtocolConstants.BLOCK_LOCATION, - location.getNames()[0]); - assertEquals(SwiftProtocolConstants.TOPOLOGY_PATH, - location.getTopologyPaths()[0]); - } - - private FileStatus createFileAndGetStatus() throws IOException { - Path path = path("/test/locatedFile"); - createFile(path); - return fs.getFileStatus(path); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testLocateNullStatus() throws Throwable { - describe("verify that a null filestatus maps to a null location array"); - BlockLocation[] locations = - getFs().getFileBlockLocations((FileStatus) null, 0, 1); - assertNull(locations); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testLocateNegativeSeek() throws Throwable { - describe("verify that a negative offset is illegal"); - try { - BlockLocation[] locations = - getFs().getFileBlockLocations(createFileAndGetStatus(), - -1, - 1); - fail("Expected an exception, got " + locations.length + " locations"); - } catch (IllegalArgumentException e) { - //expected - } - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testLocateNegativeLen() throws Throwable { - describe("verify that a negative length is illegal"); - try { - BlockLocation[] locations = - getFs().getFileBlockLocations(createFileAndGetStatus(), - 0, - -1); - fail("Expected an exception, got " + locations.length + " locations"); - } catch (IllegalArgumentException e) { - //expected - } - } - - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testLocateOutOfRangeLen() throws Throwable { - describe("overshooting the length is legal, as long as the" + - " origin location is valid"); - - BlockLocation[] locations = - getFs().getFileBlockLocations(createFileAndGetStatus(), - 0, - data.length + 100); - assertNotNull(locations); - assertTrue(locations.length > 0); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testLocateOutOfRangeSrc() throws Throwable { - describe("Seeking out of the file length returns an empty array"); - - BlockLocation[] locations = - getFs().getFileBlockLocations(createFileAndGetStatus(), - data.length + 100, - 1); - assertEmptyBlockLocations(locations); - } - - private void assertEmptyBlockLocations(BlockLocation[] locations) { - assertNotNull(locations); - if (locations.length!=0) { - fail("non empty locations[] with first entry of " + locations[0]); - } - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testLocateDirectory() throws Throwable { - describe("verify that locating a directory is an error"); - createFile(path("/test/filename")); - FileStatus status = fs.getFileStatus(path("/test")); - LOG.info("Filesystem is " + fs + "; target is " + status); - SwiftTestUtils.assertIsDirectory(status); - BlockLocation[] locations; - locations = getFs().getFileBlockLocations(status, - 0, - 1); - assertEmptyBlockLocations(locations); - } - - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testLocateRootDirectory() throws Throwable { - describe("verify that locating the root directory is an error"); - FileStatus status = fs.getFileStatus(path("/")); - SwiftTestUtils.assertIsDirectory(status); - BlockLocation[] locations; - locations = getFs().getFileBlockLocations(status, - 0, - 1); - assertEmptyBlockLocations(locations); - } - - -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemBlocksize.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemBlocksize.java deleted file mode 100644 index 0211163248681..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemBlocksize.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift; - -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.swift.util.SwiftTestUtils; -import org.junit.Test; - -/** - * Tests that blocksize is never zero for a file, either in the FS default - * or the FileStatus value of a queried file - */ -public class TestSwiftFileSystemBlocksize extends SwiftFileSystemBaseTest { - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testDefaultBlocksizeNonZero() throws Throwable { - assertTrue("Zero default blocksize", 0L != getFs().getDefaultBlockSize()); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testDefaultBlocksizeRootPathNonZero() throws Throwable { - assertTrue("Zero default blocksize", - 0L != getFs().getDefaultBlockSize(new Path("/"))); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testDefaultBlocksizeOtherPathNonZero() throws Throwable { - assertTrue("Zero default blocksize", - 0L != getFs().getDefaultBlockSize(new Path("/test"))); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testBlocksizeNonZeroForFile() throws Throwable { - Path smallfile = new Path("/test/smallfile"); - SwiftTestUtils.writeTextFile(fs, smallfile, "blocksize", true); - createFile(smallfile); - FileStatus status = getFs().getFileStatus(smallfile); - assertTrue("Zero blocksize in " + status, - status.getBlockSize() != 0L); - assertTrue("Zero replication in " + status, - status.getReplication() != 0L); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemConcurrency.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemConcurrency.java deleted file mode 100644 index c447919efa461..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemConcurrency.java +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.swift.util.SwiftTestUtils; -import org.junit.Test; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; - -/** - * Test Swift FS concurrency logic. This isn't a very accurate test, - * because it is hard to consistently generate race conditions. - * Consider it "best effort" - */ -public class TestSwiftFileSystemConcurrency extends SwiftFileSystemBaseTest { - protected static final Logger LOG = - LoggerFactory.getLogger(TestSwiftFileSystemConcurrency.class); - private Exception thread1Ex, thread2Ex; - public static final String TEST_RACE_CONDITION_ON_DELETE_DIR = - "/test/testraceconditionondirdeletetest"; - - /** - * test on concurrent file system changes - */ - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testRaceConditionOnDirDeleteTest() throws Exception { - SwiftTestUtils.skip("Skipping unreliable test"); - - final String message = "message"; - final Path fileToRead = new Path( - TEST_RACE_CONDITION_ON_DELETE_DIR +"/files/many-files/file"); - final ExecutorService executorService = Executors.newFixedThreadPool(2); - fs.create(new Path(TEST_RACE_CONDITION_ON_DELETE_DIR +"/file/test/file1")); - fs.create(new Path(TEST_RACE_CONDITION_ON_DELETE_DIR + "/documents/doc1")); - fs.create(new Path( - TEST_RACE_CONDITION_ON_DELETE_DIR + "/pictures/picture")); - - - executorService.execute(new Runnable() { - @Override - public void run() { - try { - assertDeleted(new Path(TEST_RACE_CONDITION_ON_DELETE_DIR), true); - } catch (IOException e) { - LOG.warn("deletion thread:" + e, e); - thread1Ex = e; - throw new RuntimeException(e); - } - } - }); - executorService.execute(new Runnable() { - @Override - public void run() { - try { - final FSDataOutputStream outputStream = fs.create(fileToRead); - outputStream.write(message.getBytes()); - outputStream.close(); - } catch (IOException e) { - LOG.warn("writer thread:" + e, e); - thread2Ex = e; - throw new RuntimeException(e); - } - } - }); - - executorService.awaitTermination(1, TimeUnit.MINUTES); - if (thread1Ex != null) { - throw thread1Ex; - } - if (thread2Ex != null) { - throw thread2Ex; - } - try { - fs.open(fileToRead); - LOG.info("concurrency test failed to trigger a failure"); - } catch (FileNotFoundException expected) { - - } - - } -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemContract.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemContract.java deleted file mode 100644 index 1655b95231c1d..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemContract.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystemContractBaseTest; -import org.apache.hadoop.fs.ParentNotDirectoryException; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem; -import org.apache.hadoop.fs.swift.util.SwiftTestUtils; -import org.junit.Before; -import org.junit.Test; - -import static org.junit.Assert.*; - -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; - -/** - * This is the full filesystem contract test -which requires the - * Default config set up to point to a filesystem. - * - * Some of the tests override the base class tests -these - * are where SwiftFS does not implement those features, or - * when the behavior of SwiftFS does not match the normal - * contract -which normally means that directories and equal files - * are being treated as equal. - */ -public class TestSwiftFileSystemContract - extends FileSystemContractBaseTest { - private static final Logger LOG = - LoggerFactory.getLogger(TestSwiftFileSystemContract.class); - - /** - * Override this if the filesystem is not case sensitive - * @return true if the case detection/preservation tests should run - */ - protected boolean filesystemIsCaseSensitive() { - return false; - } - - @Before - public void setUp() throws Exception { - final URI uri = getFilesystemURI(); - final Configuration conf = new Configuration(); - fs = createSwiftFS(); - try { - fs.initialize(uri, conf); - } catch (IOException e) { - //FS init failed, set it to null so that teardown doesn't - //attempt to use it - fs = null; - throw e; - } - } - - protected URI getFilesystemURI() throws URISyntaxException, IOException { - return SwiftTestUtils.getServiceURI(new Configuration()); - } - - protected SwiftNativeFileSystem createSwiftFS() throws IOException { - SwiftNativeFileSystem swiftNativeFileSystem = - new SwiftNativeFileSystem(); - return swiftNativeFileSystem; - } - - @Test - public void testMkdirsFailsForSubdirectoryOfExistingFile() throws Exception { - Path testDir = path("/test/hadoop"); - assertFalse(fs.exists(testDir)); - assertTrue(fs.mkdirs(testDir)); - assertTrue(fs.exists(testDir)); - - Path filepath = path("/test/hadoop/file"); - SwiftTestUtils.writeTextFile(fs, filepath, "hello, world", false); - - Path testSubDir = new Path(filepath, "subdir"); - SwiftTestUtils.assertPathDoesNotExist(fs, "subdir before mkdir", testSubDir); - - try { - fs.mkdirs(testSubDir); - fail("Should throw IOException."); - } catch (ParentNotDirectoryException e) { - // expected - } - //now verify that the subdir path does not exist - SwiftTestUtils.assertPathDoesNotExist(fs, "subdir after mkdir", testSubDir); - - Path testDeepSubDir = path("/test/hadoop/file/deep/sub/dir"); - try { - fs.mkdirs(testDeepSubDir); - fail("Should throw IOException."); - } catch (ParentNotDirectoryException e) { - // expected - } - SwiftTestUtils.assertPathDoesNotExist(fs, "testDeepSubDir after mkdir", - testDeepSubDir); - - } - - @Test - public void testWriteReadAndDeleteEmptyFile() throws Exception { - try { - super.testWriteReadAndDeleteEmptyFile(); - } catch (AssertionError e) { - SwiftTestUtils.downgrade("empty files get mistaken for directories", e); - } - } - - @Test - public void testMkdirsWithUmask() throws Exception { - //unsupported - } - - @Test - public void testZeroByteFilesAreFiles() throws Exception { -// SwiftTestUtils.unsupported("testZeroByteFilesAreFiles"); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemDelete.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemDelete.java deleted file mode 100644 index 81af49c2a34e3..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemDelete.java +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.swift.util.SwiftTestUtils; -import org.junit.Test; - -import java.io.IOException; -/** - * Test deletion operations - */ -public class TestSwiftFileSystemDelete extends SwiftFileSystemBaseTest { - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testDeleteEmptyFile() throws IOException { - final Path file = new Path("/test/testDeleteEmptyFile"); - createEmptyFile(file); - SwiftTestUtils.noteAction("about to delete"); - assertDeleted(file, true); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testDeleteEmptyFileTwice() throws IOException { - final Path file = new Path("/test/testDeleteEmptyFileTwice"); - createEmptyFile(file); - assertDeleted(file, true); - SwiftTestUtils.noteAction("multiple creates, and deletes"); - assertFalse("Delete returned true", fs.delete(file, false)); - createEmptyFile(file); - assertDeleted(file, true); - assertFalse("Delete returned true", fs.delete(file, false)); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testDeleteNonEmptyFile() throws IOException { - final Path file = new Path("/test/testDeleteNonEmptyFile"); - createFile(file); - assertDeleted(file, true); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testDeleteNonEmptyFileTwice() throws IOException { - final Path file = new Path("/test/testDeleteNonEmptyFileTwice"); - createFile(file); - assertDeleted(file, true); - assertFalse("Delete returned true", fs.delete(file, false)); - createFile(file); - assertDeleted(file, true); - assertFalse("Delete returned true", fs.delete(file, false)); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testDeleteTestDir() throws IOException { - final Path file = new Path("/test/"); - fs.delete(file, true); - assertPathDoesNotExist("Test dir found", file); - } - - /** - * Test recursive root directory deletion fails if there is an entry underneath - * @throws Throwable - */ - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testRmRootDirRecursiveIsForbidden() throws Throwable { - Path root = path("/"); - Path testFile = path("/test"); - createFile(testFile); - assertTrue("rm(/) returned false", fs.delete(root, true)); - assertExists("Root dir is missing", root); - assertPathDoesNotExist("test file not deleted", testFile); - } - -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemDirectories.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemDirectories.java deleted file mode 100644 index 9b4ba5e8c6f90..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemDirectories.java +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift; - -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.swift.snative.SwiftFileStatus; -import org.apache.hadoop.fs.swift.util.SwiftTestUtils; -import org.junit.Test; - -import java.io.FileNotFoundException; - -/** - * Test swift-specific directory logic. - * This class is HDFS-1 compatible; its designed to be subclasses by something - * with HDFS2 extensions - */ -public class TestSwiftFileSystemDirectories extends SwiftFileSystemBaseTest { - - /** - * Asserts that a zero byte file has a status of file and not - * directory or symlink - * - * @throws Exception on failures - */ - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testZeroByteFilesAreDirectories() throws Exception { - Path src = path("/test/testZeroByteFilesAreFiles"); - //create a zero byte file - SwiftTestUtils.touch(fs, src); - SwiftTestUtils.assertIsDirectory(fs, src); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testNoStatusForMissingDirectories() throws Throwable { - Path missing = path("/test/testNoStatusForMissingDirectories"); - assertPathDoesNotExist("leftover?", missing); - try { - FileStatus[] statuses = fs.listStatus(missing); - //not expected - fail("Expected a FileNotFoundException, got the status " + statuses); - } catch (FileNotFoundException expected) { - //expected - } - } - - /** - * test that a dir off root has a listStatus() call that - * works as expected. and that when a child is added. it changes - * - * @throws Exception on failures - */ - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testDirectoriesOffRootHaveMatchingFileStatus() throws Exception { - Path test = path("/test"); - fs.delete(test, true); - mkdirs(test); - assertExists("created test directory", test); - FileStatus[] statuses = fs.listStatus(test); - String statusString = statusToString(test.toString(), statuses); - assertEquals("Wrong number of elements in file status " + statusString, 0, - statuses.length); - - Path src = path("/test/file"); - - //create a zero byte file - SwiftTestUtils.touch(fs, src); - //stat it - statuses = fs.listStatus(test); - statusString = statusToString(test.toString(), statuses); - assertEquals("Wrong number of elements in file status " + statusString, 1, - statuses.length); - SwiftFileStatus stat = (SwiftFileStatus) statuses[0]; - assertTrue("isDir(): Not a directory: " + stat, stat.isDirectory()); - extraStatusAssertions(stat); - } - - /** - * test that a dir two levels down has a listStatus() call that - * works as expected. - * - * @throws Exception on failures - */ - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testDirectoriesLowerDownHaveMatchingFileStatus() throws Exception { - Path test = path("/test/testDirectoriesLowerDownHaveMatchingFileStatus"); - fs.delete(test, true); - mkdirs(test); - assertExists("created test sub directory", test); - FileStatus[] statuses = fs.listStatus(test); - String statusString = statusToString(test.toString(), statuses); - assertEquals("Wrong number of elements in file status " + statusString,0, - statuses.length); - } - - private String statusToString(String pathname, - FileStatus[] statuses) { - assertNotNull(statuses); - return SwiftTestUtils.dumpStats(pathname,statuses); - } - - /** - * method for subclasses to add extra assertions - * @param stat status to look at - */ - protected void extraStatusAssertions(SwiftFileStatus stat) { - - } - - /** - * Asserts that a zero byte file has a status of file and not - * directory or symlink - * - * @throws Exception on failures - */ - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testMultiByteFilesAreFiles() throws Exception { - Path src = path("/test/testMultiByteFilesAreFiles"); - SwiftTestUtils.writeTextFile(fs, src, "testMultiByteFilesAreFiles", false); - assertIsFile(src); - FileStatus status = fs.getFileStatus(src); - assertFalse(status.isDirectory()); - } - -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemExtendedContract.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemExtendedContract.java deleted file mode 100644 index 844463db6d979..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemExtendedContract.java +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.swift.http.RestClientBindings; -import org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem; -import org.apache.hadoop.fs.swift.util.SwiftTestUtils; -import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.util.StringUtils; -import org.junit.Test; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.net.URI; - -public class TestSwiftFileSystemExtendedContract extends SwiftFileSystemBaseTest { - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testOpenNonExistingFile() throws IOException { - final Path p = new Path("/test/testOpenNonExistingFile"); - //open it as a file, should get FileNotFoundException - try { - final FSDataInputStream in = fs.open(p); - in.close(); - fail("didn't expect to get here"); - } catch (FileNotFoundException fnfe) { - LOG.debug("Expected: " + fnfe, fnfe); - } - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testFilesystemHasURI() throws Throwable { - assertNotNull(fs.getUri()); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testCreateFile() throws Exception { - final Path f = new Path("/test/testCreateFile"); - final FSDataOutputStream fsDataOutputStream = fs.create(f); - fsDataOutputStream.close(); - assertExists("created file", f); - } - - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testWriteReadFile() throws Exception { - final Path f = new Path("/test/test"); - final FSDataOutputStream fsDataOutputStream = fs.create(f); - final String message = "Test string"; - fsDataOutputStream.write(message.getBytes()); - fsDataOutputStream.close(); - assertExists("created file", f); - FSDataInputStream open = null; - try { - open = fs.open(f); - final byte[] bytes = new byte[512]; - final int read = open.read(bytes); - final byte[] buffer = new byte[read]; - System.arraycopy(bytes, 0, buffer, 0, read); - assertEquals(message, new String(buffer)); - } finally { - fs.delete(f, false); - IOUtils.closeStream(open); - } - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testConfDefinesFilesystem() throws Throwable { - Configuration conf = new Configuration(); - SwiftTestUtils.getServiceURI(conf); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testConfIsValid() throws Throwable { - Configuration conf = new Configuration(); - URI fsURI = SwiftTestUtils.getServiceURI(conf); - RestClientBindings.bind(fsURI, conf); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testGetSchemeImplemented() throws Throwable { - String scheme = fs.getScheme(); - assertEquals(SwiftNativeFileSystem.SWIFT,scheme); - } - - /** - * Assert that a filesystem is case sensitive. - * This is done by creating a mixed-case filename and asserting that - * its lower case version is not there. - * - * @throws Exception failures - */ - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testFilesystemIsCaseSensitive() throws Exception { - String mixedCaseFilename = "/test/UPPER.TXT"; - Path upper = path(mixedCaseFilename); - Path lower = path(StringUtils.toLowerCase(mixedCaseFilename)); - assertFalse("File exists" + upper, fs.exists(upper)); - assertFalse("File exists" + lower, fs.exists(lower)); - FSDataOutputStream out = fs.create(upper); - out.writeUTF("UPPER"); - out.close(); - FileStatus upperStatus = fs.getFileStatus(upper); - assertExists("Original upper case file" + upper, upper); - //verify the lower-case version of the filename doesn't exist - assertPathDoesNotExist("lower case file", lower); - //now overwrite the lower case version of the filename with a - //new version. - out = fs.create(lower); - out.writeUTF("l"); - out.close(); - assertExists("lower case file", lower); - //verify the length of the upper file hasn't changed - assertExists("Original upper case file " + upper, upper); - FileStatus newStatus = fs.getFileStatus(upper); - assertEquals("Expected status:" + upperStatus - + " actual status " + newStatus, - upperStatus.getLen(), - newStatus.getLen()); - } - -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemLsOperations.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemLsOperations.java deleted file mode 100644 index 5e2b1b7231910..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemLsOperations.java +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift; - -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; -import org.junit.Test; - -import java.io.IOException; - -import static org.apache.hadoop.fs.swift.util.SwiftTestUtils.assertListStatusFinds; -import static org.apache.hadoop.fs.swift.util.SwiftTestUtils.cleanup; -import static org.apache.hadoop.fs.swift.util.SwiftTestUtils.dumpStats; -import static org.apache.hadoop.fs.swift.util.SwiftTestUtils.touch; - -/** - * Test the FileSystem#listStatus() operations - */ -public class TestSwiftFileSystemLsOperations extends SwiftFileSystemBaseTest { - - private Path[] testDirs; - - /** - * Setup creates dirs under test/hadoop - * - * @throws Exception - */ - @Override - public void setUp() throws Exception { - super.setUp(); - //delete the test directory - Path test = path("/test"); - fs.delete(test, true); - mkdirs(test); - } - - /** - * Create subdirectories and files under test/ for those tests - * that want them. Doing so adds overhead to setup and teardown, - * so should only be done for those tests that need them. - * @throws IOException on an IO problem - */ - private void createTestSubdirs() throws IOException { - testDirs = new Path[]{ - path("/test/hadoop/a"), - path("/test/hadoop/b"), - path("/test/hadoop/c/1"), - }; - - assertPathDoesNotExist("test directory setup", testDirs[0]); - for (Path path : testDirs) { - mkdirs(path); - } - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testListLevelTest() throws Exception { - createTestSubdirs(); - FileStatus[] paths = fs.listStatus(path("/test")); - assertEquals(dumpStats("/test", paths), 1, paths.length); - assertEquals(path("/test/hadoop"), paths[0].getPath()); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testListLevelTestHadoop() throws Exception { - createTestSubdirs(); - FileStatus[] paths; - paths = fs.listStatus(path("/test/hadoop")); - String stats = dumpStats("/test/hadoop", paths); - assertEquals("Paths.length wrong in " + stats, 3, paths.length); - assertEquals("Path element[0] wrong: " + stats, path("/test/hadoop/a"), - paths[0].getPath()); - assertEquals("Path element[1] wrong: " + stats, path("/test/hadoop/b"), - paths[1].getPath()); - assertEquals("Path element[2] wrong: " + stats, path("/test/hadoop/c"), - paths[2].getPath()); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testListStatusEmptyDirectory() throws Exception { - createTestSubdirs(); - FileStatus[] paths; - paths = fs.listStatus(path("/test/hadoop/a")); - assertEquals(dumpStats("/test/hadoop/a", paths), 0, - paths.length); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testListStatusFile() throws Exception { - describe("Create a single file under /test;" + - " assert that listStatus(/test) finds it"); - Path file = path("/test/filename"); - createFile(file); - FileStatus[] pathStats = fs.listStatus(file); - assertEquals(dumpStats("/test/", pathStats), - 1, - pathStats.length); - //and assert that the len of that ls'd path is the same as the original - FileStatus lsStat = pathStats[0]; - assertEquals("Wrong file len in listing of " + lsStat, - data.length, lsStat.getLen()); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testListEmptyRoot() throws Throwable { - describe("Empty the root dir and verify that an LS / returns {}"); - cleanup("testListEmptyRoot", fs, "/test"); - cleanup("testListEmptyRoot", fs, "/user"); - FileStatus[] fileStatuses = fs.listStatus(path("/")); - assertEquals("Non-empty root" + dumpStats("/", fileStatuses), - 0, - fileStatuses.length); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testListNonEmptyRoot() throws Throwable { - Path test = path("/test"); - touch(fs, test); - FileStatus[] fileStatuses = fs.listStatus(path("/")); - String stats = dumpStats("/", fileStatuses); - assertEquals("Wrong #of root children" + stats, 1, fileStatuses.length); - FileStatus status = fileStatuses[0]; - assertEquals("Wrong path value" + stats,test, status.getPath()); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testListStatusRootDir() throws Throwable { - Path dir = path("/"); - Path child = path("/test"); - touch(fs, child); - assertListStatusFinds(fs, dir, child); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testListStatusFiltered() throws Throwable { - Path dir = path("/"); - Path child = path("/test"); - touch(fs, child); - FileStatus[] stats = fs.listStatus(dir, new AcceptAllFilter()); - boolean found = false; - StringBuilder builder = new StringBuilder(); - for (FileStatus stat : stats) { - builder.append(stat.toString()).append('\n'); - if (stat.getPath().equals(child)) { - found = true; - } - } - assertTrue("Path " + child - + " not found in directory " + dir + ":" + builder, - found); - } - -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemPartitionedUploads.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemPartitionedUploads.java deleted file mode 100644 index 419d0303a0402..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemPartitionedUploads.java +++ /dev/null @@ -1,442 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.fs.swift; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.BlockLocation; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.swift.http.SwiftProtocolConstants; -import org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem; -import org.apache.hadoop.fs.swift.util.SwiftTestUtils; -import org.apache.hadoop.fs.swift.util.SwiftUtils; -import org.apache.hadoop.io.IOUtils; -import org.apache.http.Header; -import org.junit.Test; -import org.junit.internal.AssumptionViolatedException; - -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; - -import static org.apache.hadoop.fs.swift.util.SwiftTestUtils.assertPathExists; -import static org.apache.hadoop.fs.swift.util.SwiftTestUtils.readDataset; - -/** - * Test partitioned uploads. - * This is done by forcing a very small partition size and verifying that it - * is picked up. - */ -public class TestSwiftFileSystemPartitionedUploads extends - SwiftFileSystemBaseTest { - - public static final String WRONG_PARTITION_COUNT = - "wrong number of partitions written into "; - public static final int PART_SIZE = 1; - public static final int PART_SIZE_BYTES = PART_SIZE * 1024; - public static final int BLOCK_SIZE = 1024; - private URI uri; - - @Override - protected Configuration createConfiguration() { - Configuration conf = super.createConfiguration(); - //set the partition size to 1 KB - conf.setInt(SwiftProtocolConstants.SWIFT_PARTITION_SIZE, PART_SIZE); - return conf; - } - - @Test(timeout = SWIFT_BULK_IO_TEST_TIMEOUT) - public void testPartitionPropertyPropagatesToConf() throws Throwable { - assertEquals(1, - getConf().getInt(SwiftProtocolConstants.SWIFT_PARTITION_SIZE, - 0)); - } - - @Test(timeout = SWIFT_BULK_IO_TEST_TIMEOUT) - public void testPartionPropertyPropagatesToStore() throws Throwable { - assertEquals(1, fs.getStore().getPartsizeKB()); - } - - /** - * tests functionality for big files ( > 5Gb) upload - */ - @Test(timeout = SWIFT_BULK_IO_TEST_TIMEOUT) - public void testFilePartUpload() throws Throwable { - - final Path path = new Path("/test/testFilePartUpload"); - - int len = 8192; - final byte[] src = SwiftTestUtils.dataset(len, 32, 144); - FSDataOutputStream out = fs.create(path, - false, - getBufferSize(), - (short) 1, - BLOCK_SIZE); - - try { - int totalPartitionsToWrite = len / PART_SIZE_BYTES; - assertPartitionsWritten("Startup", out, 0); - //write 2048 - int firstWriteLen = 2048; - out.write(src, 0, firstWriteLen); - //assert - long expected = getExpectedPartitionsWritten(firstWriteLen, - PART_SIZE_BYTES, - false); - SwiftUtils.debug(LOG, "First write: predict %d partitions written", - expected); - assertPartitionsWritten("First write completed", out, expected); - //write the rest - int remainder = len - firstWriteLen; - SwiftUtils.debug(LOG, "remainder: writing: %d bytes", remainder); - - out.write(src, firstWriteLen, remainder); - expected = - getExpectedPartitionsWritten(len, PART_SIZE_BYTES, false); - assertPartitionsWritten("Remaining data", out, expected); - out.close(); - expected = - getExpectedPartitionsWritten(len, PART_SIZE_BYTES, true); - assertPartitionsWritten("Stream closed", out, expected); - - Header[] headers = fs.getStore().getObjectHeaders(path, true); - for (Header header : headers) { - LOG.info(header.toString()); - } - - byte[] dest = readDataset(fs, path, len); - LOG.info("Read dataset from " + path + ": data length =" + len); - //compare data - SwiftTestUtils.compareByteArrays(src, dest, len); - FileStatus status; - - final Path qualifiedPath = fs.makeQualified(path); - status = fs.getFileStatus(qualifiedPath); - //now see what block location info comes back. - //This will vary depending on the Swift version, so the results - //aren't checked -merely that the test actually worked - BlockLocation[] locations = fs.getFileBlockLocations(status, 0, len); - assertNotNull("Null getFileBlockLocations()", locations); - assertTrue("empty array returned for getFileBlockLocations()", - locations.length > 0); - - //last bit of test -which seems to play up on partitions, which we download - //to a skip - try { - validatePathLen(path, len); - } catch (AssertionError e) { - //downgrade to a skip - throw new AssumptionViolatedException(e, null); - } - - } finally { - IOUtils.closeStream(out); - } - } - /** - * tests functionality for big files ( > 5Gb) upload - */ - @Test(timeout = SWIFT_BULK_IO_TEST_TIMEOUT) - public void testFilePartUploadNoLengthCheck() throws IOException, URISyntaxException { - - final Path path = new Path("/test/testFilePartUploadLengthCheck"); - - int len = 8192; - final byte[] src = SwiftTestUtils.dataset(len, 32, 144); - FSDataOutputStream out = fs.create(path, - false, - getBufferSize(), - (short) 1, - BLOCK_SIZE); - - try { - int totalPartitionsToWrite = len / PART_SIZE_BYTES; - assertPartitionsWritten("Startup", out, 0); - //write 2048 - int firstWriteLen = 2048; - out.write(src, 0, firstWriteLen); - //assert - long expected = getExpectedPartitionsWritten(firstWriteLen, - PART_SIZE_BYTES, - false); - SwiftUtils.debug(LOG, "First write: predict %d partitions written", - expected); - assertPartitionsWritten("First write completed", out, expected); - //write the rest - int remainder = len - firstWriteLen; - SwiftUtils.debug(LOG, "remainder: writing: %d bytes", remainder); - - out.write(src, firstWriteLen, remainder); - expected = - getExpectedPartitionsWritten(len, PART_SIZE_BYTES, false); - assertPartitionsWritten("Remaining data", out, expected); - out.close(); - expected = - getExpectedPartitionsWritten(len, PART_SIZE_BYTES, true); - assertPartitionsWritten("Stream closed", out, expected); - - Header[] headers = fs.getStore().getObjectHeaders(path, true); - for (Header header : headers) { - LOG.info(header.toString()); - } - - byte[] dest = readDataset(fs, path, len); - LOG.info("Read dataset from " + path + ": data length =" + len); - //compare data - SwiftTestUtils.compareByteArrays(src, dest, len); - FileStatus status = fs.getFileStatus(path); - - //now see what block location info comes back. - //This will vary depending on the Swift version, so the results - //aren't checked -merely that the test actually worked - BlockLocation[] locations = fs.getFileBlockLocations(status, 0, len); - assertNotNull("Null getFileBlockLocations()", locations); - assertTrue("empty array returned for getFileBlockLocations()", - locations.length > 0); - } finally { - IOUtils.closeStream(out); - } - } - - private FileStatus validatePathLen(Path path, int len) throws IOException { - //verify that the length is what was written in a direct status check - final Path qualifiedPath = fs.makeQualified(path); - FileStatus[] parentDirListing = fs.listStatus(qualifiedPath.getParent()); - StringBuilder listing = lsToString(parentDirListing); - String parentDirLS = listing.toString(); - FileStatus status = fs.getFileStatus(qualifiedPath); - assertEquals("Length of written file " + qualifiedPath - + " from status check " + status - + " in dir " + listing, - len, - status.getLen()); - String fileInfo = qualifiedPath + " " + status; - assertFalse("File claims to be a directory " + fileInfo, - status.isDirectory()); - - FileStatus listedFileStat = resolveChild(parentDirListing, qualifiedPath); - assertNotNull("Did not find " + path + " in " + parentDirLS, - listedFileStat); - //file is in the parent dir. Now validate it's stats - assertEquals("Wrong len for " + path + " in listing " + parentDirLS, - len, - listedFileStat.getLen()); - listedFileStat.toString(); - return status; - } - - private FileStatus resolveChild(FileStatus[] parentDirListing, - Path childPath) { - FileStatus listedFileStat = null; - for (FileStatus stat : parentDirListing) { - if (stat.getPath().equals(childPath)) { - listedFileStat = stat; - } - } - return listedFileStat; - } - - private StringBuilder lsToString(FileStatus[] parentDirListing) { - StringBuilder listing = new StringBuilder(); - for (FileStatus stat : parentDirListing) { - listing.append(stat).append("\n"); - } - return listing; - } - - /** - * Calculate the #of partitions expected from the upload - * @param uploaded number of bytes uploaded - * @param partSizeBytes the partition size - * @param closed whether or not the stream has closed - * @return the expected number of partitions, for use in assertions. - */ - private int getExpectedPartitionsWritten(long uploaded, - int partSizeBytes, - boolean closed) { - //#of partitions in total - int partitions = (int) (uploaded / partSizeBytes); - //#of bytes past the last partition - int remainder = (int) (uploaded % partSizeBytes); - if (closed) { - //all data is written, so if there was any remainder, it went up - //too - return partitions + ((remainder > 0) ? 1 : 0); - } else { - //not closed. All the remainder is buffered, - return partitions; - } - } - - private int getBufferSize() { - return fs.getConf().getInt("io.file.buffer.size", 4096); - } - - /** - * Test sticks up a very large partitioned file and verifies that - * it comes back unchanged. - * @throws Throwable - */ - @Test(timeout = SWIFT_BULK_IO_TEST_TIMEOUT) - public void testManyPartitionedFile() throws Throwable { - final Path path = new Path("/test/testManyPartitionedFile"); - - int len = PART_SIZE_BYTES * 15; - final byte[] src = SwiftTestUtils.dataset(len, 32, 144); - FSDataOutputStream out = fs.create(path, - false, - getBufferSize(), - (short) 1, - BLOCK_SIZE); - - out.write(src, 0, src.length); - int expected = - getExpectedPartitionsWritten(len, PART_SIZE_BYTES, true); - out.close(); - assertPartitionsWritten("write completed", out, expected); - assertEquals("too few bytes written", len, - SwiftNativeFileSystem.getBytesWritten(out)); - assertEquals("too few bytes uploaded", len, - SwiftNativeFileSystem.getBytesUploaded(out)); - //now we verify that the data comes back. If it - //doesn't, it means that the ordering of the partitions - //isn't right - byte[] dest = readDataset(fs, path, len); - //compare data - SwiftTestUtils.compareByteArrays(src, dest, len); - //finally, check the data - FileStatus[] stats = fs.listStatus(path); - assertEquals("wrong entry count in " - + SwiftTestUtils.dumpStats(path.toString(), stats), - expected, stats.length); - } - - /** - * Test that when a partitioned file is overwritten by a smaller one, - * all the old partitioned files go away - * @throws Throwable - */ - @Test(timeout = SWIFT_BULK_IO_TEST_TIMEOUT) - public void testOverwritePartitionedFile() throws Throwable { - final Path path = new Path("/test/testOverwritePartitionedFile"); - - final int len1 = 8192; - final byte[] src1 = SwiftTestUtils.dataset(len1, 'A', 'Z'); - FSDataOutputStream out = fs.create(path, - false, - getBufferSize(), - (short) 1, - 1024); - out.write(src1, 0, len1); - out.close(); - long expected = getExpectedPartitionsWritten(len1, - PART_SIZE_BYTES, - false); - assertPartitionsWritten("initial upload", out, expected); - assertExists("Exists", path); - FileStatus status = fs.getFileStatus(path); - assertEquals("Length", len1, status.getLen()); - //now write a shorter file with a different dataset - final int len2 = 4095; - final byte[] src2 = SwiftTestUtils.dataset(len2, 'a', 'z'); - out = fs.create(path, - true, - getBufferSize(), - (short) 1, - 1024); - out.write(src2, 0, len2); - out.close(); - status = fs.getFileStatus(path); - assertEquals("Length", len2, status.getLen()); - byte[] dest = readDataset(fs, path, len2); - //compare data - SwiftTestUtils.compareByteArrays(src2, dest, len2); - } - - @Test(timeout = SWIFT_BULK_IO_TEST_TIMEOUT) - public void testDeleteSmallPartitionedFile() throws Throwable { - final Path path = new Path("/test/testDeleteSmallPartitionedFile"); - - final int len1 = 1024; - final byte[] src1 = SwiftTestUtils.dataset(len1, 'A', 'Z'); - SwiftTestUtils.writeDataset(fs, path, src1, len1, 1024, false); - assertExists("Exists", path); - - Path part_0001 = new Path(path, SwiftUtils.partitionFilenameFromNumber(1)); - Path part_0002 = new Path(path, SwiftUtils.partitionFilenameFromNumber(2)); - String ls = SwiftTestUtils.ls(fs, path); - assertExists("Partition 0001 Exists in " + ls, part_0001); - assertPathDoesNotExist("partition 0002 found under " + ls, part_0002); - assertExists("Partition 0002 Exists in " + ls, part_0001); - fs.delete(path, false); - assertPathDoesNotExist("deleted file still there", path); - ls = SwiftTestUtils.ls(fs, path); - assertPathDoesNotExist("partition 0001 file still under " + ls, part_0001); - } - - @Test(timeout = SWIFT_BULK_IO_TEST_TIMEOUT) - public void testDeletePartitionedFile() throws Throwable { - final Path path = new Path("/test/testDeletePartitionedFile"); - - SwiftTestUtils.writeDataset(fs, path, data, data.length, 1024, false); - assertExists("Exists", path); - - Path part_0001 = new Path(path, SwiftUtils.partitionFilenameFromNumber(1)); - Path part_0002 = new Path(path, SwiftUtils.partitionFilenameFromNumber(2)); - String ls = SwiftTestUtils.ls(fs, path); - assertExists("Partition 0001 Exists in " + ls, part_0001); - assertExists("Partition 0002 Exists in " + ls, part_0001); - fs.delete(path, false); - assertPathDoesNotExist("deleted file still there", path); - ls = SwiftTestUtils.ls(fs, path); - assertPathDoesNotExist("partition 0001 file still under " + ls, part_0001); - assertPathDoesNotExist("partition 0002 file still under " + ls, part_0002); - } - - - @Test(timeout = SWIFT_BULK_IO_TEST_TIMEOUT) - public void testRenamePartitionedFile() throws Throwable { - Path src = new Path("/test/testRenamePartitionedFileSrc"); - - int len = data.length; - SwiftTestUtils.writeDataset(fs, src, data, len, 1024, false); - assertExists("Exists", src); - - String partOneName = SwiftUtils.partitionFilenameFromNumber(1); - Path srcPart = new Path(src, partOneName); - Path dest = new Path("/test/testRenamePartitionedFileDest"); - Path destPart = new Path(src, partOneName); - assertExists("Partition Exists", srcPart); - fs.rename(src, dest); - assertPathExists(fs, "dest file missing", dest); - FileStatus status = fs.getFileStatus(dest); - assertEquals("Length of renamed file is wrong", len, status.getLen()); - byte[] destData = readDataset(fs, dest, len); - //compare data - SwiftTestUtils.compareByteArrays(data, destData, len); - String srcLs = SwiftTestUtils.ls(fs, src); - String destLs = SwiftTestUtils.ls(fs, dest); - - assertPathDoesNotExist("deleted file still found in " + srcLs, src); - - assertPathDoesNotExist("partition file still found in " + srcLs, srcPart); - } - - -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemRead.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemRead.java deleted file mode 100644 index 84794cb725074..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemRead.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift; - -import org.apache.hadoop.fs.BlockLocation; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; -import org.junit.Test; - -import java.io.EOFException; -import java.io.IOException; - -import static org.apache.hadoop.fs.swift.util.SwiftTestUtils.readBytesToString; -import static org.apache.hadoop.fs.swift.util.SwiftTestUtils.writeTextFile; - -/** - * Test filesystem read operations - */ -public class TestSwiftFileSystemRead extends SwiftFileSystemBaseTest { - - - /** - * Read past the end of a file: expect the operation to fail - * @throws IOException - */ - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testOverRead() throws IOException { - final String message = "message"; - final Path filePath = new Path("/test/file.txt"); - - writeTextFile(fs, filePath, message, false); - - try { - readBytesToString(fs, filePath, 20); - fail("expected an exception"); - } catch (EOFException e) { - //expected - } - } - - /** - * Read and write some JSON - * @throws IOException - */ - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testRWJson() throws IOException { - final String message = "{" + - " 'json': { 'i':43, 'b':true}," + - " 's':'string'" + - "}"; - final Path filePath = new Path("/test/file.json"); - - writeTextFile(fs, filePath, message, false); - String readJson = readBytesToString(fs, filePath, message.length()); - assertEquals(message,readJson); - //now find out where it is - FileStatus status = fs.getFileStatus(filePath); - BlockLocation[] locations = fs.getFileBlockLocations(status, 0, 10); - } - - /** - * Read and write some XML - * @throws IOException - */ - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testRWXML() throws IOException { - final String message = "" + - " " + - " string" + - ""; - final Path filePath = new Path("/test/file.xml"); - - writeTextFile(fs, filePath, message, false); - String read = readBytesToString(fs, filePath, message.length()); - assertEquals(message,read); - } - -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemRename.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemRename.java deleted file mode 100644 index f5ad155ffe3d7..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftFileSystemRename.java +++ /dev/null @@ -1,275 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift; - -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.swift.exceptions.SwiftOperationFailedException; -import org.apache.hadoop.fs.swift.util.SwiftTestUtils; -import org.junit.Test; - -import java.io.IOException; - -import static org.apache.hadoop.fs.swift.util.SwiftTestUtils.compareByteArrays; -import static org.apache.hadoop.fs.swift.util.SwiftTestUtils.dataset; -import static org.apache.hadoop.fs.swift.util.SwiftTestUtils.readBytesToString; -import static org.apache.hadoop.fs.swift.util.SwiftTestUtils.readDataset; -import static org.apache.hadoop.fs.swift.util.SwiftTestUtils.writeDataset; - -public class TestSwiftFileSystemRename extends SwiftFileSystemBaseTest { - - /** - * Rename a file into a directory - * - * @throws Exception - */ - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testRenameFileIntoExistingDirectory() throws Exception { - assumeRenameSupported(); - - Path src = path("/test/olddir/file"); - createFile(src); - Path dst = path("/test/new/newdir"); - fs.mkdirs(dst); - rename(src, dst, true, false, true); - Path newFile = path("/test/new/newdir/file"); - if (!fs.exists(newFile)) { - String ls = ls(dst); - LOG.info(ls(path("/test/new"))); - LOG.info(ls(path("/test/hadoop"))); - fail("did not find " + newFile + " - directory: " + ls); - } - assertTrue("Destination changed", - fs.exists(path("/test/new/newdir/file"))); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testRenameFile() throws Exception { - assumeRenameSupported(); - - final Path old = new Path("/test/alice/file"); - final Path newPath = new Path("/test/bob/file"); - fs.mkdirs(newPath.getParent()); - final FSDataOutputStream fsDataOutputStream = fs.create(old); - final byte[] message = "Some data".getBytes(); - fsDataOutputStream.write(message); - fsDataOutputStream.close(); - - assertTrue(fs.exists(old)); - rename(old, newPath, true, false, true); - - final FSDataInputStream bobStream = fs.open(newPath); - final byte[] bytes = new byte[512]; - final int read = bobStream.read(bytes); - bobStream.close(); - final byte[] buffer = new byte[read]; - System.arraycopy(bytes, 0, buffer, 0, read); - assertEquals(new String(message), new String(buffer)); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testRenameDirectory() throws Exception { - assumeRenameSupported(); - - final Path old = new Path("/test/data/logs"); - final Path newPath = new Path("/test/var/logs"); - fs.mkdirs(old); - fs.mkdirs(newPath.getParent()); - assertTrue(fs.exists(old)); - rename(old, newPath, true, false, true); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testRenameTheSameDirectory() throws Exception { - assumeRenameSupported(); - - final Path old = new Path("/test/usr/data"); - fs.mkdirs(old); - rename(old, old, false, true, true); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testRenameDirectoryIntoExistingDirectory() throws Exception { - assumeRenameSupported(); - - Path src = path("/test/olddir/dir"); - fs.mkdirs(src); - createFile(path("/test/olddir/dir/file1")); - createFile(path("/test/olddir/dir/subdir/file2")); - - Path dst = path("/test/new/newdir"); - fs.mkdirs(dst); - //this renames into a child - rename(src, dst, true, false, true); - assertExists("new dir", path("/test/new/newdir/dir")); - assertExists("Renamed nested file1", path("/test/new/newdir/dir/file1")); - assertPathDoesNotExist("Nested file1 should have been deleted", - path("/test/olddir/dir/file1")); - assertExists("Renamed nested subdir", - path("/test/new/newdir/dir/subdir/")); - assertExists("file under subdir", - path("/test/new/newdir/dir/subdir/file2")); - - assertPathDoesNotExist("Nested /test/hadoop/dir/subdir/file2 still exists", - path("/test/olddir/dir/subdir/file2")); - } - - /** - * trying to rename a directory onto itself should fail, - * preserving everything underneath. - */ - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testRenameDirToSelf() throws Throwable { - assumeRenameSupported(); - Path parentdir = path("/test/parentdir"); - fs.mkdirs(parentdir); - Path child = new Path(parentdir, "child"); - createFile(child); - - rename(parentdir, parentdir, false, true, true); - //verify the child is still there - assertIsFile(child); - } - - /** - * Assert that root directory renames are not allowed - * - * @throws Exception on failures - */ - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testRenameRootDirForbidden() throws Exception { - assumeRenameSupported(); - rename(path("/"), - path("/test/newRootDir"), - false, true, false); - } - - /** - * Assert that renaming a parent directory to be a child - * of itself is forbidden - * - * @throws Exception on failures - */ - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testRenameChildDirForbidden() throws Exception { - assumeRenameSupported(); - - Path parentdir = path("/test/parentdir"); - fs.mkdirs(parentdir); - Path childFile = new Path(parentdir, "childfile"); - createFile(childFile); - //verify one level down - Path childdir = new Path(parentdir, "childdir"); - rename(parentdir, childdir, false, true, false); - //now another level - fs.mkdirs(childdir); - Path childchilddir = new Path(childdir, "childdir"); - rename(parentdir, childchilddir, false, true, false); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testRenameFileAndVerifyContents() throws IOException { - assumeRenameSupported(); - - final Path filePath = new Path("/test/home/user/documents/file.txt"); - final Path newFilePath = new Path("/test/home/user/files/file.txt"); - mkdirs(newFilePath.getParent()); - int len = 1024; - byte[] dataset = dataset(len, 'A', 26); - writeDataset(fs, filePath, dataset, len, len, false); - rename(filePath, newFilePath, true, false, true); - byte[] dest = readDataset(fs, newFilePath, len); - compareByteArrays(dataset, dest, len); - String reread = readBytesToString(fs, newFilePath, 20); - } - - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testMoveFileUnderParent() throws Throwable { - if (!renameSupported()) return; - Path filepath = path("test/file"); - createFile(filepath); - //HDFS expects rename src, src -> true - rename(filepath, filepath, true, true, true); - //verify the file is still there - assertIsFile(filepath); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testMoveDirUnderParent() throws Throwable { - if (!renameSupported()) { - return; - } - Path testdir = path("test/dir"); - fs.mkdirs(testdir); - Path parent = testdir.getParent(); - //the outcome here is ambiguous, so is not checked - try { - fs.rename(testdir, parent); - } catch (SwiftOperationFailedException e) { - // allowed - } - assertExists("Source directory has been deleted ", testdir); - } - - /** - * trying to rename a file onto itself should succeed (it's a no-op) - */ - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testRenameFileToSelf() throws Throwable { - if (!renameSupported()) return; - Path filepath = path("test/file"); - createFile(filepath); - //HDFS expects rename src, src -> true - rename(filepath, filepath, true, true, true); - //verify the file is still there - assertIsFile(filepath); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testRenamedConsistence() throws IOException { - assumeRenameSupported(); - describe("verify that overwriting a file with new data doesn't impact" + - " the existing content"); - - final Path filePath = new Path("/test/home/user/documents/file.txt"); - final Path newFilePath = new Path("/test/home/user/files/file.txt"); - mkdirs(newFilePath.getParent()); - int len = 1024; - byte[] dataset = dataset(len, 'A', 26); - byte[] dataset2 = dataset(len, 'a', 26); - writeDataset(fs, filePath, dataset, len, len, false); - rename(filePath, newFilePath, true, false, true); - SwiftTestUtils.writeAndRead(fs, filePath, dataset2, len, len, false, true); - byte[] dest = readDataset(fs, newFilePath, len); - compareByteArrays(dataset, dest, len); - String reread = readBytesToString(fs, newFilePath, 20); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testRenameMissingFile() throws Throwable { - assumeRenameSupported(); - Path path = path("/test/RenameMissingFile"); - Path path2 = path("/test/RenameMissingFileDest"); - mkdirs(path("test")); - rename(path, path2, false, false, false); - } - -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftObjectPath.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftObjectPath.java deleted file mode 100644 index 5692b48f116be..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/TestSwiftObjectPath.java +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.fs.swift; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.swift.http.RestClientBindings; -import org.apache.hadoop.fs.swift.http.SwiftRestClient; -import org.apache.hadoop.fs.swift.util.SwiftObjectPath; -import org.apache.hadoop.fs.swift.util.SwiftUtils; -import org.junit.Test; - -import java.net.URI; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -/** - * Unit tests for SwiftObjectPath class. - */ -public class TestSwiftObjectPath implements SwiftTestConstants { - private static final Logger LOG = - LoggerFactory.getLogger(TestSwiftObjectPath.class); - - /** - * What an endpoint looks like. This is derived from a (valid) - * rackspace endpoint address - */ - private static final String ENDPOINT = - "https://storage101.region1.example.org/v1/MossoCloudFS_9fb40cc0-1234-5678-9abc-def000c9a66"; - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testParsePath() throws Exception { - final String pathString = "/home/user/files/file1"; - final Path path = new Path(pathString); - final URI uri = new URI("http://container.localhost"); - final SwiftObjectPath expected = SwiftObjectPath.fromPath(uri, path); - final SwiftObjectPath actual = new SwiftObjectPath( - RestClientBindings.extractContainerName(uri), - pathString); - - assertEquals(expected, actual); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testParseUrlPath() throws Exception { - final String pathString = "swift://container.service1/home/user/files/file1"; - final URI uri = new URI(pathString); - final Path path = new Path(pathString); - final SwiftObjectPath expected = SwiftObjectPath.fromPath(uri, path); - final SwiftObjectPath actual = new SwiftObjectPath( - RestClientBindings.extractContainerName(uri), - "/home/user/files/file1"); - - assertEquals(expected, actual); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testHandleUrlAsPath() throws Exception { - final String hostPart = "swift://container.service1"; - final String pathPart = "/home/user/files/file1"; - final String uriString = hostPart + pathPart; - - final SwiftObjectPath expected = new SwiftObjectPath(uriString, pathPart); - final SwiftObjectPath actual = new SwiftObjectPath(uriString, uriString); - - assertEquals(expected, actual); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testParseAuthenticatedUrl() throws Exception { - final String pathString = "swift://container.service1/v2/AUTH_00345h34l93459y4/home/tom/documents/finance.docx"; - final URI uri = new URI(pathString); - final Path path = new Path(pathString); - final SwiftObjectPath expected = SwiftObjectPath.fromPath(uri, path); - final SwiftObjectPath actual = new SwiftObjectPath( - RestClientBindings.extractContainerName(uri), - "/home/tom/documents/finance.docx"); - - assertEquals(expected, actual); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testConvertToPath() throws Throwable { - String initialpath = "/dir/file1"; - Path ipath = new Path(initialpath); - SwiftObjectPath objectPath = SwiftObjectPath.fromPath(new URI(initialpath), - ipath); - URI endpoint = new URI(ENDPOINT); - URI uri = SwiftRestClient.pathToURI(objectPath, endpoint); - LOG.info("Inital Hadoop Path =" + initialpath); - LOG.info("Merged URI=" + uri); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testRootDirProbeEmptyPath() throws Throwable { - SwiftObjectPath object=new SwiftObjectPath("container",""); - assertTrue(SwiftUtils.isRootDir(object)); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testRootDirProbeRootPath() throws Throwable { - SwiftObjectPath object=new SwiftObjectPath("container","/"); - assertTrue(SwiftUtils.isRootDir(object)); - } - - private void assertParentOf(SwiftObjectPath p1, SwiftObjectPath p2) { - assertTrue(p1.toString() + " is not a parent of " + p2 ,p1.isEqualToOrParentOf( - p2)); - } - - private void assertNotParentOf(SwiftObjectPath p1, SwiftObjectPath p2) { - assertFalse(p1.toString() + " is a parent of " + p2, p1.isEqualToOrParentOf( - p2)); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testChildOfProbe() throws Throwable { - SwiftObjectPath parent = new SwiftObjectPath("container", - "/parent"); - SwiftObjectPath parent2 = new SwiftObjectPath("container", - "/parent2"); - SwiftObjectPath child = new SwiftObjectPath("container", - "/parent/child"); - SwiftObjectPath sibling = new SwiftObjectPath("container", - "/parent/sibling"); - SwiftObjectPath grandchild = new SwiftObjectPath("container", - "/parent/child/grandchild"); - assertParentOf(parent, child); - assertParentOf(parent, grandchild); - assertParentOf(child, grandchild); - assertParentOf(parent, parent); - assertNotParentOf(child, parent); - assertParentOf(child, child); - assertNotParentOf(parent, parent2); - assertNotParentOf(grandchild, parent); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testChildOfRoot() throws Throwable { - SwiftObjectPath root = new SwiftObjectPath("container", "/"); - SwiftObjectPath child = new SwiftObjectPath("container", "child"); - SwiftObjectPath grandchild = new SwiftObjectPath("container", - "/child/grandchild"); - assertParentOf(root, child); - assertParentOf(root, grandchild); - assertParentOf(child, grandchild); - assertParentOf(root, root); - assertNotParentOf(child, root); - assertParentOf(child, child); - assertNotParentOf(grandchild, root); - } - -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/SwiftContract.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/SwiftContract.java deleted file mode 100644 index 99f72b7be981a..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/SwiftContract.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.contract; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractBondedFSContract; -import org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem; - -/** - * The contract of OpenStack Swift: only enabled if the test binding data is provided - */ -public class SwiftContract extends AbstractBondedFSContract { - - public static final String CONTRACT_XML = "contract/swift.xml"; - - public SwiftContract(Configuration conf) { - super(conf); - //insert the base features - addConfResource(CONTRACT_XML); - } - - - @Override - public String getScheme() { - return SwiftNativeFileSystem.SWIFT; - } - -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/TestSwiftContractCreate.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/TestSwiftContractCreate.java deleted file mode 100644 index df15a0a84c31e..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/TestSwiftContractCreate.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.contract; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractCreateTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; -import org.apache.hadoop.fs.contract.ContractTestUtils; - -public class TestSwiftContractCreate extends AbstractContractCreateTest { - - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new SwiftContract(conf); - } - - @Override - public void testOverwriteEmptyDirectory() throws Throwable { - ContractTestUtils.skip("blobstores can't distinguish empty directories from files"); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/TestSwiftContractDelete.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/TestSwiftContractDelete.java deleted file mode 100644 index 65d031cd3980e..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/TestSwiftContractDelete.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.contract; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractDeleteTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; - -public class TestSwiftContractDelete extends AbstractContractDeleteTest { - - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new SwiftContract(conf); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/TestSwiftContractMkdir.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/TestSwiftContractMkdir.java deleted file mode 100644 index b82ba776386e9..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/TestSwiftContractMkdir.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.contract; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractMkdirTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; - -/** - * Test dir operations on S3 - */ -public class TestSwiftContractMkdir extends AbstractContractMkdirTest { - - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new SwiftContract(conf); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/TestSwiftContractOpen.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/TestSwiftContractOpen.java deleted file mode 100644 index 0f91b6f823e13..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/TestSwiftContractOpen.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.contract; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractOpenTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; -import org.apache.hadoop.fs.contract.ContractTestUtils; - -public class TestSwiftContractOpen extends AbstractContractOpenTest { - - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new SwiftContract(conf); - } - - @Override - public void testOpenReadDir() throws Throwable { - ContractTestUtils.skip("Skipping object-store quirk"); - } - - @Override - public void testOpenReadDirWithChild() throws Throwable { - ContractTestUtils.skip("Skipping object-store quirk"); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/TestSwiftContractRename.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/TestSwiftContractRename.java deleted file mode 100644 index 8f1edb9b6a3bd..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/TestSwiftContractRename.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.contract; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractRenameTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; - -public class TestSwiftContractRename extends AbstractContractRenameTest { - - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new SwiftContract(conf); - } - -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/TestSwiftContractRootDir.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/TestSwiftContractRootDir.java deleted file mode 100644 index c7b766edd49ff..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/TestSwiftContractRootDir.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.contract; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractRootDirectoryTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; - -/** - * root dir operations against an S3 bucket - */ -public class TestSwiftContractRootDir extends - AbstractContractRootDirectoryTest { - - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new SwiftContract(conf); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/TestSwiftContractSeek.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/TestSwiftContractSeek.java deleted file mode 100644 index d045980e698cc..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/contract/TestSwiftContractSeek.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.contract; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractSeekTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; - -public class TestSwiftContractSeek extends AbstractContractSeekTest { - - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new SwiftContract(conf); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/hdfs2/TestSwiftFileSystemDirectoriesHdfs2.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/hdfs2/TestSwiftFileSystemDirectoriesHdfs2.java deleted file mode 100644 index cb64bef6c5cd7..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/hdfs2/TestSwiftFileSystemDirectoriesHdfs2.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.hdfs2; - -import org.apache.hadoop.fs.swift.TestSwiftFileSystemDirectories; -import org.apache.hadoop.fs.swift.snative.SwiftFileStatus; - -/** - * Add some HDFS-2 only assertions to {@link TestSwiftFileSystemDirectories} - */ -public class TestSwiftFileSystemDirectoriesHdfs2 extends - TestSwiftFileSystemDirectories { - - - /** - * make assertions about fields that only appear in - * FileStatus in HDFS2 - * @param stat status to look at - */ - protected void extraStatusAssertions(SwiftFileStatus stat) { - //HDFS2 - assertTrue("isDirectory(): Not a directory: " + stat, stat.isDirectory()); - assertFalse("isFile(): declares itself a file: " + stat, stat.isFile()); - assertFalse("isFile(): declares itself a file: " + stat, stat.isSymlink()); - } - -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/hdfs2/TestV2LsOperations.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/hdfs2/TestV2LsOperations.java deleted file mode 100644 index 833b91d57f2c5..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/hdfs2/TestV2LsOperations.java +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.hdfs2; - -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocatedFileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.RemoteIterator; -import org.apache.hadoop.fs.swift.SwiftFileSystemBaseTest; -import org.apache.hadoop.fs.swift.util.SwiftTestUtils; -import org.junit.Test; - -import java.io.IOException; - -public class TestV2LsOperations extends SwiftFileSystemBaseTest { - - private Path[] testDirs; - - /** - * Setup creates dirs under test/hadoop - * @throws Exception - */ - @Override - public void setUp() throws Exception { - super.setUp(); - //delete the test directory - Path test = path("/test"); - fs.delete(test, true); - mkdirs(test); - } - - /** - * Create subdirectories and files under test/ for those tests - * that want them. Doing so adds overhead to setup and teardown, - * so should only be done for those tests that need them. - * @throws IOException on an IO problem - */ - private void createTestSubdirs() throws IOException { - testDirs = new Path[]{ - path("/test/hadoop/a"), - path("/test/hadoop/b"), - path("/test/hadoop/c/1"), - }; - assertPathDoesNotExist("test directory setup", testDirs[0]); - for (Path path : testDirs) { - mkdirs(path); - } - } - - /** - * To get this project to compile under Hadoop 1, this code needs to be - * commented out - * - * - * @param fs filesystem - * @param dir dir - * @param subdir subdir - * @param recursive recurse? - * @throws IOException IO problems - */ - public static void assertListFilesFinds(FileSystem fs, - Path dir, - Path subdir, - boolean recursive) throws IOException { - RemoteIterator iterator = - fs.listFiles(dir, recursive); - boolean found = false; - int entries = 0; - StringBuilder builder = new StringBuilder(); - while (iterator.hasNext()) { - LocatedFileStatus next = iterator.next(); - entries++; - builder.append(next.toString()).append('\n'); - if (next.getPath().equals(subdir)) { - found = true; - } - } - assertTrue("Path " + subdir - + " not found in directory " + dir + " : " - + " entries=" + entries - + " content" - + builder.toString(), - found); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testListFilesRootDir() throws Throwable { - Path dir = path("/"); - Path child = new Path(dir, "test"); - fs.delete(child, true); - SwiftTestUtils.writeTextFile(fs, child, "text", false); - assertListFilesFinds(fs, dir, child, false); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testListFilesSubDir() throws Throwable { - createTestSubdirs(); - Path dir = path("/test/subdir"); - Path child = new Path(dir, "text.txt"); - SwiftTestUtils.writeTextFile(fs, child, "text", false); - assertListFilesFinds(fs, dir, child, false); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testListFilesRecursive() throws Throwable { - createTestSubdirs(); - Path dir = path("/test/recursive"); - Path child = new Path(dir, "hadoop/a/a.txt"); - SwiftTestUtils.writeTextFile(fs, child, "text", false); - assertListFilesFinds(fs, dir, child, true); - } - -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/http/TestRestClientBindings.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/http/TestRestClientBindings.java deleted file mode 100644 index 8075e08404ae8..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/http/TestRestClientBindings.java +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.http; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.swift.SwiftTestConstants; -import org.apache.hadoop.fs.swift.exceptions.SwiftConfigurationException; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.net.URI; -import java.net.URISyntaxException; -import java.util.Properties; - -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.DOT_AUTH_URL; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.DOT_PASSWORD; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.DOT_USERNAME; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.SWIFT_AUTH_PROPERTY; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.SWIFT_CONTAINER_PROPERTY; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.SWIFT_HTTPS_PORT_PROPERTY; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.SWIFT_HTTP_PORT_PROPERTY; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.SWIFT_PASSWORD_PROPERTY; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.SWIFT_REGION_PROPERTY; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.SWIFT_SERVICE_PROPERTY; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.SWIFT_TENANT_PROPERTY; -import static org.apache.hadoop.fs.swift.http.SwiftProtocolConstants.SWIFT_USERNAME_PROPERTY; -import static org.apache.hadoop.fs.swift.util.SwiftTestUtils.assertPropertyEquals; - -public class TestRestClientBindings extends Assert - implements SwiftTestConstants { - - private static final String SERVICE = "sname"; - private static final String CONTAINER = "cname"; - private static final String FS_URI = "swift://" - + CONTAINER + "." + SERVICE + "/"; - private static final String AUTH_URL = "http://localhost:8080/auth"; - private static final String USER = "user"; - private static final String PASS = "pass"; - private static final String TENANT = "tenant"; - private URI filesysURI; - private Configuration conf; - - @Before - public void setup() throws URISyntaxException { - filesysURI = new URI(FS_URI); - conf = new Configuration(true); - setInstanceVal(conf, SERVICE, DOT_AUTH_URL, AUTH_URL); - setInstanceVal(conf, SERVICE, DOT_USERNAME, USER); - setInstanceVal(conf, SERVICE, DOT_PASSWORD, PASS); - } - - private void setInstanceVal(Configuration conf, - String host, - String key, - String val) { - String instance = RestClientBindings.buildSwiftInstancePrefix(host); - String confkey = instance - + key; - conf.set(confkey, val); - } - - public void testPrefixBuilder() throws Throwable { - String built = RestClientBindings.buildSwiftInstancePrefix(SERVICE); - assertEquals("fs.swift.service." + SERVICE, built); - } - - public void testBindAgainstConf() throws Exception { - Properties props = RestClientBindings.bind(filesysURI, conf); - assertPropertyEquals(props, SWIFT_CONTAINER_PROPERTY, CONTAINER); - assertPropertyEquals(props, SWIFT_SERVICE_PROPERTY, SERVICE); - assertPropertyEquals(props, SWIFT_AUTH_PROPERTY, AUTH_URL); - assertPropertyEquals(props, SWIFT_AUTH_PROPERTY, AUTH_URL); - assertPropertyEquals(props, SWIFT_USERNAME_PROPERTY, USER); - assertPropertyEquals(props, SWIFT_PASSWORD_PROPERTY, PASS); - - assertPropertyEquals(props, SWIFT_TENANT_PROPERTY, null); - assertPropertyEquals(props, SWIFT_REGION_PROPERTY, null); - assertPropertyEquals(props, SWIFT_HTTP_PORT_PROPERTY, null); - assertPropertyEquals(props, SWIFT_HTTPS_PORT_PROPERTY, null); - } - - public void expectBindingFailure(URI fsURI, Configuration config) { - try { - Properties binding = RestClientBindings.bind(fsURI, config); - //if we get here, binding didn't fail- there is something else. - //list the properties but not the values. - StringBuilder details = new StringBuilder() ; - for (Object key: binding.keySet()) { - details.append(key.toString()).append(" "); - } - fail("Expected a failure, got the binding [ "+ details+"]"); - } catch (SwiftConfigurationException expected) { - - } - } - - public void testBindAgainstConfMissingInstance() throws Exception { - Configuration badConf = new Configuration(); - expectBindingFailure(filesysURI, badConf); - } - - -/* Hadoop 2.x+ only, as conf.unset() isn't a v1 feature - public void testBindAgainstConfIncompleteInstance() throws Exception { - String instance = RestClientBindings.buildSwiftInstancePrefix(SERVICE); - conf.unset(instance + DOT_PASSWORD); - expectBindingFailure(filesysURI, conf); - } -*/ - - @Test(expected = SwiftConfigurationException.class) - public void testDottedServiceURL() throws Exception { - RestClientBindings.bind(new URI("swift://hadoop.apache.org/"), conf); - } - - @Test(expected = SwiftConfigurationException.class) - public void testMissingServiceURL() throws Exception { - RestClientBindings.bind(new URI("swift:///"), conf); - } - - /** - * inner test method that expects container extraction to fail - * -if not prints a meaningful error message. - * - * @param hostname hostname to parse - */ - private static void expectExtractContainerFail(String hostname) { - try { - String container = RestClientBindings.extractContainerName(hostname); - fail("Expected an error -got a container of '" + container - + "' from " + hostname); - } catch (SwiftConfigurationException expected) { - //expected - } - } - - /** - * inner test method that expects service extraction to fail - * -if not prints a meaningful error message. - * - * @param hostname hostname to parse - */ - public static void expectExtractServiceFail(String hostname) { - try { - String service = RestClientBindings.extractServiceName(hostname); - fail("Expected an error -got a service of '" + service - + "' from " + hostname); - } catch (SwiftConfigurationException expected) { - //expected - } - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testEmptyHostname() throws Throwable { - expectExtractContainerFail(""); - expectExtractServiceFail(""); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testDot() throws Throwable { - expectExtractContainerFail("."); - expectExtractServiceFail("."); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testSimple() throws Throwable { - expectExtractContainerFail("simple"); - expectExtractServiceFail("simple"); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testTrailingDot() throws Throwable { - expectExtractServiceFail("simple."); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testLeadingDot() throws Throwable { - expectExtractServiceFail(".leading"); - } - -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/http/TestSwiftRestClient.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/http/TestSwiftRestClient.java deleted file mode 100644 index 7568c11c562c3..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/http/TestSwiftRestClient.java +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.http; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.swift.SwiftTestConstants; -import org.apache.hadoop.fs.swift.util.Duration; -import org.apache.hadoop.fs.swift.util.DurationStats; -import org.apache.hadoop.fs.swift.util.SwiftObjectPath; -import org.apache.hadoop.fs.swift.util.SwiftTestUtils; -import org.apache.http.Header; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.Before; -import org.junit.Test; - -import java.io.ByteArrayInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.net.URI; - -public class TestSwiftRestClient implements SwiftTestConstants { - private static final Logger LOG = - LoggerFactory.getLogger(TestSwiftRestClient.class); - - private Configuration conf; - private boolean runTests; - private URI serviceURI; - - @Before - public void setup() throws IOException { - conf = new Configuration(); - runTests = SwiftTestUtils.hasServiceURI(conf); - if (runTests) { - serviceURI = SwiftTestUtils.getServiceURI(conf); - } - } - - protected void assumeEnabled() { - Assume.assumeTrue(runTests); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testCreate() throws Throwable { - assumeEnabled(); - SwiftRestClient client = createClient(); - } - - private SwiftRestClient createClient() throws IOException { - return SwiftRestClient.getInstance(serviceURI, conf); - } - - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testAuthenticate() throws Throwable { - assumeEnabled(); - SwiftRestClient client = createClient(); - client.authenticate(); - } - - @Test(timeout = SWIFT_TEST_TIMEOUT) - public void testPutAndDelete() throws Throwable { - assumeEnabled(); - SwiftRestClient client = createClient(); - client.authenticate(); - Path path = new Path("restTestPutAndDelete"); - SwiftObjectPath sobject = SwiftObjectPath.fromPath(serviceURI, path); - byte[] stuff = new byte[1]; - stuff[0] = 'a'; - client.upload(sobject, new ByteArrayInputStream(stuff), stuff.length); - //check file exists - Duration head = new Duration(); - Header[] responseHeaders = client.headRequest("expect success", - sobject, - SwiftRestClient.NEWEST); - head.finished(); - LOG.info("head request duration " + head); - for (Header header: responseHeaders) { - LOG.info(header.toString()); - } - //delete the file - client.delete(sobject); - //check file is gone - try { - Header[] headers = client.headRequest("expect fail", - sobject, - SwiftRestClient.NEWEST); - Assert.fail("Expected deleted file, but object is still present: " - + sobject); - } catch (FileNotFoundException e) { - //expected - } - for (DurationStats stats: client.getOperationStatistics()) { - LOG.info(stats.toString()); - } - } - -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/scale/SwiftScaleTestBase.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/scale/SwiftScaleTestBase.java deleted file mode 100644 index 314e7a1dfb88e..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/scale/SwiftScaleTestBase.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.scale; - -import org.apache.hadoop.fs.swift.SwiftFileSystemBaseTest; - -/** - * Base class for scale tests; here is where the common scale configuration - * keys are defined - */ - -public class SwiftScaleTestBase extends SwiftFileSystemBaseTest { - - public static final String SCALE_TEST = "scale.test."; - public static final String KEY_OPERATION_COUNT = SCALE_TEST + "operation.count"; - public static final long DEFAULT_OPERATION_COUNT = 10; - - protected long getOperationCount() { - return getConf().getLong(KEY_OPERATION_COUNT, DEFAULT_OPERATION_COUNT); - } -} diff --git a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/scale/TestWriteManySmallFiles.java b/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/scale/TestWriteManySmallFiles.java deleted file mode 100644 index 1d6cfa2e8668e..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/java/org/apache/hadoop/fs/swift/scale/TestWriteManySmallFiles.java +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.swift.scale; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.swift.util.Duration; -import org.apache.hadoop.fs.swift.util.DurationStats; -import org.apache.hadoop.fs.swift.util.SwiftTestUtils; -import org.junit.Test; - -public class TestWriteManySmallFiles extends SwiftScaleTestBase { - - public static final Logger LOG = - LoggerFactory.getLogger(TestWriteManySmallFiles.class); - - @Test(timeout = SWIFT_BULK_IO_TEST_TIMEOUT) - public void testScaledWriteThenRead() throws Throwable { - Path dir = new Path("/test/manysmallfiles"); - Duration rm1 = new Duration(); - fs.delete(dir, true); - rm1.finished(); - fs.mkdirs(dir); - Duration ls1 = new Duration(); - fs.listStatus(dir); - ls1.finished(); - long count = getOperationCount(); - SwiftTestUtils.noteAction("Beginning Write of "+ count + " files "); - DurationStats writeStats = new DurationStats("write"); - DurationStats readStats = new DurationStats("read"); - String format = "%08d"; - for (long l = 0; l < count; l++) { - String name = String.format(format, l); - Path p = new Path(dir, "part-" + name); - Duration d = new Duration(); - SwiftTestUtils.writeTextFile(fs, p, name, false); - d.finished(); - writeStats.add(d); - Thread.sleep(1000); - } - //at this point, the directory is full. - SwiftTestUtils.noteAction("Beginning ls"); - - Duration ls2 = new Duration(); - FileStatus[] status2 = (FileStatus[]) fs.listStatus(dir); - ls2.finished(); - assertEquals("Not enough entries in the directory", count, status2.length); - - SwiftTestUtils.noteAction("Beginning read"); - - for (long l = 0; l < count; l++) { - String name = String.format(format, l); - Path p = new Path(dir, "part-" + name); - Duration d = new Duration(); - String result = SwiftTestUtils.readBytesToString(fs, p, name.length()); - assertEquals(name, result); - d.finished(); - readStats.add(d); - } - //do a recursive delete - SwiftTestUtils.noteAction("Beginning delete"); - Duration rm2 = new Duration(); - fs.delete(dir, true); - rm2.finished(); - //print the stats - LOG.info(String.format("'filesystem','%s'",fs.getUri())); - LOG.info(writeStats.toString()); - LOG.info(readStats.toString()); - LOG.info(String.format( - "'rm1',%d,'ls1',%d", - rm1.value(), - ls1.value())); - LOG.info(String.format( - "'rm2',%d,'ls2',%d", - rm2.value(), - ls2.value())); - } - -} diff --git a/hadoop-tools/hadoop-openstack/src/test/resources/contract/swift.xml b/hadoop-tools/hadoop-openstack/src/test/resources/contract/swift.xml deleted file mode 100644 index fbf3a177c91f0..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/resources/contract/swift.xml +++ /dev/null @@ -1,105 +0,0 @@ - - - - - - - fs.contract.test.root-tests-enabled - true - - - - fs.contract.test.random-seek-count - 10 - - - - fs.contract.is-blobstore - true - - - - fs.contract.create-overwrites-directory - true - - - - fs.contract.create-visibility-delayed - true - - - - fs.contract.is-case-sensitive - true - - - - fs.contract.supports-append - false - - - - fs.contract.supports-atomic-directory-delete - false - - - - fs.contract.supports-atomic-rename - false - - - - fs.contract.supports-block-locality - false - - - - fs.contract.supports-concat - false - - - - fs.contract.supports-seek - true - - - - fs.contract.rejects-seek-past-eof - true - - - - fs.contract.supports-strict-exceptions - true - - - - fs.contract.supports-unix-permissions - false - - - - fs.contract.rename-returns-false-if-source-missing - true - - - diff --git a/hadoop-tools/hadoop-openstack/src/test/resources/core-site.xml b/hadoop-tools/hadoop-openstack/src/test/resources/core-site.xml deleted file mode 100644 index 9252e885871e4..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/resources/core-site.xml +++ /dev/null @@ -1,51 +0,0 @@ - - - - - - - - - - - - - hadoop.tmp.dir - target/build/test - A base for other temporary directories. - true - - - - - hadoop.security.authentication - simple - - - - - - diff --git a/hadoop-tools/hadoop-openstack/src/test/resources/log4j.properties b/hadoop-tools/hadoop-openstack/src/test/resources/log4j.properties deleted file mode 100644 index a3bb8204f0df8..0000000000000 --- a/hadoop-tools/hadoop-openstack/src/test/resources/log4j.properties +++ /dev/null @@ -1,39 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# log4j configuration used during build and unit tests - -log4j.rootLogger=INFO,stdout -log4j.threshold=ALL -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.target=System.out -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} (%F:%M(%L)) - %m%n -#log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c %x - %m%n" -#log4j.logger.org.apache.hadoop.fs.swift=DEBUG diff --git a/hadoop-tools/hadoop-pipes/pom.xml b/hadoop-tools/hadoop-pipes/pom.xml index bda7fb35bef85..78963c5911e47 100644 --- a/hadoop-tools/hadoop-pipes/pom.xml +++ b/hadoop-tools/hadoop-pipes/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-pipes - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop Pipes Apache Hadoop Pipes pom diff --git a/hadoop-tools/hadoop-resourceestimator/pom.xml b/hadoop-tools/hadoop-resourceestimator/pom.xml index bd82cc8c8ccc5..fc2565935d8db 100644 --- a/hadoop-tools/hadoop-resourceestimator/pom.xml +++ b/hadoop-tools/hadoop-resourceestimator/pom.xml @@ -25,7 +25,7 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-resourceestimator @@ -79,8 +79,22 @@ jersey-server - com.sun.jersey + com.github.pjfanning jersey-json + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.jaxrs + jackson-jaxrs-json-provider + + junit diff --git a/hadoop-tools/hadoop-rumen/pom.xml b/hadoop-tools/hadoop-rumen/pom.xml index d4b7d64d3bfb1..f1f4daf96164e 100644 --- a/hadoop-tools/hadoop-rumen/pom.xml +++ b/hadoop-tools/hadoop-rumen/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-rumen - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop Rumen Apache Hadoop Rumen jar @@ -102,10 +102,9 @@ - org.codehaus.mojo - findbugs-maven-plugin + com.github.spotbugs + spotbugs-maven-plugin - true true ${basedir}/dev-support/findbugs-exclude.xml Max diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Folder.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Folder.java index 424405aa7e647..683bcc80f1fa9 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Folder.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Folder.java @@ -470,7 +470,7 @@ public int compare(Pair p1, next = heap.poll(); } } finally { - IOUtils.cleanup(null, reader); + IOUtils.cleanupWithLogger(null, reader); if (outGen != null) { outGen.close(); } diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobConfigurationParser.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobConfigurationParser.java index 7e79179721f8a..9cd2f4778fc7f 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobConfigurationParser.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobConfigurationParser.java @@ -25,6 +25,8 @@ import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; +import org.apache.hadoop.util.XMLUtils; + import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; @@ -55,7 +57,7 @@ static Properties parse(InputStream input) throws IOException { Properties result = new Properties(); try { - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory(); DocumentBuilder db = dbf.newDocumentBuilder(); diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedConfigFile.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedConfigFile.java index 1d85872c08d7b..a6c8bdad87d04 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedConfigFile.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedConfigFile.java @@ -17,28 +17,27 @@ */ package org.apache.hadoop.tools.rumen; +import java.io.IOException; +import java.io.StringReader; import java.util.Properties; import java.util.regex.Pattern; import java.util.regex.Matcher; -import java.io.InputStream; -import java.io.ByteArrayInputStream; -import java.io.IOException; - -import java.nio.charset.Charset; - import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.ParserConfigurationException; import org.apache.hadoop.mapreduce.MRConfig; import org.apache.hadoop.mapreduce.MRJobConfig; +import org.apache.hadoop.util.XMLUtils; + import org.w3c.dom.Document; import org.w3c.dom.NodeList; import org.w3c.dom.Node; import org.w3c.dom.Element; import org.w3c.dom.Text; +import org.xml.sax.InputSource; import org.xml.sax.SAXException; class ParsedConfigFile { @@ -46,7 +45,6 @@ class ParsedConfigFile { Pattern.compile("_(job_[0-9]+_[0-9]+)_"); private static final Pattern heapPattern = Pattern.compile("-Xmx([0-9]+)([mMgG])"); - private static final Charset UTF_8 = Charset.forName("UTF-8"); final int heapMegabytes; @@ -103,13 +101,11 @@ private int maybeGetIntValue(String propName, String attr, String value, } try { - InputStream is = new ByteArrayInputStream(xmlString.getBytes(UTF_8)); - - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory(); DocumentBuilder db = dbf.newDocumentBuilder(); - Document doc = db.parse(is); + Document doc = db.parse(new InputSource(new StringReader(xmlString))); Element root = doc.getDocumentElement(); diff --git a/hadoop-tools/hadoop-sls/pom.xml b/hadoop-tools/hadoop-sls/pom.xml index 848b8c509fd6f..87f3e98d92036 100644 --- a/hadoop-tools/hadoop-sls/pom.xml +++ b/hadoop-tools/hadoop-sls/pom.xml @@ -19,11 +19,11 @@ org.apache.hadoop hadoop-project - 3.3.0-SNAPSHOT + 3.3.6 ../../hadoop-project hadoop-sls - 3.3.0-SNAPSHOT + 3.3.6 Apache Hadoop Scheduler Load Simulator Apache Hadoop Scheduler Load Simulator jar @@ -108,10 +108,9 @@ - org.codehaus.mojo - findbugs-maven-plugin + com.github.spotbugs + spotbugs-maven-plugin - true true ${basedir}/dev-support/findbugs-exclude.xml Max diff --git a/hadoop-tools/hadoop-sls/src/main/data/2jobs2min-rumen-jh.json b/hadoop-tools/hadoop-sls/src/main/data/2jobs2min-rumen-jh.json index 8c2e43e1e53ba..8e9af7549cd33 100644 --- a/hadoop-tools/hadoop-sls/src/main/data/2jobs2min-rumen-jh.json +++ b/hadoop-tools/hadoop-sls/src/main/data/2jobs2min-rumen-jh.json @@ -4559,7 +4559,6 @@ "hadoop.hdfs.configuration.version" : "1", "dfs.datanode.balance.bandwidthPerSec" : "1048576", "mapreduce.reduce.shuffle.connect.timeout" : "180000", - "hadoop.ssl.enabled" : "false", "dfs.journalnode.rpc-address" : "0.0.0.0:8485", "yarn.nodemanager.aux-services" : "mapreduce.shuffle", "mapreduce.job.counters.max" : "120", @@ -9626,7 +9625,6 @@ "hadoop.hdfs.configuration.version" : "1", "dfs.datanode.balance.bandwidthPerSec" : "1048576", "mapreduce.reduce.shuffle.connect.timeout" : "180000", - "hadoop.ssl.enabled" : "false", "dfs.journalnode.rpc-address" : "0.0.0.0:8485", "yarn.nodemanager.aux-services" : "mapreduce.shuffle", "mapreduce.job.counters.max" : "120", diff --git a/hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/jquery.js b/hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/jquery.js index 9b5206bcc6078..50937333b99a5 100644 --- a/hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/jquery.js +++ b/hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/jquery.js @@ -1,5 +1,5 @@ /*! - * jQuery JavaScript Library v3.3.1 + * jQuery JavaScript Library v3.5.1 * https://jquery.com/ * * Includes Sizzle.js @@ -9,7 +9,7 @@ * Released under the MIT license * https://jquery.org/license * - * Date: 2018-01-20T17:24Z + * Date: 2020-05-04T22:49Z */ ( function( global, factory ) { @@ -47,13 +47,16 @@ var arr = []; -var document = window.document; - var getProto = Object.getPrototypeOf; var slice = arr.slice; -var concat = arr.concat; +var flat = arr.flat ? function( array ) { + return arr.flat.call( array ); +} : function( array ) { + return arr.concat.apply( [], array ); +}; + var push = arr.push; @@ -86,25 +89,40 @@ var isWindow = function isWindow( obj ) { }; +var document = window.document; + var preservedScriptAttributes = { type: true, src: true, + nonce: true, noModule: true }; - function DOMEval( code, doc, node ) { + function DOMEval( code, node, doc ) { doc = doc || document; - var i, + var i, val, script = doc.createElement( "script" ); script.text = code; if ( node ) { for ( i in preservedScriptAttributes ) { - if ( node[ i ] ) { - script[ i ] = node[ i ]; + + // Support: Firefox 64+, Edge 18+ + // Some browsers don't support the "nonce" property on scripts. + // On the other hand, just using `getAttribute` is not enough as + // the `nonce` attribute is reset to an empty string whenever it + // becomes browsing-context connected. + // See https://github.com/whatwg/html/issues/2369 + // See https://html.spec.whatwg.org/#nonce-attributes + // The `node.getAttribute` check was added for the sake of + // `jQuery.globalEval` so that it can fake a nonce-containing node + // via an object. + val = node[ i ] || node.getAttribute && node.getAttribute( i ); + if ( val ) { + script.setAttribute( i, val ); } } } @@ -129,7 +147,7 @@ function toType( obj ) { var - version = "3.3.1", + version = "3.5.1", // Define a local copy of jQuery jQuery = function( selector, context ) { @@ -137,11 +155,7 @@ var // The jQuery object is actually just the init constructor 'enhanced' // Need init if jQuery is called (just allow error to be thrown if not included) return new jQuery.fn.init( selector, context ); - }, - - // Support: Android <=4.0 only - // Make sure we trim BOM and NBSP - rtrim = /^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g; + }; jQuery.fn = jQuery.prototype = { @@ -207,6 +221,18 @@ jQuery.fn = jQuery.prototype = { return this.eq( -1 ); }, + even: function() { + return this.pushStack( jQuery.grep( this, function( _elem, i ) { + return ( i + 1 ) % 2; + } ) ); + }, + + odd: function() { + return this.pushStack( jQuery.grep( this, function( _elem, i ) { + return i % 2; + } ) ); + }, + eq: function( i ) { var len = this.length, j = +i + ( i < 0 ? len : 0 ); @@ -258,25 +284,28 @@ jQuery.extend = jQuery.fn.extend = function() { // Extend the base object for ( name in options ) { - src = target[ name ]; copy = options[ name ]; + // Prevent Object.prototype pollution // Prevent never-ending loop - if ( target === copy ) { + if ( name === "__proto__" || target === copy ) { continue; } // Recurse if we're merging plain objects or arrays if ( deep && copy && ( jQuery.isPlainObject( copy ) || ( copyIsArray = Array.isArray( copy ) ) ) ) { + src = target[ name ]; - if ( copyIsArray ) { - copyIsArray = false; - clone = src && Array.isArray( src ) ? src : []; - + // Ensure proper type for the source value + if ( copyIsArray && !Array.isArray( src ) ) { + clone = []; + } else if ( !copyIsArray && !jQuery.isPlainObject( src ) ) { + clone = {}; } else { - clone = src && jQuery.isPlainObject( src ) ? src : {}; + clone = src; } + copyIsArray = false; // Never move original objects, clone them target[ name ] = jQuery.extend( deep, clone, copy ); @@ -329,9 +358,6 @@ jQuery.extend( { }, isEmptyObject: function( obj ) { - - /* eslint-disable no-unused-vars */ - // See https://github.com/eslint/eslint/issues/6125 var name; for ( name in obj ) { @@ -340,9 +366,10 @@ jQuery.extend( { return true; }, - // Evaluates a script in a global context - globalEval: function( code ) { - DOMEval( code ); + // Evaluates a script in a provided context; falls back to the global one + // if not specified. + globalEval: function( code, options, doc ) { + DOMEval( code, { nonce: options && options.nonce }, doc ); }, each: function( obj, callback ) { @@ -366,13 +393,6 @@ jQuery.extend( { return obj; }, - // Support: Android <=4.0 only - trim: function( text ) { - return text == null ? - "" : - ( text + "" ).replace( rtrim, "" ); - }, - // results is for internal usage only makeArray: function( arr, results ) { var ret = results || []; @@ -459,7 +479,7 @@ jQuery.extend( { } // Flatten any nested arrays - return concat.apply( [], ret ); + return flat( ret ); }, // A global GUID counter for objects @@ -476,7 +496,7 @@ if ( typeof Symbol === "function" ) { // Populate the class2type map jQuery.each( "Boolean Number String Function Array Date RegExp Object Error Symbol".split( " " ), -function( i, name ) { +function( _i, name ) { class2type[ "[object " + name + "]" ] = name.toLowerCase(); } ); @@ -498,17 +518,16 @@ function isArrayLike( obj ) { } var Sizzle = /*! - * Sizzle CSS Selector Engine v2.3.3 + * Sizzle CSS Selector Engine v2.3.5 * https://sizzlejs.com/ * - * Copyright jQuery Foundation and other contributors + * Copyright JS Foundation and other contributors * Released under the MIT license - * http://jquery.org/license + * https://js.foundation/ * - * Date: 2016-08-08 + * Date: 2020-03-14 */ -(function( window ) { - +( function( window ) { var i, support, Expr, @@ -539,6 +558,7 @@ var i, classCache = createCache(), tokenCache = createCache(), compilerCache = createCache(), + nonnativeSelectorCache = createCache(), sortOrder = function( a, b ) { if ( a === b ) { hasDuplicate = true; @@ -547,61 +567,71 @@ var i, }, // Instance methods - hasOwn = ({}).hasOwnProperty, + hasOwn = ( {} ).hasOwnProperty, arr = [], pop = arr.pop, - push_native = arr.push, + pushNative = arr.push, push = arr.push, slice = arr.slice, + // Use a stripped-down indexOf as it's faster than native // https://jsperf.com/thor-indexof-vs-for/5 indexOf = function( list, elem ) { var i = 0, len = list.length; for ( ; i < len; i++ ) { - if ( list[i] === elem ) { + if ( list[ i ] === elem ) { return i; } } return -1; }, - booleans = "checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped", + booleans = "checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|" + + "ismap|loop|multiple|open|readonly|required|scoped", // Regular expressions // http://www.w3.org/TR/css3-selectors/#whitespace whitespace = "[\\x20\\t\\r\\n\\f]", - // http://www.w3.org/TR/CSS21/syndata.html#value-def-identifier - identifier = "(?:\\\\.|[\\w-]|[^\0-\\xa0])+", + // https://www.w3.org/TR/css-syntax-3/#ident-token-diagram + identifier = "(?:\\\\[\\da-fA-F]{1,6}" + whitespace + + "?|\\\\[^\\r\\n\\f]|[\\w-]|[^\0-\\x7f])+", // Attribute selectors: http://www.w3.org/TR/selectors/#attribute-selectors attributes = "\\[" + whitespace + "*(" + identifier + ")(?:" + whitespace + + // Operator (capture 2) "*([*^$|!~]?=)" + whitespace + - // "Attribute values must be CSS identifiers [capture 5] or strings [capture 3 or capture 4]" - "*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|(" + identifier + "))|)" + whitespace + - "*\\]", + + // "Attribute values must be CSS identifiers [capture 5] + // or strings [capture 3 or capture 4]" + "*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|(" + identifier + "))|)" + + whitespace + "*\\]", pseudos = ":(" + identifier + ")(?:\\((" + + // To reduce the number of selectors needing tokenize in the preFilter, prefer arguments: // 1. quoted (capture 3; capture 4 or capture 5) "('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|" + + // 2. simple (capture 6) "((?:\\\\.|[^\\\\()[\\]]|" + attributes + ")*)|" + + // 3. anything else (capture 2) ".*" + ")\\)|)", // Leading and non-escaped trailing whitespace, capturing some non-whitespace characters preceding the latter rwhitespace = new RegExp( whitespace + "+", "g" ), - rtrim = new RegExp( "^" + whitespace + "+|((?:^|[^\\\\])(?:\\\\.)*)" + whitespace + "+$", "g" ), + rtrim = new RegExp( "^" + whitespace + "+|((?:^|[^\\\\])(?:\\\\.)*)" + + whitespace + "+$", "g" ), rcomma = new RegExp( "^" + whitespace + "*," + whitespace + "*" ), - rcombinators = new RegExp( "^" + whitespace + "*([>+~]|" + whitespace + ")" + whitespace + "*" ), - - rattributeQuotes = new RegExp( "=" + whitespace + "*([^\\]'\"]*?)" + whitespace + "*\\]", "g" ), + rcombinators = new RegExp( "^" + whitespace + "*([>+~]|" + whitespace + ")" + whitespace + + "*" ), + rdescend = new RegExp( whitespace + "|>" ), rpseudo = new RegExp( pseudos ), ridentifier = new RegExp( "^" + identifier + "$" ), @@ -612,16 +642,19 @@ var i, "TAG": new RegExp( "^(" + identifier + "|[*])" ), "ATTR": new RegExp( "^" + attributes ), "PSEUDO": new RegExp( "^" + pseudos ), - "CHILD": new RegExp( "^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\(" + whitespace + - "*(even|odd|(([+-]|)(\\d*)n|)" + whitespace + "*(?:([+-]|)" + whitespace + - "*(\\d+)|))" + whitespace + "*\\)|)", "i" ), + "CHILD": new RegExp( "^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\(" + + whitespace + "*(even|odd|(([+-]|)(\\d*)n|)" + whitespace + "*(?:([+-]|)" + + whitespace + "*(\\d+)|))" + whitespace + "*\\)|)", "i" ), "bool": new RegExp( "^(?:" + booleans + ")$", "i" ), + // For use in libraries implementing .is() // We use this for POS matching in `select` - "needsContext": new RegExp( "^" + whitespace + "*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\(" + - whitespace + "*((?:-\\d)?\\d*)" + whitespace + "*\\)|)(?=[^-]|$)", "i" ) + "needsContext": new RegExp( "^" + whitespace + + "*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\(" + whitespace + + "*((?:-\\d)?\\d*)" + whitespace + "*\\)|)(?=[^-]|$)", "i" ) }, + rhtml = /HTML$/i, rinputs = /^(?:input|select|textarea|button)$/i, rheader = /^h\d$/i, @@ -634,18 +667,21 @@ var i, // CSS escapes // http://www.w3.org/TR/CSS21/syndata.html#escaped-characters - runescape = new RegExp( "\\\\([\\da-f]{1,6}" + whitespace + "?|(" + whitespace + ")|.)", "ig" ), - funescape = function( _, escaped, escapedWhitespace ) { - var high = "0x" + escaped - 0x10000; - // NaN means non-codepoint - // Support: Firefox<24 - // Workaround erroneous numeric interpretation of +"0x" - return high !== high || escapedWhitespace ? - escaped : + runescape = new RegExp( "\\\\[\\da-fA-F]{1,6}" + whitespace + "?|\\\\([^\\r\\n\\f])", "g" ), + funescape = function( escape, nonHex ) { + var high = "0x" + escape.slice( 1 ) - 0x10000; + + return nonHex ? + + // Strip the backslash prefix from a non-hex escape sequence + nonHex : + + // Replace a hexadecimal escape sequence with the encoded Unicode code point + // Support: IE <=11+ + // For values outside the Basic Multilingual Plane (BMP), manually construct a + // surrogate pair high < 0 ? - // BMP codepoint String.fromCharCode( high + 0x10000 ) : - // Supplemental Plane codepoint (surrogate pair) String.fromCharCode( high >> 10 | 0xD800, high & 0x3FF | 0xDC00 ); }, @@ -661,7 +697,8 @@ var i, } // Control characters and (dependent upon position) numbers get escaped as code points - return ch.slice( 0, -1 ) + "\\" + ch.charCodeAt( ch.length - 1 ).toString( 16 ) + " "; + return ch.slice( 0, -1 ) + "\\" + + ch.charCodeAt( ch.length - 1 ).toString( 16 ) + " "; } // Other potentially-special ASCII characters get backslash-escaped @@ -676,9 +713,9 @@ var i, setDocument(); }, - disabledAncestor = addCombinator( + inDisabledFieldset = addCombinator( function( elem ) { - return elem.disabled === true && ("form" in elem || "label" in elem); + return elem.disabled === true && elem.nodeName.toLowerCase() === "fieldset"; }, { dir: "parentNode", next: "legend" } ); @@ -686,18 +723,20 @@ var i, // Optimize for push.apply( _, NodeList ) try { push.apply( - (arr = slice.call( preferredDoc.childNodes )), + ( arr = slice.call( preferredDoc.childNodes ) ), preferredDoc.childNodes ); + // Support: Android<4.0 // Detect silently failing push.apply + // eslint-disable-next-line no-unused-expressions arr[ preferredDoc.childNodes.length ].nodeType; } catch ( e ) { push = { apply: arr.length ? // Leverage slice if possible function( target, els ) { - push_native.apply( target, slice.call(els) ); + pushNative.apply( target, slice.call( els ) ); } : // Support: IE<9 @@ -705,8 +744,9 @@ try { function( target, els ) { var j = target.length, i = 0; + // Can't trust NodeList.length - while ( (target[j++] = els[i++]) ) {} + while ( ( target[ j++ ] = els[ i++ ] ) ) {} target.length = j - 1; } }; @@ -730,24 +770,21 @@ function Sizzle( selector, context, results, seed ) { // Try to shortcut find operations (as opposed to filters) in HTML documents if ( !seed ) { - - if ( ( context ? context.ownerDocument || context : preferredDoc ) !== document ) { - setDocument( context ); - } + setDocument( context ); context = context || document; if ( documentIsHTML ) { // If the selector is sufficiently simple, try using a "get*By*" DOM method // (excepting DocumentFragment context, where the methods don't exist) - if ( nodeType !== 11 && (match = rquickExpr.exec( selector )) ) { + if ( nodeType !== 11 && ( match = rquickExpr.exec( selector ) ) ) { // ID selector - if ( (m = match[1]) ) { + if ( ( m = match[ 1 ] ) ) { // Document context if ( nodeType === 9 ) { - if ( (elem = context.getElementById( m )) ) { + if ( ( elem = context.getElementById( m ) ) ) { // Support: IE, Opera, Webkit // TODO: identify versions @@ -766,7 +803,7 @@ function Sizzle( selector, context, results, seed ) { // Support: IE, Opera, Webkit // TODO: identify versions // getElementById can match elements by name instead of ID - if ( newContext && (elem = newContext.getElementById( m )) && + if ( newContext && ( elem = newContext.getElementById( m ) ) && contains( context, elem ) && elem.id === m ) { @@ -776,12 +813,12 @@ function Sizzle( selector, context, results, seed ) { } // Type selector - } else if ( match[2] ) { + } else if ( match[ 2 ] ) { push.apply( results, context.getElementsByTagName( selector ) ); return results; // Class selector - } else if ( (m = match[3]) && support.getElementsByClassName && + } else if ( ( m = match[ 3 ] ) && support.getElementsByClassName && context.getElementsByClassName ) { push.apply( results, context.getElementsByClassName( m ) ); @@ -791,50 +828,62 @@ function Sizzle( selector, context, results, seed ) { // Take advantage of querySelectorAll if ( support.qsa && - !compilerCache[ selector + " " ] && - (!rbuggyQSA || !rbuggyQSA.test( selector )) ) { - - if ( nodeType !== 1 ) { - newContext = context; - newSelector = selector; + !nonnativeSelectorCache[ selector + " " ] && + ( !rbuggyQSA || !rbuggyQSA.test( selector ) ) && - // qSA looks outside Element context, which is not what we want - // Thanks to Andrew Dupont for this workaround technique - // Support: IE <=8 + // Support: IE 8 only // Exclude object elements - } else if ( context.nodeName.toLowerCase() !== "object" ) { + ( nodeType !== 1 || context.nodeName.toLowerCase() !== "object" ) ) { - // Capture the context ID, setting it first if necessary - if ( (nid = context.getAttribute( "id" )) ) { - nid = nid.replace( rcssescape, fcssescape ); - } else { - context.setAttribute( "id", (nid = expando) ); + newSelector = selector; + newContext = context; + + // qSA considers elements outside a scoping root when evaluating child or + // descendant combinators, which is not what we want. + // In such cases, we work around the behavior by prefixing every selector in the + // list with an ID selector referencing the scope context. + // The technique has to be used as well when a leading combinator is used + // as such selectors are not recognized by querySelectorAll. + // Thanks to Andrew Dupont for this technique. + if ( nodeType === 1 && + ( rdescend.test( selector ) || rcombinators.test( selector ) ) ) { + + // Expand context for sibling selectors + newContext = rsibling.test( selector ) && testContext( context.parentNode ) || + context; + + // We can use :scope instead of the ID hack if the browser + // supports it & if we're not changing the context. + if ( newContext !== context || !support.scope ) { + + // Capture the context ID, setting it first if necessary + if ( ( nid = context.getAttribute( "id" ) ) ) { + nid = nid.replace( rcssescape, fcssescape ); + } else { + context.setAttribute( "id", ( nid = expando ) ); + } } // Prefix every selector in the list groups = tokenize( selector ); i = groups.length; while ( i-- ) { - groups[i] = "#" + nid + " " + toSelector( groups[i] ); + groups[ i ] = ( nid ? "#" + nid : ":scope" ) + " " + + toSelector( groups[ i ] ); } newSelector = groups.join( "," ); - - // Expand context for sibling selectors - newContext = rsibling.test( selector ) && testContext( context.parentNode ) || - context; } - if ( newSelector ) { - try { - push.apply( results, - newContext.querySelectorAll( newSelector ) - ); - return results; - } catch ( qsaError ) { - } finally { - if ( nid === expando ) { - context.removeAttribute( "id" ); - } + try { + push.apply( results, + newContext.querySelectorAll( newSelector ) + ); + return results; + } catch ( qsaError ) { + nonnativeSelectorCache( selector, true ); + } finally { + if ( nid === expando ) { + context.removeAttribute( "id" ); } } } @@ -855,12 +904,14 @@ function createCache() { var keys = []; function cache( key, value ) { + // Use (key + " ") to avoid collision with native prototype properties (see Issue #157) if ( keys.push( key + " " ) > Expr.cacheLength ) { + // Only keep the most recent entries delete cache[ keys.shift() ]; } - return (cache[ key + " " ] = value); + return ( cache[ key + " " ] = value ); } return cache; } @@ -879,17 +930,19 @@ function markFunction( fn ) { * @param {Function} fn Passed the created element and returns a boolean result */ function assert( fn ) { - var el = document.createElement("fieldset"); + var el = document.createElement( "fieldset" ); try { return !!fn( el ); - } catch (e) { + } catch ( e ) { return false; } finally { + // Remove from its parent by default if ( el.parentNode ) { el.parentNode.removeChild( el ); } + // release memory in IE el = null; } @@ -901,11 +954,11 @@ function assert( fn ) { * @param {Function} handler The method that will be applied */ function addHandle( attrs, handler ) { - var arr = attrs.split("|"), + var arr = attrs.split( "|" ), i = arr.length; while ( i-- ) { - Expr.attrHandle[ arr[i] ] = handler; + Expr.attrHandle[ arr[ i ] ] = handler; } } @@ -927,7 +980,7 @@ function siblingCheck( a, b ) { // Check if b follows a if ( cur ) { - while ( (cur = cur.nextSibling) ) { + while ( ( cur = cur.nextSibling ) ) { if ( cur === b ) { return -1; } @@ -955,7 +1008,7 @@ function createInputPseudo( type ) { function createButtonPseudo( type ) { return function( elem ) { var name = elem.nodeName.toLowerCase(); - return (name === "input" || name === "button") && elem.type === type; + return ( name === "input" || name === "button" ) && elem.type === type; }; } @@ -998,7 +1051,7 @@ function createDisabledPseudo( disabled ) { // Where there is no isDisabled, check manually /* jshint -W018 */ elem.isDisabled !== !disabled && - disabledAncestor( elem ) === disabled; + inDisabledFieldset( elem ) === disabled; } return elem.disabled === disabled; @@ -1020,21 +1073,21 @@ function createDisabledPseudo( disabled ) { * @param {Function} fn */ function createPositionalPseudo( fn ) { - return markFunction(function( argument ) { + return markFunction( function( argument ) { argument = +argument; - return markFunction(function( seed, matches ) { + return markFunction( function( seed, matches ) { var j, matchIndexes = fn( [], seed.length, argument ), i = matchIndexes.length; // Match elements found at the specified indexes while ( i-- ) { - if ( seed[ (j = matchIndexes[i]) ] ) { - seed[j] = !(matches[j] = seed[j]); + if ( seed[ ( j = matchIndexes[ i ] ) ] ) { + seed[ j ] = !( matches[ j ] = seed[ j ] ); } } - }); - }); + } ); + } ); } /** @@ -1055,10 +1108,13 @@ support = Sizzle.support = {}; * @returns {Boolean} True iff elem is a non-HTML XML node */ isXML = Sizzle.isXML = function( elem ) { - // documentElement is verified for cases where it doesn't yet exist - // (such as loading iframes in IE - #4833) - var documentElement = elem && (elem.ownerDocument || elem).documentElement; - return documentElement ? documentElement.nodeName !== "HTML" : false; + var namespace = elem.namespaceURI, + docElem = ( elem.ownerDocument || elem ).documentElement; + + // Support: IE <=8 + // Assume HTML when documentElement doesn't yet exist, such as inside loading iframes + // https://bugs.jquery.com/ticket/4833 + return !rhtml.test( namespace || docElem && docElem.nodeName || "HTML" ); }; /** @@ -1071,7 +1127,11 @@ setDocument = Sizzle.setDocument = function( node ) { doc = node ? node.ownerDocument || node : preferredDoc; // Return early if doc is invalid or already selected - if ( doc === document || doc.nodeType !== 9 || !doc.documentElement ) { + // Support: IE 11+, Edge 17 - 18+ + // IE/Edge sometimes throw a "Permission denied" error when strict-comparing + // two documents; shallow comparisons work. + // eslint-disable-next-line eqeqeq + if ( doc == document || doc.nodeType !== 9 || !doc.documentElement ) { return document; } @@ -1080,10 +1140,14 @@ setDocument = Sizzle.setDocument = function( node ) { docElem = document.documentElement; documentIsHTML = !isXML( document ); - // Support: IE 9-11, Edge + // Support: IE 9 - 11+, Edge 12 - 18+ // Accessing iframe documents after unload throws "permission denied" errors (jQuery #13936) - if ( preferredDoc !== document && - (subWindow = document.defaultView) && subWindow.top !== subWindow ) { + // Support: IE 11+, Edge 17 - 18+ + // IE/Edge sometimes throw a "Permission denied" error when strict-comparing + // two documents; shallow comparisons work. + // eslint-disable-next-line eqeqeq + if ( preferredDoc != document && + ( subWindow = document.defaultView ) && subWindow.top !== subWindow ) { // Support: IE 11, Edge if ( subWindow.addEventListener ) { @@ -1095,25 +1159,36 @@ setDocument = Sizzle.setDocument = function( node ) { } } + // Support: IE 8 - 11+, Edge 12 - 18+, Chrome <=16 - 25 only, Firefox <=3.6 - 31 only, + // Safari 4 - 5 only, Opera <=11.6 - 12.x only + // IE/Edge & older browsers don't support the :scope pseudo-class. + // Support: Safari 6.0 only + // Safari 6.0 supports :scope but it's an alias of :root there. + support.scope = assert( function( el ) { + docElem.appendChild( el ).appendChild( document.createElement( "div" ) ); + return typeof el.querySelectorAll !== "undefined" && + !el.querySelectorAll( ":scope fieldset div" ).length; + } ); + /* Attributes ---------------------------------------------------------------------- */ // Support: IE<8 // Verify that getAttribute really returns attributes and not properties // (excepting IE8 booleans) - support.attributes = assert(function( el ) { + support.attributes = assert( function( el ) { el.className = "i"; - return !el.getAttribute("className"); - }); + return !el.getAttribute( "className" ); + } ); /* getElement(s)By* ---------------------------------------------------------------------- */ // Check if getElementsByTagName("*") returns only elements - support.getElementsByTagName = assert(function( el ) { - el.appendChild( document.createComment("") ); - return !el.getElementsByTagName("*").length; - }); + support.getElementsByTagName = assert( function( el ) { + el.appendChild( document.createComment( "" ) ); + return !el.getElementsByTagName( "*" ).length; + } ); // Support: IE<9 support.getElementsByClassName = rnative.test( document.getElementsByClassName ); @@ -1122,38 +1197,38 @@ setDocument = Sizzle.setDocument = function( node ) { // Check if getElementById returns elements by name // The broken getElementById methods don't pick up programmatically-set names, // so use a roundabout getElementsByName test - support.getById = assert(function( el ) { + support.getById = assert( function( el ) { docElem.appendChild( el ).id = expando; return !document.getElementsByName || !document.getElementsByName( expando ).length; - }); + } ); // ID filter and find if ( support.getById ) { - Expr.filter["ID"] = function( id ) { + Expr.filter[ "ID" ] = function( id ) { var attrId = id.replace( runescape, funescape ); return function( elem ) { - return elem.getAttribute("id") === attrId; + return elem.getAttribute( "id" ) === attrId; }; }; - Expr.find["ID"] = function( id, context ) { + Expr.find[ "ID" ] = function( id, context ) { if ( typeof context.getElementById !== "undefined" && documentIsHTML ) { var elem = context.getElementById( id ); return elem ? [ elem ] : []; } }; } else { - Expr.filter["ID"] = function( id ) { + Expr.filter[ "ID" ] = function( id ) { var attrId = id.replace( runescape, funescape ); return function( elem ) { var node = typeof elem.getAttributeNode !== "undefined" && - elem.getAttributeNode("id"); + elem.getAttributeNode( "id" ); return node && node.value === attrId; }; }; // Support: IE 6 - 7 only // getElementById is not reliable as a find shortcut - Expr.find["ID"] = function( id, context ) { + Expr.find[ "ID" ] = function( id, context ) { if ( typeof context.getElementById !== "undefined" && documentIsHTML ) { var node, i, elems, elem = context.getElementById( id ); @@ -1161,7 +1236,7 @@ setDocument = Sizzle.setDocument = function( node ) { if ( elem ) { // Verify the id attribute - node = elem.getAttributeNode("id"); + node = elem.getAttributeNode( "id" ); if ( node && node.value === id ) { return [ elem ]; } @@ -1169,8 +1244,8 @@ setDocument = Sizzle.setDocument = function( node ) { // Fall back on getElementsByName elems = context.getElementsByName( id ); i = 0; - while ( (elem = elems[i++]) ) { - node = elem.getAttributeNode("id"); + while ( ( elem = elems[ i++ ] ) ) { + node = elem.getAttributeNode( "id" ); if ( node && node.value === id ) { return [ elem ]; } @@ -1183,7 +1258,7 @@ setDocument = Sizzle.setDocument = function( node ) { } // Tag - Expr.find["TAG"] = support.getElementsByTagName ? + Expr.find[ "TAG" ] = support.getElementsByTagName ? function( tag, context ) { if ( typeof context.getElementsByTagName !== "undefined" ) { return context.getElementsByTagName( tag ); @@ -1198,12 +1273,13 @@ setDocument = Sizzle.setDocument = function( node ) { var elem, tmp = [], i = 0, + // By happy coincidence, a (broken) gEBTN appears on DocumentFragment nodes too results = context.getElementsByTagName( tag ); // Filter out possible comments if ( tag === "*" ) { - while ( (elem = results[i++]) ) { + while ( ( elem = results[ i++ ] ) ) { if ( elem.nodeType === 1 ) { tmp.push( elem ); } @@ -1215,7 +1291,7 @@ setDocument = Sizzle.setDocument = function( node ) { }; // Class - Expr.find["CLASS"] = support.getElementsByClassName && function( className, context ) { + Expr.find[ "CLASS" ] = support.getElementsByClassName && function( className, context ) { if ( typeof context.getElementsByClassName !== "undefined" && documentIsHTML ) { return context.getElementsByClassName( className ); } @@ -1236,10 +1312,14 @@ setDocument = Sizzle.setDocument = function( node ) { // See https://bugs.jquery.com/ticket/13378 rbuggyQSA = []; - if ( (support.qsa = rnative.test( document.querySelectorAll )) ) { + if ( ( support.qsa = rnative.test( document.querySelectorAll ) ) ) { + // Build QSA regex // Regex strategy adopted from Diego Perini - assert(function( el ) { + assert( function( el ) { + + var input; + // Select is set to empty string on purpose // This is to test IE's treatment of not explicitly // setting a boolean content attribute, @@ -1253,78 +1333,98 @@ setDocument = Sizzle.setDocument = function( node ) { // Nothing should be selected when empty strings follow ^= or $= or *= // The test attribute must be unknown in Opera but "safe" for WinRT // https://msdn.microsoft.com/en-us/library/ie/hh465388.aspx#attribute_section - if ( el.querySelectorAll("[msallowcapture^='']").length ) { + if ( el.querySelectorAll( "[msallowcapture^='']" ).length ) { rbuggyQSA.push( "[*^$]=" + whitespace + "*(?:''|\"\")" ); } // Support: IE8 // Boolean attributes and "value" are not treated correctly - if ( !el.querySelectorAll("[selected]").length ) { + if ( !el.querySelectorAll( "[selected]" ).length ) { rbuggyQSA.push( "\\[" + whitespace + "*(?:value|" + booleans + ")" ); } // Support: Chrome<29, Android<4.4, Safari<7.0+, iOS<7.0+, PhantomJS<1.9.8+ if ( !el.querySelectorAll( "[id~=" + expando + "-]" ).length ) { - rbuggyQSA.push("~="); + rbuggyQSA.push( "~=" ); + } + + // Support: IE 11+, Edge 15 - 18+ + // IE 11/Edge don't find elements on a `[name='']` query in some cases. + // Adding a temporary attribute to the document before the selection works + // around the issue. + // Interestingly, IE 10 & older don't seem to have the issue. + input = document.createElement( "input" ); + input.setAttribute( "name", "" ); + el.appendChild( input ); + if ( !el.querySelectorAll( "[name='']" ).length ) { + rbuggyQSA.push( "\\[" + whitespace + "*name" + whitespace + "*=" + + whitespace + "*(?:''|\"\")" ); } // Webkit/Opera - :checked should return selected option elements // http://www.w3.org/TR/2011/REC-css3-selectors-20110929/#checked // IE8 throws error here and will not see later tests - if ( !el.querySelectorAll(":checked").length ) { - rbuggyQSA.push(":checked"); + if ( !el.querySelectorAll( ":checked" ).length ) { + rbuggyQSA.push( ":checked" ); } // Support: Safari 8+, iOS 8+ // https://bugs.webkit.org/show_bug.cgi?id=136851 // In-page `selector#id sibling-combinator selector` fails if ( !el.querySelectorAll( "a#" + expando + "+*" ).length ) { - rbuggyQSA.push(".#.+[+~]"); + rbuggyQSA.push( ".#.+[+~]" ); } - }); - assert(function( el ) { + // Support: Firefox <=3.6 - 5 only + // Old Firefox doesn't throw on a badly-escaped identifier. + el.querySelectorAll( "\\\f" ); + rbuggyQSA.push( "[\\r\\n\\f]" ); + } ); + + assert( function( el ) { el.innerHTML = "" + ""; // Support: Windows 8 Native Apps // The type and name attributes are restricted during .innerHTML assignment - var input = document.createElement("input"); + var input = document.createElement( "input" ); input.setAttribute( "type", "hidden" ); el.appendChild( input ).setAttribute( "name", "D" ); // Support: IE8 // Enforce case-sensitivity of name attribute - if ( el.querySelectorAll("[name=d]").length ) { + if ( el.querySelectorAll( "[name=d]" ).length ) { rbuggyQSA.push( "name" + whitespace + "*[*^$|!~]?=" ); } // FF 3.5 - :enabled/:disabled and hidden elements (hidden elements are still enabled) // IE8 throws error here and will not see later tests - if ( el.querySelectorAll(":enabled").length !== 2 ) { + if ( el.querySelectorAll( ":enabled" ).length !== 2 ) { rbuggyQSA.push( ":enabled", ":disabled" ); } // Support: IE9-11+ // IE's :disabled selector does not pick up the children of disabled fieldsets docElem.appendChild( el ).disabled = true; - if ( el.querySelectorAll(":disabled").length !== 2 ) { + if ( el.querySelectorAll( ":disabled" ).length !== 2 ) { rbuggyQSA.push( ":enabled", ":disabled" ); } + // Support: Opera 10 - 11 only // Opera 10-11 does not throw on post-comma invalid pseudos - el.querySelectorAll("*,:x"); - rbuggyQSA.push(",.*:"); - }); + el.querySelectorAll( "*,:x" ); + rbuggyQSA.push( ",.*:" ); + } ); } - if ( (support.matchesSelector = rnative.test( (matches = docElem.matches || + if ( ( support.matchesSelector = rnative.test( ( matches = docElem.matches || docElem.webkitMatchesSelector || docElem.mozMatchesSelector || docElem.oMatchesSelector || - docElem.msMatchesSelector) )) ) { + docElem.msMatchesSelector ) ) ) ) { + + assert( function( el ) { - assert(function( el ) { // Check to see if it's possible to do matchesSelector // on a disconnected node (IE 9) support.disconnectedMatch = matches.call( el, "*" ); @@ -1333,11 +1433,11 @@ setDocument = Sizzle.setDocument = function( node ) { // Gecko does not error, returns false instead matches.call( el, "[s!='']:x" ); rbuggyMatches.push( "!=", pseudos ); - }); + } ); } - rbuggyQSA = rbuggyQSA.length && new RegExp( rbuggyQSA.join("|") ); - rbuggyMatches = rbuggyMatches.length && new RegExp( rbuggyMatches.join("|") ); + rbuggyQSA = rbuggyQSA.length && new RegExp( rbuggyQSA.join( "|" ) ); + rbuggyMatches = rbuggyMatches.length && new RegExp( rbuggyMatches.join( "|" ) ); /* Contains ---------------------------------------------------------------------- */ @@ -1354,11 +1454,11 @@ setDocument = Sizzle.setDocument = function( node ) { adown.contains ? adown.contains( bup ) : a.compareDocumentPosition && a.compareDocumentPosition( bup ) & 16 - )); + ) ); } : function( a, b ) { if ( b ) { - while ( (b = b.parentNode) ) { + while ( ( b = b.parentNode ) ) { if ( b === a ) { return true; } @@ -1387,7 +1487,11 @@ setDocument = Sizzle.setDocument = function( node ) { } // Calculate position if both inputs belong to the same document - compare = ( a.ownerDocument || a ) === ( b.ownerDocument || b ) ? + // Support: IE 11+, Edge 17 - 18+ + // IE/Edge sometimes throw a "Permission denied" error when strict-comparing + // two documents; shallow comparisons work. + // eslint-disable-next-line eqeqeq + compare = ( a.ownerDocument || a ) == ( b.ownerDocument || b ) ? a.compareDocumentPosition( b ) : // Otherwise we know they are disconnected @@ -1395,13 +1499,24 @@ setDocument = Sizzle.setDocument = function( node ) { // Disconnected nodes if ( compare & 1 || - (!support.sortDetached && b.compareDocumentPosition( a ) === compare) ) { + ( !support.sortDetached && b.compareDocumentPosition( a ) === compare ) ) { // Choose the first element that is related to our preferred document - if ( a === document || a.ownerDocument === preferredDoc && contains(preferredDoc, a) ) { + // Support: IE 11+, Edge 17 - 18+ + // IE/Edge sometimes throw a "Permission denied" error when strict-comparing + // two documents; shallow comparisons work. + // eslint-disable-next-line eqeqeq + if ( a == document || a.ownerDocument == preferredDoc && + contains( preferredDoc, a ) ) { return -1; } - if ( b === document || b.ownerDocument === preferredDoc && contains(preferredDoc, b) ) { + + // Support: IE 11+, Edge 17 - 18+ + // IE/Edge sometimes throw a "Permission denied" error when strict-comparing + // two documents; shallow comparisons work. + // eslint-disable-next-line eqeqeq + if ( b == document || b.ownerDocument == preferredDoc && + contains( preferredDoc, b ) ) { return 1; } @@ -1414,6 +1529,7 @@ setDocument = Sizzle.setDocument = function( node ) { return compare & 4 ? -1 : 1; } : function( a, b ) { + // Exit early if the nodes are identical if ( a === b ) { hasDuplicate = true; @@ -1429,8 +1545,14 @@ setDocument = Sizzle.setDocument = function( node ) { // Parentless nodes are either documents or disconnected if ( !aup || !bup ) { - return a === document ? -1 : - b === document ? 1 : + + // Support: IE 11+, Edge 17 - 18+ + // IE/Edge sometimes throw a "Permission denied" error when strict-comparing + // two documents; shallow comparisons work. + /* eslint-disable eqeqeq */ + return a == document ? -1 : + b == document ? 1 : + /* eslint-enable eqeqeq */ aup ? -1 : bup ? 1 : sortInput ? @@ -1444,26 +1566,32 @@ setDocument = Sizzle.setDocument = function( node ) { // Otherwise we need full lists of their ancestors for comparison cur = a; - while ( (cur = cur.parentNode) ) { + while ( ( cur = cur.parentNode ) ) { ap.unshift( cur ); } cur = b; - while ( (cur = cur.parentNode) ) { + while ( ( cur = cur.parentNode ) ) { bp.unshift( cur ); } // Walk down the tree looking for a discrepancy - while ( ap[i] === bp[i] ) { + while ( ap[ i ] === bp[ i ] ) { i++; } return i ? + // Do a sibling check if the nodes have a common ancestor - siblingCheck( ap[i], bp[i] ) : + siblingCheck( ap[ i ], bp[ i ] ) : // Otherwise nodes in our document sort first - ap[i] === preferredDoc ? -1 : - bp[i] === preferredDoc ? 1 : + // Support: IE 11+, Edge 17 - 18+ + // IE/Edge sometimes throw a "Permission denied" error when strict-comparing + // two documents; shallow comparisons work. + /* eslint-disable eqeqeq */ + ap[ i ] == preferredDoc ? -1 : + bp[ i ] == preferredDoc ? 1 : + /* eslint-enable eqeqeq */ 0; }; @@ -1475,16 +1603,10 @@ Sizzle.matches = function( expr, elements ) { }; Sizzle.matchesSelector = function( elem, expr ) { - // Set document vars if needed - if ( ( elem.ownerDocument || elem ) !== document ) { - setDocument( elem ); - } - - // Make sure that attribute selectors are quoted - expr = expr.replace( rattributeQuotes, "='$1']" ); + setDocument( elem ); if ( support.matchesSelector && documentIsHTML && - !compilerCache[ expr + " " ] && + !nonnativeSelectorCache[ expr + " " ] && ( !rbuggyMatches || !rbuggyMatches.test( expr ) ) && ( !rbuggyQSA || !rbuggyQSA.test( expr ) ) ) { @@ -1493,32 +1615,46 @@ Sizzle.matchesSelector = function( elem, expr ) { // IE 9's matchesSelector returns false on disconnected nodes if ( ret || support.disconnectedMatch || - // As well, disconnected nodes are said to be in a document - // fragment in IE 9 - elem.document && elem.document.nodeType !== 11 ) { + + // As well, disconnected nodes are said to be in a document + // fragment in IE 9 + elem.document && elem.document.nodeType !== 11 ) { return ret; } - } catch (e) {} + } catch ( e ) { + nonnativeSelectorCache( expr, true ); + } } return Sizzle( expr, document, null, [ elem ] ).length > 0; }; Sizzle.contains = function( context, elem ) { + // Set document vars if needed - if ( ( context.ownerDocument || context ) !== document ) { + // Support: IE 11+, Edge 17 - 18+ + // IE/Edge sometimes throw a "Permission denied" error when strict-comparing + // two documents; shallow comparisons work. + // eslint-disable-next-line eqeqeq + if ( ( context.ownerDocument || context ) != document ) { setDocument( context ); } return contains( context, elem ); }; Sizzle.attr = function( elem, name ) { + // Set document vars if needed - if ( ( elem.ownerDocument || elem ) !== document ) { + // Support: IE 11+, Edge 17 - 18+ + // IE/Edge sometimes throw a "Permission denied" error when strict-comparing + // two documents; shallow comparisons work. + // eslint-disable-next-line eqeqeq + if ( ( elem.ownerDocument || elem ) != document ) { setDocument( elem ); } var fn = Expr.attrHandle[ name.toLowerCase() ], + // Don't get fooled by Object.prototype properties (jQuery #13807) val = fn && hasOwn.call( Expr.attrHandle, name.toLowerCase() ) ? fn( elem, name, !documentIsHTML ) : @@ -1528,13 +1664,13 @@ Sizzle.attr = function( elem, name ) { val : support.attributes || !documentIsHTML ? elem.getAttribute( name ) : - (val = elem.getAttributeNode(name)) && val.specified ? + ( val = elem.getAttributeNode( name ) ) && val.specified ? val.value : null; }; Sizzle.escape = function( sel ) { - return (sel + "").replace( rcssescape, fcssescape ); + return ( sel + "" ).replace( rcssescape, fcssescape ); }; Sizzle.error = function( msg ) { @@ -1557,7 +1693,7 @@ Sizzle.uniqueSort = function( results ) { results.sort( sortOrder ); if ( hasDuplicate ) { - while ( (elem = results[i++]) ) { + while ( ( elem = results[ i++ ] ) ) { if ( elem === results[ i ] ) { j = duplicates.push( i ); } @@ -1585,17 +1721,21 @@ getText = Sizzle.getText = function( elem ) { nodeType = elem.nodeType; if ( !nodeType ) { + // If no nodeType, this is expected to be an array - while ( (node = elem[i++]) ) { + while ( ( node = elem[ i++ ] ) ) { + // Do not traverse comment nodes ret += getText( node ); } } else if ( nodeType === 1 || nodeType === 9 || nodeType === 11 ) { + // Use textContent for elements // innerText usage removed for consistency of new lines (jQuery #11153) if ( typeof elem.textContent === "string" ) { return elem.textContent; } else { + // Traverse its children for ( elem = elem.firstChild; elem; elem = elem.nextSibling ) { ret += getText( elem ); @@ -1604,6 +1744,7 @@ getText = Sizzle.getText = function( elem ) { } else if ( nodeType === 3 || nodeType === 4 ) { return elem.nodeValue; } + // Do not include comment or processing instruction nodes return ret; @@ -1631,19 +1772,21 @@ Expr = Sizzle.selectors = { preFilter: { "ATTR": function( match ) { - match[1] = match[1].replace( runescape, funescape ); + match[ 1 ] = match[ 1 ].replace( runescape, funescape ); // Move the given value to match[3] whether quoted or unquoted - match[3] = ( match[3] || match[4] || match[5] || "" ).replace( runescape, funescape ); + match[ 3 ] = ( match[ 3 ] || match[ 4 ] || + match[ 5 ] || "" ).replace( runescape, funescape ); - if ( match[2] === "~=" ) { - match[3] = " " + match[3] + " "; + if ( match[ 2 ] === "~=" ) { + match[ 3 ] = " " + match[ 3 ] + " "; } return match.slice( 0, 4 ); }, "CHILD": function( match ) { + /* matches from matchExpr["CHILD"] 1 type (only|nth|...) 2 what (child|of-type) @@ -1654,22 +1797,25 @@ Expr = Sizzle.selectors = { 7 sign of y-component 8 y of y-component */ - match[1] = match[1].toLowerCase(); + match[ 1 ] = match[ 1 ].toLowerCase(); + + if ( match[ 1 ].slice( 0, 3 ) === "nth" ) { - if ( match[1].slice( 0, 3 ) === "nth" ) { // nth-* requires argument - if ( !match[3] ) { - Sizzle.error( match[0] ); + if ( !match[ 3 ] ) { + Sizzle.error( match[ 0 ] ); } // numeric x and y parameters for Expr.filter.CHILD // remember that false/true cast respectively to 0/1 - match[4] = +( match[4] ? match[5] + (match[6] || 1) : 2 * ( match[3] === "even" || match[3] === "odd" ) ); - match[5] = +( ( match[7] + match[8] ) || match[3] === "odd" ); + match[ 4 ] = +( match[ 4 ] ? + match[ 5 ] + ( match[ 6 ] || 1 ) : + 2 * ( match[ 3 ] === "even" || match[ 3 ] === "odd" ) ); + match[ 5 ] = +( ( match[ 7 ] + match[ 8 ] ) || match[ 3 ] === "odd" ); - // other types prohibit arguments - } else if ( match[3] ) { - Sizzle.error( match[0] ); + // other types prohibit arguments + } else if ( match[ 3 ] ) { + Sizzle.error( match[ 0 ] ); } return match; @@ -1677,26 +1823,28 @@ Expr = Sizzle.selectors = { "PSEUDO": function( match ) { var excess, - unquoted = !match[6] && match[2]; + unquoted = !match[ 6 ] && match[ 2 ]; - if ( matchExpr["CHILD"].test( match[0] ) ) { + if ( matchExpr[ "CHILD" ].test( match[ 0 ] ) ) { return null; } // Accept quoted arguments as-is - if ( match[3] ) { - match[2] = match[4] || match[5] || ""; + if ( match[ 3 ] ) { + match[ 2 ] = match[ 4 ] || match[ 5 ] || ""; // Strip excess characters from unquoted arguments } else if ( unquoted && rpseudo.test( unquoted ) && + // Get excess from tokenize (recursively) - (excess = tokenize( unquoted, true )) && + ( excess = tokenize( unquoted, true ) ) && + // advance to the next closing parenthesis - (excess = unquoted.indexOf( ")", unquoted.length - excess ) - unquoted.length) ) { + ( excess = unquoted.indexOf( ")", unquoted.length - excess ) - unquoted.length ) ) { // excess is a negative index - match[0] = match[0].slice( 0, excess ); - match[2] = unquoted.slice( 0, excess ); + match[ 0 ] = match[ 0 ].slice( 0, excess ); + match[ 2 ] = unquoted.slice( 0, excess ); } // Return only captures needed by the pseudo filter method (type and argument) @@ -1709,7 +1857,9 @@ Expr = Sizzle.selectors = { "TAG": function( nodeNameSelector ) { var nodeName = nodeNameSelector.replace( runescape, funescape ).toLowerCase(); return nodeNameSelector === "*" ? - function() { return true; } : + function() { + return true; + } : function( elem ) { return elem.nodeName && elem.nodeName.toLowerCase() === nodeName; }; @@ -1719,10 +1869,16 @@ Expr = Sizzle.selectors = { var pattern = classCache[ className + " " ]; return pattern || - (pattern = new RegExp( "(^|" + whitespace + ")" + className + "(" + whitespace + "|$)" )) && - classCache( className, function( elem ) { - return pattern.test( typeof elem.className === "string" && elem.className || typeof elem.getAttribute !== "undefined" && elem.getAttribute("class") || "" ); - }); + ( pattern = new RegExp( "(^|" + whitespace + + ")" + className + "(" + whitespace + "|$)" ) ) && classCache( + className, function( elem ) { + return pattern.test( + typeof elem.className === "string" && elem.className || + typeof elem.getAttribute !== "undefined" && + elem.getAttribute( "class" ) || + "" + ); + } ); }, "ATTR": function( name, operator, check ) { @@ -1738,6 +1894,8 @@ Expr = Sizzle.selectors = { result += ""; + /* eslint-disable max-len */ + return operator === "=" ? result === check : operator === "!=" ? result !== check : operator === "^=" ? check && result.indexOf( check ) === 0 : @@ -1746,10 +1904,12 @@ Expr = Sizzle.selectors = { operator === "~=" ? ( " " + result.replace( rwhitespace, " " ) + " " ).indexOf( check ) > -1 : operator === "|=" ? result === check || result.slice( 0, check.length + 1 ) === check + "-" : false; + /* eslint-enable max-len */ + }; }, - "CHILD": function( type, what, argument, first, last ) { + "CHILD": function( type, what, _argument, first, last ) { var simple = type.slice( 0, 3 ) !== "nth", forward = type.slice( -4 ) !== "last", ofType = what === "of-type"; @@ -1761,7 +1921,7 @@ Expr = Sizzle.selectors = { return !!elem.parentNode; } : - function( elem, context, xml ) { + function( elem, _context, xml ) { var cache, uniqueCache, outerCache, node, nodeIndex, start, dir = simple !== forward ? "nextSibling" : "previousSibling", parent = elem.parentNode, @@ -1775,7 +1935,7 @@ Expr = Sizzle.selectors = { if ( simple ) { while ( dir ) { node = elem; - while ( (node = node[ dir ]) ) { + while ( ( node = node[ dir ] ) ) { if ( ofType ? node.nodeName.toLowerCase() === name : node.nodeType === 1 ) { @@ -1783,6 +1943,7 @@ Expr = Sizzle.selectors = { return false; } } + // Reverse direction for :only-* (if we haven't yet done so) start = dir = type === "only" && !start && "nextSibling"; } @@ -1798,22 +1959,22 @@ Expr = Sizzle.selectors = { // ...in a gzip-friendly way node = parent; - outerCache = node[ expando ] || (node[ expando ] = {}); + outerCache = node[ expando ] || ( node[ expando ] = {} ); // Support: IE <9 only // Defend against cloned attroperties (jQuery gh-1709) uniqueCache = outerCache[ node.uniqueID ] || - (outerCache[ node.uniqueID ] = {}); + ( outerCache[ node.uniqueID ] = {} ); cache = uniqueCache[ type ] || []; nodeIndex = cache[ 0 ] === dirruns && cache[ 1 ]; diff = nodeIndex && cache[ 2 ]; node = nodeIndex && parent.childNodes[ nodeIndex ]; - while ( (node = ++nodeIndex && node && node[ dir ] || + while ( ( node = ++nodeIndex && node && node[ dir ] || // Fallback to seeking `elem` from the start - (diff = nodeIndex = 0) || start.pop()) ) { + ( diff = nodeIndex = 0 ) || start.pop() ) ) { // When found, cache indexes on `parent` and break if ( node.nodeType === 1 && ++diff && node === elem ) { @@ -1823,16 +1984,18 @@ Expr = Sizzle.selectors = { } } else { + // Use previously-cached element index if available if ( useCache ) { + // ...in a gzip-friendly way node = elem; - outerCache = node[ expando ] || (node[ expando ] = {}); + outerCache = node[ expando ] || ( node[ expando ] = {} ); // Support: IE <9 only // Defend against cloned attroperties (jQuery gh-1709) uniqueCache = outerCache[ node.uniqueID ] || - (outerCache[ node.uniqueID ] = {}); + ( outerCache[ node.uniqueID ] = {} ); cache = uniqueCache[ type ] || []; nodeIndex = cache[ 0 ] === dirruns && cache[ 1 ]; @@ -1842,9 +2005,10 @@ Expr = Sizzle.selectors = { // xml :nth-child(...) // or :nth-last-child(...) or :nth(-last)?-of-type(...) if ( diff === false ) { + // Use the same loop as above to seek `elem` from the start - while ( (node = ++nodeIndex && node && node[ dir ] || - (diff = nodeIndex = 0) || start.pop()) ) { + while ( ( node = ++nodeIndex && node && node[ dir ] || + ( diff = nodeIndex = 0 ) || start.pop() ) ) { if ( ( ofType ? node.nodeName.toLowerCase() === name : @@ -1853,12 +2017,13 @@ Expr = Sizzle.selectors = { // Cache the index of each encountered element if ( useCache ) { - outerCache = node[ expando ] || (node[ expando ] = {}); + outerCache = node[ expando ] || + ( node[ expando ] = {} ); // Support: IE <9 only // Defend against cloned attroperties (jQuery gh-1709) uniqueCache = outerCache[ node.uniqueID ] || - (outerCache[ node.uniqueID ] = {}); + ( outerCache[ node.uniqueID ] = {} ); uniqueCache[ type ] = [ dirruns, diff ]; } @@ -1879,6 +2044,7 @@ Expr = Sizzle.selectors = { }, "PSEUDO": function( pseudo, argument ) { + // pseudo-class names are case-insensitive // http://www.w3.org/TR/selectors/#pseudo-classes // Prioritize by case sensitivity in case custom pseudos are added with uppercase letters @@ -1898,15 +2064,15 @@ Expr = Sizzle.selectors = { if ( fn.length > 1 ) { args = [ pseudo, pseudo, "", argument ]; return Expr.setFilters.hasOwnProperty( pseudo.toLowerCase() ) ? - markFunction(function( seed, matches ) { + markFunction( function( seed, matches ) { var idx, matched = fn( seed, argument ), i = matched.length; while ( i-- ) { - idx = indexOf( seed, matched[i] ); - seed[ idx ] = !( matches[ idx ] = matched[i] ); + idx = indexOf( seed, matched[ i ] ); + seed[ idx ] = !( matches[ idx ] = matched[ i ] ); } - }) : + } ) : function( elem ) { return fn( elem, 0, args ); }; @@ -1917,8 +2083,10 @@ Expr = Sizzle.selectors = { }, pseudos: { + // Potentially complex pseudos - "not": markFunction(function( selector ) { + "not": markFunction( function( selector ) { + // Trim the selector passed to compile // to avoid treating leading and trailing // spaces as combinators @@ -1927,39 +2095,40 @@ Expr = Sizzle.selectors = { matcher = compile( selector.replace( rtrim, "$1" ) ); return matcher[ expando ] ? - markFunction(function( seed, matches, context, xml ) { + markFunction( function( seed, matches, _context, xml ) { var elem, unmatched = matcher( seed, null, xml, [] ), i = seed.length; // Match elements unmatched by `matcher` while ( i-- ) { - if ( (elem = unmatched[i]) ) { - seed[i] = !(matches[i] = elem); + if ( ( elem = unmatched[ i ] ) ) { + seed[ i ] = !( matches[ i ] = elem ); } } - }) : - function( elem, context, xml ) { - input[0] = elem; + } ) : + function( elem, _context, xml ) { + input[ 0 ] = elem; matcher( input, null, xml, results ); + // Don't keep the element (issue #299) - input[0] = null; + input[ 0 ] = null; return !results.pop(); }; - }), + } ), - "has": markFunction(function( selector ) { + "has": markFunction( function( selector ) { return function( elem ) { return Sizzle( selector, elem ).length > 0; }; - }), + } ), - "contains": markFunction(function( text ) { + "contains": markFunction( function( text ) { text = text.replace( runescape, funescape ); return function( elem ) { - return ( elem.textContent || elem.innerText || getText( elem ) ).indexOf( text ) > -1; + return ( elem.textContent || getText( elem ) ).indexOf( text ) > -1; }; - }), + } ), // "Whether an element is represented by a :lang() selector // is based solely on the element's language value @@ -1969,25 +2138,26 @@ Expr = Sizzle.selectors = { // The identifier C does not have to be a valid language name." // http://www.w3.org/TR/selectors/#lang-pseudo "lang": markFunction( function( lang ) { + // lang value must be a valid identifier - if ( !ridentifier.test(lang || "") ) { + if ( !ridentifier.test( lang || "" ) ) { Sizzle.error( "unsupported lang: " + lang ); } lang = lang.replace( runescape, funescape ).toLowerCase(); return function( elem ) { var elemLang; do { - if ( (elemLang = documentIsHTML ? + if ( ( elemLang = documentIsHTML ? elem.lang : - elem.getAttribute("xml:lang") || elem.getAttribute("lang")) ) { + elem.getAttribute( "xml:lang" ) || elem.getAttribute( "lang" ) ) ) { elemLang = elemLang.toLowerCase(); return elemLang === lang || elemLang.indexOf( lang + "-" ) === 0; } - } while ( (elem = elem.parentNode) && elem.nodeType === 1 ); + } while ( ( elem = elem.parentNode ) && elem.nodeType === 1 ); return false; }; - }), + } ), // Miscellaneous "target": function( elem ) { @@ -2000,7 +2170,9 @@ Expr = Sizzle.selectors = { }, "focus": function( elem ) { - return elem === document.activeElement && (!document.hasFocus || document.hasFocus()) && !!(elem.type || elem.href || ~elem.tabIndex); + return elem === document.activeElement && + ( !document.hasFocus || document.hasFocus() ) && + !!( elem.type || elem.href || ~elem.tabIndex ); }, // Boolean properties @@ -2008,16 +2180,20 @@ Expr = Sizzle.selectors = { "disabled": createDisabledPseudo( true ), "checked": function( elem ) { + // In CSS3, :checked should return both checked and selected elements // http://www.w3.org/TR/2011/REC-css3-selectors-20110929/#checked var nodeName = elem.nodeName.toLowerCase(); - return (nodeName === "input" && !!elem.checked) || (nodeName === "option" && !!elem.selected); + return ( nodeName === "input" && !!elem.checked ) || + ( nodeName === "option" && !!elem.selected ); }, "selected": function( elem ) { + // Accessing this property makes selected-by-default // options in Safari work properly if ( elem.parentNode ) { + // eslint-disable-next-line no-unused-expressions elem.parentNode.selectedIndex; } @@ -2026,6 +2202,7 @@ Expr = Sizzle.selectors = { // Contents "empty": function( elem ) { + // http://www.w3.org/TR/selectors/#empty-pseudo // :empty is negated by element (1) or content nodes (text: 3; cdata: 4; entity ref: 5), // but not by others (comment: 8; processing instruction: 7; etc.) @@ -2039,7 +2216,7 @@ Expr = Sizzle.selectors = { }, "parent": function( elem ) { - return !Expr.pseudos["empty"]( elem ); + return !Expr.pseudos[ "empty" ]( elem ); }, // Element/input types @@ -2063,57 +2240,62 @@ Expr = Sizzle.selectors = { // Support: IE<8 // New HTML5 attribute values (e.g., "search") appear with elem.type === "text" - ( (attr = elem.getAttribute("type")) == null || attr.toLowerCase() === "text" ); + ( ( attr = elem.getAttribute( "type" ) ) == null || + attr.toLowerCase() === "text" ); }, // Position-in-collection - "first": createPositionalPseudo(function() { + "first": createPositionalPseudo( function() { return [ 0 ]; - }), + } ), - "last": createPositionalPseudo(function( matchIndexes, length ) { + "last": createPositionalPseudo( function( _matchIndexes, length ) { return [ length - 1 ]; - }), + } ), - "eq": createPositionalPseudo(function( matchIndexes, length, argument ) { + "eq": createPositionalPseudo( function( _matchIndexes, length, argument ) { return [ argument < 0 ? argument + length : argument ]; - }), + } ), - "even": createPositionalPseudo(function( matchIndexes, length ) { + "even": createPositionalPseudo( function( matchIndexes, length ) { var i = 0; for ( ; i < length; i += 2 ) { matchIndexes.push( i ); } return matchIndexes; - }), + } ), - "odd": createPositionalPseudo(function( matchIndexes, length ) { + "odd": createPositionalPseudo( function( matchIndexes, length ) { var i = 1; for ( ; i < length; i += 2 ) { matchIndexes.push( i ); } return matchIndexes; - }), + } ), - "lt": createPositionalPseudo(function( matchIndexes, length, argument ) { - var i = argument < 0 ? argument + length : argument; + "lt": createPositionalPseudo( function( matchIndexes, length, argument ) { + var i = argument < 0 ? + argument + length : + argument > length ? + length : + argument; for ( ; --i >= 0; ) { matchIndexes.push( i ); } return matchIndexes; - }), + } ), - "gt": createPositionalPseudo(function( matchIndexes, length, argument ) { + "gt": createPositionalPseudo( function( matchIndexes, length, argument ) { var i = argument < 0 ? argument + length : argument; for ( ; ++i < length; ) { matchIndexes.push( i ); } return matchIndexes; - }) + } ) } }; -Expr.pseudos["nth"] = Expr.pseudos["eq"]; +Expr.pseudos[ "nth" ] = Expr.pseudos[ "eq" ]; // Add button/input type pseudos for ( i in { radio: true, checkbox: true, file: true, password: true, image: true } ) { @@ -2144,37 +2326,39 @@ tokenize = Sizzle.tokenize = function( selector, parseOnly ) { while ( soFar ) { // Comma and first run - if ( !matched || (match = rcomma.exec( soFar )) ) { + if ( !matched || ( match = rcomma.exec( soFar ) ) ) { if ( match ) { + // Don't consume trailing commas as valid - soFar = soFar.slice( match[0].length ) || soFar; + soFar = soFar.slice( match[ 0 ].length ) || soFar; } - groups.push( (tokens = []) ); + groups.push( ( tokens = [] ) ); } matched = false; // Combinators - if ( (match = rcombinators.exec( soFar )) ) { + if ( ( match = rcombinators.exec( soFar ) ) ) { matched = match.shift(); - tokens.push({ + tokens.push( { value: matched, + // Cast descendant combinators to space - type: match[0].replace( rtrim, " " ) - }); + type: match[ 0 ].replace( rtrim, " " ) + } ); soFar = soFar.slice( matched.length ); } // Filters for ( type in Expr.filter ) { - if ( (match = matchExpr[ type ].exec( soFar )) && (!preFilters[ type ] || - (match = preFilters[ type ]( match ))) ) { + if ( ( match = matchExpr[ type ].exec( soFar ) ) && ( !preFilters[ type ] || + ( match = preFilters[ type ]( match ) ) ) ) { matched = match.shift(); - tokens.push({ + tokens.push( { value: matched, type: type, matches: match - }); + } ); soFar = soFar.slice( matched.length ); } } @@ -2191,6 +2375,7 @@ tokenize = Sizzle.tokenize = function( selector, parseOnly ) { soFar.length : soFar ? Sizzle.error( selector ) : + // Cache the tokens tokenCache( selector, groups ).slice( 0 ); }; @@ -2200,7 +2385,7 @@ function toSelector( tokens ) { len = tokens.length, selector = ""; for ( ; i < len; i++ ) { - selector += tokens[i].value; + selector += tokens[ i ].value; } return selector; } @@ -2213,9 +2398,10 @@ function addCombinator( matcher, combinator, base ) { doneName = done++; return combinator.first ? + // Check against closest ancestor/preceding element function( elem, context, xml ) { - while ( (elem = elem[ dir ]) ) { + while ( ( elem = elem[ dir ] ) ) { if ( elem.nodeType === 1 || checkNonElements ) { return matcher( elem, context, xml ); } @@ -2230,7 +2416,7 @@ function addCombinator( matcher, combinator, base ) { // We can't set arbitrary data on XML nodes, so they don't benefit from combinator caching if ( xml ) { - while ( (elem = elem[ dir ]) ) { + while ( ( elem = elem[ dir ] ) ) { if ( elem.nodeType === 1 || checkNonElements ) { if ( matcher( elem, context, xml ) ) { return true; @@ -2238,27 +2424,29 @@ function addCombinator( matcher, combinator, base ) { } } } else { - while ( (elem = elem[ dir ]) ) { + while ( ( elem = elem[ dir ] ) ) { if ( elem.nodeType === 1 || checkNonElements ) { - outerCache = elem[ expando ] || (elem[ expando ] = {}); + outerCache = elem[ expando ] || ( elem[ expando ] = {} ); // Support: IE <9 only // Defend against cloned attroperties (jQuery gh-1709) - uniqueCache = outerCache[ elem.uniqueID ] || (outerCache[ elem.uniqueID ] = {}); + uniqueCache = outerCache[ elem.uniqueID ] || + ( outerCache[ elem.uniqueID ] = {} ); if ( skip && skip === elem.nodeName.toLowerCase() ) { elem = elem[ dir ] || elem; - } else if ( (oldCache = uniqueCache[ key ]) && + } else if ( ( oldCache = uniqueCache[ key ] ) && oldCache[ 0 ] === dirruns && oldCache[ 1 ] === doneName ) { // Assign to newCache so results back-propagate to previous elements - return (newCache[ 2 ] = oldCache[ 2 ]); + return ( newCache[ 2 ] = oldCache[ 2 ] ); } else { + // Reuse newcache so results back-propagate to previous elements uniqueCache[ key ] = newCache; // A match means we're done; a fail means we have to keep checking - if ( (newCache[ 2 ] = matcher( elem, context, xml )) ) { + if ( ( newCache[ 2 ] = matcher( elem, context, xml ) ) ) { return true; } } @@ -2274,20 +2462,20 @@ function elementMatcher( matchers ) { function( elem, context, xml ) { var i = matchers.length; while ( i-- ) { - if ( !matchers[i]( elem, context, xml ) ) { + if ( !matchers[ i ]( elem, context, xml ) ) { return false; } } return true; } : - matchers[0]; + matchers[ 0 ]; } function multipleContexts( selector, contexts, results ) { var i = 0, len = contexts.length; for ( ; i < len; i++ ) { - Sizzle( selector, contexts[i], results ); + Sizzle( selector, contexts[ i ], results ); } return results; } @@ -2300,7 +2488,7 @@ function condense( unmatched, map, filter, context, xml ) { mapped = map != null; for ( ; i < len; i++ ) { - if ( (elem = unmatched[i]) ) { + if ( ( elem = unmatched[ i ] ) ) { if ( !filter || filter( elem, context, xml ) ) { newUnmatched.push( elem ); if ( mapped ) { @@ -2320,14 +2508,18 @@ function setMatcher( preFilter, selector, matcher, postFilter, postFinder, postS if ( postFinder && !postFinder[ expando ] ) { postFinder = setMatcher( postFinder, postSelector ); } - return markFunction(function( seed, results, context, xml ) { + return markFunction( function( seed, results, context, xml ) { var temp, i, elem, preMap = [], postMap = [], preexisting = results.length, // Get initial elements from seed or context - elems = seed || multipleContexts( selector || "*", context.nodeType ? [ context ] : context, [] ), + elems = seed || multipleContexts( + selector || "*", + context.nodeType ? [ context ] : context, + [] + ), // Prefilter to get matcher input, preserving a map for seed-results synchronization matcherIn = preFilter && ( seed || !selector ) ? @@ -2335,6 +2527,7 @@ function setMatcher( preFilter, selector, matcher, postFilter, postFinder, postS elems, matcherOut = matcher ? + // If we have a postFinder, or filtered seed, or non-seed postFilter or preexisting results, postFinder || ( seed ? preFilter : preexisting || postFilter ) ? @@ -2358,8 +2551,8 @@ function setMatcher( preFilter, selector, matcher, postFilter, postFinder, postS // Un-match failing elements by moving them back to matcherIn i = temp.length; while ( i-- ) { - if ( (elem = temp[i]) ) { - matcherOut[ postMap[i] ] = !(matcherIn[ postMap[i] ] = elem); + if ( ( elem = temp[ i ] ) ) { + matcherOut[ postMap[ i ] ] = !( matcherIn[ postMap[ i ] ] = elem ); } } } @@ -2367,25 +2560,27 @@ function setMatcher( preFilter, selector, matcher, postFilter, postFinder, postS if ( seed ) { if ( postFinder || preFilter ) { if ( postFinder ) { + // Get the final matcherOut by condensing this intermediate into postFinder contexts temp = []; i = matcherOut.length; while ( i-- ) { - if ( (elem = matcherOut[i]) ) { + if ( ( elem = matcherOut[ i ] ) ) { + // Restore matcherIn since elem is not yet a final match - temp.push( (matcherIn[i] = elem) ); + temp.push( ( matcherIn[ i ] = elem ) ); } } - postFinder( null, (matcherOut = []), temp, xml ); + postFinder( null, ( matcherOut = [] ), temp, xml ); } // Move matched elements from seed to results to keep them synchronized i = matcherOut.length; while ( i-- ) { - if ( (elem = matcherOut[i]) && - (temp = postFinder ? indexOf( seed, elem ) : preMap[i]) > -1 ) { + if ( ( elem = matcherOut[ i ] ) && + ( temp = postFinder ? indexOf( seed, elem ) : preMap[ i ] ) > -1 ) { - seed[temp] = !(results[temp] = elem); + seed[ temp ] = !( results[ temp ] = elem ); } } } @@ -2403,14 +2598,14 @@ function setMatcher( preFilter, selector, matcher, postFilter, postFinder, postS push.apply( results, matcherOut ); } } - }); + } ); } function matcherFromTokens( tokens ) { var checkContext, matcher, j, len = tokens.length, - leadingRelative = Expr.relative[ tokens[0].type ], - implicitRelative = leadingRelative || Expr.relative[" "], + leadingRelative = Expr.relative[ tokens[ 0 ].type ], + implicitRelative = leadingRelative || Expr.relative[ " " ], i = leadingRelative ? 1 : 0, // The foundational matcher ensures that elements are reachable from top-level context(s) @@ -2422,38 +2617,43 @@ function matcherFromTokens( tokens ) { }, implicitRelative, true ), matchers = [ function( elem, context, xml ) { var ret = ( !leadingRelative && ( xml || context !== outermostContext ) ) || ( - (checkContext = context).nodeType ? + ( checkContext = context ).nodeType ? matchContext( elem, context, xml ) : matchAnyContext( elem, context, xml ) ); + // Avoid hanging onto element (issue #299) checkContext = null; return ret; } ]; for ( ; i < len; i++ ) { - if ( (matcher = Expr.relative[ tokens[i].type ]) ) { - matchers = [ addCombinator(elementMatcher( matchers ), matcher) ]; + if ( ( matcher = Expr.relative[ tokens[ i ].type ] ) ) { + matchers = [ addCombinator( elementMatcher( matchers ), matcher ) ]; } else { - matcher = Expr.filter[ tokens[i].type ].apply( null, tokens[i].matches ); + matcher = Expr.filter[ tokens[ i ].type ].apply( null, tokens[ i ].matches ); // Return special upon seeing a positional matcher if ( matcher[ expando ] ) { + // Find the next relative operator (if any) for proper handling j = ++i; for ( ; j < len; j++ ) { - if ( Expr.relative[ tokens[j].type ] ) { + if ( Expr.relative[ tokens[ j ].type ] ) { break; } } return setMatcher( i > 1 && elementMatcher( matchers ), i > 1 && toSelector( - // If the preceding token was a descendant combinator, insert an implicit any-element `*` - tokens.slice( 0, i - 1 ).concat({ value: tokens[ i - 2 ].type === " " ? "*" : "" }) + + // If the preceding token was a descendant combinator, insert an implicit any-element `*` + tokens + .slice( 0, i - 1 ) + .concat( { value: tokens[ i - 2 ].type === " " ? "*" : "" } ) ).replace( rtrim, "$1" ), matcher, i < j && matcherFromTokens( tokens.slice( i, j ) ), - j < len && matcherFromTokens( (tokens = tokens.slice( j )) ), + j < len && matcherFromTokens( ( tokens = tokens.slice( j ) ) ), j < len && toSelector( tokens ) ); } @@ -2474,28 +2674,40 @@ function matcherFromGroupMatchers( elementMatchers, setMatchers ) { unmatched = seed && [], setMatched = [], contextBackup = outermostContext, + // We must always have either seed elements or outermost context - elems = seed || byElement && Expr.find["TAG"]( "*", outermost ), + elems = seed || byElement && Expr.find[ "TAG" ]( "*", outermost ), + // Use integer dirruns iff this is the outermost matcher - dirrunsUnique = (dirruns += contextBackup == null ? 1 : Math.random() || 0.1), + dirrunsUnique = ( dirruns += contextBackup == null ? 1 : Math.random() || 0.1 ), len = elems.length; if ( outermost ) { - outermostContext = context === document || context || outermost; + + // Support: IE 11+, Edge 17 - 18+ + // IE/Edge sometimes throw a "Permission denied" error when strict-comparing + // two documents; shallow comparisons work. + // eslint-disable-next-line eqeqeq + outermostContext = context == document || context || outermost; } // Add elements passing elementMatchers directly to results // Support: IE<9, Safari // Tolerate NodeList properties (IE: "length"; Safari: ) matching elements by id - for ( ; i !== len && (elem = elems[i]) != null; i++ ) { + for ( ; i !== len && ( elem = elems[ i ] ) != null; i++ ) { if ( byElement && elem ) { j = 0; - if ( !context && elem.ownerDocument !== document ) { + + // Support: IE 11+, Edge 17 - 18+ + // IE/Edge sometimes throw a "Permission denied" error when strict-comparing + // two documents; shallow comparisons work. + // eslint-disable-next-line eqeqeq + if ( !context && elem.ownerDocument != document ) { setDocument( elem ); xml = !documentIsHTML; } - while ( (matcher = elementMatchers[j++]) ) { - if ( matcher( elem, context || document, xml) ) { + while ( ( matcher = elementMatchers[ j++ ] ) ) { + if ( matcher( elem, context || document, xml ) ) { results.push( elem ); break; } @@ -2507,8 +2719,9 @@ function matcherFromGroupMatchers( elementMatchers, setMatchers ) { // Track unmatched elements for set filters if ( bySet ) { + // They will have gone through all possible matchers - if ( (elem = !matcher && elem) ) { + if ( ( elem = !matcher && elem ) ) { matchedCount--; } @@ -2532,16 +2745,17 @@ function matcherFromGroupMatchers( elementMatchers, setMatchers ) { // numerically zero. if ( bySet && i !== matchedCount ) { j = 0; - while ( (matcher = setMatchers[j++]) ) { + while ( ( matcher = setMatchers[ j++ ] ) ) { matcher( unmatched, setMatched, context, xml ); } if ( seed ) { + // Reintegrate element matches to eliminate the need for sorting if ( matchedCount > 0 ) { while ( i-- ) { - if ( !(unmatched[i] || setMatched[i]) ) { - setMatched[i] = pop.call( results ); + if ( !( unmatched[ i ] || setMatched[ i ] ) ) { + setMatched[ i ] = pop.call( results ); } } } @@ -2582,13 +2796,14 @@ compile = Sizzle.compile = function( selector, match /* Internal Use Only */ ) { cached = compilerCache[ selector + " " ]; if ( !cached ) { + // Generate a function of recursive functions that can be used to check each element if ( !match ) { match = tokenize( selector ); } i = match.length; while ( i-- ) { - cached = matcherFromTokens( match[i] ); + cached = matcherFromTokens( match[ i ] ); if ( cached[ expando ] ) { setMatchers.push( cached ); } else { @@ -2597,7 +2812,10 @@ compile = Sizzle.compile = function( selector, match /* Internal Use Only */ ) { } // Cache the compiled function - cached = compilerCache( selector, matcherFromGroupMatchers( elementMatchers, setMatchers ) ); + cached = compilerCache( + selector, + matcherFromGroupMatchers( elementMatchers, setMatchers ) + ); // Save selector and tokenization cached.selector = selector; @@ -2617,7 +2835,7 @@ compile = Sizzle.compile = function( selector, match /* Internal Use Only */ ) { select = Sizzle.select = function( selector, context, results, seed ) { var i, tokens, token, type, find, compiled = typeof selector === "function" && selector, - match = !seed && tokenize( (selector = compiled.selector || selector) ); + match = !seed && tokenize( ( selector = compiled.selector || selector ) ); results = results || []; @@ -2626,11 +2844,12 @@ select = Sizzle.select = function( selector, context, results, seed ) { if ( match.length === 1 ) { // Reduce context if the leading compound selector is an ID - tokens = match[0] = match[0].slice( 0 ); - if ( tokens.length > 2 && (token = tokens[0]).type === "ID" && - context.nodeType === 9 && documentIsHTML && Expr.relative[ tokens[1].type ] ) { + tokens = match[ 0 ] = match[ 0 ].slice( 0 ); + if ( tokens.length > 2 && ( token = tokens[ 0 ] ).type === "ID" && + context.nodeType === 9 && documentIsHTML && Expr.relative[ tokens[ 1 ].type ] ) { - context = ( Expr.find["ID"]( token.matches[0].replace(runescape, funescape), context ) || [] )[0]; + context = ( Expr.find[ "ID" ]( token.matches[ 0 ] + .replace( runescape, funescape ), context ) || [] )[ 0 ]; if ( !context ) { return results; @@ -2643,20 +2862,22 @@ select = Sizzle.select = function( selector, context, results, seed ) { } // Fetch a seed set for right-to-left matching - i = matchExpr["needsContext"].test( selector ) ? 0 : tokens.length; + i = matchExpr[ "needsContext" ].test( selector ) ? 0 : tokens.length; while ( i-- ) { - token = tokens[i]; + token = tokens[ i ]; // Abort if we hit a combinator - if ( Expr.relative[ (type = token.type) ] ) { + if ( Expr.relative[ ( type = token.type ) ] ) { break; } - if ( (find = Expr.find[ type ]) ) { + if ( ( find = Expr.find[ type ] ) ) { + // Search, expanding context for leading sibling combinators - if ( (seed = find( - token.matches[0].replace( runescape, funescape ), - rsibling.test( tokens[0].type ) && testContext( context.parentNode ) || context - )) ) { + if ( ( seed = find( + token.matches[ 0 ].replace( runescape, funescape ), + rsibling.test( tokens[ 0 ].type ) && testContext( context.parentNode ) || + context + ) ) ) { // If seed is empty or no tokens remain, we can return early tokens.splice( i, 1 ); @@ -2687,7 +2908,7 @@ select = Sizzle.select = function( selector, context, results, seed ) { // One-time assignments // Sort stability -support.sortStable = expando.split("").sort( sortOrder ).join("") === expando; +support.sortStable = expando.split( "" ).sort( sortOrder ).join( "" ) === expando; // Support: Chrome 14-35+ // Always assume duplicates if they aren't passed to the comparison function @@ -2698,58 +2919,59 @@ setDocument(); // Support: Webkit<537.32 - Safari 6.0.3/Chrome 25 (fixed in Chrome 27) // Detached nodes confoundingly follow *each other* -support.sortDetached = assert(function( el ) { +support.sortDetached = assert( function( el ) { + // Should return 1, but returns 4 (following) - return el.compareDocumentPosition( document.createElement("fieldset") ) & 1; -}); + return el.compareDocumentPosition( document.createElement( "fieldset" ) ) & 1; +} ); // Support: IE<8 // Prevent attribute/property "interpolation" // https://msdn.microsoft.com/en-us/library/ms536429%28VS.85%29.aspx -if ( !assert(function( el ) { +if ( !assert( function( el ) { el.innerHTML = ""; - return el.firstChild.getAttribute("href") === "#" ; -}) ) { + return el.firstChild.getAttribute( "href" ) === "#"; +} ) ) { addHandle( "type|href|height|width", function( elem, name, isXML ) { if ( !isXML ) { return elem.getAttribute( name, name.toLowerCase() === "type" ? 1 : 2 ); } - }); + } ); } // Support: IE<9 // Use defaultValue in place of getAttribute("value") -if ( !support.attributes || !assert(function( el ) { +if ( !support.attributes || !assert( function( el ) { el.innerHTML = ""; el.firstChild.setAttribute( "value", "" ); return el.firstChild.getAttribute( "value" ) === ""; -}) ) { - addHandle( "value", function( elem, name, isXML ) { +} ) ) { + addHandle( "value", function( elem, _name, isXML ) { if ( !isXML && elem.nodeName.toLowerCase() === "input" ) { return elem.defaultValue; } - }); + } ); } // Support: IE<9 // Use getAttributeNode to fetch booleans when getAttribute lies -if ( !assert(function( el ) { - return el.getAttribute("disabled") == null; -}) ) { +if ( !assert( function( el ) { + return el.getAttribute( "disabled" ) == null; +} ) ) { addHandle( booleans, function( elem, name, isXML ) { var val; if ( !isXML ) { return elem[ name ] === true ? name.toLowerCase() : - (val = elem.getAttributeNode( name )) && val.specified ? + ( val = elem.getAttributeNode( name ) ) && val.specified ? val.value : - null; + null; } - }); + } ); } return Sizzle; -})( window ); +} )( window ); @@ -3118,7 +3340,7 @@ jQuery.each( { parents: function( elem ) { return dir( elem, "parentNode" ); }, - parentsUntil: function( elem, i, until ) { + parentsUntil: function( elem, _i, until ) { return dir( elem, "parentNode", until ); }, next: function( elem ) { @@ -3133,10 +3355,10 @@ jQuery.each( { prevAll: function( elem ) { return dir( elem, "previousSibling" ); }, - nextUntil: function( elem, i, until ) { + nextUntil: function( elem, _i, until ) { return dir( elem, "nextSibling", until ); }, - prevUntil: function( elem, i, until ) { + prevUntil: function( elem, _i, until ) { return dir( elem, "previousSibling", until ); }, siblings: function( elem ) { @@ -3146,18 +3368,24 @@ jQuery.each( { return siblings( elem.firstChild ); }, contents: function( elem ) { - if ( nodeName( elem, "iframe" ) ) { - return elem.contentDocument; - } + if ( elem.contentDocument != null && + + // Support: IE 11+ + // elements with no `data` attribute has an object + // `contentDocument` with a `null` prototype. + getProto( elem.contentDocument ) ) { - // Support: IE 9 - 11 only, iOS 7 only, Android Browser <=4.3 only - // Treat the template element as a regular one in browsers that - // don't support it. - if ( nodeName( elem, "template" ) ) { - elem = elem.content || elem; - } + return elem.contentDocument; + } - return jQuery.merge( [], elem.childNodes ); + // Support: IE 9 - 11 only, iOS 7 only, Android Browser <=4.3 only + // Treat the template element as a regular one in browsers that + // don't support it. + if ( nodeName( elem, "template" ) ) { + elem = elem.content || elem; + } + + return jQuery.merge( [], elem.childNodes ); } }, function( name, fn ) { jQuery.fn[ name ] = function( until, selector ) { @@ -3489,7 +3717,7 @@ jQuery.extend( { var fns = arguments; return jQuery.Deferred( function( newDefer ) { - jQuery.each( tuples, function( i, tuple ) { + jQuery.each( tuples, function( _i, tuple ) { // Map tuples (progress, done, fail) to arguments (done, fail, progress) var fn = isFunction( fns[ tuple[ 4 ] ] ) && fns[ tuple[ 4 ] ]; @@ -3942,7 +4170,7 @@ var access = function( elems, fn, key, value, chainable, emptyGet, raw ) { // ...except when executing function values } else { bulk = fn; - fn = function( elem, key, value ) { + fn = function( elem, _key, value ) { return bulk.call( jQuery( elem ), value ); }; } @@ -3977,7 +4205,7 @@ var rmsPrefix = /^-ms-/, rdashAlpha = /-([a-z])/g; // Used by camelCase as callback to replace() -function fcamelCase( all, letter ) { +function fcamelCase( _all, letter ) { return letter.toUpperCase(); } @@ -4466,6 +4694,26 @@ var rcssNum = new RegExp( "^(?:([+-])=|)(" + pnum + ")([a-z%]*)$", "i" ); var cssExpand = [ "Top", "Right", "Bottom", "Left" ]; +var documentElement = document.documentElement; + + + + var isAttached = function( elem ) { + return jQuery.contains( elem.ownerDocument, elem ); + }, + composed = { composed: true }; + + // Support: IE 9 - 11+, Edge 12 - 18+, iOS 10.0 - 10.2 only + // Check attachment across shadow DOM boundaries when possible (gh-3504) + // Support: iOS 10.0-10.2 only + // Early iOS 10 versions support `attachShadow` but not `getRootNode`, + // leading to errors. We need to check for `getRootNode`. + if ( documentElement.getRootNode ) { + isAttached = function( elem ) { + return jQuery.contains( elem.ownerDocument, elem ) || + elem.getRootNode( composed ) === elem.ownerDocument; + }; + } var isHiddenWithinTree = function( elem, el ) { // isHiddenWithinTree might be called from jQuery#filter function; @@ -4480,32 +4728,11 @@ var isHiddenWithinTree = function( elem, el ) { // Support: Firefox <=43 - 45 // Disconnected elements can have computed display: none, so first confirm that elem is // in the document. - jQuery.contains( elem.ownerDocument, elem ) && + isAttached( elem ) && jQuery.css( elem, "display" ) === "none"; }; -var swap = function( elem, options, callback, args ) { - var ret, name, - old = {}; - - // Remember the old values, and insert the new ones - for ( name in options ) { - old[ name ] = elem.style[ name ]; - elem.style[ name ] = options[ name ]; - } - - ret = callback.apply( elem, args || [] ); - - // Revert the old values - for ( name in options ) { - elem.style[ name ] = old[ name ]; - } - - return ret; -}; - - function adjustCSS( elem, prop, valueParts, tween ) { @@ -4522,7 +4749,8 @@ function adjustCSS( elem, prop, valueParts, tween ) { unit = valueParts && valueParts[ 3 ] || ( jQuery.cssNumber[ prop ] ? "" : "px" ), // Starting value computation is required for potential unit mismatches - initialInUnit = ( jQuery.cssNumber[ prop ] || unit !== "px" && +initial ) && + initialInUnit = elem.nodeType && + ( jQuery.cssNumber[ prop ] || unit !== "px" && +initial ) && rcssNum.exec( jQuery.css( elem, prop ) ); if ( initialInUnit && initialInUnit[ 3 ] !== unit ) { @@ -4669,17 +4897,46 @@ jQuery.fn.extend( { } ); var rcheckableType = ( /^(?:checkbox|radio)$/i ); -var rtagName = ( /<([a-z][^\/\0>\x20\t\r\n\f]+)/i ); +var rtagName = ( /<([a-z][^\/\0>\x20\t\r\n\f]*)/i ); var rscriptType = ( /^$|^module$|\/(?:java|ecma)script/i ); -// We have to close these tags to support XHTML (#13200) -var wrapMap = { +( function() { + var fragment = document.createDocumentFragment(), + div = fragment.appendChild( document.createElement( "div" ) ), + input = document.createElement( "input" ); + + // Support: Android 4.0 - 4.3 only + // Check state lost if the name is set (#11217) + // Support: Windows Web Apps (WWA) + // `name` and `type` must use .setAttribute for WWA (#14901) + input.setAttribute( "type", "radio" ); + input.setAttribute( "checked", "checked" ); + input.setAttribute( "name", "t" ); + + div.appendChild( input ); + + // Support: Android <=4.1 only + // Older WebKit doesn't clone checked state correctly in fragments + support.checkClone = div.cloneNode( true ).cloneNode( true ).lastChild.checked; + + // Support: IE <=11 only + // Make sure textarea (and checkbox) defaultValue is properly cloned + div.innerHTML = ""; + support.noCloneChecked = !!div.cloneNode( true ).lastChild.defaultValue; // Support: IE <=9 only - option: [ 1, "" ], + // IE <=9 replaces "; + support.option = !!div.lastChild; +} )(); + + +// We have to close these tags to support XHTML (#13200) +var wrapMap = { // XHTML parsers do not magically insert elements in the // same way that tag soup parsers do. So we cannot shorten @@ -4692,12 +4949,14 @@ var wrapMap = { _default: [ 0, "", "" ] }; -// Support: IE <=9 only -wrapMap.optgroup = wrapMap.option; - wrapMap.tbody = wrapMap.tfoot = wrapMap.colgroup = wrapMap.caption = wrapMap.thead; wrapMap.th = wrapMap.td; +// Support: IE <=9 only +if ( !support.option ) { + wrapMap.optgroup = wrapMap.option = [ 1, "" ]; +} + function getAll( context, tag ) { @@ -4741,7 +5000,7 @@ function setGlobalEval( elems, refElements ) { var rhtml = /<|&#?\w+;/; function buildFragment( elems, context, scripts, selection, ignored ) { - var elem, tmp, tag, wrap, contains, j, + var elem, tmp, tag, wrap, attached, j, fragment = context.createDocumentFragment(), nodes = [], i = 0, @@ -4805,13 +5064,13 @@ function buildFragment( elems, context, scripts, selection, ignored ) { continue; } - contains = jQuery.contains( elem.ownerDocument, elem ); + attached = isAttached( elem ); // Append to fragment tmp = getAll( fragment.appendChild( elem ), "script" ); // Preserve script evaluation history - if ( contains ) { + if ( attached ) { setGlobalEval( tmp ); } @@ -4830,34 +5089,6 @@ function buildFragment( elems, context, scripts, selection, ignored ) { } -( function() { - var fragment = document.createDocumentFragment(), - div = fragment.appendChild( document.createElement( "div" ) ), - input = document.createElement( "input" ); - - // Support: Android 4.0 - 4.3 only - // Check state lost if the name is set (#11217) - // Support: Windows Web Apps (WWA) - // `name` and `type` must use .setAttribute for WWA (#14901) - input.setAttribute( "type", "radio" ); - input.setAttribute( "checked", "checked" ); - input.setAttribute( "name", "t" ); - - div.appendChild( input ); - - // Support: Android <=4.1 only - // Older WebKit doesn't clone checked state correctly in fragments - support.checkClone = div.cloneNode( true ).cloneNode( true ).lastChild.checked; - - // Support: IE <=11 only - // Make sure textarea (and checkbox) defaultValue is properly cloned - div.innerHTML = ""; - support.noCloneChecked = !!div.cloneNode( true ).lastChild.defaultValue; -} )(); -var documentElement = document.documentElement; - - - var rkeyEvent = /^key/, rmouseEvent = /^(?:mouse|pointer|contextmenu|drag|drop)|click/, @@ -4871,8 +5102,19 @@ function returnFalse() { return false; } +// Support: IE <=9 - 11+ +// focus() and blur() are asynchronous, except when they are no-op. +// So expect focus to be synchronous when the element is already active, +// and blur to be synchronous when the element is not already active. +// (focus and blur are always synchronous in other supported browsers, +// this just defines when we can count on it). +function expectSync( elem, type ) { + return ( elem === safeActiveElement() ) === ( type === "focus" ); +} + // Support: IE <=9 only -// See #13393 for more info +// Accessing document.activeElement can throw unexpectedly +// https://bugs.jquery.com/ticket/13393 function safeActiveElement() { try { return document.activeElement; @@ -4955,8 +5197,8 @@ jQuery.event = { special, handlers, type, namespaces, origType, elemData = dataPriv.get( elem ); - // Don't attach events to noData or text/comment nodes (but allow plain objects) - if ( !elemData ) { + // Only attach events to objects that accept data + if ( !acceptData( elem ) ) { return; } @@ -4980,7 +5222,7 @@ jQuery.event = { // Init the element's event structure and main handler, if this is the first if ( !( events = elemData.events ) ) { - events = elemData.events = {}; + events = elemData.events = Object.create( null ); } if ( !( eventHandle = elemData.handle ) ) { eventHandle = elemData.handle = function( e ) { @@ -5138,12 +5380,15 @@ jQuery.event = { dispatch: function( nativeEvent ) { - // Make a writable jQuery.Event from the native event object - var event = jQuery.event.fix( nativeEvent ); - var i, j, ret, matched, handleObj, handlerQueue, args = new Array( arguments.length ), - handlers = ( dataPriv.get( this, "events" ) || {} )[ event.type ] || [], + + // Make a writable jQuery.Event from the native event object + event = jQuery.event.fix( nativeEvent ), + + handlers = ( + dataPriv.get( this, "events" ) || Object.create( null ) + )[ event.type ] || [], special = jQuery.event.special[ event.type ] || {}; // Use the fix-ed jQuery.Event rather than the (read-only) native event @@ -5172,9 +5417,10 @@ jQuery.event = { while ( ( handleObj = matched.handlers[ j++ ] ) && !event.isImmediatePropagationStopped() ) { - // Triggered event must either 1) have no namespace, or 2) have namespace(s) - // a subset or equal to those in the bound event (both can have no namespace). - if ( !event.rnamespace || event.rnamespace.test( handleObj.namespace ) ) { + // If the event is namespaced, then each handler is only invoked if it is + // specially universal or its namespaces are a superset of the event's. + if ( !event.rnamespace || handleObj.namespace === false || + event.rnamespace.test( handleObj.namespace ) ) { event.handleObj = handleObj; event.data = handleObj.data; @@ -5298,39 +5544,51 @@ jQuery.event = { // Prevent triggered image.load events from bubbling to window.load noBubble: true }, - focus: { + click: { - // Fire native event if possible so blur/focus sequence is correct - trigger: function() { - if ( this !== safeActiveElement() && this.focus ) { - this.focus(); - return false; - } - }, - delegateType: "focusin" - }, - blur: { - trigger: function() { - if ( this === safeActiveElement() && this.blur ) { - this.blur(); - return false; + // Utilize native event to ensure correct state for checkable inputs + setup: function( data ) { + + // For mutual compressibility with _default, replace `this` access with a local var. + // `|| data` is dead code meant only to preserve the variable through minification. + var el = this || data; + + // Claim the first handler + if ( rcheckableType.test( el.type ) && + el.click && nodeName( el, "input" ) ) { + + // dataPriv.set( el, "click", ... ) + leverageNative( el, "click", returnTrue ); } + + // Return false to allow normal processing in the caller + return false; }, - delegateType: "focusout" - }, - click: { + trigger: function( data ) { - // For checkbox, fire native event so checked state will be right - trigger: function() { - if ( this.type === "checkbox" && this.click && nodeName( this, "input" ) ) { - this.click(); - return false; + // For mutual compressibility with _default, replace `this` access with a local var. + // `|| data` is dead code meant only to preserve the variable through minification. + var el = this || data; + + // Force setup before triggering a click + if ( rcheckableType.test( el.type ) && + el.click && nodeName( el, "input" ) ) { + + leverageNative( el, "click" ); } + + // Return non-false to allow normal event-path propagation + return true; }, - // For cross-browser consistency, don't fire native .click() on links + // For cross-browser consistency, suppress native .click() on links + // Also prevent it if we're currently inside a leveraged native-event stack _default: function( event ) { - return nodeName( event.target, "a" ); + var target = event.target; + return rcheckableType.test( target.type ) && + target.click && nodeName( target, "input" ) && + dataPriv.get( target, "click" ) || + nodeName( target, "a" ); } }, @@ -5347,6 +5605,93 @@ jQuery.event = { } }; +// Ensure the presence of an event listener that handles manually-triggered +// synthetic events by interrupting progress until reinvoked in response to +// *native* events that it fires directly, ensuring that state changes have +// already occurred before other listeners are invoked. +function leverageNative( el, type, expectSync ) { + + // Missing expectSync indicates a trigger call, which must force setup through jQuery.event.add + if ( !expectSync ) { + if ( dataPriv.get( el, type ) === undefined ) { + jQuery.event.add( el, type, returnTrue ); + } + return; + } + + // Register the controller as a special universal handler for all event namespaces + dataPriv.set( el, type, false ); + jQuery.event.add( el, type, { + namespace: false, + handler: function( event ) { + var notAsync, result, + saved = dataPriv.get( this, type ); + + if ( ( event.isTrigger & 1 ) && this[ type ] ) { + + // Interrupt processing of the outer synthetic .trigger()ed event + // Saved data should be false in such cases, but might be a leftover capture object + // from an async native handler (gh-4350) + if ( !saved.length ) { + + // Store arguments for use when handling the inner native event + // There will always be at least one argument (an event object), so this array + // will not be confused with a leftover capture object. + saved = slice.call( arguments ); + dataPriv.set( this, type, saved ); + + // Trigger the native event and capture its result + // Support: IE <=9 - 11+ + // focus() and blur() are asynchronous + notAsync = expectSync( this, type ); + this[ type ](); + result = dataPriv.get( this, type ); + if ( saved !== result || notAsync ) { + dataPriv.set( this, type, false ); + } else { + result = {}; + } + if ( saved !== result ) { + + // Cancel the outer synthetic event + event.stopImmediatePropagation(); + event.preventDefault(); + return result.value; + } + + // If this is an inner synthetic event for an event with a bubbling surrogate + // (focus or blur), assume that the surrogate already propagated from triggering the + // native event and prevent that from happening again here. + // This technically gets the ordering wrong w.r.t. to `.trigger()` (in which the + // bubbling surrogate propagates *after* the non-bubbling base), but that seems + // less bad than duplication. + } else if ( ( jQuery.event.special[ type ] || {} ).delegateType ) { + event.stopPropagation(); + } + + // If this is a native event triggered above, everything is now in order + // Fire an inner synthetic event with the original arguments + } else if ( saved.length ) { + + // ...and capture the result + dataPriv.set( this, type, { + value: jQuery.event.trigger( + + // Support: IE <=9 - 11+ + // Extend with the prototype to reset the above stopImmediatePropagation() + jQuery.extend( saved[ 0 ], jQuery.Event.prototype ), + saved.slice( 1 ), + this + ) + } ); + + // Abort handling of the native event + event.stopImmediatePropagation(); + } + } + } ); +} + jQuery.removeEvent = function( elem, type, handle ) { // This "if" is needed for plain objects @@ -5459,6 +5804,7 @@ jQuery.each( { shiftKey: true, view: true, "char": true, + code: true, charCode: true, key: true, keyCode: true, @@ -5505,6 +5851,33 @@ jQuery.each( { } }, jQuery.event.addProp ); +jQuery.each( { focus: "focusin", blur: "focusout" }, function( type, delegateType ) { + jQuery.event.special[ type ] = { + + // Utilize native event if possible so blur/focus sequence is correct + setup: function() { + + // Claim the first handler + // dataPriv.set( this, "focus", ... ) + // dataPriv.set( this, "blur", ... ) + leverageNative( this, type, expectSync ); + + // Return false to allow normal processing in the caller + return false; + }, + trigger: function() { + + // Force setup before trigger + leverageNative( this, type ); + + // Return non-false to allow normal event-path propagation + return true; + }, + + delegateType: delegateType + }; +} ); + // Create mouseenter/leave events using mouseover/out and event-time checks // so that event delegation works in jQuery. // Do the same for pointerenter/pointerleave and pointerover/pointerout @@ -5590,13 +5963,6 @@ jQuery.fn.extend( { var - /* eslint-disable max-len */ - - // See https://github.com/eslint/eslint/issues/3229 - rxhtmlTag = /<(?!area|br|col|embed|hr|img|input|link|meta|param)(([a-z][^\/\0>\x20\t\r\n\f]*)[^>]*)\/>/gi, - - /* eslint-enable */ - // Support: IE <=10 - 11, Edge 12 - 13 only // In IE/Edge using regex groups here causes severe slowdowns. // See https://connect.microsoft.com/IE/feedback/details/1736512/ @@ -5633,7 +5999,7 @@ function restoreScript( elem ) { } function cloneCopyEvent( src, dest ) { - var i, l, type, pdataOld, pdataCur, udataOld, udataCur, events; + var i, l, type, pdataOld, udataOld, udataCur, events; if ( dest.nodeType !== 1 ) { return; @@ -5641,13 +6007,11 @@ function cloneCopyEvent( src, dest ) { // 1. Copy private data: events, handlers, etc. if ( dataPriv.hasData( src ) ) { - pdataOld = dataPriv.access( src ); - pdataCur = dataPriv.set( dest, pdataOld ); + pdataOld = dataPriv.get( src ); events = pdataOld.events; if ( events ) { - delete pdataCur.handle; - pdataCur.events = {}; + dataPriv.remove( dest, "handle events" ); for ( type in events ) { for ( i = 0, l = events[ type ].length; i < l; i++ ) { @@ -5683,7 +6047,7 @@ function fixInput( src, dest ) { function domManip( collection, args, callback, ignored ) { // Flatten any nested arrays - args = concat.apply( [], args ); + args = flat( args ); var fragment, first, scripts, hasScripts, node, doc, i = 0, @@ -5755,11 +6119,13 @@ function domManip( collection, args, callback, ignored ) { if ( node.src && ( node.type || "" ).toLowerCase() !== "module" ) { // Optional AJAX dependency, but won't run scripts if not present - if ( jQuery._evalUrl ) { - jQuery._evalUrl( node.src ); + if ( jQuery._evalUrl && !node.noModule ) { + jQuery._evalUrl( node.src, { + nonce: node.nonce || node.getAttribute( "nonce" ) + }, doc ); } } else { - DOMEval( node.textContent.replace( rcleanScript, "" ), doc, node ); + DOMEval( node.textContent.replace( rcleanScript, "" ), node, doc ); } } } @@ -5781,7 +6147,7 @@ function remove( elem, selector, keepData ) { } if ( node.parentNode ) { - if ( keepData && jQuery.contains( node.ownerDocument, node ) ) { + if ( keepData && isAttached( node ) ) { setGlobalEval( getAll( node, "script" ) ); } node.parentNode.removeChild( node ); @@ -5793,13 +6159,13 @@ function remove( elem, selector, keepData ) { jQuery.extend( { htmlPrefilter: function( html ) { - return html.replace( rxhtmlTag, "<$1>" ); + return html; }, clone: function( elem, dataAndEvents, deepDataAndEvents ) { var i, l, srcElements, destElements, clone = elem.cloneNode( true ), - inPage = jQuery.contains( elem.ownerDocument, elem ); + inPage = isAttached( elem ); // Fix IE cloning issues if ( !support.noCloneChecked && ( elem.nodeType === 1 || elem.nodeType === 11 ) && @@ -6055,6 +6421,27 @@ var getStyles = function( elem ) { return view.getComputedStyle( elem ); }; +var swap = function( elem, options, callback ) { + var ret, name, + old = {}; + + // Remember the old values, and insert the new ones + for ( name in options ) { + old[ name ] = elem.style[ name ]; + elem.style[ name ] = options[ name ]; + } + + ret = callback.call( elem ); + + // Revert the old values + for ( name in options ) { + elem.style[ name ] = old[ name ]; + } + + return ret; +}; + + var rboxStyle = new RegExp( cssExpand.join( "|" ), "i" ); @@ -6095,8 +6482,10 @@ var rboxStyle = new RegExp( cssExpand.join( "|" ), "i" ); // Support: IE 9 only // Detect overflow:scroll screwiness (gh-3699) + // Support: Chrome <=64 + // Don't get tricked when zoom affects offsetWidth (gh-4029) div.style.position = "absolute"; - scrollboxSizeVal = div.offsetWidth === 36 || "absolute"; + scrollboxSizeVal = roundPixelMeasures( div.offsetWidth / 3 ) === 12; documentElement.removeChild( container ); @@ -6110,7 +6499,7 @@ var rboxStyle = new RegExp( cssExpand.join( "|" ), "i" ); } var pixelPositionVal, boxSizingReliableVal, scrollboxSizeVal, pixelBoxStylesVal, - reliableMarginLeftVal, + reliableTrDimensionsVal, reliableMarginLeftVal, container = document.createElement( "div" ), div = document.createElement( "div" ); @@ -6145,6 +6534,35 @@ var rboxStyle = new RegExp( cssExpand.join( "|" ), "i" ); scrollboxSize: function() { computeStyleTests(); return scrollboxSizeVal; + }, + + // Support: IE 9 - 11+, Edge 15 - 18+ + // IE/Edge misreport `getComputedStyle` of table rows with width/height + // set in CSS while `offset*` properties report correct values. + // Behavior in IE 9 is more subtle than in newer versions & it passes + // some versions of this test; make sure not to make it pass there! + reliableTrDimensions: function() { + var table, tr, trChild, trStyle; + if ( reliableTrDimensionsVal == null ) { + table = document.createElement( "table" ); + tr = document.createElement( "tr" ); + trChild = document.createElement( "div" ); + + table.style.cssText = "position:absolute;left:-11111px"; + tr.style.height = "1px"; + trChild.style.height = "9px"; + + documentElement + .appendChild( table ) + .appendChild( tr ) + .appendChild( trChild ); + + trStyle = window.getComputedStyle( tr ); + reliableTrDimensionsVal = parseInt( trStyle.height ) > 3; + + documentElement.removeChild( table ); + } + return reliableTrDimensionsVal; } } ); } )(); @@ -6167,7 +6585,7 @@ function curCSS( elem, name, computed ) { if ( computed ) { ret = computed.getPropertyValue( name ) || computed[ name ]; - if ( ret === "" && !jQuery.contains( elem.ownerDocument, elem ) ) { + if ( ret === "" && !isAttached( elem ) ) { ret = jQuery.style( elem, name ); } @@ -6223,30 +6641,13 @@ function addGetHookIf( conditionFn, hookFn ) { } -var - - // Swappable if display is none or starts with table - // except "table", "table-cell", or "table-caption" - // See here for display values: https://developer.mozilla.org/en-US/docs/CSS/display - rdisplayswap = /^(none|table(?!-c[ea]).+)/, - rcustomProp = /^--/, - cssShow = { position: "absolute", visibility: "hidden", display: "block" }, - cssNormalTransform = { - letterSpacing: "0", - fontWeight: "400" - }, - - cssPrefixes = [ "Webkit", "Moz", "ms" ], - emptyStyle = document.createElement( "div" ).style; +var cssPrefixes = [ "Webkit", "Moz", "ms" ], + emptyStyle = document.createElement( "div" ).style, + vendorProps = {}; -// Return a css property mapped to a potentially vendor prefixed property +// Return a vendor-prefixed property or undefined function vendorPropName( name ) { - // Shortcut for names that are not vendor prefixed - if ( name in emptyStyle ) { - return name; - } - // Check for vendor prefixed names var capName = name[ 0 ].toUpperCase() + name.slice( 1 ), i = cssPrefixes.length; @@ -6259,17 +6660,34 @@ function vendorPropName( name ) { } } -// Return a property mapped along what jQuery.cssProps suggests or to -// a vendor prefixed property. +// Return a potentially-mapped jQuery.cssProps or vendor prefixed property function finalPropName( name ) { - var ret = jQuery.cssProps[ name ]; - if ( !ret ) { - ret = jQuery.cssProps[ name ] = vendorPropName( name ) || name; + var final = jQuery.cssProps[ name ] || vendorProps[ name ]; + + if ( final ) { + return final; } - return ret; + if ( name in emptyStyle ) { + return name; + } + return vendorProps[ name ] = vendorPropName( name ) || name; } -function setPositiveNumber( elem, value, subtract ) { + +var + + // Swappable if display is none or starts with table + // except "table", "table-cell", or "table-caption" + // See here for display values: https://developer.mozilla.org/en-US/docs/CSS/display + rdisplayswap = /^(none|table(?!-c[ea]).+)/, + rcustomProp = /^--/, + cssShow = { position: "absolute", visibility: "hidden", display: "block" }, + cssNormalTransform = { + letterSpacing: "0", + fontWeight: "400" + }; + +function setPositiveNumber( _elem, value, subtract ) { // Any relative (+/-) values have already been // normalized at this point @@ -6340,7 +6758,10 @@ function boxModelAdjustment( elem, dimension, box, isBorderBox, styles, computed delta - extra - 0.5 - ) ); + + // If offsetWidth/offsetHeight is unknown, then we can't determine content-box scroll gutter + // Use an explicit zero to avoid NaN (gh-3964) + ) ) || 0; } return delta; @@ -6350,9 +6771,16 @@ function getWidthOrHeight( elem, dimension, extra ) { // Start with computed style var styles = getStyles( elem ), + + // To avoid forcing a reflow, only fetch boxSizing if we need it (gh-4322). + // Fake content-box until we know it's needed to know the true value. + boxSizingNeeded = !support.boxSizingReliable() || extra, + isBorderBox = boxSizingNeeded && + jQuery.css( elem, "boxSizing", false, styles ) === "border-box", + valueIsBorderBox = isBorderBox, + val = curCSS( elem, dimension, styles ), - isBorderBox = jQuery.css( elem, "boxSizing", false, styles ) === "border-box", - valueIsBorderBox = isBorderBox; + offsetProp = "offset" + dimension[ 0 ].toUpperCase() + dimension.slice( 1 ); // Support: Firefox <=54 // Return a confounding non-pixel value or feign ignorance, as appropriate. @@ -6363,22 +6791,38 @@ function getWidthOrHeight( elem, dimension, extra ) { val = "auto"; } - // Check for style in case a browser which returns unreliable values - // for getComputedStyle silently falls back to the reliable elem.style - valueIsBorderBox = valueIsBorderBox && - ( support.boxSizingReliable() || val === elem.style[ dimension ] ); - // Fall back to offsetWidth/offsetHeight when value is "auto" - // This happens for inline elements with no explicit setting (gh-3571) - // Support: Android <=4.1 - 4.3 only - // Also use offsetWidth/offsetHeight for misreported inline dimensions (gh-3602) - if ( val === "auto" || - !parseFloat( val ) && jQuery.css( elem, "display", false, styles ) === "inline" ) { + // Support: IE 9 - 11 only + // Use offsetWidth/offsetHeight for when box sizing is unreliable. + // In those cases, the computed value can be trusted to be border-box. + if ( ( !support.boxSizingReliable() && isBorderBox || + + // Support: IE 10 - 11+, Edge 15 - 18+ + // IE/Edge misreport `getComputedStyle` of table rows with width/height + // set in CSS while `offset*` properties report correct values. + // Interestingly, in some cases IE 9 doesn't suffer from this issue. + !support.reliableTrDimensions() && nodeName( elem, "tr" ) || + + // Fall back to offsetWidth/offsetHeight when value is "auto" + // This happens for inline elements with no explicit setting (gh-3571) + val === "auto" || + + // Support: Android <=4.1 - 4.3 only + // Also use offsetWidth/offsetHeight for misreported inline dimensions (gh-3602) + !parseFloat( val ) && jQuery.css( elem, "display", false, styles ) === "inline" ) && - val = elem[ "offset" + dimension[ 0 ].toUpperCase() + dimension.slice( 1 ) ]; + // Make sure the element is visible & connected + elem.getClientRects().length ) { - // offsetWidth/offsetHeight provide border-box values - valueIsBorderBox = true; + isBorderBox = jQuery.css( elem, "boxSizing", false, styles ) === "border-box"; + + // Where available, offsetWidth/offsetHeight approximate border box dimensions. + // Where not available (e.g., SVG), assume unreliable box-sizing and interpret the + // retrieved value as a content box dimension. + valueIsBorderBox = offsetProp in elem; + if ( valueIsBorderBox ) { + val = elem[ offsetProp ]; + } } // Normalize "" and auto @@ -6424,6 +6868,13 @@ jQuery.extend( { "flexGrow": true, "flexShrink": true, "fontWeight": true, + "gridArea": true, + "gridColumn": true, + "gridColumnEnd": true, + "gridColumnStart": true, + "gridRow": true, + "gridRowEnd": true, + "gridRowStart": true, "lineHeight": true, "opacity": true, "order": true, @@ -6479,7 +6930,9 @@ jQuery.extend( { } // If a number was passed in, add the unit (except for certain CSS properties) - if ( type === "number" ) { + // The isCustomProp check can be removed in jQuery 4.0 when we only auto-append + // "px" to a few hardcoded values. + if ( type === "number" && !isCustomProp ) { value += ret && ret[ 3 ] || ( jQuery.cssNumber[ origName ] ? "" : "px" ); } @@ -6553,7 +7006,7 @@ jQuery.extend( { } } ); -jQuery.each( [ "height", "width" ], function( i, dimension ) { +jQuery.each( [ "height", "width" ], function( _i, dimension ) { jQuery.cssHooks[ dimension ] = { get: function( elem, computed, extra ) { if ( computed ) { @@ -6579,18 +7032,29 @@ jQuery.each( [ "height", "width" ], function( i, dimension ) { set: function( elem, value, extra ) { var matches, styles = getStyles( elem ), - isBorderBox = jQuery.css( elem, "boxSizing", false, styles ) === "border-box", - subtract = extra && boxModelAdjustment( - elem, - dimension, - extra, - isBorderBox, - styles - ); + + // Only read styles.position if the test has a chance to fail + // to avoid forcing a reflow. + scrollboxSizeBuggy = !support.scrollboxSize() && + styles.position === "absolute", + + // To avoid forcing a reflow, only fetch boxSizing if we need it (gh-3991) + boxSizingNeeded = scrollboxSizeBuggy || extra, + isBorderBox = boxSizingNeeded && + jQuery.css( elem, "boxSizing", false, styles ) === "border-box", + subtract = extra ? + boxModelAdjustment( + elem, + dimension, + extra, + isBorderBox, + styles + ) : + 0; // Account for unreliable border-box dimensions by comparing offset* to computed and // faking a content-box to get border and padding (gh-3699) - if ( isBorderBox && support.scrollboxSize() === styles.position ) { + if ( isBorderBox && scrollboxSizeBuggy ) { subtract -= Math.ceil( elem[ "offset" + dimension[ 0 ].toUpperCase() + dimension.slice( 1 ) ] - parseFloat( styles[ dimension ] ) - @@ -6758,9 +7222,9 @@ Tween.propHooks = { // Use .style if available and use plain properties where available. if ( jQuery.fx.step[ tween.prop ] ) { jQuery.fx.step[ tween.prop ]( tween ); - } else if ( tween.elem.nodeType === 1 && - ( tween.elem.style[ jQuery.cssProps[ tween.prop ] ] != null || - jQuery.cssHooks[ tween.prop ] ) ) { + } else if ( tween.elem.nodeType === 1 && ( + jQuery.cssHooks[ tween.prop ] || + tween.elem.style[ finalPropName( tween.prop ) ] != null ) ) { jQuery.style( tween.elem, tween.prop, tween.now + tween.unit ); } else { tween.elem[ tween.prop ] = tween.now; @@ -7315,7 +7779,7 @@ jQuery.fn.extend( { clearQueue = type; type = undefined; } - if ( clearQueue && type !== false ) { + if ( clearQueue ) { this.queue( type || "fx", [] ); } @@ -7398,7 +7862,7 @@ jQuery.fn.extend( { } } ); -jQuery.each( [ "toggle", "show", "hide" ], function( i, name ) { +jQuery.each( [ "toggle", "show", "hide" ], function( _i, name ) { var cssFn = jQuery.fn[ name ]; jQuery.fn[ name ] = function( speed, easing, callback ) { return speed == null || typeof speed === "boolean" ? @@ -7619,7 +8083,7 @@ boolHook = { } }; -jQuery.each( jQuery.expr.match.bool.source.match( /\w+/g ), function( i, name ) { +jQuery.each( jQuery.expr.match.bool.source.match( /\w+/g ), function( _i, name ) { var getter = attrHandle[ name ] || jQuery.find.attr; attrHandle[ name ] = function( elem, name, isXML ) { @@ -8243,7 +8707,9 @@ jQuery.extend( jQuery.event, { special.bindType || type; // jQuery handler - handle = ( dataPriv.get( cur, "events" ) || {} )[ event.type ] && + handle = ( + dataPriv.get( cur, "events" ) || Object.create( null ) + )[ event.type ] && dataPriv.get( cur, "handle" ); if ( handle ) { handle.apply( cur, data ); @@ -8354,7 +8820,10 @@ if ( !support.focusin ) { jQuery.event.special[ fix ] = { setup: function() { - var doc = this.ownerDocument || this, + + // Handle: regular nodes (via `this.ownerDocument`), window + // (via `this.document`) & document (via `this`). + var doc = this.ownerDocument || this.document || this, attaches = dataPriv.access( doc, fix ); if ( !attaches ) { @@ -8363,7 +8832,7 @@ if ( !support.focusin ) { dataPriv.access( doc, fix, ( attaches || 0 ) + 1 ); }, teardown: function() { - var doc = this.ownerDocument || this, + var doc = this.ownerDocument || this.document || this, attaches = dataPriv.access( doc, fix ) - 1; if ( !attaches ) { @@ -8379,7 +8848,7 @@ if ( !support.focusin ) { } var location = window.location; -var nonce = Date.now(); +var nonce = { guid: Date.now() }; var rquery = ( /\?/ ); @@ -8467,6 +8936,10 @@ jQuery.param = function( a, traditional ) { encodeURIComponent( value == null ? "" : value ); }; + if ( a == null ) { + return ""; + } + // If an array was passed in, assume that it is an array of form elements. if ( Array.isArray( a ) || ( a.jquery && !jQuery.isPlainObject( a ) ) ) { @@ -8507,7 +8980,7 @@ jQuery.fn.extend( { rsubmittable.test( this.nodeName ) && !rsubmitterTypes.test( type ) && ( this.checked || !rcheckableType.test( type ) ); } ) - .map( function( i, elem ) { + .map( function( _i, elem ) { var val = jQuery( this ).val(); if ( val == null ) { @@ -8969,12 +9442,14 @@ jQuery.extend( { if ( !responseHeaders ) { responseHeaders = {}; while ( ( match = rheaders.exec( responseHeadersString ) ) ) { - responseHeaders[ match[ 1 ].toLowerCase() ] = match[ 2 ]; + responseHeaders[ match[ 1 ].toLowerCase() + " " ] = + ( responseHeaders[ match[ 1 ].toLowerCase() + " " ] || [] ) + .concat( match[ 2 ] ); } } - match = responseHeaders[ key.toLowerCase() ]; + match = responseHeaders[ key.toLowerCase() + " " ]; } - return match == null ? null : match; + return match == null ? null : match.join( ", " ); }, // Raw string @@ -9118,7 +9593,8 @@ jQuery.extend( { // Add or update anti-cache param if needed if ( s.cache === false ) { cacheURL = cacheURL.replace( rantiCache, "$1" ); - uncached = ( rquery.test( cacheURL ) ? "&" : "?" ) + "_=" + ( nonce++ ) + uncached; + uncached = ( rquery.test( cacheURL ) ? "&" : "?" ) + "_=" + ( nonce.guid++ ) + + uncached; } // Put hash and anti-cache on the URL that will be requested (gh-1732) @@ -9251,6 +9727,11 @@ jQuery.extend( { response = ajaxHandleResponses( s, jqXHR, responses ); } + // Use a noop converter for missing script + if ( !isSuccess && jQuery.inArray( "script", s.dataTypes ) > -1 ) { + s.converters[ "text script" ] = function() {}; + } + // Convert no matter what (that way responseXXX fields are always set) response = ajaxConvert( s, response, jqXHR, isSuccess ); @@ -9341,7 +9822,7 @@ jQuery.extend( { } } ); -jQuery.each( [ "get", "post" ], function( i, method ) { +jQuery.each( [ "get", "post" ], function( _i, method ) { jQuery[ method ] = function( url, data, callback, type ) { // Shift arguments if data argument was omitted @@ -9362,8 +9843,17 @@ jQuery.each( [ "get", "post" ], function( i, method ) { }; } ); +jQuery.ajaxPrefilter( function( s ) { + var i; + for ( i in s.headers ) { + if ( i.toLowerCase() === "content-type" ) { + s.contentType = s.headers[ i ] || ""; + } + } +} ); + -jQuery._evalUrl = function( url ) { +jQuery._evalUrl = function( url, options, doc ) { return jQuery.ajax( { url: url, @@ -9373,7 +9863,16 @@ jQuery._evalUrl = function( url ) { cache: true, async: false, global: false, - "throws": true + + // Only evaluate the response if it is successful (gh-4126) + // dataFilter is not invoked for failure responses, so using it instead + // of the default converter is kludgy but it works. + converters: { + "text script": function() {} + }, + dataFilter: function( response ) { + jQuery.globalEval( response, options, doc ); + } } ); }; @@ -9656,24 +10155,21 @@ jQuery.ajaxPrefilter( "script", function( s ) { // Bind script tag hack transport jQuery.ajaxTransport( "script", function( s ) { - // This transport only deals with cross domain requests - if ( s.crossDomain ) { + // This transport only deals with cross domain or forced-by-attrs requests + if ( s.crossDomain || s.scriptAttrs ) { var script, callback; return { send: function( _, complete ) { - script = jQuery( "